Further tighten AArch32 push/pop selection.

Notably:

- Check IT block status for pops that branch (by loading to pc).
- Enforce register selection rules regarding sp, lr and pc. These differ
  between T32 and A32, and sometimes between encodings.
- Relax restrictions for A1 pop (multiple); a single register is
  permitted here.

Change-Id: Ic5fe640c957652dfe2deb9d244de07a9460f1c1c
diff --git a/src/aarch32/assembler-aarch32.cc b/src/aarch32/assembler-aarch32.cc
index 9c2aaca..fb153cd 100644
--- a/src/aarch32/assembler-aarch32.cc
+++ b/src/aarch32/assembler-aarch32.cc
@@ -8473,27 +8473,33 @@
   CheckIT(cond);
   if (!registers.IsEmpty() || AllowUnpredictable()) {
     if (IsUsingT32()) {
-      // POP{<c>}{<q>} <registers> ; T1
-      if (!size.IsWide() && registers.IsR0toR7orPC()) {
-        EmitT32_16(0xbc00 | (GetRegisterListEncoding(registers, 15, 1) << 8) |
-                   GetRegisterListEncoding(registers, 0, 8));
-        AdvanceIT();
-        return;
-      }
-      // POP{<c>}{<q>} <registers> ; T2
-      if (!size.IsNarrow() && !registers.Includes(sp) &&
-          (!registers.IsSingleRegister() || AllowUnpredictable())) {
-        EmitT32_32(0xe8bd0000U |
-                   (GetRegisterListEncoding(registers, 15, 1) << 15) |
-                   (GetRegisterListEncoding(registers, 14, 1) << 14) |
-                   GetRegisterListEncoding(registers, 0, 13));
-        AdvanceIT();
-        return;
+      // A branch out of an IT block should be the last instruction in the
+      // block.
+      if (!registers.Includes(pc) || OutsideITBlockAndAlOrLast(cond) ||
+          AllowUnpredictable()) {
+        // POP{<c>}{<q>} <registers> ; T1
+        if (!size.IsWide() && registers.IsR0toR7orPC()) {
+          EmitT32_16(0xbc00 | (GetRegisterListEncoding(registers, 15, 1) << 8) |
+                     GetRegisterListEncoding(registers, 0, 8));
+          AdvanceIT();
+          return;
+        }
+        // POP{<c>}{<q>} <registers> ; T2
+        // Alias of: LDM{<c>}{<q>} SP!, <registers> ; T2
+        if (!size.IsNarrow() &&
+            ((!registers.Includes(sp) && (registers.GetCount() > 1) &&
+              !(registers.Includes(pc) && registers.Includes(lr))) ||
+             AllowUnpredictable())) {
+          EmitT32_32(0xe8bd0000U | GetRegisterListEncoding(registers, 0, 16));
+          AdvanceIT();
+          return;
+        }
       }
     } else {
       // POP{<c>}{<q>} <registers> ; A1
+      // Alias of: LDM{<c>}{<q>} SP!, <registers> ; A1
       if (cond.IsNotNever() &&
-          (!registers.IsSingleRegister() || AllowUnpredictable())) {
+          (!registers.Includes(sp) || AllowUnpredictable())) {
         EmitA32(0x08bd0000U | (cond.GetCondition() << 28) |
                 GetRegisterListEncoding(registers, 0, 16));
         return;
@@ -8506,19 +8512,24 @@
 void Assembler::pop(Condition cond, EncodingSize size, Register rt) {
   VIXL_ASSERT(AllowAssembler());
   CheckIT(cond);
-  if (IsUsingT32()) {
-    // POP{<c>}{<q>} <single_register_list> ; T4
-    if (!size.IsNarrow() && ((!rt.IsPC() || OutsideITBlockAndAlOrLast(cond)) ||
-                             AllowUnpredictable())) {
-      EmitT32_32(0xf85d0b04U | (rt.GetCode() << 12));
-      AdvanceIT();
-      return;
-    }
-  } else {
-    // POP{<c>}{<q>} <single_register_list> ; A1
-    if (cond.IsNotNever()) {
-      EmitA32(0x049d0004U | (cond.GetCondition() << 28) | (rt.GetCode() << 12));
-      return;
+  if (!rt.IsSP() || AllowUnpredictable()) {
+    if (IsUsingT32()) {
+      // POP{<c>}{<q>} <single_register_list> ; T4
+      // Alias of: LDR{<c>}{<q>} <Rt>, [SP], #4 ; T4
+      if (!size.IsNarrow() && (!rt.IsPC() || OutsideITBlockAndAlOrLast(cond) ||
+                               AllowUnpredictable())) {
+        EmitT32_32(0xf85d0b04U | (rt.GetCode() << 12));
+        AdvanceIT();
+        return;
+      }
+    } else {
+      // POP{<c>}{<q>} <single_register_list> ; A1
+      // Alias of: LDR{<c>}{<q>} <Rt>, [SP], #4 ; T1
+      if (cond.IsNotNever()) {
+        EmitA32(0x049d0004U | (cond.GetCondition() << 28) |
+                (rt.GetCode() << 12));
+        return;
+      }
     }
   }
   Delegate(kPop, &Assembler::pop, cond, size, rt);
@@ -8539,18 +8550,23 @@
         return;
       }
       // PUSH{<c>}{<q>} <registers> ; T1
-      if (!size.IsNarrow() && !registers.Includes(sp) &&
-          !registers.Includes(pc) &&
-          (!registers.IsSingleRegister() || AllowUnpredictable())) {
-        EmitT32_32(0xe92d0000U |
-                   (GetRegisterListEncoding(registers, 14, 1) << 14) |
-                   GetRegisterListEncoding(registers, 0, 13));
+      // Alias of: STMDB SP!, <registers> ; T1
+      if (!size.IsNarrow() && !registers.Includes(pc) &&
+          ((!registers.Includes(sp) && (registers.GetCount() > 1)) ||
+           AllowUnpredictable())) {
+        EmitT32_32(0xe92d0000U | GetRegisterListEncoding(registers, 0, 15));
         AdvanceIT();
         return;
       }
     } else {
       // PUSH{<c>}{<q>} <registers> ; A1
-      if (cond.IsNotNever()) {
+      // Alias of: STMDB SP!, <registers> ; A1
+      if (cond.IsNotNever() &&
+          // For A32, sp can appear in the list, but stores an UNKNOWN value if
+          // it is not the lowest-valued register.
+          (!registers.Includes(sp) ||
+           registers.GetFirstAvailableRegister().IsSP() ||
+           AllowUnpredictable())) {
         EmitA32(0x092d0000U | (cond.GetCondition() << 28) |
                 GetRegisterListEncoding(registers, 0, 16));
         return;
@@ -8565,14 +8581,17 @@
   CheckIT(cond);
   if (IsUsingT32()) {
     // PUSH{<c>}{<q>} <single_register_list> ; T4
-    if (!size.IsNarrow() && (!rt.IsPC() || AllowUnpredictable())) {
+    // Alias of: STR{<c>}{<q>} <Rt>, [SP, #4]! ; T4
+    if (!size.IsNarrow() &&
+        ((!rt.IsPC() && !rt.IsSP()) || AllowUnpredictable())) {
       EmitT32_32(0xf84d0d04U | (rt.GetCode() << 12));
       AdvanceIT();
       return;
     }
   } else {
     // PUSH{<c>}{<q>} <single_register_list> ; A1
-    if (cond.IsNotNever() && (!rt.IsPC() || AllowUnpredictable())) {
+    // Alias of: STR{<c>}{<q>} <Rt>, [SP, #4]! ; A1
+    if (cond.IsNotNever() && (!rt.IsSP() || AllowUnpredictable())) {
       EmitA32(0x052d0004U | (cond.GetCondition() << 28) | (rt.GetCode() << 12));
       return;
     }
diff --git a/src/aarch32/instructions-aarch32.h b/src/aarch32/instructions-aarch32.h
index 2927226..ff97d55 100644
--- a/src/aarch32/instructions-aarch32.h
+++ b/src/aarch32/instructions-aarch32.h
@@ -492,6 +492,7 @@
   Register GetFirstAvailableRegister() const;
   bool IsEmpty() const { return list_ == 0; }
   bool IsSingleRegister() const { return IsPowerOf2(list_); }
+  int GetCount() const { return CountSetBits(list_); }
   static RegisterList Union(const RegisterList& list_1,
                             const RegisterList& list_2) {
     return RegisterList(list_1.list_ | list_2.list_);
diff --git a/src/aarch32/macro-assembler-aarch32.h b/src/aarch32/macro-assembler-aarch32.h
index 62c116d..59b25a5 100644
--- a/src/aarch32/macro-assembler-aarch32.h
+++ b/src/aarch32/macro-assembler-aarch32.h
@@ -2922,7 +2922,7 @@
     VIXL_ASSERT(OutsideITBlock());
     MacroEmissionCheckScope guard(this);
     ITScope it_scope(this, &cond, guard);
-    if (registers.IsSingleRegister() &&
+    if (registers.IsSingleRegister() && !registers.Includes(sp) &&
         (!IsUsingT32() || !registers.IsR0toR7orLR())) {
       push(cond, registers.GetFirstAvailableRegister());
     } else if (!registers.IsEmpty()) {
@@ -2937,7 +2937,12 @@
     VIXL_ASSERT(OutsideITBlock());
     MacroEmissionCheckScope guard(this);
     ITScope it_scope(this, &cond, guard);
-    push(cond, rt);
+    if (IsUsingA32() && rt.IsSP()) {
+      // Only the A32 multiple-register form can push sp.
+      push(cond, RegisterList(rt));
+    } else {
+      push(cond, rt);
+    }
   }
   void Push(Register rt) { Push(al, rt); }
 
diff --git a/test/aarch32/test-disasm-a32.cc b/test/aarch32/test-disasm-a32.cc
index cb3a376..cda3de4 100644
--- a/test/aarch32/test-disasm-a32.cc
+++ b/test/aarch32/test-disasm-a32.cc
@@ -2507,22 +2507,31 @@
               "beq 0x00000006\n"
               "push {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,ip}\n");
 
-  COMPARE_A32(Push(RegisterList(sp)), "push {sp}\n");
+  // Narrow form, T1.
+  COMPARE_T32(Pop(RegisterList(r0)), "pop {r0}\n");
+  // <single_register_list> form, T4
+  COMPARE_T32(Pop(RegisterList(r10)), "pop {r10}\n");
 
-  // TODO: Clarify behaviour of MacroAssembler vs Assembler with respect to
-  //       deprecated and unpredictable instructions. The tests reflect the
-  //       current behaviour and will need to be updated.
+  // It is usually UNPREDICTABLE to push sp.
+  MUST_FAIL_TEST_BOTH(Push(RegisterList(r0, sp)),
+                      "Unpredictable instruction.\n");
+  MUST_FAIL_TEST_T32(Push(RegisterList(sp)), "Unpredictable instruction.\n");
+  MUST_FAIL_TEST_T32(Push(sp), "Unpredictable instruction.\n");
+  // A32 can push sp if it is the first register in the list.
+  COMPARE_A32(Push(sp), "stmdb sp!, {sp}\n");
+  COMPARE_A32(Push(RegisterList(sp)), "stmdb sp!, {sp}\n");
+  COMPARE_A32(Push(RegisterList(sp, lr)), "push {sp,lr}\n");
 
-  // We don't accept the single-register version of this.
-  MUST_FAIL_TEST_BOTH(Push(pc), "Unpredictable instruction.\n");
-  // The macro assembler translates a single-register register list into a
-  // single-register push.
-  MUST_FAIL_TEST_A32(Push(RegisterList(pc)), "Unpredictable instruction.\n");
   // Deprecated, but accepted:
+  SHOULD_FAIL_TEST_A32(Push(pc));
+  SHOULD_FAIL_TEST_A32(Push(RegisterList(pc)));
   SHOULD_FAIL_TEST_A32(Push(RegisterList(r0, pc)));
 
-  // Accepted, but stores UNKNOWN value for the SP:
-  SHOULD_FAIL_TEST_A32(Push(RegisterList(r0, sp)));
+  MUST_FAIL_TEST_T32(Push(pc), "Unpredictable instruction.\n");
+  MUST_FAIL_TEST_T32(Push(RegisterList(pc)), "Unpredictable instruction.\n");
+  // The multiple-register T32 push can't encode PC at all.
+  MUST_FAIL_TEST_T32(Push(RegisterList(r0, pc)),
+                     "Ill-formed 'push' instruction.\n");
 
   // The following use the PUSH (T1) and PUSH (single register) (A1) encodings
   // for T32 and A32 respectively:
@@ -2533,12 +2542,6 @@
   // PUSH (single register), T4 and A1 encodings:
   COMPARE_BOTH(Push(RegisterList(r8)), "push {r8}\n");
 
-  // Cannot push the sp and pc in T32 when using a register list.
-  MUST_FAIL_TEST_T32(Push(RegisterList(r0, sp)),
-                     "Ill-formed 'push' instruction.\n");
-  MUST_FAIL_TEST_T32(Push(RegisterList(r0, pc)),
-                     "Ill-formed 'push' instruction.\n");
-
   // Pushing zero registers should produce no instructions.
   COMPARE_BOTH(Push(RegisterList()), "");
 
@@ -2567,13 +2570,16 @@
               "beq 0x00000006\n"
               "pop {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,ip}\n");
 
-  // Popping a single register when using a register list becomes a
-  // single-register pop.
-  COMPARE_T32(Pop(RegisterList(sp)), "pop {sp}\n");
+  // Narrow form, T1.
+  COMPARE_T32(Pop(RegisterList(r0)), "pop {r0}\n");
+  // <single_register_list> form, T4.
+  COMPARE_T32(Pop(RegisterList(r10)), "pop {r10}\n");
 
-  // Cannot pop the sp in T32 when using a register list.
-  MUST_FAIL_TEST_T32(Pop(RegisterList(r0, sp)),
-                     "Ill-formed 'pop' instruction.\n");
+  // It is UNPREDICTABLE to pop sp.
+  MUST_FAIL_TEST_BOTH(Pop(RegisterList(r0, sp)),
+                      "Unpredictable instruction.\n");
+  MUST_FAIL_TEST_BOTH(Pop(RegisterList(sp)), "Unpredictable instruction.\n");
+  MUST_FAIL_TEST_BOTH(Pop(sp), "Unpredictable instruction.\n");
 
   // The following use the POP (T1) and POP (single register) (A1) encodings for
   // T32 and A32 respectively:
@@ -2585,10 +2591,9 @@
   COMPARE_BOTH(Pop(RegisterList(r8)), "pop {r8}\n");
   COMPARE_BOTH(Pop(RegisterList(lr)), "pop {lr}\n");
 
-  // TODO: Pushing both the lr and pc should not be allowed by the
-  //       MacroAssembler (deprecated for A32, for T32 they shouldn't both
-  //       be in the list).
-  SHOULD_FAIL_TEST_BOTH(Pop(RegisterList(lr, pc)));
+  MUST_FAIL_TEST_T32(Pop(RegisterList(lr, pc)), "Unpredictable instruction.\n");
+  // Deprecated, but allowed.
+  COMPARE_A32(Pop(RegisterList(lr, pc)), "pop {lr,pc}\n");
 
   // Popping zero registers should produce no instructions.
   COMPARE_BOTH(Pop(RegisterList()), "");