git: 93a0c2d46a28 - main - devel/llvm-morello: Fix stack register selection

From: Brooks Davis <brooks_at_FreeBSD.org>
Date: Wed, 18 May 2022 18:34:27 UTC
The branch main has been updated by brooks:

URL: https://cgit.FreeBSD.org/ports/commit/?id=93a0c2d46a28d0929d75182583ca2fc63bf8898b

commit 93a0c2d46a28d0929d75182583ca2fc63bf8898b
Author:     Brooks Davis <brooks@FreeBSD.org>
AuthorDate: 2022-05-18 18:30:16 +0000
Commit:     Brooks Davis <brooks@FreeBSD.org>
CommitDate: 2022-05-18 18:34:14 +0000

    devel/llvm-morello: Fix stack register selection
    
    Apply a patch to fix a bug where the varargs register was improperly
    reused as a scratch register.
    
    Merge Request:  https://git.morello-project.org/morello/llvm-project/-/merge_requests/185
---
 devel/llvm-cheri/Makefile                      |   2 +-
 devel/llvm-morello/Makefile                    |   2 +
 devel/llvm-morello/files/patch-scratch-reg-fix | 135 +++++++++++++++++++++++++
 3 files changed, 138 insertions(+), 1 deletion(-)

diff --git a/devel/llvm-cheri/Makefile b/devel/llvm-cheri/Makefile
index 596fe3afdaba..c731a081a9ac 100644
--- a/devel/llvm-cheri/Makefile
+++ b/devel/llvm-cheri/Makefile
@@ -1,6 +1,6 @@
 PORTNAME=	llvm
 PORTVERSION=	${LLVM_MAJOR}.0.d${SNAPDATE}
-PORTREVISION=	1
+PORTREVISION=	2
 CATEGORIES=	devel lang
 PKGNAMESUFFIX=	${LLVM_SUFFIX}
 
diff --git a/devel/llvm-morello/Makefile b/devel/llvm-morello/Makefile
index 0b77d2c69606..d4a504ca85d5 100644
--- a/devel/llvm-morello/Makefile
+++ b/devel/llvm-morello/Makefile
@@ -17,6 +17,8 @@ GL_COMMIT=	${LLVM_COMMIT}
 # Regularly tested targets as part of Android development
 LLVM_TARGETS=	AArch64;ARM;BPF;X86
 
+EXTRA_PATCHES=	${.CURDIR}/files
+
 .include "${.CURDIR}/Makefile.snapshot"
 
 MASTERDIR=	${.CURDIR}/../llvm-cheri
diff --git a/devel/llvm-morello/files/patch-scratch-reg-fix b/devel/llvm-morello/files/patch-scratch-reg-fix
new file mode 100644
index 000000000000..2a6ef90705a7
--- /dev/null
+++ b/devel/llvm-morello/files/patch-scratch-reg-fix
@@ -0,0 +1,135 @@
+commit a7d0053c29e0275a7d920170fe686ba3b6d61cbf
+Author: Silviu Baranga <silviu.baranga@arm.com>
+Date:   Wed May 18 14:31:24 2022 +0100
+
+    [Morello] Don't use a scratch register for re-aligning the stack.
+    
+    Purecap can use alignd which can use csp.
+    
+    This avoids using C9 as a scratch register which can cause issues
+    with the new varargs PCS.
+
+diff --git llvm/lib/Target/AArch64/AArch64FrameLowering.cpp llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+index 42944b417f49..205a7576fe06 100644
+--- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
++++ llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+@@ -593,32 +593,15 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
+ // but we would then have to make sure that we were in fact saving at least one
+ // callee-save register in the prologue, which is additional complexity that
+ // doesn't seem worth the benefit.
+-static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
+-    unsigned OldScratch = AArch64::NoRegister) {
+-
+-  static const MCPhysReg ScratchCapReg1[2] = {AArch64::C6, AArch64::C9};
+-  static const MCPhysReg ScratchCapReg2[2] = {AArch64::C7, AArch64::C10};
+-
++static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
+   MachineFunction *MF = MBB->getParent();
+-
+   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
+-  bool HasPureCap = Subtarget.hasPureCap();
+-  bool Use32CapRegs = !Subtarget.use16CapRegs();
+-  // We can use an intra-procedural registers here since aligning the
+-  // prologue doesn't require having this live across branches.
+-  unsigned DefaultScratch = HasPureCap ? ScratchCapReg1[Use32CapRegs] : AArch64::X9;
+-  if (OldScratch == DefaultScratch)
+-    DefaultScratch = HasPureCap ? ScratchCapReg2[Use32CapRegs] : AArch64::X10;
+-
+-  const TargetRegisterClass ScratchRegClass =
+-      HasPureCap ? AArch64::CapRegClass : AArch64::GPR64RegClass;
++  assert(!Subtarget.hasPureCap() &&
++      "Purecap doesn't need a scratch register");
+ 
+   // If MBB is an entry block, use X9 as the scratch register
+-  if (&MF->front() == MBB) {
+-    assert(DefaultScratch != OldScratch &&
+-           "Should not reuse scratch register");
+-    return DefaultScratch;
+-  }
++  if (&MF->front() == MBB)
++    return AArch64::X9;
+ 
+   const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
+   LivePhysRegs LiveRegs(TRI);
+@@ -629,15 +612,12 @@ static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
+   for (unsigned i = 0; CSRegs[i]; ++i)
+     LiveRegs.addReg(CSRegs[i]);
+ 
+-  if (OldScratch != AArch64::NoRegister)
+-    LiveRegs.addReg(OldScratch);
+-
+-  // Prefer X9/C6 since it was historically used for the prologue scratch reg.
++  // Prefer X9 since it was historically used for the prologue scratch reg.
+   const MachineRegisterInfo &MRI = MF->getRegInfo();
+-  if (LiveRegs.available(MRI, DefaultScratch))
+-    return DefaultScratch;
++  if (LiveRegs.available(MRI, AArch64::X9))
++    return AArch64::X9;
+ 
+-  for (unsigned Reg : ScratchRegClass) {
++  for (unsigned Reg : AArch64::GPR64RegClass) {
+     if (LiveRegs.available(MRI, Reg))
+       return Reg;
+   }
+@@ -650,9 +630,11 @@ bool AArch64FrameLowering::canUseAsPrologue(
+   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
+   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
+   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
++  bool HasPureCap = Subtarget.hasPureCap();
+ 
+   // Don't need a scratch register if we're not going to re-align the stack.
+-  if (!RegInfo->hasStackRealignment(*MF))
++  // Purecap doesn't need a scratch to re-align the stack.
++  if (HasPureCap || !RegInfo->hasStackRealignment(*MF))
+     return true;
+   // Otherwise, we can use any block as long as it has a scratch register
+   // available.
+@@ -1542,7 +1524,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
+         !IsFunclet && RegInfo->hasStackRealignment(MF);
+     unsigned scratchSPReg = SP;
+ 
+-    if (NeedsRealignment) {
++    if (NeedsRealignment && !HasPureCap) {
+       scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
+       assert(scratchSPReg != AArch64::NoRegister);
+     }
+@@ -1559,7 +1541,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
+     if (NeedsRealignment) {
+       const unsigned NrBitsToZero = Log2(MFI.getMaxAlign());
+       assert(NrBitsToZero > 1);
+-      assert(scratchSPReg != SP);
++      assert((scratchSPReg != SP) != HasPureCap);
+ 
+       // SUB X9, SP, NumBytes
+       //   -- X9 is temporary register, so shouldn't contain any live data here,
+diff --git llvm/test/CodeGen/AArch64/morello-sandbox-align-and-pad.ll llvm/test/CodeGen/AArch64/morello-sandbox-align-and-pad.ll
+index b0e8f567e331..cb9fef678cea 100644
+--- llvm/test/CodeGen/AArch64/morello-sandbox-align-and-pad.ll
++++ llvm/test/CodeGen/AArch64/morello-sandbox-align-and-pad.ll
+@@ -11,8 +11,8 @@ target datalayout = "e-m:e-pf200:128:128-i8:8:32-i16:16:32-i64:64-i128:128-n32:6
+ define i32 @fun1() addrspace(200) {
+ entry:
+ ; Allocate extra memory. CSP still needs to be 16 bytes aligned.
+-; PCS16:	sub c6, csp, #1024, lsl #12
+-; PCS16-NEXT: sub c6, c6, #4032
++; PCS16:	sub csp, csp, #1024, lsl #12
++; PCS16-NEXT: sub csp, csp, #4032
+ 
+ ; PCS16: mov w[[REG:[0-9]+]], #2048
+ ; PCS16-NEXT: movk w8, #64, lsl #16
+@@ -34,12 +34,12 @@ declare i32 @g(i32 addrspace(200)*) local_unnamed_addr addrspace(200) #2
+ ; CHECK-LABEL: fun2
+ define i32 @fun2() addrspace(200) {
+ entry:
+-; Here we need more alignment than the stack alignment. We know that the scratch register is c6.
+-; PCS16: alignd	csp, c6, #14
++; Here we need more alignment than the stack alignment.
++; PCS16: alignd	csp, csp, #14
+ ; PCS16: mov w[[REG:[0-9]+]], #33554432
+ ; PCS16: scbndse c{{.*}}, c{{.*}}, x[[REG]]
+ ;
+-; PCS32: alignd	csp, c9, #11
++; PCS32: alignd	csp, csp, #14
+ ; PCS32: mov w[[REG:[0-9]+]], #33554432
+ ; PCS32: scbndse c{{.*}}, c{{.*}}, x[[REG]]
+