summaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/gcc/gcc/CVE-2023-4039.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-devtools/gcc/gcc/CVE-2023-4039.patch')
-rw-r--r--meta/recipes-devtools/gcc/gcc/CVE-2023-4039.patch3093
1 files changed, 0 insertions, 3093 deletions
diff --git a/meta/recipes-devtools/gcc/gcc/CVE-2023-4039.patch b/meta/recipes-devtools/gcc/gcc/CVE-2023-4039.patch
deleted file mode 100644
index 81b5067c33..0000000000
--- a/meta/recipes-devtools/gcc/gcc/CVE-2023-4039.patch
+++ /dev/null
@@ -1,3093 +0,0 @@
-From: Richard Sandiford <richard.sandiford@arm.com>
-Subject: [PATCH 00/19] aarch64: Fix -fstack-protector issue
-Date: Tue, 12 Sep 2023 16:25:10 +0100
-
-This series of patches fixes deficiencies in GCC's -fstack-protector
-implementation for AArch64 when using dynamically allocated stack space.
-This is CVE-2023-4039. See:
-
-https://developer.arm.com/Arm%20Security%20Center/GCC%20Stack%20Protector%20Vulnerability%20AArch64
-https://github.com/metaredteam/external-disclosures/security/advisories/GHSA-x7ch-h5rf-w2mf
-
-for more details.
-
-The fix is to put the saved registers above the locals area when
--fstack-protector is used.
-
-The series also fixes a stack-clash problem that I found while working
-on the CVE. In unpatched sources, the stack-clash problem would only
-trigger for unrealistic numbers of arguments (8K 64-bit arguments, or an
-equivalent). But it would be a more significant issue with the new
--fstack-protector frame layout. It's therefore important that both
-problems are fixed together.
-
-Some reorganisation of the code seemed necessary to fix the problems in a
-cleanish way. The series is therefore quite long, but only a handful of
-patches should have any effect on code generation.
-
-See the individual patches for a detailed description.
-
-Tested on aarch64-linux-gnu. Pushed to trunk and to all active branches.
-I've also pushed backports to GCC 7+ to vendors/ARM/heads/CVE-2023-4039.
-
-CVE: CVE-2023-4039
-Upstream-Status: Backport
-Signed-off-by: Ross Burton <ross.burton@arm.com>
-
-
-From 71a2aa2127283f450c623d3604dbcabe0e14a8d4 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:07:12 +0100
-Subject: [PATCH 01/19] aarch64: Use local frame vars in shrink-wrapping code
-
-aarch64_layout_frame uses a shorthand for referring to
-cfun->machine->frame:
-
- aarch64_frame &frame = cfun->machine->frame;
-
-This patch does the same for some other heavy users of the structure.
-No functional change intended.
-
-gcc/
- * config/aarch64/aarch64.cc (aarch64_save_callee_saves): Use
- a local shorthand for cfun->machine->frame.
- (aarch64_restore_callee_saves, aarch64_get_separate_components):
- (aarch64_process_components): Likewise.
- (aarch64_allocate_and_probe_stack_space): Likewise.
- (aarch64_expand_prologue, aarch64_expand_epilogue): Likewise.
- (aarch64_layout_frame): Use existing shorthand for one more case.
----
- gcc/config/aarch64/aarch64.cc | 123 ++++++++++++++++++----------------
- 1 file changed, 64 insertions(+), 59 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 822a2b49a46..5d473d161d9 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8612,7 +8612,7 @@ aarch64_layout_frame (void)
- frame.is_scs_enabled
- = (!crtl->calls_eh_return
- && sanitize_flags_p (SANITIZE_SHADOW_CALL_STACK)
-- && known_ge (cfun->machine->frame.reg_offset[LR_REGNUM], 0));
-+ && known_ge (frame.reg_offset[LR_REGNUM], 0));
-
- /* When shadow call stack is enabled, the scs_pop in the epilogue will
- restore x30, and we don't need to pop x30 again in the traditional
-@@ -9078,6 +9078,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
- unsigned start, unsigned limit, bool skip_wb,
- bool hard_fp_valid_p)
- {
-+ aarch64_frame &frame = cfun->machine->frame;
- rtx_insn *insn;
- unsigned regno;
- unsigned regno2;
-@@ -9092,8 +9093,8 @@ aarch64_save_callee_saves (poly_int64 start_offset,
- bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
-
- if (skip_wb
-- && (regno == cfun->machine->frame.wb_push_candidate1
-- || regno == cfun->machine->frame.wb_push_candidate2))
-+ && (regno == frame.wb_push_candidate1
-+ || regno == frame.wb_push_candidate2))
- continue;
-
- if (cfun->machine->reg_is_wrapped_separately[regno])
-@@ -9101,7 +9102,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
-
- machine_mode mode = aarch64_reg_save_mode (regno);
- reg = gen_rtx_REG (mode, regno);
-- offset = start_offset + cfun->machine->frame.reg_offset[regno];
-+ offset = start_offset + frame.reg_offset[regno];
- rtx base_rtx = stack_pointer_rtx;
- poly_int64 sp_offset = offset;
-
-@@ -9114,7 +9115,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
- {
- gcc_assert (known_eq (start_offset, 0));
- poly_int64 fp_offset
-- = cfun->machine->frame.below_hard_fp_saved_regs_size;
-+ = frame.below_hard_fp_saved_regs_size;
- if (hard_fp_valid_p)
- base_rtx = hard_frame_pointer_rtx;
- else
-@@ -9136,8 +9137,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
- && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
- && !cfun->machine->reg_is_wrapped_separately[regno2]
- && known_eq (GET_MODE_SIZE (mode),
-- cfun->machine->frame.reg_offset[regno2]
-- - cfun->machine->frame.reg_offset[regno]))
-+ frame.reg_offset[regno2] - frame.reg_offset[regno]))
- {
- rtx reg2 = gen_rtx_REG (mode, regno2);
- rtx mem2;
-@@ -9187,6 +9187,7 @@ static void
- aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
- unsigned limit, bool skip_wb, rtx *cfi_ops)
- {
-+ aarch64_frame &frame = cfun->machine->frame;
- unsigned regno;
- unsigned regno2;
- poly_int64 offset;
-@@ -9203,13 +9204,13 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
- rtx reg, mem;
-
- if (skip_wb
-- && (regno == cfun->machine->frame.wb_pop_candidate1
-- || regno == cfun->machine->frame.wb_pop_candidate2))
-+ && (regno == frame.wb_pop_candidate1
-+ || regno == frame.wb_pop_candidate2))
- continue;
-
- machine_mode mode = aarch64_reg_save_mode (regno);
- reg = gen_rtx_REG (mode, regno);
-- offset = start_offset + cfun->machine->frame.reg_offset[regno];
-+ offset = start_offset + frame.reg_offset[regno];
- rtx base_rtx = stack_pointer_rtx;
- if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
- aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
-@@ -9220,8 +9221,7 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
- && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
- && !cfun->machine->reg_is_wrapped_separately[regno2]
- && known_eq (GET_MODE_SIZE (mode),
-- cfun->machine->frame.reg_offset[regno2]
-- - cfun->machine->frame.reg_offset[regno]))
-+ frame.reg_offset[regno2] - frame.reg_offset[regno]))
- {
- rtx reg2 = gen_rtx_REG (mode, regno2);
- rtx mem2;
-@@ -9326,6 +9326,7 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
- static sbitmap
- aarch64_get_separate_components (void)
- {
-+ aarch64_frame &frame = cfun->machine->frame;
- sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
- bitmap_clear (components);
-
-@@ -9342,18 +9343,18 @@ aarch64_get_separate_components (void)
- if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
- continue;
-
-- poly_int64 offset = cfun->machine->frame.reg_offset[regno];
-+ poly_int64 offset = frame.reg_offset[regno];
-
- /* If the register is saved in the first SVE save slot, we use
- it as a stack probe for -fstack-clash-protection. */
- if (flag_stack_clash_protection
-- && maybe_ne (cfun->machine->frame.below_hard_fp_saved_regs_size, 0)
-+ && maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
- && known_eq (offset, 0))
- continue;
-
- /* Get the offset relative to the register we'll use. */
- if (frame_pointer_needed)
-- offset -= cfun->machine->frame.below_hard_fp_saved_regs_size;
-+ offset -= frame.below_hard_fp_saved_regs_size;
- else
- offset += crtl->outgoing_args_size;
-
-@@ -9372,11 +9373,11 @@ aarch64_get_separate_components (void)
- /* If the spare predicate register used by big-endian SVE code
- is call-preserved, it must be saved in the main prologue
- before any saves that use it. */
-- if (cfun->machine->frame.spare_pred_reg != INVALID_REGNUM)
-- bitmap_clear_bit (components, cfun->machine->frame.spare_pred_reg);
-+ if (frame.spare_pred_reg != INVALID_REGNUM)
-+ bitmap_clear_bit (components, frame.spare_pred_reg);
-
-- unsigned reg1 = cfun->machine->frame.wb_push_candidate1;
-- unsigned reg2 = cfun->machine->frame.wb_push_candidate2;
-+ unsigned reg1 = frame.wb_push_candidate1;
-+ unsigned reg2 = frame.wb_push_candidate2;
- /* If registers have been chosen to be stored/restored with
- writeback don't interfere with them to avoid having to output explicit
- stack adjustment instructions. */
-@@ -9485,6 +9486,7 @@ aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
- static void
- aarch64_process_components (sbitmap components, bool prologue_p)
- {
-+ aarch64_frame &frame = cfun->machine->frame;
- rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
- ? HARD_FRAME_POINTER_REGNUM
- : STACK_POINTER_REGNUM);
-@@ -9499,9 +9501,9 @@ aarch64_process_components (sbitmap components, bool prologue_p)
- machine_mode mode = aarch64_reg_save_mode (regno);
-
- rtx reg = gen_rtx_REG (mode, regno);
-- poly_int64 offset = cfun->machine->frame.reg_offset[regno];
-+ poly_int64 offset = frame.reg_offset[regno];
- if (frame_pointer_needed)
-- offset -= cfun->machine->frame.below_hard_fp_saved_regs_size;
-+ offset -= frame.below_hard_fp_saved_regs_size;
- else
- offset += crtl->outgoing_args_size;
-
-@@ -9526,14 +9528,14 @@ aarch64_process_components (sbitmap components, bool prologue_p)
- break;
- }
-
-- poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2];
-+ poly_int64 offset2 = frame.reg_offset[regno2];
- /* The next register is not of the same class or its offset is not
- mergeable with the current one into a pair. */
- if (aarch64_sve_mode_p (mode)
- || !satisfies_constraint_Ump (mem)
- || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
- || (crtl->abi->id () == ARM_PCS_SIMD && FP_REGNUM_P (regno))
-- || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]),
-+ || maybe_ne ((offset2 - frame.reg_offset[regno]),
- GET_MODE_SIZE (mode)))
- {
- insn = emit_insn (set);
-@@ -9555,7 +9557,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
- /* REGNO2 can be saved/restored in a pair with REGNO. */
- rtx reg2 = gen_rtx_REG (mode, regno2);
- if (frame_pointer_needed)
-- offset2 -= cfun->machine->frame.below_hard_fp_saved_regs_size;
-+ offset2 -= frame.below_hard_fp_saved_regs_size;
- else
- offset2 += crtl->outgoing_args_size;
- rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
-@@ -9650,6 +9652,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
- bool frame_related_p,
- bool final_adjustment_p)
- {
-+ aarch64_frame &frame = cfun->machine->frame;
- HOST_WIDE_INT guard_size
- = 1 << param_stack_clash_protection_guard_size;
- HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
-@@ -9670,25 +9673,25 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
- register as a probe. We can't assume that LR was saved at position 0
- though, so treat any space below it as unprobed. */
- if (final_adjustment_p
-- && known_eq (cfun->machine->frame.below_hard_fp_saved_regs_size, 0))
-+ && known_eq (frame.below_hard_fp_saved_regs_size, 0))
- {
-- poly_int64 lr_offset = cfun->machine->frame.reg_offset[LR_REGNUM];
-+ poly_int64 lr_offset = frame.reg_offset[LR_REGNUM];
- if (known_ge (lr_offset, 0))
- min_probe_threshold -= lr_offset.to_constant ();
- else
- gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0));
- }
-
-- poly_int64 frame_size = cfun->machine->frame.frame_size;
-+ poly_int64 frame_size = frame.frame_size;
-
- /* We should always have a positive probe threshold. */
- gcc_assert (min_probe_threshold > 0);
-
- if (flag_stack_clash_protection && !final_adjustment_p)
- {
-- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
-- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
-- poly_int64 final_adjust = cfun->machine->frame.final_adjust;
-+ poly_int64 initial_adjust = frame.initial_adjust;
-+ poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
-+ poly_int64 final_adjust = frame.final_adjust;
-
- if (known_eq (frame_size, 0))
- {
-@@ -9977,17 +9980,18 @@ aarch64_epilogue_uses (int regno)
- void
- aarch64_expand_prologue (void)
- {
-- poly_int64 frame_size = cfun->machine->frame.frame_size;
-- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
-- HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
-- poly_int64 final_adjust = cfun->machine->frame.final_adjust;
-- poly_int64 callee_offset = cfun->machine->frame.callee_offset;
-- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
-+ aarch64_frame &frame = cfun->machine->frame;
-+ poly_int64 frame_size = frame.frame_size;
-+ poly_int64 initial_adjust = frame.initial_adjust;
-+ HOST_WIDE_INT callee_adjust = frame.callee_adjust;
-+ poly_int64 final_adjust = frame.final_adjust;
-+ poly_int64 callee_offset = frame.callee_offset;
-+ poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
- poly_int64 below_hard_fp_saved_regs_size
-- = cfun->machine->frame.below_hard_fp_saved_regs_size;
-- unsigned reg1 = cfun->machine->frame.wb_push_candidate1;
-- unsigned reg2 = cfun->machine->frame.wb_push_candidate2;
-- bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
-+ = frame.below_hard_fp_saved_regs_size;
-+ unsigned reg1 = frame.wb_push_candidate1;
-+ unsigned reg2 = frame.wb_push_candidate2;
-+ bool emit_frame_chain = frame.emit_frame_chain;
- rtx_insn *insn;
-
- if (flag_stack_clash_protection && known_eq (callee_adjust, 0))
-@@ -10018,7 +10022,7 @@ aarch64_expand_prologue (void)
- }
-
- /* Push return address to shadow call stack. */
-- if (cfun->machine->frame.is_scs_enabled)
-+ if (frame.is_scs_enabled)
- emit_insn (gen_scs_push ());
-
- if (flag_stack_usage_info)
-@@ -10057,7 +10061,7 @@ aarch64_expand_prologue (void)
-
- /* The offset of the frame chain record (if any) from the current SP. */
- poly_int64 chain_offset = (initial_adjust + callee_adjust
-- - cfun->machine->frame.hard_fp_offset);
-+ - frame.hard_fp_offset);
- gcc_assert (known_ge (chain_offset, 0));
-
- /* The offset of the bottom of the save area from the current SP. */
-@@ -10160,16 +10164,17 @@ aarch64_use_return_insn_p (void)
- void
- aarch64_expand_epilogue (bool for_sibcall)
- {
-- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
-- HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
-- poly_int64 final_adjust = cfun->machine->frame.final_adjust;
-- poly_int64 callee_offset = cfun->machine->frame.callee_offset;
-- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
-+ aarch64_frame &frame = cfun->machine->frame;
-+ poly_int64 initial_adjust = frame.initial_adjust;
-+ HOST_WIDE_INT callee_adjust = frame.callee_adjust;
-+ poly_int64 final_adjust = frame.final_adjust;
-+ poly_int64 callee_offset = frame.callee_offset;
-+ poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
- poly_int64 below_hard_fp_saved_regs_size
-- = cfun->machine->frame.below_hard_fp_saved_regs_size;
-- unsigned reg1 = cfun->machine->frame.wb_pop_candidate1;
-- unsigned reg2 = cfun->machine->frame.wb_pop_candidate2;
-- unsigned int last_gpr = (cfun->machine->frame.is_scs_enabled
-+ = frame.below_hard_fp_saved_regs_size;
-+ unsigned reg1 = frame.wb_pop_candidate1;
-+ unsigned reg2 = frame.wb_pop_candidate2;
-+ unsigned int last_gpr = (frame.is_scs_enabled
- ? R29_REGNUM : R30_REGNUM);
- rtx cfi_ops = NULL;
- rtx_insn *insn;
-@@ -10203,7 +10208,7 @@ aarch64_expand_epilogue (bool for_sibcall)
- /* We need to add memory barrier to prevent read from deallocated stack. */
- bool need_barrier_p
- = maybe_ne (get_frame_size ()
-- + cfun->machine->frame.saved_varargs_size, 0);
-+ + frame.saved_varargs_size, 0);
-
- /* Emit a barrier to prevent loads from a deallocated stack. */
- if (maybe_gt (final_adjust, crtl->outgoing_args_size)
-@@ -10284,7 +10289,7 @@ aarch64_expand_epilogue (bool for_sibcall)
- }
-
- /* Pop return address from shadow call stack. */
-- if (cfun->machine->frame.is_scs_enabled)
-+ if (frame.is_scs_enabled)
- {
- machine_mode mode = aarch64_reg_save_mode (R30_REGNUM);
- rtx reg = gen_rtx_REG (mode, R30_REGNUM);
-@@ -12740,24 +12745,24 @@ aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
- poly_int64
- aarch64_initial_elimination_offset (unsigned from, unsigned to)
- {
-+ aarch64_frame &frame = cfun->machine->frame;
-+
- if (to == HARD_FRAME_POINTER_REGNUM)
- {
- if (from == ARG_POINTER_REGNUM)
-- return cfun->machine->frame.hard_fp_offset;
-+ return frame.hard_fp_offset;
-
- if (from == FRAME_POINTER_REGNUM)
-- return cfun->machine->frame.hard_fp_offset
-- - cfun->machine->frame.locals_offset;
-+ return frame.hard_fp_offset - frame.locals_offset;
- }
-
- if (to == STACK_POINTER_REGNUM)
- {
- if (from == FRAME_POINTER_REGNUM)
-- return cfun->machine->frame.frame_size
-- - cfun->machine->frame.locals_offset;
-+ return frame.frame_size - frame.locals_offset;
- }
-
-- return cfun->machine->frame.frame_size;
-+ return frame.frame_size;
- }
-
-
---
-2.34.1
-
-
-From 89a9fa287706c5011f61926eaf65e7b996b963a3 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:07:12 +0100
-Subject: [PATCH 02/19] aarch64: Avoid a use of callee_offset
-
-When we emit the frame chain, i.e. when we reach Here in this statement
-of aarch64_expand_prologue:
-
- if (emit_frame_chain)
- {
- // Here
- ...
- }
-
-the stack is in one of two states:
-
-- We've allocated up to the frame chain, but no more.
-
-- We've allocated the whole frame, and the frame chain is within easy
- reach of the new SP.
-
-The offset of the frame chain from the current SP is available
-in aarch64_frame as callee_offset. It is also available as the
-chain_offset local variable, where the latter is calculated from other
-data. (However, chain_offset is not always equal to callee_offset when
-!emit_frame_chain, so chain_offset isn't redundant.)
-
-In c600df9a4060da3c6121ff4d0b93f179eafd69d1 I switched to using
-chain_offset for the initialisation of the hard frame pointer:
-
- aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
-- stack_pointer_rtx, callee_offset,
-+ stack_pointer_rtx, chain_offset,
- tmp1_rtx, tmp0_rtx, frame_pointer_needed);
-
-But the later REG_CFA_ADJUST_CFA handling still used callee_offset.
-
-I think the difference is harmless, but it's more logical for the
-CFA note to be in sync, and it's more convenient for later patches
-if it uses chain_offset.
-
-gcc/
- * config/aarch64/aarch64.cc (aarch64_expand_prologue): Use
- chain_offset rather than callee_offset.
----
- gcc/config/aarch64/aarch64.cc | 4 +---
- 1 file changed, 1 insertion(+), 3 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 5d473d161d9..4f233c95140 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -9985,7 +9985,6 @@ aarch64_expand_prologue (void)
- poly_int64 initial_adjust = frame.initial_adjust;
- HOST_WIDE_INT callee_adjust = frame.callee_adjust;
- poly_int64 final_adjust = frame.final_adjust;
-- poly_int64 callee_offset = frame.callee_offset;
- poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
- poly_int64 below_hard_fp_saved_regs_size
- = frame.below_hard_fp_saved_regs_size;
-@@ -10098,8 +10097,7 @@ aarch64_expand_prologue (void)
- implicit. */
- if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX))
- {
-- rtx src = plus_constant (Pmode, stack_pointer_rtx,
-- callee_offset);
-+ rtx src = plus_constant (Pmode, stack_pointer_rtx, chain_offset);
- add_reg_note (insn, REG_CFA_ADJUST_CFA,
- gen_rtx_SET (hard_frame_pointer_rtx, src));
- }
---
-2.34.1
-
-
-From b36a2a78040722dab6124366c5d6baf8eaf80aef Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:07:13 +0100
-Subject: [PATCH 03/19] aarch64: Explicitly handle frames with no saved
- registers
-
-If a frame has no saved registers, it can be allocated in one go.
-There is no need to treat the areas below and above the saved
-registers as separate.
-
-And if we allocate the frame in one go, it should be allocated
-as the initial_adjust rather than the final_adjust. This allows the
-frame size to grow to guard_size - guard_used_by_caller before a stack
-probe is needed. (A frame with no register saves is necessarily a
-leaf frame.)
-
-This is a no-op as thing stand, since a leaf function will have
-no outgoing arguments, and so all the frame will be above where
-the saved registers normally go.
-
-gcc/
- * config/aarch64/aarch64.cc (aarch64_layout_frame): Explicitly
- allocate the frame in one go if there are no saved registers.
----
- gcc/config/aarch64/aarch64.cc | 8 +++++---
- 1 file changed, 5 insertions(+), 3 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 4f233c95140..37643041ffb 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8639,9 +8639,11 @@ aarch64_layout_frame (void)
-
- HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset;
- HOST_WIDE_INT const_saved_regs_size;
-- if (frame.frame_size.is_constant (&const_size)
-- && const_size < max_push_offset
-- && known_eq (frame.hard_fp_offset, const_size))
-+ if (known_eq (frame.saved_regs_size, 0))
-+ frame.initial_adjust = frame.frame_size;
-+ else if (frame.frame_size.is_constant (&const_size)
-+ && const_size < max_push_offset
-+ && known_eq (frame.hard_fp_offset, const_size))
- {
- /* Simple, small frame with no outgoing arguments:
-
---
-2.34.1
-
-
-From ada2ab0093596be707f23a3466ac82cff59fcffe Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:07:13 +0100
-Subject: [PATCH 04/19] aarch64: Add bytes_below_saved_regs to frame info
-
-The frame layout code currently hard-codes the assumption that
-the number of bytes below the saved registers is equal to the
-size of the outgoing arguments. This patch abstracts that
-value into a new field of aarch64_frame.
-
-gcc/
- * config/aarch64/aarch64.h (aarch64_frame::bytes_below_saved_regs): New
- field.
- * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it,
- and use it instead of crtl->outgoing_args_size.
- (aarch64_get_separate_components): Use bytes_below_saved_regs instead
- of outgoing_args_size.
- (aarch64_process_components): Likewise.
----
- gcc/config/aarch64/aarch64.cc | 71 ++++++++++++++++++-----------------
- gcc/config/aarch64/aarch64.h | 5 +++
- 2 files changed, 41 insertions(+), 35 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 37643041ffb..dacc2b0e4dd 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8478,6 +8478,8 @@ aarch64_layout_frame (void)
- gcc_assert (crtl->is_leaf
- || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
-
-+ frame.bytes_below_saved_regs = crtl->outgoing_args_size;
-+
- /* Now assign stack slots for the registers. Start with the predicate
- registers, since predicate LDR and STR have a relatively small
- offset range. These saves happen below the hard frame pointer. */
-@@ -8582,18 +8584,18 @@ aarch64_layout_frame (void)
-
- poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
-
-- poly_int64 above_outgoing_args
-+ poly_int64 saved_regs_and_above
- = aligned_upper_bound (varargs_and_saved_regs_size
- + get_frame_size (),
- STACK_BOUNDARY / BITS_PER_UNIT);
-
- frame.hard_fp_offset
-- = above_outgoing_args - frame.below_hard_fp_saved_regs_size;
-+ = saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
-
- /* Both these values are already aligned. */
-- gcc_assert (multiple_p (crtl->outgoing_args_size,
-+ gcc_assert (multiple_p (frame.bytes_below_saved_regs,
- STACK_BOUNDARY / BITS_PER_UNIT));
-- frame.frame_size = above_outgoing_args + crtl->outgoing_args_size;
-+ frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
-
- frame.locals_offset = frame.saved_varargs_size;
-
-@@ -8637,7 +8639,7 @@ aarch64_layout_frame (void)
- else if (frame.wb_pop_candidate1 != INVALID_REGNUM)
- max_push_offset = 256;
-
-- HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset;
-+ HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset;
- HOST_WIDE_INT const_saved_regs_size;
- if (known_eq (frame.saved_regs_size, 0))
- frame.initial_adjust = frame.frame_size;
-@@ -8645,31 +8647,31 @@ aarch64_layout_frame (void)
- && const_size < max_push_offset
- && known_eq (frame.hard_fp_offset, const_size))
- {
-- /* Simple, small frame with no outgoing arguments:
-+ /* Simple, small frame with no data below the saved registers.
-
- stp reg1, reg2, [sp, -frame_size]!
- stp reg3, reg4, [sp, 16] */
- frame.callee_adjust = const_size;
- }
-- else if (crtl->outgoing_args_size.is_constant (&const_outgoing_args_size)
-+ else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs)
- && frame.saved_regs_size.is_constant (&const_saved_regs_size)
-- && const_outgoing_args_size + const_saved_regs_size < 512
-- /* We could handle this case even with outgoing args, provided
-- that the number of args left us with valid offsets for all
-- predicate and vector save slots. It's such a rare case that
-- it hardly seems worth the effort though. */
-- && (!saves_below_hard_fp_p || const_outgoing_args_size == 0)
-+ && const_below_saved_regs + const_saved_regs_size < 512
-+ /* We could handle this case even with data below the saved
-+ registers, provided that that data left us with valid offsets
-+ for all predicate and vector save slots. It's such a rare
-+ case that it hardly seems worth the effort though. */
-+ && (!saves_below_hard_fp_p || const_below_saved_regs == 0)
- && !(cfun->calls_alloca
- && frame.hard_fp_offset.is_constant (&const_fp_offset)
- && const_fp_offset < max_push_offset))
- {
-- /* Frame with small outgoing arguments:
-+ /* Frame with small area below the saved registers:
-
- sub sp, sp, frame_size
-- stp reg1, reg2, [sp, outgoing_args_size]
-- stp reg3, reg4, [sp, outgoing_args_size + 16] */
-+ stp reg1, reg2, [sp, bytes_below_saved_regs]
-+ stp reg3, reg4, [sp, bytes_below_saved_regs + 16] */
- frame.initial_adjust = frame.frame_size;
-- frame.callee_offset = const_outgoing_args_size;
-+ frame.callee_offset = const_below_saved_regs;
- }
- else if (saves_below_hard_fp_p
- && known_eq (frame.saved_regs_size,
-@@ -8679,30 +8681,29 @@ aarch64_layout_frame (void)
-
- sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
- save SVE registers relative to SP
-- sub sp, sp, outgoing_args_size */
-+ sub sp, sp, bytes_below_saved_regs */
- frame.initial_adjust = (frame.hard_fp_offset
- + frame.below_hard_fp_saved_regs_size);
-- frame.final_adjust = crtl->outgoing_args_size;
-+ frame.final_adjust = frame.bytes_below_saved_regs;
- }
- else if (frame.hard_fp_offset.is_constant (&const_fp_offset)
- && const_fp_offset < max_push_offset)
- {
-- /* Frame with large outgoing arguments or SVE saves, but with
-- a small local area:
-+ /* Frame with large area below the saved registers, or with SVE saves,
-+ but with a small area above:
-
- stp reg1, reg2, [sp, -hard_fp_offset]!
- stp reg3, reg4, [sp, 16]
- [sub sp, sp, below_hard_fp_saved_regs_size]
- [save SVE registers relative to SP]
-- sub sp, sp, outgoing_args_size */
-+ sub sp, sp, bytes_below_saved_regs */
- frame.callee_adjust = const_fp_offset;
- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
-- frame.final_adjust = crtl->outgoing_args_size;
-+ frame.final_adjust = frame.bytes_below_saved_regs;
- }
- else
- {
-- /* Frame with large local area and outgoing arguments or SVE saves,
-- using frame pointer:
-+ /* General case:
-
- sub sp, sp, hard_fp_offset
- stp x29, x30, [sp, 0]
-@@ -8710,10 +8711,10 @@ aarch64_layout_frame (void)
- stp reg3, reg4, [sp, 16]
- [sub sp, sp, below_hard_fp_saved_regs_size]
- [save SVE registers relative to SP]
-- sub sp, sp, outgoing_args_size */
-+ sub sp, sp, bytes_below_saved_regs */
- frame.initial_adjust = frame.hard_fp_offset;
- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
-- frame.final_adjust = crtl->outgoing_args_size;
-+ frame.final_adjust = frame.bytes_below_saved_regs;
- }
-
- /* Make sure the individual adjustments add up to the full frame size. */
-@@ -9358,7 +9359,7 @@ aarch64_get_separate_components (void)
- if (frame_pointer_needed)
- offset -= frame.below_hard_fp_saved_regs_size;
- else
-- offset += crtl->outgoing_args_size;
-+ offset += frame.bytes_below_saved_regs;
-
- /* Check that we can access the stack slot of the register with one
- direct load with no adjustments needed. */
-@@ -9507,7 +9508,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
- if (frame_pointer_needed)
- offset -= frame.below_hard_fp_saved_regs_size;
- else
-- offset += crtl->outgoing_args_size;
-+ offset += frame.bytes_below_saved_regs;
-
- rtx addr = plus_constant (Pmode, ptr_reg, offset);
- rtx mem = gen_frame_mem (mode, addr);
-@@ -9561,7 +9562,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
- if (frame_pointer_needed)
- offset2 -= frame.below_hard_fp_saved_regs_size;
- else
-- offset2 += crtl->outgoing_args_size;
-+ offset2 += frame.bytes_below_saved_regs;
- rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
- rtx mem2 = gen_frame_mem (mode, addr2);
- rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
-@@ -9635,10 +9636,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void)
- registers. If POLY_SIZE is not large enough to require a probe this function
- will only adjust the stack. When allocating the stack space
- FRAME_RELATED_P is then used to indicate if the allocation is frame related.
-- FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing
-- arguments. If we are then we ensure that any allocation larger than the ABI
-- defined buffer needs a probe so that the invariant of having a 1KB buffer is
-- maintained.
-+ FINAL_ADJUSTMENT_P indicates whether we are allocating the area below
-+ the saved registers. If we are then we ensure that any allocation
-+ larger than the ABI defined buffer needs a probe so that the
-+ invariant of having a 1KB buffer is maintained.
-
- We emit barriers after each stack adjustment to prevent optimizations from
- breaking the invariant that we never drop the stack more than a page. This
-@@ -9847,7 +9848,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
- /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to
- be probed. This maintains the requirement that each page is probed at
- least once. For initial probing we probe only if the allocation is
-- more than GUARD_SIZE - buffer, and for the outgoing arguments we probe
-+ more than GUARD_SIZE - buffer, and below the saved registers we probe
- if the amount is larger than buffer. GUARD_SIZE - buffer + buffer ==
- GUARD_SIZE. This works that for any allocation that is large enough to
- trigger a probe here, we'll have at least one, and if they're not large
-diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
-index 73b09e20508..0b6faa3ddf1 100644
---- a/gcc/config/aarch64/aarch64.h
-+++ b/gcc/config/aarch64/aarch64.h
-@@ -777,6 +777,11 @@ struct GTY (()) aarch64_frame
- /* The size of the callee-save registers with a slot in REG_OFFSET. */
- poly_int64 saved_regs_size;
-
-+ /* The number of bytes between the bottom of the static frame (the bottom
-+ of the outgoing arguments) and the bottom of the register save area.
-+ This value is always a multiple of STACK_BOUNDARY. */
-+ poly_int64 bytes_below_saved_regs;
-+
- /* The size of the callee-save registers with a slot in REG_OFFSET that
- are saved below the hard frame pointer. */
- poly_int64 below_hard_fp_saved_regs_size;
---
-2.34.1
-
-
-From 82f6b3e1b596ef0f4e3ac3bb9c6e88fb4458f402 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:07:14 +0100
-Subject: [PATCH 05/19] aarch64: Add bytes_below_hard_fp to frame info
-
-Following on from the previous bytes_below_saved_regs patch, this one
-records the number of bytes that are below the hard frame pointer.
-This eventually replaces below_hard_fp_saved_regs_size.
-
-If a frame pointer is not needed, the epilogue adds final_adjust
-to the stack pointer before restoring registers:
-
- aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true);
-
-Therefore, if the epilogue needs to restore the stack pointer from
-the hard frame pointer, the directly corresponding offset is:
-
- -bytes_below_hard_fp + final_adjust
-
-i.e. go from the hard frame pointer to the bottom of the frame,
-then add the same amount as if we were using the stack pointer
-from the outset.
-
-gcc/
- * config/aarch64/aarch64.h (aarch64_frame::bytes_below_hard_fp): New
- field.
- * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it.
- (aarch64_expand_epilogue): Use it instead of
- below_hard_fp_saved_regs_size.
----
- gcc/config/aarch64/aarch64.cc | 6 +++---
- gcc/config/aarch64/aarch64.h | 5 +++++
- 2 files changed, 8 insertions(+), 3 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index dacc2b0e4dd..a3f7aabcc59 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8530,6 +8530,7 @@ aarch64_layout_frame (void)
- of the callee save area. */
- bool saves_below_hard_fp_p = maybe_ne (offset, 0);
- frame.below_hard_fp_saved_regs_size = offset;
-+ frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs;
- if (frame.emit_frame_chain)
- {
- /* FP and LR are placed in the linkage record. */
-@@ -10171,8 +10172,7 @@ aarch64_expand_epilogue (bool for_sibcall)
- poly_int64 final_adjust = frame.final_adjust;
- poly_int64 callee_offset = frame.callee_offset;
- poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
-- poly_int64 below_hard_fp_saved_regs_size
-- = frame.below_hard_fp_saved_regs_size;
-+ poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp;
- unsigned reg1 = frame.wb_pop_candidate1;
- unsigned reg2 = frame.wb_pop_candidate2;
- unsigned int last_gpr = (frame.is_scs_enabled
-@@ -10230,7 +10230,7 @@ aarch64_expand_epilogue (bool for_sibcall)
- is restored on the instruction doing the writeback. */
- aarch64_add_offset (Pmode, stack_pointer_rtx,
- hard_frame_pointer_rtx,
-- -callee_offset - below_hard_fp_saved_regs_size,
-+ -bytes_below_hard_fp + final_adjust,
- tmp1_rtx, tmp0_rtx, callee_adjust == 0);
- else
- /* The case where we need to re-use the register here is very rare, so
-diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
-index 0b6faa3ddf1..4263d29d29d 100644
---- a/gcc/config/aarch64/aarch64.h
-+++ b/gcc/config/aarch64/aarch64.h
-@@ -786,6 +786,11 @@ struct GTY (()) aarch64_frame
- are saved below the hard frame pointer. */
- poly_int64 below_hard_fp_saved_regs_size;
-
-+ /* The number of bytes between the bottom of the static frame (the bottom
-+ of the outgoing arguments) and the hard frame pointer. This value is
-+ always a multiple of STACK_BOUNDARY. */
-+ poly_int64 bytes_below_hard_fp;
-+
- /* Offset from the base of the frame (incomming SP) to the
- top of the locals area. This value is always a multiple of
- STACK_BOUNDARY. */
---
-2.34.1
-
-
-From 86fa43e9fe4a8bf954f2919f07cbe3646d1d1df3 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:07:14 +0100
-Subject: [PATCH 06/19] aarch64: Tweak aarch64_save/restore_callee_saves
-
-aarch64_save_callee_saves and aarch64_restore_callee_saves took
-a parameter called start_offset that gives the offset of the
-bottom of the saved register area from the current stack pointer.
-However, it's more convenient for later patches if we use the
-bottom of the entire frame as the reference point, rather than
-the bottom of the saved registers.
-
-Doing that removes the need for the callee_offset field.
-Other than that, this is not a win on its own. It only really
-makes sense in combination with the follow-on patches.
-
-gcc/
- * config/aarch64/aarch64.h (aarch64_frame::callee_offset): Delete.
- * config/aarch64/aarch64.cc (aarch64_layout_frame): Remove
- callee_offset handling.
- (aarch64_save_callee_saves): Replace the start_offset parameter
- with a bytes_below_sp parameter.
- (aarch64_restore_callee_saves): Likewise.
- (aarch64_expand_prologue): Update accordingly.
- (aarch64_expand_epilogue): Likewise.
----
- gcc/config/aarch64/aarch64.cc | 56 +++++++++++++++++------------------
- gcc/config/aarch64/aarch64.h | 4 ---
- 2 files changed, 28 insertions(+), 32 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index a3f7aabcc59..46ae5cf7673 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8604,7 +8604,6 @@ aarch64_layout_frame (void)
- frame.final_adjust = 0;
- frame.callee_adjust = 0;
- frame.sve_callee_adjust = 0;
-- frame.callee_offset = 0;
-
- frame.wb_pop_candidate1 = frame.wb_push_candidate1;
- frame.wb_pop_candidate2 = frame.wb_push_candidate2;
-@@ -8672,7 +8671,6 @@ aarch64_layout_frame (void)
- stp reg1, reg2, [sp, bytes_below_saved_regs]
- stp reg3, reg4, [sp, bytes_below_saved_regs + 16] */
- frame.initial_adjust = frame.frame_size;
-- frame.callee_offset = const_below_saved_regs;
- }
- else if (saves_below_hard_fp_p
- && known_eq (frame.saved_regs_size,
-@@ -9073,12 +9071,13 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg,
- }
-
- /* Emit code to save the callee-saved registers from register number START
-- to LIMIT to the stack at the location starting at offset START_OFFSET,
-- skipping any write-back candidates if SKIP_WB is true. HARD_FP_VALID_P
-- is true if the hard frame pointer has been set up. */
-+ to LIMIT to the stack. The stack pointer is currently BYTES_BELOW_SP
-+ bytes above the bottom of the static frame. Skip any write-back
-+ candidates if SKIP_WB is true. HARD_FP_VALID_P is true if the hard
-+ frame pointer has been set up. */
-
- static void
--aarch64_save_callee_saves (poly_int64 start_offset,
-+aarch64_save_callee_saves (poly_int64 bytes_below_sp,
- unsigned start, unsigned limit, bool skip_wb,
- bool hard_fp_valid_p)
- {
-@@ -9106,7 +9105,9 @@ aarch64_save_callee_saves (poly_int64 start_offset,
-
- machine_mode mode = aarch64_reg_save_mode (regno);
- reg = gen_rtx_REG (mode, regno);
-- offset = start_offset + frame.reg_offset[regno];
-+ offset = (frame.reg_offset[regno]
-+ + frame.bytes_below_saved_regs
-+ - bytes_below_sp);
- rtx base_rtx = stack_pointer_rtx;
- poly_int64 sp_offset = offset;
-
-@@ -9117,9 +9118,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
- else if (GP_REGNUM_P (regno)
- && (!offset.is_constant (&const_offset) || const_offset >= 512))
- {
-- gcc_assert (known_eq (start_offset, 0));
-- poly_int64 fp_offset
-- = frame.below_hard_fp_saved_regs_size;
-+ poly_int64 fp_offset = frame.bytes_below_hard_fp - bytes_below_sp;
- if (hard_fp_valid_p)
- base_rtx = hard_frame_pointer_rtx;
- else
-@@ -9183,12 +9182,13 @@ aarch64_save_callee_saves (poly_int64 start_offset,
- }
-
- /* Emit code to restore the callee registers from register number START
-- up to and including LIMIT. Restore from the stack offset START_OFFSET,
-- skipping any write-back candidates if SKIP_WB is true. Write the
-- appropriate REG_CFA_RESTORE notes into CFI_OPS. */
-+ up to and including LIMIT. The stack pointer is currently BYTES_BELOW_SP
-+ bytes above the bottom of the static frame. Skip any write-back
-+ candidates if SKIP_WB is true. Write the appropriate REG_CFA_RESTORE
-+ notes into CFI_OPS. */
-
- static void
--aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
-+aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
- unsigned limit, bool skip_wb, rtx *cfi_ops)
- {
- aarch64_frame &frame = cfun->machine->frame;
-@@ -9214,7 +9214,9 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
-
- machine_mode mode = aarch64_reg_save_mode (regno);
- reg = gen_rtx_REG (mode, regno);
-- offset = start_offset + frame.reg_offset[regno];
-+ offset = (frame.reg_offset[regno]
-+ + frame.bytes_below_saved_regs
-+ - bytes_below_sp);
- rtx base_rtx = stack_pointer_rtx;
- if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
- aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
-@@ -9990,8 +9992,6 @@ aarch64_expand_prologue (void)
- HOST_WIDE_INT callee_adjust = frame.callee_adjust;
- poly_int64 final_adjust = frame.final_adjust;
- poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
-- poly_int64 below_hard_fp_saved_regs_size
-- = frame.below_hard_fp_saved_regs_size;
- unsigned reg1 = frame.wb_push_candidate1;
- unsigned reg2 = frame.wb_push_candidate2;
- bool emit_frame_chain = frame.emit_frame_chain;
-@@ -10067,8 +10067,8 @@ aarch64_expand_prologue (void)
- - frame.hard_fp_offset);
- gcc_assert (known_ge (chain_offset, 0));
-
-- /* The offset of the bottom of the save area from the current SP. */
-- poly_int64 saved_regs_offset = chain_offset - below_hard_fp_saved_regs_size;
-+ /* The offset of the current SP from the bottom of the static frame. */
-+ poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust;
-
- if (emit_frame_chain)
- {
-@@ -10076,7 +10076,7 @@ aarch64_expand_prologue (void)
- {
- reg1 = R29_REGNUM;
- reg2 = R30_REGNUM;
-- aarch64_save_callee_saves (saved_regs_offset, reg1, reg2,
-+ aarch64_save_callee_saves (bytes_below_sp, reg1, reg2,
- false, false);
- }
- else
-@@ -10116,7 +10116,7 @@ aarch64_expand_prologue (void)
- emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
- }
-
-- aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM,
-+ aarch64_save_callee_saves (bytes_below_sp, R0_REGNUM, R30_REGNUM,
- callee_adjust != 0 || emit_frame_chain,
- emit_frame_chain);
- if (maybe_ne (sve_callee_adjust, 0))
-@@ -10126,16 +10126,17 @@ aarch64_expand_prologue (void)
- aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx,
- sve_callee_adjust,
- !frame_pointer_needed, false);
-- saved_regs_offset += sve_callee_adjust;
-+ bytes_below_sp -= sve_callee_adjust;
- }
-- aarch64_save_callee_saves (saved_regs_offset, P0_REGNUM, P15_REGNUM,
-+ aarch64_save_callee_saves (bytes_below_sp, P0_REGNUM, P15_REGNUM,
- false, emit_frame_chain);
-- aarch64_save_callee_saves (saved_regs_offset, V0_REGNUM, V31_REGNUM,
-+ aarch64_save_callee_saves (bytes_below_sp, V0_REGNUM, V31_REGNUM,
- callee_adjust != 0 || emit_frame_chain,
- emit_frame_chain);
-
- /* We may need to probe the final adjustment if it is larger than the guard
- that is assumed by the called. */
-+ gcc_assert (known_eq (bytes_below_sp, final_adjust));
- aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
- !frame_pointer_needed, true);
- }
-@@ -10170,7 +10171,6 @@ aarch64_expand_epilogue (bool for_sibcall)
- poly_int64 initial_adjust = frame.initial_adjust;
- HOST_WIDE_INT callee_adjust = frame.callee_adjust;
- poly_int64 final_adjust = frame.final_adjust;
-- poly_int64 callee_offset = frame.callee_offset;
- poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
- poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp;
- unsigned reg1 = frame.wb_pop_candidate1;
-@@ -10240,9 +10240,9 @@ aarch64_expand_epilogue (bool for_sibcall)
-
- /* Restore the vector registers before the predicate registers,
- so that we can use P4 as a temporary for big-endian SVE frames. */
-- aarch64_restore_callee_saves (callee_offset, V0_REGNUM, V31_REGNUM,
-+ aarch64_restore_callee_saves (final_adjust, V0_REGNUM, V31_REGNUM,
- callee_adjust != 0, &cfi_ops);
-- aarch64_restore_callee_saves (callee_offset, P0_REGNUM, P15_REGNUM,
-+ aarch64_restore_callee_saves (final_adjust, P0_REGNUM, P15_REGNUM,
- false, &cfi_ops);
- if (maybe_ne (sve_callee_adjust, 0))
- aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true);
-@@ -10250,7 +10250,7 @@ aarch64_expand_epilogue (bool for_sibcall)
- /* When shadow call stack is enabled, the scs_pop in the epilogue will
- restore x30, we don't need to restore x30 again in the traditional
- way. */
-- aarch64_restore_callee_saves (callee_offset - sve_callee_adjust,
-+ aarch64_restore_callee_saves (final_adjust + sve_callee_adjust,
- R0_REGNUM, last_gpr,
- callee_adjust != 0, &cfi_ops);
-
-diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
-index 4263d29d29d..fd820b1be4e 100644
---- a/gcc/config/aarch64/aarch64.h
-+++ b/gcc/config/aarch64/aarch64.h
-@@ -813,10 +813,6 @@ struct GTY (()) aarch64_frame
- It is zero when no push is used. */
- HOST_WIDE_INT callee_adjust;
-
-- /* The offset from SP to the callee-save registers after initial_adjust.
-- It may be non-zero if no push is used (ie. callee_adjust == 0). */
-- poly_int64 callee_offset;
--
- /* The size of the stack adjustment before saving or after restoring
- SVE registers. */
- poly_int64 sve_callee_adjust;
---
-2.34.1
-
-
-From 8ae9181426f2700c2e5a2909487fa630e6fa406b Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:07:15 +0100
-Subject: [PATCH 07/19] aarch64: Only calculate chain_offset if there is a
- chain
-
-After previous patches, it is no longer necessary to calculate
-a chain_offset in cases where there is no chain record.
-
-gcc/
- * config/aarch64/aarch64.cc (aarch64_expand_prologue): Move the
- calculation of chain_offset into the emit_frame_chain block.
----
- gcc/config/aarch64/aarch64.cc | 10 +++++-----
- 1 file changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 46ae5cf7673..0e9b9717c08 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -10062,16 +10062,16 @@ aarch64_expand_prologue (void)
- if (callee_adjust != 0)
- aarch64_push_regs (reg1, reg2, callee_adjust);
-
-- /* The offset of the frame chain record (if any) from the current SP. */
-- poly_int64 chain_offset = (initial_adjust + callee_adjust
-- - frame.hard_fp_offset);
-- gcc_assert (known_ge (chain_offset, 0));
--
- /* The offset of the current SP from the bottom of the static frame. */
- poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust;
-
- if (emit_frame_chain)
- {
-+ /* The offset of the frame chain record (if any) from the current SP. */
-+ poly_int64 chain_offset = (initial_adjust + callee_adjust
-+ - frame.hard_fp_offset);
-+ gcc_assert (known_ge (chain_offset, 0));
-+
- if (callee_adjust == 0)
- {
- reg1 = R29_REGNUM;
---
-2.34.1
-
-
-From 375794feb614cee1f41b710b9cc1b6f25da6c1cb Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:07:15 +0100
-Subject: [PATCH 08/19] aarch64: Rename locals_offset to bytes_above_locals
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-locals_offset was described as:
-
- /* Offset from the base of the frame (incomming SP) to the
- top of the locals area. This value is always a multiple of
- STACK_BOUNDARY. */
-
-This is implicitly an “upside down” view of the frame: the incoming
-SP is at offset 0, and anything N bytes below the incoming SP is at
-offset N (rather than -N).
-
-However, reg_offset instead uses a “right way up” view; that is,
-it views offsets in address terms. Something above X is at a
-positive offset from X and something below X is at a negative
-offset from X.
-
-Also, even on FRAME_GROWS_DOWNWARD targets like AArch64,
-target-independent code views offsets in address terms too:
-locals are allocated at negative offsets to virtual_stack_vars.
-
-It seems confusing to have *_offset fields of the same structure
-using different polarities like this. This patch tries to avoid
-that by renaming locals_offset to bytes_above_locals.
-
-gcc/
- * config/aarch64/aarch64.h (aarch64_frame::locals_offset): Rename to...
- (aarch64_frame::bytes_above_locals): ...this.
- * config/aarch64/aarch64.cc (aarch64_layout_frame)
- (aarch64_initial_elimination_offset): Update accordingly.
----
- gcc/config/aarch64/aarch64.cc | 6 +++---
- gcc/config/aarch64/aarch64.h | 6 +++---
- 2 files changed, 6 insertions(+), 6 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 0e9b9717c08..0a22f91520e 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8598,7 +8598,7 @@ aarch64_layout_frame (void)
- STACK_BOUNDARY / BITS_PER_UNIT));
- frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
-
-- frame.locals_offset = frame.saved_varargs_size;
-+ frame.bytes_above_locals = frame.saved_varargs_size;
-
- frame.initial_adjust = 0;
- frame.final_adjust = 0;
-@@ -12754,13 +12754,13 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
- return frame.hard_fp_offset;
-
- if (from == FRAME_POINTER_REGNUM)
-- return frame.hard_fp_offset - frame.locals_offset;
-+ return frame.hard_fp_offset - frame.bytes_above_locals;
- }
-
- if (to == STACK_POINTER_REGNUM)
- {
- if (from == FRAME_POINTER_REGNUM)
-- return frame.frame_size - frame.locals_offset;
-+ return frame.frame_size - frame.bytes_above_locals;
- }
-
- return frame.frame_size;
-diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
-index fd820b1be4e..7ae12d13e2b 100644
---- a/gcc/config/aarch64/aarch64.h
-+++ b/gcc/config/aarch64/aarch64.h
-@@ -791,10 +791,10 @@ struct GTY (()) aarch64_frame
- always a multiple of STACK_BOUNDARY. */
- poly_int64 bytes_below_hard_fp;
-
-- /* Offset from the base of the frame (incomming SP) to the
-- top of the locals area. This value is always a multiple of
-+ /* The number of bytes between the top of the locals area and the top
-+ of the frame (the incomming SP). This value is always a multiple of
- STACK_BOUNDARY. */
-- poly_int64 locals_offset;
-+ poly_int64 bytes_above_locals;
-
- /* Offset from the base of the frame (incomming SP) to the
- hard_frame_pointer. This value is always a multiple of
---
-2.34.1
-
-
-From 1a9ea1c45c75615ffbfabe652b3598a1d7be2168 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:07:16 +0100
-Subject: [PATCH 09/19] aarch64: Rename hard_fp_offset to bytes_above_hard_fp
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Similarly to the previous locals_offset patch, hard_fp_offset
-was described as:
-
- /* Offset from the base of the frame (incomming SP) to the
- hard_frame_pointer. This value is always a multiple of
- STACK_BOUNDARY. */
- poly_int64 hard_fp_offset;
-
-which again took an “upside-down” view: higher offsets meant lower
-addresses. This patch renames the field to bytes_above_hard_fp instead.
-
-gcc/
- * config/aarch64/aarch64.h (aarch64_frame::hard_fp_offset): Rename
- to...
- (aarch64_frame::bytes_above_hard_fp): ...this.
- * config/aarch64/aarch64.cc (aarch64_layout_frame)
- (aarch64_expand_prologue): Update accordingly.
- (aarch64_initial_elimination_offset): Likewise.
----
- gcc/config/aarch64/aarch64.cc | 26 +++++++++++++-------------
- gcc/config/aarch64/aarch64.h | 6 +++---
- 2 files changed, 16 insertions(+), 16 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 0a22f91520e..95499ae49ba 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8590,7 +8590,7 @@ aarch64_layout_frame (void)
- + get_frame_size (),
- STACK_BOUNDARY / BITS_PER_UNIT);
-
-- frame.hard_fp_offset
-+ frame.bytes_above_hard_fp
- = saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
-
- /* Both these values are already aligned. */
-@@ -8639,13 +8639,13 @@ aarch64_layout_frame (void)
- else if (frame.wb_pop_candidate1 != INVALID_REGNUM)
- max_push_offset = 256;
-
-- HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset;
-+ HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp;
- HOST_WIDE_INT const_saved_regs_size;
- if (known_eq (frame.saved_regs_size, 0))
- frame.initial_adjust = frame.frame_size;
- else if (frame.frame_size.is_constant (&const_size)
- && const_size < max_push_offset
-- && known_eq (frame.hard_fp_offset, const_size))
-+ && known_eq (frame.bytes_above_hard_fp, const_size))
- {
- /* Simple, small frame with no data below the saved registers.
-
-@@ -8662,8 +8662,8 @@ aarch64_layout_frame (void)
- case that it hardly seems worth the effort though. */
- && (!saves_below_hard_fp_p || const_below_saved_regs == 0)
- && !(cfun->calls_alloca
-- && frame.hard_fp_offset.is_constant (&const_fp_offset)
-- && const_fp_offset < max_push_offset))
-+ && frame.bytes_above_hard_fp.is_constant (&const_above_fp)
-+ && const_above_fp < max_push_offset))
- {
- /* Frame with small area below the saved registers:
-
-@@ -8681,12 +8681,12 @@ aarch64_layout_frame (void)
- sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
- save SVE registers relative to SP
- sub sp, sp, bytes_below_saved_regs */
-- frame.initial_adjust = (frame.hard_fp_offset
-+ frame.initial_adjust = (frame.bytes_above_hard_fp
- + frame.below_hard_fp_saved_regs_size);
- frame.final_adjust = frame.bytes_below_saved_regs;
- }
-- else if (frame.hard_fp_offset.is_constant (&const_fp_offset)
-- && const_fp_offset < max_push_offset)
-+ else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp)
-+ && const_above_fp < max_push_offset)
- {
- /* Frame with large area below the saved registers, or with SVE saves,
- but with a small area above:
-@@ -8696,7 +8696,7 @@ aarch64_layout_frame (void)
- [sub sp, sp, below_hard_fp_saved_regs_size]
- [save SVE registers relative to SP]
- sub sp, sp, bytes_below_saved_regs */
-- frame.callee_adjust = const_fp_offset;
-+ frame.callee_adjust = const_above_fp;
- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
- frame.final_adjust = frame.bytes_below_saved_regs;
- }
-@@ -8711,7 +8711,7 @@ aarch64_layout_frame (void)
- [sub sp, sp, below_hard_fp_saved_regs_size]
- [save SVE registers relative to SP]
- sub sp, sp, bytes_below_saved_regs */
-- frame.initial_adjust = frame.hard_fp_offset;
-+ frame.initial_adjust = frame.bytes_above_hard_fp;
- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
- frame.final_adjust = frame.bytes_below_saved_regs;
- }
-@@ -10069,7 +10069,7 @@ aarch64_expand_prologue (void)
- {
- /* The offset of the frame chain record (if any) from the current SP. */
- poly_int64 chain_offset = (initial_adjust + callee_adjust
-- - frame.hard_fp_offset);
-+ - frame.bytes_above_hard_fp);
- gcc_assert (known_ge (chain_offset, 0));
-
- if (callee_adjust == 0)
-@@ -12751,10 +12751,10 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
- if (to == HARD_FRAME_POINTER_REGNUM)
- {
- if (from == ARG_POINTER_REGNUM)
-- return frame.hard_fp_offset;
-+ return frame.bytes_above_hard_fp;
-
- if (from == FRAME_POINTER_REGNUM)
-- return frame.hard_fp_offset - frame.bytes_above_locals;
-+ return frame.bytes_above_hard_fp - frame.bytes_above_locals;
- }
-
- if (to == STACK_POINTER_REGNUM)
-diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
-index 7ae12d13e2b..3808f49e9ca 100644
---- a/gcc/config/aarch64/aarch64.h
-+++ b/gcc/config/aarch64/aarch64.h
-@@ -796,10 +796,10 @@ struct GTY (()) aarch64_frame
- STACK_BOUNDARY. */
- poly_int64 bytes_above_locals;
-
-- /* Offset from the base of the frame (incomming SP) to the
-- hard_frame_pointer. This value is always a multiple of
-+ /* The number of bytes between the hard_frame_pointer and the top of
-+ the frame (the incomming SP). This value is always a multiple of
- STACK_BOUNDARY. */
-- poly_int64 hard_fp_offset;
-+ poly_int64 bytes_above_hard_fp;
-
- /* The size of the frame. This value is the offset from base of the
- frame (incomming SP) to the stack_pointer. This value is always
---
-2.34.1
-
-
-From d202ce1ecf60a36a3e1009917dd76109248ce9be Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:07:16 +0100
-Subject: [PATCH 10/19] aarch64: Tweak frame_size comment
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This patch fixes another case in which a value was described with
-an “upside-down” view.
-
-gcc/
- * config/aarch64/aarch64.h (aarch64_frame::frame_size): Tweak comment.
----
- gcc/config/aarch64/aarch64.h | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
-index 3808f49e9ca..108a5731b0d 100644
---- a/gcc/config/aarch64/aarch64.h
-+++ b/gcc/config/aarch64/aarch64.h
-@@ -801,8 +801,8 @@ struct GTY (()) aarch64_frame
- STACK_BOUNDARY. */
- poly_int64 bytes_above_hard_fp;
-
-- /* The size of the frame. This value is the offset from base of the
-- frame (incomming SP) to the stack_pointer. This value is always
-+ /* The size of the frame, i.e. the number of bytes between the bottom
-+ of the outgoing arguments and the incoming SP. This value is always
- a multiple of STACK_BOUNDARY. */
- poly_int64 frame_size;
-
---
-2.34.1
-
-
-From f2b585375205b0a1802d79c682ba33766ecd1f0f Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:07:17 +0100
-Subject: [PATCH 11/19] aarch64: Measure reg_offset from the bottom of the
- frame
-
-reg_offset was measured from the bottom of the saved register area.
-This made perfect sense with the original layout, since the bottom
-of the saved register area was also the hard frame pointer address.
-It became slightly less obvious with SVE, since we save SVE
-registers below the hard frame pointer, but it still made sense.
-
-However, if we want to allow different frame layouts, it's more
-convenient and obvious to measure reg_offset from the bottom of
-the frame. After previous patches, it's also a slight simplification
-in its own right.
-
-gcc/
- * config/aarch64/aarch64.h (aarch64_frame): Add comment above
- reg_offset.
- * config/aarch64/aarch64.cc (aarch64_layout_frame): Walk offsets
- from the bottom of the frame, rather than the bottom of the saved
- register area. Measure reg_offset from the bottom of the frame
- rather than the bottom of the saved register area.
- (aarch64_save_callee_saves): Update accordingly.
- (aarch64_restore_callee_saves): Likewise.
- (aarch64_get_separate_components): Likewise.
- (aarch64_process_components): Likewise.
----
- gcc/config/aarch64/aarch64.cc | 53 ++++++++++++++++-------------------
- gcc/config/aarch64/aarch64.h | 3 ++
- 2 files changed, 27 insertions(+), 29 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 95499ae49ba..af99807ef8a 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8400,7 +8400,6 @@ aarch64_needs_frame_chain (void)
- static void
- aarch64_layout_frame (void)
- {
-- poly_int64 offset = 0;
- int regno, last_fp_reg = INVALID_REGNUM;
- machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM);
- poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
-@@ -8478,7 +8477,9 @@ aarch64_layout_frame (void)
- gcc_assert (crtl->is_leaf
- || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
-
-- frame.bytes_below_saved_regs = crtl->outgoing_args_size;
-+ poly_int64 offset = crtl->outgoing_args_size;
-+ gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
-+ frame.bytes_below_saved_regs = offset;
-
- /* Now assign stack slots for the registers. Start with the predicate
- registers, since predicate LDR and STR have a relatively small
-@@ -8490,7 +8491,8 @@ aarch64_layout_frame (void)
- offset += BYTES_PER_SVE_PRED;
- }
-
-- if (maybe_ne (offset, 0))
-+ poly_int64 saved_prs_size = offset - frame.bytes_below_saved_regs;
-+ if (maybe_ne (saved_prs_size, 0))
- {
- /* If we have any vector registers to save above the predicate registers,
- the offset of the vector register save slots need to be a multiple
-@@ -8508,10 +8510,10 @@ aarch64_layout_frame (void)
- offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
- else
- {
-- if (known_le (offset, vector_save_size))
-- offset = vector_save_size;
-- else if (known_le (offset, vector_save_size * 2))
-- offset = vector_save_size * 2;
-+ if (known_le (saved_prs_size, vector_save_size))
-+ offset = frame.bytes_below_saved_regs + vector_save_size;
-+ else if (known_le (saved_prs_size, vector_save_size * 2))
-+ offset = frame.bytes_below_saved_regs + vector_save_size * 2;
- else
- gcc_unreachable ();
- }
-@@ -8528,9 +8530,10 @@ aarch64_layout_frame (void)
-
- /* OFFSET is now the offset of the hard frame pointer from the bottom
- of the callee save area. */
-- bool saves_below_hard_fp_p = maybe_ne (offset, 0);
-- frame.below_hard_fp_saved_regs_size = offset;
-- frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs;
-+ frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
-+ bool saves_below_hard_fp_p
-+ = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
-+ frame.bytes_below_hard_fp = offset;
- if (frame.emit_frame_chain)
- {
- /* FP and LR are placed in the linkage record. */
-@@ -8581,9 +8584,10 @@ aarch64_layout_frame (void)
-
- offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-
-- frame.saved_regs_size = offset;
-+ frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
-
-- poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
-+ poly_int64 varargs_and_saved_regs_size
-+ = frame.saved_regs_size + frame.saved_varargs_size;
-
- poly_int64 saved_regs_and_above
- = aligned_upper_bound (varargs_and_saved_regs_size
-@@ -9105,9 +9109,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
-
- machine_mode mode = aarch64_reg_save_mode (regno);
- reg = gen_rtx_REG (mode, regno);
-- offset = (frame.reg_offset[regno]
-- + frame.bytes_below_saved_regs
-- - bytes_below_sp);
-+ offset = frame.reg_offset[regno] - bytes_below_sp;
- rtx base_rtx = stack_pointer_rtx;
- poly_int64 sp_offset = offset;
-
-@@ -9214,9 +9216,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
-
- machine_mode mode = aarch64_reg_save_mode (regno);
- reg = gen_rtx_REG (mode, regno);
-- offset = (frame.reg_offset[regno]
-- + frame.bytes_below_saved_regs
-- - bytes_below_sp);
-+ offset = frame.reg_offset[regno] - bytes_below_sp;
- rtx base_rtx = stack_pointer_rtx;
- if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
- aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
-@@ -9355,14 +9355,12 @@ aarch64_get_separate_components (void)
- it as a stack probe for -fstack-clash-protection. */
- if (flag_stack_clash_protection
- && maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
-- && known_eq (offset, 0))
-+ && known_eq (offset, frame.bytes_below_saved_regs))
- continue;
-
- /* Get the offset relative to the register we'll use. */
- if (frame_pointer_needed)
-- offset -= frame.below_hard_fp_saved_regs_size;
-- else
-- offset += frame.bytes_below_saved_regs;
-+ offset -= frame.bytes_below_hard_fp;
-
- /* Check that we can access the stack slot of the register with one
- direct load with no adjustments needed. */
-@@ -9509,9 +9507,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
- rtx reg = gen_rtx_REG (mode, regno);
- poly_int64 offset = frame.reg_offset[regno];
- if (frame_pointer_needed)
-- offset -= frame.below_hard_fp_saved_regs_size;
-- else
-- offset += frame.bytes_below_saved_regs;
-+ offset -= frame.bytes_below_hard_fp;
-
- rtx addr = plus_constant (Pmode, ptr_reg, offset);
- rtx mem = gen_frame_mem (mode, addr);
-@@ -9563,9 +9559,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
- /* REGNO2 can be saved/restored in a pair with REGNO. */
- rtx reg2 = gen_rtx_REG (mode, regno2);
- if (frame_pointer_needed)
-- offset2 -= frame.below_hard_fp_saved_regs_size;
-- else
-- offset2 += frame.bytes_below_saved_regs;
-+ offset2 -= frame.bytes_below_hard_fp;
- rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
- rtx mem2 = gen_frame_mem (mode, addr2);
- rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
-@@ -9681,7 +9675,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
- if (final_adjustment_p
- && known_eq (frame.below_hard_fp_saved_regs_size, 0))
- {
-- poly_int64 lr_offset = frame.reg_offset[LR_REGNUM];
-+ poly_int64 lr_offset = (frame.reg_offset[LR_REGNUM]
-+ - frame.bytes_below_saved_regs);
- if (known_ge (lr_offset, 0))
- min_probe_threshold -= lr_offset.to_constant ();
- else
-diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
-index 108a5731b0d..c8becb098c8 100644
---- a/gcc/config/aarch64/aarch64.h
-+++ b/gcc/config/aarch64/aarch64.h
-@@ -766,6 +766,9 @@ extern enum aarch64_processor aarch64_tune;
- #ifdef HAVE_POLY_INT_H
- struct GTY (()) aarch64_frame
- {
-+ /* The offset from the bottom of the static frame (the bottom of the
-+ outgoing arguments) of each register save slot, or -2 if no save is
-+ needed. */
- poly_int64 reg_offset[LAST_SAVED_REGNUM + 1];
-
- /* The number of extra stack bytes taken up by register varargs.
---
-2.34.1
-
-
-From 79faabda181d0d9fd29a3cf5726ba65bdee945b5 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:07:17 +0100
-Subject: [PATCH 12/19] aarch64: Simplify top of frame allocation
-
-After previous patches, it no longer really makes sense to allocate
-the top of the frame in terms of varargs_and_saved_regs_size and
-saved_regs_and_above.
-
-gcc/
- * config/aarch64/aarch64.cc (aarch64_layout_frame): Simplify
- the allocation of the top of the frame.
----
- gcc/config/aarch64/aarch64.cc | 23 ++++++++---------------
- 1 file changed, 8 insertions(+), 15 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index af99807ef8a..31b00094c2a 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8586,23 +8586,16 @@ aarch64_layout_frame (void)
-
- frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
-
-- poly_int64 varargs_and_saved_regs_size
-- = frame.saved_regs_size + frame.saved_varargs_size;
--
-- poly_int64 saved_regs_and_above
-- = aligned_upper_bound (varargs_and_saved_regs_size
-- + get_frame_size (),
-- STACK_BOUNDARY / BITS_PER_UNIT);
--
-- frame.bytes_above_hard_fp
-- = saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
-+ offset += get_frame_size ();
-+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-+ auto top_of_locals = offset;
-
-- /* Both these values are already aligned. */
-- gcc_assert (multiple_p (frame.bytes_below_saved_regs,
-- STACK_BOUNDARY / BITS_PER_UNIT));
-- frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
-+ offset += frame.saved_varargs_size;
-+ gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
-+ frame.frame_size = offset;
-
-- frame.bytes_above_locals = frame.saved_varargs_size;
-+ frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp;
-+ frame.bytes_above_locals = frame.frame_size - top_of_locals;
-
- frame.initial_adjust = 0;
- frame.final_adjust = 0;
---
-2.34.1
-
-
-From 4e62049e403b141e6f916176160dac8cbd65fe47 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:07:18 +0100
-Subject: [PATCH 13/19] aarch64: Minor initial adjustment tweak
-
-This patch just changes a calculation of initial_adjust
-to one that makes it slightly more obvious that the total
-adjustment is frame.frame_size.
-
-gcc/
- * config/aarch64/aarch64.cc (aarch64_layout_frame): Tweak
- calculation of initial_adjust for frames in which all saves
- are SVE saves.
----
- gcc/config/aarch64/aarch64.cc | 5 ++---
- 1 file changed, 2 insertions(+), 3 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 31b00094c2a..1aa79da0673 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8675,11 +8675,10 @@ aarch64_layout_frame (void)
- {
- /* Frame in which all saves are SVE saves:
-
-- sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
-+ sub sp, sp, frame_size - bytes_below_saved_regs
- save SVE registers relative to SP
- sub sp, sp, bytes_below_saved_regs */
-- frame.initial_adjust = (frame.bytes_above_hard_fp
-- + frame.below_hard_fp_saved_regs_size);
-+ frame.initial_adjust = frame.frame_size - frame.bytes_below_saved_regs;
- frame.final_adjust = frame.bytes_below_saved_regs;
- }
- else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp)
---
-2.34.1
-
-
-From aaa1a0a5912d9e5d571e5f1c6f09ceac99544ab5 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:07:18 +0100
-Subject: [PATCH 14/19] aarch64: Tweak stack clash boundary condition
-
-The AArch64 ABI says that, when stack clash protection is used,
-there can be a maximum of 1KiB of unprobed space at sp on entry
-to a function. Therefore, we need to probe when allocating
->= guard_size - 1KiB of data (>= rather than >). This is what
-GCC does.
-
-If an allocation is exactly guard_size bytes, it is enough to allocate
-those bytes and probe once at offset 1024. It isn't possible to use a
-single probe at any other offset: higher would conmplicate later code,
-by leaving more unprobed space than usual, while lower would risk
-leaving an entire page unprobed. For simplicity, the code probes all
-allocations at offset 1024.
-
-Some register saves also act as probes. If we need to allocate
-more space below the last such register save probe, we need to
-probe the allocation if it is > 1KiB. Again, this allocation is
-then sometimes (but not always) probed at offset 1024. This sort of
-allocation is currently only used for outgoing arguments, which are
-rarely this big.
-
-However, the code also probed if this final outgoing-arguments
-allocation was == 1KiB, rather than just > 1KiB. This isn't
-necessary, since the register save then probes at offset 1024
-as required. Continuing to probe allocations of exactly 1KiB
-would complicate later patches.
-
-gcc/
- * config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space):
- Don't probe final allocations that are exactly 1KiB in size (after
- unprobed space above the final allocation has been deducted).
-
-gcc/testsuite/
- * gcc.target/aarch64/stack-check-prologue-17.c: New test.
----
- gcc/config/aarch64/aarch64.cc | 4 +-
- .../aarch64/stack-check-prologue-17.c | 55 +++++++++++++++++++
- 2 files changed, 58 insertions(+), 1 deletion(-)
- create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 1aa79da0673..5cad847977a 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -9648,9 +9648,11 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
- HOST_WIDE_INT guard_size
- = 1 << param_stack_clash_protection_guard_size;
- HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
-+ HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
-+ gcc_assert (multiple_p (poly_size, byte_sp_alignment));
- HOST_WIDE_INT min_probe_threshold
- = (final_adjustment_p
-- ? guard_used_by_caller
-+ ? guard_used_by_caller + byte_sp_alignment
- : guard_size - guard_used_by_caller);
- /* When doing the final adjustment for the outgoing arguments, take into
- account any unprobed space there is above the current SP. There are
-diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
-new file mode 100644
-index 00000000000..0d8a25d73a2
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
-@@ -0,0 +1,55 @@
-+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */
-+/* { dg-final { check-function-bodies "**" "" } } */
-+
-+void f(int, ...);
-+void g();
-+
-+/*
-+** test1:
-+** ...
-+** str x30, \[sp\]
-+** sub sp, sp, #1024
-+** cbnz w0, .*
-+** bl g
-+** ...
-+*/
-+int test1(int z) {
-+ __uint128_t x = 0;
-+ int y[0x400];
-+ if (z)
-+ {
-+ f(0, 0, 0, 0, 0, 0, 0, &y,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
-+ }
-+ g();
-+ return 1;
-+}
-+
-+/*
-+** test2:
-+** ...
-+** str x30, \[sp\]
-+** sub sp, sp, #1040
-+** str xzr, \[sp\]
-+** cbnz w0, .*
-+** bl g
-+** ...
-+*/
-+int test2(int z) {
-+ __uint128_t x = 0;
-+ int y[0x400];
-+ if (z)
-+ {
-+ f(0, 0, 0, 0, 0, 0, 0, &y,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x);
-+ }
-+ g();
-+ return 1;
-+}
---
-2.34.1
-
-
-From 8433953434a7b58c0923140d39eb3c5988c1d097 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:07:19 +0100
-Subject: [PATCH 15/19] aarch64: Put LR save probe in first 16 bytes
-
--fstack-clash-protection uses the save of LR as a probe for the next
-allocation. The next allocation could be:
-
-* another part of the static frame, e.g. when allocating SVE save slots
- or outgoing arguments
-
-* an alloca in the same function
-
-* an allocation made by a callee function
-
-However, when -fomit-frame-pointer is used, the LR save slot is placed
-above the other GPR save slots. It could therefore be up to 80 bytes
-above the base of the GPR save area (which is also the hard fp address).
-
-aarch64_allocate_and_probe_stack_space took this into account when
-deciding how much subsequent space could be allocated without needing
-a probe. However, it interacted badly with:
-
- /* If doing a small final adjustment, we always probe at offset 0.
- This is done to avoid issues when LR is not at position 0 or when
- the final adjustment is smaller than the probing offset. */
- else if (final_adjustment_p && rounded_size == 0)
- residual_probe_offset = 0;
-
-which forces any allocation that is smaller than the guard page size
-to be probed at offset 0 rather than the usual offset 1024. It was
-therefore possible to construct cases in which we had:
-
-* a probe using LR at SP + 80 bytes (or some other value >= 16)
-* an allocation of the guard page size - 16 bytes
-* a probe at SP + 0
-
-which allocates guard page size + 64 consecutive unprobed bytes.
-
-This patch requires the LR probe to be in the first 16 bytes of the
-save area when stack clash protection is active. Doing it
-unconditionally would cause code-quality regressions.
-
-Putting LR before other registers prevents push/pop allocation
-when shadow call stacks are enabled, since LR is restored
-separately from the other callee-saved registers.
-
-The new comment doesn't say that the probe register is required
-to be LR, since a later patch removes that restriction.
-
-gcc/
- * config/aarch64/aarch64.cc (aarch64_layout_frame): Ensure that
- the LR save slot is in the first 16 bytes of the register save area.
- Only form STP/LDP push/pop candidates if both registers are valid.
- (aarch64_allocate_and_probe_stack_space): Remove workaround for
- when LR was not in the first 16 bytes.
-
-gcc/testsuite/
- * gcc.target/aarch64/stack-check-prologue-18.c: New test.
- * gcc.target/aarch64/stack-check-prologue-19.c: Likewise.
- * gcc.target/aarch64/stack-check-prologue-20.c: Likewise.
----
- gcc/config/aarch64/aarch64.cc | 72 ++++++-------
- .../aarch64/stack-check-prologue-18.c | 100 ++++++++++++++++++
- .../aarch64/stack-check-prologue-19.c | 100 ++++++++++++++++++
- .../aarch64/stack-check-prologue-20.c | 3 +
- 4 files changed, 233 insertions(+), 42 deletions(-)
- create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
- create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
- create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 5cad847977a..a765f92329d 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8534,26 +8534,34 @@ aarch64_layout_frame (void)
- bool saves_below_hard_fp_p
- = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
- frame.bytes_below_hard_fp = offset;
-+
-+ auto allocate_gpr_slot = [&](unsigned int regno)
-+ {
-+ frame.reg_offset[regno] = offset;
-+ if (frame.wb_push_candidate1 == INVALID_REGNUM)
-+ frame.wb_push_candidate1 = regno;
-+ else if (frame.wb_push_candidate2 == INVALID_REGNUM)
-+ frame.wb_push_candidate2 = regno;
-+ offset += UNITS_PER_WORD;
-+ };
-+
- if (frame.emit_frame_chain)
- {
- /* FP and LR are placed in the linkage record. */
-- frame.reg_offset[R29_REGNUM] = offset;
-- frame.wb_push_candidate1 = R29_REGNUM;
-- frame.reg_offset[R30_REGNUM] = offset + UNITS_PER_WORD;
-- frame.wb_push_candidate2 = R30_REGNUM;
-- offset += 2 * UNITS_PER_WORD;
-+ allocate_gpr_slot (R29_REGNUM);
-+ allocate_gpr_slot (R30_REGNUM);
- }
-+ else if (flag_stack_clash_protection
-+ && known_eq (frame.reg_offset[R30_REGNUM], SLOT_REQUIRED))
-+ /* Put the LR save slot first, since it makes a good choice of probe
-+ for stack clash purposes. The idea is that the link register usually
-+ has to be saved before a call anyway, and so we lose little by
-+ stopping it from being individually shrink-wrapped. */
-+ allocate_gpr_slot (R30_REGNUM);
-
- for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
- if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
-- {
-- frame.reg_offset[regno] = offset;
-- if (frame.wb_push_candidate1 == INVALID_REGNUM)
-- frame.wb_push_candidate1 = regno;
-- else if (frame.wb_push_candidate2 == INVALID_REGNUM)
-- frame.wb_push_candidate2 = regno;
-- offset += UNITS_PER_WORD;
-- }
-+ allocate_gpr_slot (regno);
-
- poly_int64 max_int_offset = offset;
- offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-@@ -8631,10 +8639,13 @@ aarch64_layout_frame (void)
- max_push_offset to 0, because no registers are popped at this time,
- so callee_adjust cannot be adjusted. */
- HOST_WIDE_INT max_push_offset = 0;
-- if (frame.wb_pop_candidate2 != INVALID_REGNUM)
-- max_push_offset = 512;
-- else if (frame.wb_pop_candidate1 != INVALID_REGNUM)
-- max_push_offset = 256;
-+ if (frame.wb_pop_candidate1 != INVALID_REGNUM)
-+ {
-+ if (frame.wb_pop_candidate2 != INVALID_REGNUM)
-+ max_push_offset = 512;
-+ else
-+ max_push_offset = 256;
-+ }
-
- HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp;
- HOST_WIDE_INT const_saved_regs_size;
-@@ -9654,29 +9665,6 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
- = (final_adjustment_p
- ? guard_used_by_caller + byte_sp_alignment
- : guard_size - guard_used_by_caller);
-- /* When doing the final adjustment for the outgoing arguments, take into
-- account any unprobed space there is above the current SP. There are
-- two cases:
--
-- - When saving SVE registers below the hard frame pointer, we force
-- the lowest save to take place in the prologue before doing the final
-- adjustment (i.e. we don't allow the save to be shrink-wrapped).
-- This acts as a probe at SP, so there is no unprobed space.
--
-- - When there are no SVE register saves, we use the store of the link
-- register as a probe. We can't assume that LR was saved at position 0
-- though, so treat any space below it as unprobed. */
-- if (final_adjustment_p
-- && known_eq (frame.below_hard_fp_saved_regs_size, 0))
-- {
-- poly_int64 lr_offset = (frame.reg_offset[LR_REGNUM]
-- - frame.bytes_below_saved_regs);
-- if (known_ge (lr_offset, 0))
-- min_probe_threshold -= lr_offset.to_constant ();
-- else
-- gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0));
-- }
--
- poly_int64 frame_size = frame.frame_size;
-
- /* We should always have a positive probe threshold. */
-@@ -9856,8 +9844,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
- if (final_adjustment_p && rounded_size != 0)
- min_probe_threshold = 0;
- /* If doing a small final adjustment, we always probe at offset 0.
-- This is done to avoid issues when LR is not at position 0 or when
-- the final adjustment is smaller than the probing offset. */
-+ This is done to avoid issues when the final adjustment is smaller
-+ than the probing offset. */
- else if (final_adjustment_p && rounded_size == 0)
- residual_probe_offset = 0;
-
-diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
-new file mode 100644
-index 00000000000..82447d20fff
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
-@@ -0,0 +1,100 @@
-+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */
-+/* { dg-final { check-function-bodies "**" "" } } */
-+
-+void f(int, ...);
-+void g();
-+
-+/*
-+** test1:
-+** ...
-+** str x30, \[sp\]
-+** sub sp, sp, #4064
-+** str xzr, \[sp\]
-+** cbnz w0, .*
-+** bl g
-+** ...
-+** str x26, \[sp, #?4128\]
-+** ...
-+*/
-+int test1(int z) {
-+ __uint128_t x = 0;
-+ int y[0x400];
-+ if (z)
-+ {
-+ asm volatile ("" :::
-+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
-+ f(0, 0, 0, 0, 0, 0, 0, &y,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x);
-+ }
-+ g();
-+ return 1;
-+}
-+
-+/*
-+** test2:
-+** ...
-+** str x30, \[sp\]
-+** sub sp, sp, #1040
-+** str xzr, \[sp\]
-+** cbnz w0, .*
-+** bl g
-+** ...
-+*/
-+int test2(int z) {
-+ __uint128_t x = 0;
-+ int y[0x400];
-+ if (z)
-+ {
-+ asm volatile ("" :::
-+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
-+ f(0, 0, 0, 0, 0, 0, 0, &y,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x);
-+ }
-+ g();
-+ return 1;
-+}
-+
-+/*
-+** test3:
-+** ...
-+** str x30, \[sp\]
-+** sub sp, sp, #1024
-+** cbnz w0, .*
-+** bl g
-+** ...
-+*/
-+int test3(int z) {
-+ __uint128_t x = 0;
-+ int y[0x400];
-+ if (z)
-+ {
-+ asm volatile ("" :::
-+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
-+ f(0, 0, 0, 0, 0, 0, 0, &y,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
-+ }
-+ g();
-+ return 1;
-+}
-diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
-new file mode 100644
-index 00000000000..73ac3e4e4eb
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
-@@ -0,0 +1,100 @@
-+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fsanitize=shadow-call-stack -ffixed-x18" } */
-+/* { dg-final { check-function-bodies "**" "" } } */
-+
-+void f(int, ...);
-+void g();
-+
-+/*
-+** test1:
-+** ...
-+** str x30, \[sp\]
-+** sub sp, sp, #4064
-+** str xzr, \[sp\]
-+** cbnz w0, .*
-+** bl g
-+** ...
-+** str x26, \[sp, #?4128\]
-+** ...
-+*/
-+int test1(int z) {
-+ __uint128_t x = 0;
-+ int y[0x400];
-+ if (z)
-+ {
-+ asm volatile ("" :::
-+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
-+ f(0, 0, 0, 0, 0, 0, 0, &y,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x);
-+ }
-+ g();
-+ return 1;
-+}
-+
-+/*
-+** test2:
-+** ...
-+** str x30, \[sp\]
-+** sub sp, sp, #1040
-+** str xzr, \[sp\]
-+** cbnz w0, .*
-+** bl g
-+** ...
-+*/
-+int test2(int z) {
-+ __uint128_t x = 0;
-+ int y[0x400];
-+ if (z)
-+ {
-+ asm volatile ("" :::
-+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
-+ f(0, 0, 0, 0, 0, 0, 0, &y,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x);
-+ }
-+ g();
-+ return 1;
-+}
-+
-+/*
-+** test3:
-+** ...
-+** str x30, \[sp\]
-+** sub sp, sp, #1024
-+** cbnz w0, .*
-+** bl g
-+** ...
-+*/
-+int test3(int z) {
-+ __uint128_t x = 0;
-+ int y[0x400];
-+ if (z)
-+ {
-+ asm volatile ("" :::
-+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
-+ f(0, 0, 0, 0, 0, 0, 0, &y,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
-+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
-+ }
-+ g();
-+ return 1;
-+}
-diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c
-new file mode 100644
-index 00000000000..690aae8dfd5
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c
-@@ -0,0 +1,3 @@
-+/* { dg-options "-O2 -fstack-protector-all -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fsanitize=shadow-call-stack -ffixed-x18" } */
-+
-+#include "stack-check-prologue-19.c"
---
-2.34.1
-
-
-From eea1759073e09dd1aefbc9a881601ab1eebfdd18 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:07:19 +0100
-Subject: [PATCH 16/19] aarch64: Simplify probe of final frame allocation
-
-Previous patches ensured that the final frame allocation only needs
-a probe when the size is strictly greater than 1KiB. It's therefore
-safe to use the normal 1024 probe offset in all cases.
-
-The main motivation for doing this is to simplify the code and
-remove the number of special cases.
-
-gcc/
- * config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space):
- Always probe the residual allocation at offset 1024, asserting
- that that is in range.
-
-gcc/testsuite/
- * gcc.target/aarch64/stack-check-prologue-17.c: Expect the probe
- to be at offset 1024 rather than offset 0.
- * gcc.target/aarch64/stack-check-prologue-18.c: Likewise.
- * gcc.target/aarch64/stack-check-prologue-19.c: Likewise.
----
- gcc/config/aarch64/aarch64.cc | 12 ++++--------
- .../gcc.target/aarch64/stack-check-prologue-17.c | 2 +-
- .../gcc.target/aarch64/stack-check-prologue-18.c | 4 ++--
- .../gcc.target/aarch64/stack-check-prologue-19.c | 4 ++--
- 4 files changed, 9 insertions(+), 13 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index a765f92329d..37809a306f7 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -9838,16 +9838,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
- are still safe. */
- if (residual)
- {
-- HOST_WIDE_INT residual_probe_offset = guard_used_by_caller;
-+ gcc_assert (guard_used_by_caller + byte_sp_alignment <= size);
-+
- /* If we're doing final adjustments, and we've done any full page
- allocations then any residual needs to be probed. */
- if (final_adjustment_p && rounded_size != 0)
- min_probe_threshold = 0;
-- /* If doing a small final adjustment, we always probe at offset 0.
-- This is done to avoid issues when the final adjustment is smaller
-- than the probing offset. */
-- else if (final_adjustment_p && rounded_size == 0)
-- residual_probe_offset = 0;
-
- aarch64_sub_sp (temp1, temp2, residual, frame_related_p);
- if (residual >= min_probe_threshold)
-@@ -9858,8 +9854,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
- HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
- "\n", residual);
-
-- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
-- residual_probe_offset));
-+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
-+ guard_used_by_caller));
- emit_insn (gen_blockage ());
- }
- }
-diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
-index 0d8a25d73a2..f0ec1389771 100644
---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
-+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
-@@ -33,7 +33,7 @@ int test1(int z) {
- ** ...
- ** str x30, \[sp\]
- ** sub sp, sp, #1040
--** str xzr, \[sp\]
-+** str xzr, \[sp, #?1024\]
- ** cbnz w0, .*
- ** bl g
- ** ...
-diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
-index 82447d20fff..6383bec5ebc 100644
---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
-+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
-@@ -9,7 +9,7 @@ void g();
- ** ...
- ** str x30, \[sp\]
- ** sub sp, sp, #4064
--** str xzr, \[sp\]
-+** str xzr, \[sp, #?1024\]
- ** cbnz w0, .*
- ** bl g
- ** ...
-@@ -50,7 +50,7 @@ int test1(int z) {
- ** ...
- ** str x30, \[sp\]
- ** sub sp, sp, #1040
--** str xzr, \[sp\]
-+** str xzr, \[sp, #?1024\]
- ** cbnz w0, .*
- ** bl g
- ** ...
-diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
-index 73ac3e4e4eb..562039b5e9b 100644
---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
-+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
-@@ -9,7 +9,7 @@ void g();
- ** ...
- ** str x30, \[sp\]
- ** sub sp, sp, #4064
--** str xzr, \[sp\]
-+** str xzr, \[sp, #?1024\]
- ** cbnz w0, .*
- ** bl g
- ** ...
-@@ -50,7 +50,7 @@ int test1(int z) {
- ** ...
- ** str x30, \[sp\]
- ** sub sp, sp, #1040
--** str xzr, \[sp\]
-+** str xzr, \[sp, #?1024\]
- ** cbnz w0, .*
- ** bl g
- ** ...
---
-2.34.1
-
-
-From 96d85187c3b9c9a7efc2fd698c3d452e80d8aa47 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:07:20 +0100
-Subject: [PATCH 17/19] aarch64: Explicitly record probe registers in frame
- info
-
-The stack frame is currently divided into three areas:
-
-A: the area above the hard frame pointer
-B: the SVE saves below the hard frame pointer
-C: the outgoing arguments
-
-If the stack frame is allocated in one chunk, the allocation needs a
-probe if the frame size is >= guard_size - 1KiB. In addition, if the
-function is not a leaf function, it must probe an address no more than
-1KiB above the outgoing SP. We ensured the second condition by
-
-(1) using single-chunk allocations for non-leaf functions only if
- the link register save slot is within 512 bytes of the bottom
- of the frame; and
-
-(2) using the link register save as a probe (meaning, for instance,
- that it can't be individually shrink wrapped)
-
-If instead the stack is allocated in multiple chunks, then:
-
-* an allocation involving only the outgoing arguments (C above) requires
- a probe if the allocation size is > 1KiB
-
-* any other allocation requires a probe if the allocation size
- is >= guard_size - 1KiB
-
-* second and subsequent allocations require the previous allocation
- to probe at the bottom of the allocated area, regardless of the size
- of that previous allocation
-
-The final point means that, unlike for single allocations,
-it can be necessary to have both a non-SVE register probe and
-an SVE register probe. For example:
-
-* allocate A, probe using a non-SVE register save
-* allocate B, probe using an SVE register save
-* allocate C
-
-The non-SVE register used in this case was again the link register.
-It was previously used even if the link register save slot was some
-bytes above the bottom of the non-SVE register saves, but an earlier
-patch avoided that by putting the link register save slot first.
-
-As a belt-and-braces fix, this patch explicitly records which
-probe registers we're using and allows the non-SVE probe to be
-whichever register comes first (as for SVE).
-
-The patch also avoids unnecessary probes in sve/pcs/stack_clash_3.c.
-
-gcc/
- * config/aarch64/aarch64.h (aarch64_frame::sve_save_and_probe)
- (aarch64_frame::hard_fp_save_and_probe): New fields.
- * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize them.
- Rather than asserting that a leaf function saves LR, instead assert
- that a leaf function saves something.
- (aarch64_get_separate_components): Prevent the chosen probe
- registers from being individually shrink-wrapped.
- (aarch64_allocate_and_probe_stack_space): Remove workaround for
- probe registers that aren't at the bottom of the previous allocation.
-
-gcc/testsuite/
- * gcc.target/aarch64/sve/pcs/stack_clash_3.c: Avoid redundant probes.
----
- gcc/config/aarch64/aarch64.cc | 68 +++++++++++++++----
- gcc/config/aarch64/aarch64.h | 8 +++
- .../aarch64/sve/pcs/stack_clash_3.c | 6 +-
- 3 files changed, 64 insertions(+), 18 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 37809a306f7..6c59c39a639 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8471,15 +8471,11 @@ aarch64_layout_frame (void)
- && !crtl->abi->clobbers_full_reg_p (regno))
- frame.reg_offset[regno] = SLOT_REQUIRED;
-
-- /* With stack-clash, LR must be saved in non-leaf functions. The saving of
-- LR counts as an implicit probe which allows us to maintain the invariant
-- described in the comment at expand_prologue. */
-- gcc_assert (crtl->is_leaf
-- || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
-
- poly_int64 offset = crtl->outgoing_args_size;
- gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
- frame.bytes_below_saved_regs = offset;
-+ frame.sve_save_and_probe = INVALID_REGNUM;
-
- /* Now assign stack slots for the registers. Start with the predicate
- registers, since predicate LDR and STR have a relatively small
-@@ -8487,6 +8483,8 @@ aarch64_layout_frame (void)
- for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++)
- if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
- {
-+ if (frame.sve_save_and_probe == INVALID_REGNUM)
-+ frame.sve_save_and_probe = regno;
- frame.reg_offset[regno] = offset;
- offset += BYTES_PER_SVE_PRED;
- }
-@@ -8524,6 +8522,8 @@ aarch64_layout_frame (void)
- for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
- if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
- {
-+ if (frame.sve_save_and_probe == INVALID_REGNUM)
-+ frame.sve_save_and_probe = regno;
- frame.reg_offset[regno] = offset;
- offset += vector_save_size;
- }
-@@ -8533,10 +8533,18 @@ aarch64_layout_frame (void)
- frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
- bool saves_below_hard_fp_p
- = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
-+ gcc_assert (!saves_below_hard_fp_p
-+ || (frame.sve_save_and_probe != INVALID_REGNUM
-+ && known_eq (frame.reg_offset[frame.sve_save_and_probe],
-+ frame.bytes_below_saved_regs)));
-+
- frame.bytes_below_hard_fp = offset;
-+ frame.hard_fp_save_and_probe = INVALID_REGNUM;
-
- auto allocate_gpr_slot = [&](unsigned int regno)
- {
-+ if (frame.hard_fp_save_and_probe == INVALID_REGNUM)
-+ frame.hard_fp_save_and_probe = regno;
- frame.reg_offset[regno] = offset;
- if (frame.wb_push_candidate1 == INVALID_REGNUM)
- frame.wb_push_candidate1 = regno;
-@@ -8570,6 +8578,8 @@ aarch64_layout_frame (void)
- for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
- if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
- {
-+ if (frame.hard_fp_save_and_probe == INVALID_REGNUM)
-+ frame.hard_fp_save_and_probe = regno;
- /* If there is an alignment gap between integer and fp callee-saves,
- allocate the last fp register to it if possible. */
- if (regno == last_fp_reg
-@@ -8593,6 +8603,17 @@ aarch64_layout_frame (void)
- offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-
- frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
-+ gcc_assert (known_eq (frame.saved_regs_size,
-+ frame.below_hard_fp_saved_regs_size)
-+ || (frame.hard_fp_save_and_probe != INVALID_REGNUM
-+ && known_eq (frame.reg_offset[frame.hard_fp_save_and_probe],
-+ frame.bytes_below_hard_fp)));
-+
-+ /* With stack-clash, a register must be saved in non-leaf functions.
-+ The saving of the bottommost register counts as an implicit probe,
-+ which allows us to maintain the invariant described in the comment
-+ at expand_prologue. */
-+ gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0));
-
- offset += get_frame_size ();
- offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-@@ -8723,6 +8744,25 @@ aarch64_layout_frame (void)
- frame.final_adjust = frame.bytes_below_saved_regs;
- }
-
-+ /* The frame is allocated in pieces, with each non-final piece
-+ including a register save at offset 0 that acts as a probe for
-+ the following piece. In addition, the save of the bottommost register
-+ acts as a probe for callees and allocas. Roll back any probes that
-+ aren't needed.
-+
-+ A probe isn't needed if it is associated with the final allocation
-+ (including callees and allocas) that happens before the epilogue is
-+ executed. */
-+ if (crtl->is_leaf
-+ && !cfun->calls_alloca
-+ && known_eq (frame.final_adjust, 0))
-+ {
-+ if (maybe_ne (frame.sve_callee_adjust, 0))
-+ frame.sve_save_and_probe = INVALID_REGNUM;
-+ else
-+ frame.hard_fp_save_and_probe = INVALID_REGNUM;
-+ }
-+
- /* Make sure the individual adjustments add up to the full frame size. */
- gcc_assert (known_eq (frame.initial_adjust
- + frame.callee_adjust
-@@ -9354,13 +9394,6 @@ aarch64_get_separate_components (void)
-
- poly_int64 offset = frame.reg_offset[regno];
-
-- /* If the register is saved in the first SVE save slot, we use
-- it as a stack probe for -fstack-clash-protection. */
-- if (flag_stack_clash_protection
-- && maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
-- && known_eq (offset, frame.bytes_below_saved_regs))
-- continue;
--
- /* Get the offset relative to the register we'll use. */
- if (frame_pointer_needed)
- offset -= frame.bytes_below_hard_fp;
-@@ -9395,6 +9428,13 @@ aarch64_get_separate_components (void)
-
- bitmap_clear_bit (components, LR_REGNUM);
- bitmap_clear_bit (components, SP_REGNUM);
-+ if (flag_stack_clash_protection)
-+ {
-+ if (frame.sve_save_and_probe != INVALID_REGNUM)
-+ bitmap_clear_bit (components, frame.sve_save_and_probe);
-+ if (frame.hard_fp_save_and_probe != INVALID_REGNUM)
-+ bitmap_clear_bit (components, frame.hard_fp_save_and_probe);
-+ }
-
- return components;
- }
-@@ -9931,8 +9971,8 @@ aarch64_epilogue_uses (int regno)
- When probing is needed, we emit a probe at the start of the prologue
- and every PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE bytes thereafter.
-
-- We have to track how much space has been allocated and the only stores
-- to the stack we track as implicit probes are the FP/LR stores.
-+ We can also use register saves as probes. These are stored in
-+ sve_save_and_probe and hard_fp_save_and_probe.
-
- For outgoing arguments we probe if the size is larger than 1KB, such that
- the ABI specified buffer is maintained for the next callee.
-diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
-index c8becb098c8..fbfb73545ba 100644
---- a/gcc/config/aarch64/aarch64.h
-+++ b/gcc/config/aarch64/aarch64.h
-@@ -863,6 +863,14 @@ struct GTY (()) aarch64_frame
- This is the register they should use. */
- unsigned spare_pred_reg;
-
-+ /* An SVE register that is saved below the hard frame pointer and that acts
-+ as a probe for later allocations, or INVALID_REGNUM if none. */
-+ unsigned sve_save_and_probe;
-+
-+ /* A register that is saved at the hard frame pointer and that acts
-+ as a probe for later allocations, or INVALID_REGNUM if none. */
-+ unsigned hard_fp_save_and_probe;
-+
- bool laid_out;
-
- /* True if shadow call stack should be enabled for the current function. */
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c
-index 3e01ec36c3a..3530a0d504b 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c
-@@ -11,11 +11,10 @@
- ** mov x11, sp
- ** ...
- ** sub sp, sp, x13
--** str p4, \[sp\]
- ** cbz w0, [^\n]*
-+** str p4, \[sp\]
- ** ...
- ** ptrue p0\.b, all
--** ldr p4, \[sp\]
- ** addvl sp, sp, #1
- ** ldr x24, \[sp\], 32
- ** ret
-@@ -39,13 +38,12 @@ test_1 (int n)
- ** mov x11, sp
- ** ...
- ** sub sp, sp, x13
--** str p4, \[sp\]
- ** cbz w0, [^\n]*
-+** str p4, \[sp\]
- ** str p5, \[sp, #1, mul vl\]
- ** str p6, \[sp, #2, mul vl\]
- ** ...
- ** ptrue p0\.b, all
--** ldr p4, \[sp\]
- ** addvl sp, sp, #1
- ** ldr x24, \[sp\], 32
- ** ret
---
-2.34.1
-
-
-From 56df065080950bb30dda9c260f71be54269bdda5 Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:07:20 +0100
-Subject: [PATCH 18/19] aarch64: Remove below_hard_fp_saved_regs_size
-
-After previous patches, it's no longer necessary to store
-saved_regs_size and below_hard_fp_saved_regs_size in the frame info.
-All measurements instead use the top or bottom of the frame as
-reference points.
-
-gcc/
- * config/aarch64/aarch64.h (aarch64_frame::saved_regs_size)
- (aarch64_frame::below_hard_fp_saved_regs_size): Delete.
- * config/aarch64/aarch64.cc (aarch64_layout_frame): Update accordingly.
----
- gcc/config/aarch64/aarch64.cc | 45 ++++++++++++++++-------------------
- gcc/config/aarch64/aarch64.h | 7 ------
- 2 files changed, 21 insertions(+), 31 deletions(-)
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index 6c59c39a639..b95e805a8cc 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8530,9 +8530,8 @@ aarch64_layout_frame (void)
-
- /* OFFSET is now the offset of the hard frame pointer from the bottom
- of the callee save area. */
-- frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
-- bool saves_below_hard_fp_p
-- = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
-+ auto below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
-+ bool saves_below_hard_fp_p = maybe_ne (below_hard_fp_saved_regs_size, 0);
- gcc_assert (!saves_below_hard_fp_p
- || (frame.sve_save_and_probe != INVALID_REGNUM
- && known_eq (frame.reg_offset[frame.sve_save_and_probe],
-@@ -8602,9 +8601,8 @@ aarch64_layout_frame (void)
-
- offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-
-- frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
-- gcc_assert (known_eq (frame.saved_regs_size,
-- frame.below_hard_fp_saved_regs_size)
-+ auto saved_regs_size = offset - frame.bytes_below_saved_regs;
-+ gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size)
- || (frame.hard_fp_save_and_probe != INVALID_REGNUM
- && known_eq (frame.reg_offset[frame.hard_fp_save_and_probe],
- frame.bytes_below_hard_fp)));
-@@ -8613,7 +8611,7 @@ aarch64_layout_frame (void)
- The saving of the bottommost register counts as an implicit probe,
- which allows us to maintain the invariant described in the comment
- at expand_prologue. */
-- gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0));
-+ gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0));
-
- offset += get_frame_size ();
- offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-@@ -8670,7 +8668,7 @@ aarch64_layout_frame (void)
-
- HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp;
- HOST_WIDE_INT const_saved_regs_size;
-- if (known_eq (frame.saved_regs_size, 0))
-+ if (known_eq (saved_regs_size, 0))
- frame.initial_adjust = frame.frame_size;
- else if (frame.frame_size.is_constant (&const_size)
- && const_size < max_push_offset
-@@ -8683,7 +8681,7 @@ aarch64_layout_frame (void)
- frame.callee_adjust = const_size;
- }
- else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs)
-- && frame.saved_regs_size.is_constant (&const_saved_regs_size)
-+ && saved_regs_size.is_constant (&const_saved_regs_size)
- && const_below_saved_regs + const_saved_regs_size < 512
- /* We could handle this case even with data below the saved
- registers, provided that that data left us with valid offsets
-@@ -8702,8 +8700,7 @@ aarch64_layout_frame (void)
- frame.initial_adjust = frame.frame_size;
- }
- else if (saves_below_hard_fp_p
-- && known_eq (frame.saved_regs_size,
-- frame.below_hard_fp_saved_regs_size))
-+ && known_eq (saved_regs_size, below_hard_fp_saved_regs_size))
- {
- /* Frame in which all saves are SVE saves:
-
-@@ -8725,7 +8722,7 @@ aarch64_layout_frame (void)
- [save SVE registers relative to SP]
- sub sp, sp, bytes_below_saved_regs */
- frame.callee_adjust = const_above_fp;
-- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
-+ frame.sve_callee_adjust = below_hard_fp_saved_regs_size;
- frame.final_adjust = frame.bytes_below_saved_regs;
- }
- else
-@@ -8740,7 +8737,7 @@ aarch64_layout_frame (void)
- [save SVE registers relative to SP]
- sub sp, sp, bytes_below_saved_regs */
- frame.initial_adjust = frame.bytes_above_hard_fp;
-- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
-+ frame.sve_callee_adjust = below_hard_fp_saved_regs_size;
- frame.final_adjust = frame.bytes_below_saved_regs;
- }
-
-@@ -9936,17 +9933,17 @@ aarch64_epilogue_uses (int regno)
- | local variables | <-- frame_pointer_rtx
- | |
- +-------------------------------+
-- | padding | \
-- +-------------------------------+ |
-- | callee-saved registers | | frame.saved_regs_size
-- +-------------------------------+ |
-- | LR' | |
-- +-------------------------------+ |
-- | FP' | |
-- +-------------------------------+ |<- hard_frame_pointer_rtx (aligned)
-- | SVE vector registers | | \
-- +-------------------------------+ | | below_hard_fp_saved_regs_size
-- | SVE predicate registers | / /
-+ | padding |
-+ +-------------------------------+
-+ | callee-saved registers |
-+ +-------------------------------+
-+ | LR' |
-+ +-------------------------------+
-+ | FP' |
-+ +-------------------------------+ <-- hard_frame_pointer_rtx (aligned)
-+ | SVE vector registers |
-+ +-------------------------------+
-+ | SVE predicate registers |
- +-------------------------------+
- | dynamic allocation |
- +-------------------------------+
-diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
-index fbfb73545ba..cfeaf4657ab 100644
---- a/gcc/config/aarch64/aarch64.h
-+++ b/gcc/config/aarch64/aarch64.h
-@@ -777,18 +777,11 @@ struct GTY (()) aarch64_frame
- STACK_BOUNDARY. */
- HOST_WIDE_INT saved_varargs_size;
-
-- /* The size of the callee-save registers with a slot in REG_OFFSET. */
-- poly_int64 saved_regs_size;
--
- /* The number of bytes between the bottom of the static frame (the bottom
- of the outgoing arguments) and the bottom of the register save area.
- This value is always a multiple of STACK_BOUNDARY. */
- poly_int64 bytes_below_saved_regs;
-
-- /* The size of the callee-save registers with a slot in REG_OFFSET that
-- are saved below the hard frame pointer. */
-- poly_int64 below_hard_fp_saved_regs_size;
--
- /* The number of bytes between the bottom of the static frame (the bottom
- of the outgoing arguments) and the hard frame pointer. This value is
- always a multiple of STACK_BOUNDARY. */
---
-2.34.1
-
-
-From b96e66fd4ef3e36983969fb8cdd1956f551a074b Mon Sep 17 00:00:00 2001
-From: Richard Sandiford <richard.sandiford@arm.com>
-Date: Tue, 12 Sep 2023 16:07:21 +0100
-Subject: [PATCH 19/19] aarch64: Make stack smash canary protect saved
- registers
-
-AArch64 normally puts the saved registers near the bottom of the frame,
-immediately above any dynamic allocations. But this means that a
-stack-smash attack on those dynamic allocations could overwrite the
-saved registers without needing to reach as far as the stack smash
-canary.
-
-The same thing could also happen for variable-sized arguments that are
-passed by value, since those are allocated before a call and popped on
-return.
-
-This patch avoids that by putting the locals (and thus the canary) below
-the saved registers when stack smash protection is active.
-
-The patch fixes CVE-2023-4039.
-
-gcc/
- * config/aarch64/aarch64.cc (aarch64_save_regs_above_locals_p):
- New function.
- (aarch64_layout_frame): Use it to decide whether locals should
- go above or below the saved registers.
- (aarch64_expand_prologue): Update stack layout comment.
- Emit a stack tie after the final adjustment.
-
-gcc/testsuite/
- * gcc.target/aarch64/stack-protector-8.c: New test.
- * gcc.target/aarch64/stack-protector-9.c: Likewise.
----
- gcc/config/aarch64/aarch64.cc | 46 +++++++--
- .../gcc.target/aarch64/stack-protector-8.c | 95 +++++++++++++++++++
- .../gcc.target/aarch64/stack-protector-9.c | 33 +++++++
- 3 files changed, 168 insertions(+), 6 deletions(-)
- create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
- create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
-
-diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
-index b95e805a8cc..389c0e29353 100644
---- a/gcc/config/aarch64/aarch64.cc
-+++ b/gcc/config/aarch64/aarch64.cc
-@@ -8394,6 +8394,20 @@ aarch64_needs_frame_chain (void)
- return aarch64_use_frame_pointer;
- }
-
-+/* Return true if the current function should save registers above
-+ the locals area, rather than below it. */
-+
-+static bool
-+aarch64_save_regs_above_locals_p ()
-+{
-+ /* When using stack smash protection, make sure that the canary slot
-+ comes between the locals and the saved registers. Otherwise,
-+ it would be possible for a carefully sized smash attack to change
-+ the saved registers (particularly LR and FP) without reaching the
-+ canary. */
-+ return crtl->stack_protect_guard;
-+}
-+
- /* Mark the registers that need to be saved by the callee and calculate
- the size of the callee-saved registers area and frame record (both FP
- and LR may be omitted). */
-@@ -8405,6 +8419,7 @@ aarch64_layout_frame (void)
- poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
- bool frame_related_fp_reg_p = false;
- aarch64_frame &frame = cfun->machine->frame;
-+ poly_int64 top_of_locals = -1;
-
- frame.emit_frame_chain = aarch64_needs_frame_chain ();
-
-@@ -8471,9 +8486,16 @@ aarch64_layout_frame (void)
- && !crtl->abi->clobbers_full_reg_p (regno))
- frame.reg_offset[regno] = SLOT_REQUIRED;
-
-+ bool regs_at_top_p = aarch64_save_regs_above_locals_p ();
-
- poly_int64 offset = crtl->outgoing_args_size;
- gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
-+ if (regs_at_top_p)
-+ {
-+ offset += get_frame_size ();
-+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-+ top_of_locals = offset;
-+ }
- frame.bytes_below_saved_regs = offset;
- frame.sve_save_and_probe = INVALID_REGNUM;
-
-@@ -8613,15 +8635,18 @@ aarch64_layout_frame (void)
- at expand_prologue. */
- gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0));
-
-- offset += get_frame_size ();
-- offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-- auto top_of_locals = offset;
--
-+ if (!regs_at_top_p)
-+ {
-+ offset += get_frame_size ();
-+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-+ top_of_locals = offset;
-+ }
- offset += frame.saved_varargs_size;
- gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
- frame.frame_size = offset;
-
- frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp;
-+ gcc_assert (known_ge (top_of_locals, 0));
- frame.bytes_above_locals = frame.frame_size - top_of_locals;
-
- frame.initial_adjust = 0;
-@@ -9930,10 +9955,10 @@ aarch64_epilogue_uses (int regno)
- | for register varargs |
- | |
- +-------------------------------+
-- | local variables | <-- frame_pointer_rtx
-+ | local variables (1) | <-- frame_pointer_rtx
- | |
- +-------------------------------+
-- | padding |
-+ | padding (1) |
- +-------------------------------+
- | callee-saved registers |
- +-------------------------------+
-@@ -9945,6 +9970,10 @@ aarch64_epilogue_uses (int regno)
- +-------------------------------+
- | SVE predicate registers |
- +-------------------------------+
-+ | local variables (2) |
-+ +-------------------------------+
-+ | padding (2) |
-+ +-------------------------------+
- | dynamic allocation |
- +-------------------------------+
- | padding |
-@@ -9954,6 +9983,9 @@ aarch64_epilogue_uses (int regno)
- +-------------------------------+
- | | <-- stack_pointer_rtx (aligned)
-
-+ The regions marked (1) and (2) are mutually exclusive. (2) is used
-+ when aarch64_save_regs_above_locals_p is true.
-+
- Dynamic stack allocations via alloca() decrease stack_pointer_rtx
- but leave frame_pointer_rtx and hard_frame_pointer_rtx
- unchanged.
-@@ -10149,6 +10181,8 @@ aarch64_expand_prologue (void)
- gcc_assert (known_eq (bytes_below_sp, final_adjust));
- aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
- !frame_pointer_needed, true);
-+ if (emit_frame_chain && maybe_ne (final_adjust, 0))
-+ emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
- }
-
- /* Return TRUE if we can use a simple_return insn.
-diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
-new file mode 100644
-index 00000000000..e71d820e365
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
-@@ -0,0 +1,95 @@
-+/* { dg-options " -O -fstack-protector-strong -mstack-protector-guard=sysreg -mstack-protector-guard-reg=tpidr2_el0 -mstack-protector-guard-offset=16" } */
-+/* { dg-final { check-function-bodies "**" "" } } */
-+
-+void g(void *);
-+__SVBool_t *h(void *);
-+
-+/*
-+** test1:
-+** sub sp, sp, #288
-+** stp x29, x30, \[sp, #?272\]
-+** add x29, sp, #?272
-+** mrs (x[0-9]+), tpidr2_el0
-+** ldr (x[0-9]+), \[\1, #?16\]
-+** str \2, \[sp, #?264\]
-+** mov \2, #?0
-+** add x0, sp, #?8
-+** bl g
-+** ...
-+** mrs .*
-+** ...
-+** bne .*
-+** ...
-+** ldp x29, x30, \[sp, #?272\]
-+** add sp, sp, #?288
-+** ret
-+** bl __stack_chk_fail
-+*/
-+int test1() {
-+ int y[0x40];
-+ g(y);
-+ return 1;
-+}
-+
-+/*
-+** test2:
-+** stp x29, x30, \[sp, #?-16\]!
-+** mov x29, sp
-+** sub sp, sp, #1040
-+** mrs (x[0-9]+), tpidr2_el0
-+** ldr (x[0-9]+), \[\1, #?16\]
-+** str \2, \[sp, #?1032\]
-+** mov \2, #?0
-+** add x0, sp, #?8
-+** bl g
-+** ...
-+** mrs .*
-+** ...
-+** bne .*
-+** ...
-+** add sp, sp, #?1040
-+** ldp x29, x30, \[sp\], #?16
-+** ret
-+** bl __stack_chk_fail
-+*/
-+int test2() {
-+ int y[0x100];
-+ g(y);
-+ return 1;
-+}
-+
-+#pragma GCC target "+sve"
-+
-+/*
-+** test3:
-+** stp x29, x30, \[sp, #?-16\]!
-+** mov x29, sp
-+** addvl sp, sp, #-18
-+** ...
-+** str p4, \[sp\]
-+** ...
-+** sub sp, sp, #272
-+** mrs (x[0-9]+), tpidr2_el0
-+** ldr (x[0-9]+), \[\1, #?16\]
-+** str \2, \[sp, #?264\]
-+** mov \2, #?0
-+** add x0, sp, #?8
-+** bl h
-+** ...
-+** mrs .*
-+** ...
-+** bne .*
-+** ...
-+** add sp, sp, #?272
-+** ...
-+** ldr p4, \[sp\]
-+** ...
-+** addvl sp, sp, #18
-+** ldp x29, x30, \[sp\], #?16
-+** ret
-+** bl __stack_chk_fail
-+*/
-+__SVBool_t test3() {
-+ int y[0x40];
-+ return *h(y);
-+}
-diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
-new file mode 100644
-index 00000000000..58f322aa480
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
-@@ -0,0 +1,33 @@
-+/* { dg-options "-O2 -mcpu=neoverse-v1 -fstack-protector-all" } */
-+/* { dg-final { check-function-bodies "**" "" } } */
-+
-+/*
-+** main:
-+** ...
-+** stp x29, x30, \[sp, #?-[0-9]+\]!
-+** ...
-+** sub sp, sp, #[0-9]+
-+** ...
-+** str x[0-9]+, \[x29, #?-8\]
-+** ...
-+*/
-+int f(const char *);
-+void g(void *);
-+int main(int argc, char* argv[])
-+{
-+ int a;
-+ int b;
-+ char c[2+f(argv[1])];
-+ int d[0x100];
-+ char y;
-+
-+ y=42; a=4; b=10;
-+ c[0] = 'h'; c[1] = '\0';
-+
-+ c[f(argv[2])] = '\0';
-+
-+ __builtin_printf("%d %d\n%s\n", a, b, c);
-+ g(d);
-+
-+ return 0;
-+}
---
-2.34.1
-