From 5e067562e2acfd3c3d3313494ad0495492738de3 Mon Sep 17 00:00:00 2001 From: Khem Raj Date: Fri, 9 Jul 2010 15:14:45 -0700 Subject: gcc-4.4.4: Use CP15 register for TLS access on armv7-a. * ARMv7 was using -mtp=soft where as the CP15 register for TLS is available and should be used. This should improve the performance of TLS access. Signed-off-by: Khem Raj --- recipes/gcc/gcc-4.4.4.inc | 3 +- .../gcc/gcc-4.4.4/gcc-arm-cp15-tpreg-for-TLS.patch | 217 +++++++++++++++++++++ 2 files changed, 219 insertions(+), 1 deletion(-) create mode 100644 recipes/gcc/gcc-4.4.4/gcc-arm-cp15-tpreg-for-TLS.patch diff --git a/recipes/gcc/gcc-4.4.4.inc b/recipes/gcc/gcc-4.4.4.inc index 37c8cd0001..d1987f92c8 100644 --- a/recipes/gcc/gcc-4.4.4.inc +++ b/recipes/gcc/gcc-4.4.4.inc @@ -8,7 +8,7 @@ LICENSE = "GPLv3" DEPENDS = "mpfr gmp" NATIVEDEPS = "mpfr-native gmp-native" -INC_PR = "r2" +INC_PR = "r3" FILESPATHPKG .= ":gcc-$PV" @@ -27,6 +27,7 @@ SRC_URI = "${GNU_MIRROR}/gcc/gcc-${PV}/gcc-${PV}.tar.bz2;name=gcc444tarbz2 \ file://gcc-armv4-pass-fix-v4bx-to-ld.patch \ file://gcc-add-t-slibgcc-libgcc.patch \ file://gcc-4.3.3-fix-EXTRA_BUILD.patch \ + file://gcc-arm-cp15-tpreg-for-TLS.patch \ " SRC_URI[gcc444tarbz2.md5sum] = "7ff5ce9e5f0b088ab48720bbd7203530" SRC_URI[gcc444tarbz2.sha256sum] = "e1c13696b45752ad3f652304fab5120a43a8a5c0f438d3bda78cf16b620c0c58" diff --git a/recipes/gcc/gcc-4.4.4/gcc-arm-cp15-tpreg-for-TLS.patch b/recipes/gcc/gcc-4.4.4/gcc-arm-cp15-tpreg-for-TLS.patch new file mode 100644 index 0000000000..a3fbdcee3c --- /dev/null +++ b/recipes/gcc/gcc-4.4.4/gcc-arm-cp15-tpreg-for-TLS.patch @@ -0,0 +1,217 @@ +Backport the below patch from trunk. This will let gcc use +Hard TLS register on ARMv7 so far it has been using soft access +this should help improve performance. + +2010-02-01 Richard Earnshaw + + * arm.c (FL_FOR_ARCH_7A): is also a superset of ARMv6K. + (arm_override_options): Allow automatic selection of the thread + pointer register if thumb2. + (legitimize_pic_address): Improve code sequences for Thumb2. + (arm_call_tls_get_addr): Likewise. + (legitimize_tls_address): Likewise. + * arm.md (pic_load_addr_arm): Delete. Replace with ... + (pic_load_addr_32bit): ... this. New named pattern. + * thumb2.md (pic_load_addr_thumb2): Delete. + (pic_load_dot_plus_four): Delete. + (tls_load_dot_plus_four): New named pattern. + + +Index: gcc-4.4.4/gcc/config/arm/arm.c +=================================================================== +--- gcc-4.4.4.orig/gcc/config/arm/arm.c 2010-02-18 05:13:03.000000000 -0800 ++++ gcc-4.4.4/gcc/config/arm/arm.c 2010-07-09 15:07:03.829739455 -0700 +@@ -495,7 +495,7 @@ static int thumb_call_reg_needed; + #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2) + #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) + #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM) +-#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM) ++#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K) + #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV) + #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV) + +@@ -1549,7 +1549,7 @@ arm_override_options (void) + /* Use the cp15 method if it is available. */ + if (target_thread_pointer == TP_AUTO) + { +- if (arm_arch6k && !TARGET_THUMB) ++ if (arm_arch6k && !TARGET_THUMB1) + target_thread_pointer = TP_CP15; + else + target_thread_pointer = TP_SOFT; +@@ -3634,10 +3634,8 @@ legitimize_pic_address (rtx orig, enum m + else + address = reg; + +- if (TARGET_ARM) +- emit_insn (gen_pic_load_addr_arm (address, orig)); +- else if (TARGET_THUMB2) +- emit_insn (gen_pic_load_addr_thumb2 (address, orig)); ++ if (TARGET_32BIT) ++ emit_insn (gen_pic_load_addr_32bit (address, orig)); + else /* TARGET_THUMB1 */ + emit_insn (gen_pic_load_addr_thumb1 (address, orig)); + +@@ -3814,7 +3812,7 @@ arm_load_pic_register (unsigned long sav + { + pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE); + pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); +- emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx)); ++ emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx)); + + emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg))); + +@@ -3837,29 +3835,13 @@ arm_load_pic_register (unsigned long sav + UNSPEC_GOTSYM_OFF); + pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); + +- if (TARGET_ARM) +- { +- emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx)); +- emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno)); +- } +- else if (TARGET_THUMB2) ++ if (TARGET_32BIT) + { +- /* Thumb-2 only allows very limited access to the PC. Calculate the +- address in a temporary register. */ +- if (arm_pic_register != INVALID_REGNUM) +- { +- pic_tmp = gen_rtx_REG (SImode, +- thumb_find_work_register (saved_regs)); +- } ++ emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx)); ++ if (TARGET_ARM) ++ emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno)); + else +- { +- gcc_assert (can_create_pseudo_p ()); +- pic_tmp = gen_reg_rtx (Pmode); +- } +- +- emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx)); +- emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno)); +- emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp)); ++ emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno)); + } + else /* TARGET_THUMB1 */ + { +@@ -4499,14 +4481,7 @@ arm_call_tls_get_addr (rtx x, rtx reg, r + if (TARGET_ARM) + emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno)); + else if (TARGET_THUMB2) +- { +- rtx tmp; +- /* Thumb-2 only allows very limited access to the PC. Calculate +- the address in a temporary register. */ +- tmp = gen_reg_rtx (SImode); +- emit_insn (gen_pic_load_dot_plus_four (tmp, labelno)); +- emit_insn (gen_addsi3(reg, reg, tmp)); +- } ++ emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); + else /* TARGET_THUMB1 */ + emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); + +@@ -4562,15 +4537,7 @@ legitimize_tls_address (rtx x, rtx reg) + if (TARGET_ARM) + emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno)); + else if (TARGET_THUMB2) +- { +- rtx tmp; +- /* Thumb-2 only allows very limited access to the PC. Calculate +- the address in a temporary register. */ +- tmp = gen_reg_rtx (SImode); +- emit_insn (gen_pic_load_dot_plus_four (tmp, labelno)); +- emit_insn (gen_addsi3(reg, reg, tmp)); +- emit_move_insn (reg, gen_const_mem (SImode, reg)); +- } ++ emit_insn (gen_tls_load_dot_plus_four (reg, reg, labelno)); + else + { + emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); +Index: gcc-4.4.4/gcc/config/arm/arm.md +=================================================================== +--- gcc-4.4.4.orig/gcc/config/arm/arm.md 2010-02-18 05:13:03.000000000 -0800 ++++ gcc-4.4.4/gcc/config/arm/arm.md 2010-07-09 15:07:03.833742490 -0700 +@@ -5091,14 +5091,17 @@ + ;; the insn alone, and to force the minipool generation pass to then move + ;; the GOT symbol to memory. + +-(define_insn "pic_load_addr_arm" ++(define_insn "pic_load_addr_32bit" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "" "mX")] UNSPEC_PIC_SYM))] +- "TARGET_ARM && flag_pic" ++ "TARGET_32BIT && flag_pic" + "ldr%?\\t%0, %1" + [(set_attr "type" "load1") +- (set (attr "pool_range") (const_int 4096)) +- (set (attr "neg_pool_range") (const_int 4084))] ++ (set_attr "pool_range" "4096") ++ (set (attr "neg_pool_range") ++ (if_then_else (eq_attr "is_thumb" "no") ++ (const_int 4084) ++ (const_int 0)))] + ) + + (define_insn "pic_load_addr_thumb1" +@@ -5116,7 +5119,7 @@ + (const_int 4) + (match_operand 2 "" "")] + UNSPEC_PIC_BASE))] +- "TARGET_THUMB1" ++ "TARGET_THUMB" + "* + (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\", + INTVAL (operands[2])); +Index: gcc-4.4.4/gcc/config/arm/thumb2.md +=================================================================== +--- gcc-4.4.4.orig/gcc/config/arm/thumb2.md 2010-02-24 06:50:43.000000000 -0800 ++++ gcc-4.4.4/gcc/config/arm/thumb2.md 2010-07-09 15:07:03.829739455 -0700 +@@ -243,37 +243,19 @@ + (set_attr "neg_pool_range" "*,*,*,*,0,*")] + ) + +-;; ??? We can probably do better with thumb2 +-(define_insn "pic_load_addr_thumb2" +- [(set (match_operand:SI 0 "s_register_operand" "=r") +- (unspec:SI [(match_operand:SI 1 "" "mX")] UNSPEC_PIC_SYM))] +- "TARGET_THUMB2 && flag_pic" +- "ldr%?\\t%0, %1" +- [(set_attr "type" "load1") +- (set_attr "pool_range" "4096") +- (set_attr "neg_pool_range" "0")] +-) +- +-;; Set reg to the address of this instruction plus four. The low two +-;; bits of the PC are always read as zero, so ensure the instructions is +-;; word aligned. +-(define_insn "pic_load_dot_plus_four" +- [(set (match_operand:SI 0 "register_operand" "=r") +- (unspec:SI [(const_int 4) +- (match_operand 1 "" "")] +- UNSPEC_PIC_BASE))] ++(define_insn "tls_load_dot_plus_four" ++ [(set (match_operand:SI 0 "register_operand" "=l,r") ++ (mem:SI (unspec:SI [(match_operand:SI 1 "register_operand" "+l,r") ++ (const_int 4) ++ (match_operand 2 "" "")] ++ UNSPEC_PIC_BASE)))] + "TARGET_THUMB2" + "* +- assemble_align(BITS_PER_WORD); + (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\", +- INTVAL (operands[1])); +- /* We use adr because some buggy gas assemble add r8, pc, #0 +- to add.w r8, pc, #0, not addw r8, pc, #0. */ +- asm_fprintf (asm_out_file, \"\\tadr\\t%r, %LLPIC%d + 4\\n\", +- REGNO(operands[0]), (int)INTVAL (operands[1])); +- return \"\"; ++ INTVAL (operands[2])); ++ return \"add\\t%1, %|pc\;ldr%?\\t%0, [%1]\"; + " +- [(set_attr "length" "6")] ++ [(set_attr "length" "4,6")] + ) + + ;; Thumb-2 always has load/store halfword instructions, so we can avoid a lot -- cgit 1.2.3-korg