diff options
Diffstat (limited to 'toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99397.patch')
-rw-r--r-- | toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99397.patch | 3565 |
1 files changed, 3565 insertions, 0 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99397.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99397.patch new file mode 100644 index 0000000000..e795d54e0f --- /dev/null +++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99397.patch @@ -0,0 +1,3565 @@ +2010-09-16 Andrew Stubbs <ams@codesourcery.com> + + Backport from FSF: + + 2010-09-01 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> + + * config/arm/neon-schedgen.ml (core): New type. + (allCores): List of supported cores. + (availability_table): Add supported cores. + (collate_bypasses): Accept core as a parameter. + (worst_case_latencies_and_bypasses): Accept core as a + parameter. + (emit_insn_reservations): Accept core as a parameter. + Use tuneStr and coreStr to get tune attribute and prefix + for functional units. + (emit_bypasses): Accept core name and use it. + (calculate_per_core_availability_table): New. + (filter_core): New. + (calculate_core_availability_table): New. + (main): Use calculate_core_availablity_table. + * config/arm/cortex-a8-neon.md: Update copyright year. + Regenerated from ml file and merged in. + (neon_mrrc, neon_mrc): Rename to cortex_a8_neon_mrrc and + cortex_a8_neon_mrc. + + 2010-09-10 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> + + * config/arm/neon-schedgen.ml (allCores): Add support for + Cortex-A9. + * config/arm/cortex-a9-neon.md: New and partially generated. + * config/arm/cortex-a9.md (cortex_a9_dp): Adjust for Neon. + + 2010-09-15 Chung-Lin Tang <cltang@codesourcery.com> + + Issue #9441 + +=== modified file 'gcc/config/arm/cortex-a8-neon.md' +--- old/gcc/config/arm/cortex-a8-neon.md 2009-02-20 15:20:38 +0000 ++++ new/gcc/config/arm/cortex-a8-neon.md 2010-09-16 09:47:44 +0000 +@@ -182,12 +182,12 @@ + + ;; NEON -> core transfers. + +-(define_insn_reservation "neon_mrc" 20 ++(define_insn_reservation "cortex_a8_neon_mrc" 20 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mrc")) + "cortex_a8_neon_ls") + +-(define_insn_reservation "neon_mrrc" 21 ++(define_insn_reservation "cortex_a8_neon_mrrc" 21 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mrrc")) + "cortex_a8_neon_ls_2") +@@ -196,48 +196,48 @@ + + ;; Instructions using this reservation read their source operands at N2, and + ;; produce a result at N3. +-(define_insn_reservation "neon_int_1" 3 ++(define_insn_reservation "cortex_a8_neon_int_1" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_int_1")) + "cortex_a8_neon_dp") + + ;; Instructions using this reservation read their (D|Q)m operands at N1, + ;; their (D|Q)n operands at N2, and produce a result at N3. +-(define_insn_reservation "neon_int_2" 3 ++(define_insn_reservation "cortex_a8_neon_int_2" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_int_2")) + "cortex_a8_neon_dp") + + ;; Instructions using this reservation read their source operands at N1, and + ;; produce a result at N3. +-(define_insn_reservation "neon_int_3" 3 ++(define_insn_reservation "cortex_a8_neon_int_3" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_int_3")) + "cortex_a8_neon_dp") + + ;; Instructions using this reservation read their source operands at N2, and + ;; produce a result at N4. +-(define_insn_reservation "neon_int_4" 4 ++(define_insn_reservation "cortex_a8_neon_int_4" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_int_4")) + "cortex_a8_neon_dp") + + ;; Instructions using this reservation read their (D|Q)m operands at N1, + ;; their (D|Q)n operands at N2, and produce a result at N4. +-(define_insn_reservation "neon_int_5" 4 ++(define_insn_reservation "cortex_a8_neon_int_5" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_int_5")) + "cortex_a8_neon_dp") + + ;; Instructions using this reservation read their source operands at N1, and + ;; produce a result at N4. +-(define_insn_reservation "neon_vqneg_vqabs" 4 ++(define_insn_reservation "cortex_a8_neon_vqneg_vqabs" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vqneg_vqabs")) + "cortex_a8_neon_dp") + + ;; Instructions using this reservation produce a result at N3. +-(define_insn_reservation "neon_vmov" 3 ++(define_insn_reservation "cortex_a8_neon_vmov" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vmov")) + "cortex_a8_neon_dp") +@@ -245,7 +245,7 @@ + ;; Instructions using this reservation read their (D|Q)n operands at N2, + ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and + ;; produce a result at N6. +-(define_insn_reservation "neon_vaba" 6 ++(define_insn_reservation "cortex_a8_neon_vaba" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vaba")) + "cortex_a8_neon_dp") +@@ -253,35 +253,35 @@ + ;; Instructions using this reservation read their (D|Q)n operands at N2, + ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and + ;; produce a result at N6 on cycle 2. +-(define_insn_reservation "neon_vaba_qqq" 7 ++(define_insn_reservation "cortex_a8_neon_vaba_qqq" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vaba_qqq")) + "cortex_a8_neon_dp_2") + + ;; Instructions using this reservation read their (D|Q)m operands at N1, + ;; their (D|Q)d operands at N3, and produce a result at N6. +-(define_insn_reservation "neon_vsma" 6 ++(define_insn_reservation "cortex_a8_neon_vsma" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vsma")) + "cortex_a8_neon_dp") + + ;; Instructions using this reservation read their source operands at N2, and + ;; produce a result at N6. +-(define_insn_reservation "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6 ++(define_insn_reservation "cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")) + "cortex_a8_neon_dp") + + ;; Instructions using this reservation read their source operands at N2, and + ;; produce a result at N6 on cycle 2. +-(define_insn_reservation "neon_mul_qqq_8_16_32_ddd_32" 7 ++(define_insn_reservation "cortex_a8_neon_mul_qqq_8_16_32_ddd_32" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mul_qqq_8_16_32_ddd_32")) + "cortex_a8_neon_dp_2") + + ;; Instructions using this reservation read their (D|Q)n operands at N2, + ;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2. +-(define_insn_reservation "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7 ++(define_insn_reservation "cortex_a8_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")) + "cortex_a8_neon_dp_2") +@@ -289,7 +289,7 @@ + ;; Instructions using this reservation read their (D|Q)n operands at N2, + ;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and + ;; produce a result at N6. +-(define_insn_reservation "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6 ++(define_insn_reservation "cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")) + "cortex_a8_neon_dp") +@@ -297,7 +297,7 @@ + ;; Instructions using this reservation read their (D|Q)n operands at N2, + ;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and + ;; produce a result at N6 on cycle 2. +-(define_insn_reservation "neon_mla_qqq_8_16" 7 ++(define_insn_reservation "cortex_a8_neon_mla_qqq_8_16" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mla_qqq_8_16")) + "cortex_a8_neon_dp_2") +@@ -305,7 +305,7 @@ + ;; Instructions using this reservation read their (D|Q)n operands at N2, + ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and + ;; produce a result at N6 on cycle 2. +-(define_insn_reservation "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7 ++(define_insn_reservation "cortex_a8_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) + "cortex_a8_neon_dp_2") +@@ -313,21 +313,21 @@ + ;; Instructions using this reservation read their (D|Q)n operands at N2, + ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and + ;; produce a result at N6 on cycle 4. +-(define_insn_reservation "neon_mla_qqq_32_qqd_32_scalar" 9 ++(define_insn_reservation "cortex_a8_neon_mla_qqq_32_qqd_32_scalar" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mla_qqq_32_qqd_32_scalar")) + "cortex_a8_neon_dp_4") + + ;; Instructions using this reservation read their (D|Q)n operands at N2, + ;; their (D|Q)m operands at N1, and produce a result at N6. +-(define_insn_reservation "neon_mul_ddd_16_scalar_32_16_long_scalar" 6 ++(define_insn_reservation "cortex_a8_neon_mul_ddd_16_scalar_32_16_long_scalar" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mul_ddd_16_scalar_32_16_long_scalar")) + "cortex_a8_neon_dp") + + ;; Instructions using this reservation read their (D|Q)n operands at N2, + ;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4. +-(define_insn_reservation "neon_mul_qqd_32_scalar" 9 ++(define_insn_reservation "cortex_a8_neon_mul_qqd_32_scalar" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mul_qqd_32_scalar")) + "cortex_a8_neon_dp_4") +@@ -335,84 +335,84 @@ + ;; Instructions using this reservation read their (D|Q)n operands at N2, + ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and + ;; produce a result at N6. +-(define_insn_reservation "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6 ++(define_insn_reservation "cortex_a8_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")) + "cortex_a8_neon_dp") + + ;; Instructions using this reservation read their source operands at N1, and + ;; produce a result at N3. +-(define_insn_reservation "neon_shift_1" 3 ++(define_insn_reservation "cortex_a8_neon_shift_1" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_shift_1")) + "cortex_a8_neon_dp") + + ;; Instructions using this reservation read their source operands at N1, and + ;; produce a result at N4. +-(define_insn_reservation "neon_shift_2" 4 ++(define_insn_reservation "cortex_a8_neon_shift_2" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_shift_2")) + "cortex_a8_neon_dp") + + ;; Instructions using this reservation read their source operands at N1, and + ;; produce a result at N3 on cycle 2. +-(define_insn_reservation "neon_shift_3" 4 ++(define_insn_reservation "cortex_a8_neon_shift_3" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_shift_3")) + "cortex_a8_neon_dp_2") + + ;; Instructions using this reservation read their source operands at N1, and + ;; produce a result at N1. +-(define_insn_reservation "neon_vshl_ddd" 1 ++(define_insn_reservation "cortex_a8_neon_vshl_ddd" 1 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vshl_ddd")) + "cortex_a8_neon_dp") + + ;; Instructions using this reservation read their source operands at N1, and + ;; produce a result at N4 on cycle 2. +-(define_insn_reservation "neon_vqshl_vrshl_vqrshl_qqq" 5 ++(define_insn_reservation "cortex_a8_neon_vqshl_vrshl_vqrshl_qqq" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vqshl_vrshl_vqrshl_qqq")) + "cortex_a8_neon_dp_2") + + ;; Instructions using this reservation read their (D|Q)m operands at N1, + ;; their (D|Q)d operands at N3, and produce a result at N6. +-(define_insn_reservation "neon_vsra_vrsra" 6 ++(define_insn_reservation "cortex_a8_neon_vsra_vrsra" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vsra_vrsra")) + "cortex_a8_neon_dp") + + ;; Instructions using this reservation read their source operands at N2, and + ;; produce a result at N5. +-(define_insn_reservation "neon_fp_vadd_ddd_vabs_dd" 5 ++(define_insn_reservation "cortex_a8_neon_fp_vadd_ddd_vabs_dd" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")) + "cortex_a8_neon_fadd") + + ;; Instructions using this reservation read their source operands at N2, and + ;; produce a result at N5 on cycle 2. +-(define_insn_reservation "neon_fp_vadd_qqq_vabs_qq" 6 ++(define_insn_reservation "cortex_a8_neon_fp_vadd_qqq_vabs_qq" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vadd_qqq_vabs_qq")) + "cortex_a8_neon_fadd_2") + + ;; Instructions using this reservation read their source operands at N1, and + ;; produce a result at N5. +-(define_insn_reservation "neon_fp_vsum" 5 ++(define_insn_reservation "cortex_a8_neon_fp_vsum" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vsum")) + "cortex_a8_neon_fadd") + + ;; Instructions using this reservation read their (D|Q)n operands at N2, + ;; their (D|Q)m operands at N1, and produce a result at N5. +-(define_insn_reservation "neon_fp_vmul_ddd" 5 ++(define_insn_reservation "cortex_a8_neon_fp_vmul_ddd" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vmul_ddd")) + "cortex_a8_neon_dp") + + ;; Instructions using this reservation read their (D|Q)n operands at N2, + ;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2. +-(define_insn_reservation "neon_fp_vmul_qqd" 6 ++(define_insn_reservation "cortex_a8_neon_fp_vmul_qqd" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vmul_qqd")) + "cortex_a8_neon_dp_2") +@@ -420,7 +420,7 @@ + ;; Instructions using this reservation read their (D|Q)n operands at N2, + ;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and + ;; produce a result at N9. +-(define_insn_reservation "neon_fp_vmla_ddd" 9 ++(define_insn_reservation "cortex_a8_neon_fp_vmla_ddd" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vmla_ddd")) + "cortex_a8_neon_fmul_then_fadd") +@@ -428,7 +428,7 @@ + ;; Instructions using this reservation read their (D|Q)n operands at N2, + ;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and + ;; produce a result at N9 on cycle 2. +-(define_insn_reservation "neon_fp_vmla_qqq" 10 ++(define_insn_reservation "cortex_a8_neon_fp_vmla_qqq" 10 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vmla_qqq")) + "cortex_a8_neon_fmul_then_fadd_2") +@@ -436,7 +436,7 @@ + ;; Instructions using this reservation read their (D|Q)n operands at N2, + ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and + ;; produce a result at N9. +-(define_insn_reservation "neon_fp_vmla_ddd_scalar" 9 ++(define_insn_reservation "cortex_a8_neon_fp_vmla_ddd_scalar" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vmla_ddd_scalar")) + "cortex_a8_neon_fmul_then_fadd") +@@ -444,869 +444,869 @@ + ;; Instructions using this reservation read their (D|Q)n operands at N2, + ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and + ;; produce a result at N9 on cycle 2. +-(define_insn_reservation "neon_fp_vmla_qqq_scalar" 10 ++(define_insn_reservation "cortex_a8_neon_fp_vmla_qqq_scalar" 10 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vmla_qqq_scalar")) + "cortex_a8_neon_fmul_then_fadd_2") + + ;; Instructions using this reservation read their source operands at N2, and + ;; produce a result at N9. +-(define_insn_reservation "neon_fp_vrecps_vrsqrts_ddd" 9 ++(define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_ddd" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_ddd")) + "cortex_a8_neon_fmul_then_fadd") + + ;; Instructions using this reservation read their source operands at N2, and + ;; produce a result at N9 on cycle 2. +-(define_insn_reservation "neon_fp_vrecps_vrsqrts_qqq" 10 ++(define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_qqq" 10 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_qqq")) + "cortex_a8_neon_fmul_then_fadd_2") + + ;; Instructions using this reservation read their source operands at N1, and + ;; produce a result at N2. +-(define_insn_reservation "neon_bp_simple" 2 ++(define_insn_reservation "cortex_a8_neon_bp_simple" 2 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_bp_simple")) + "cortex_a8_neon_perm") + + ;; Instructions using this reservation read their source operands at N1, and + ;; produce a result at N2 on cycle 2. +-(define_insn_reservation "neon_bp_2cycle" 3 ++(define_insn_reservation "cortex_a8_neon_bp_2cycle" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_bp_2cycle")) + "cortex_a8_neon_perm_2") + + ;; Instructions using this reservation read their source operands at N1, and + ;; produce a result at N2 on cycle 3. +-(define_insn_reservation "neon_bp_3cycle" 4 ++(define_insn_reservation "cortex_a8_neon_bp_3cycle" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_bp_3cycle")) + "cortex_a8_neon_perm_3") + + ;; Instructions using this reservation produce a result at N1. +-(define_insn_reservation "neon_ldr" 1 ++(define_insn_reservation "cortex_a8_neon_ldr" 1 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_ldr")) + "cortex_a8_neon_ls") + + ;; Instructions using this reservation read their source operands at N1. +-(define_insn_reservation "neon_str" 0 ++(define_insn_reservation "cortex_a8_neon_str" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_str")) + "cortex_a8_neon_ls") + + ;; Instructions using this reservation produce a result at N1 on cycle 2. +-(define_insn_reservation "neon_vld1_1_2_regs" 2 ++(define_insn_reservation "cortex_a8_neon_vld1_1_2_regs" 2 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld1_1_2_regs")) + "cortex_a8_neon_ls_2") + + ;; Instructions using this reservation produce a result at N1 on cycle 3. +-(define_insn_reservation "neon_vld1_3_4_regs" 3 ++(define_insn_reservation "cortex_a8_neon_vld1_3_4_regs" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld1_3_4_regs")) + "cortex_a8_neon_ls_3") + + ;; Instructions using this reservation produce a result at N2 on cycle 2. +-(define_insn_reservation "neon_vld2_2_regs_vld1_vld2_all_lanes" 3 ++(define_insn_reservation "cortex_a8_neon_vld2_2_regs_vld1_vld2_all_lanes" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")) + "cortex_a8_neon_ls_2") + + ;; Instructions using this reservation produce a result at N2 on cycle 3. +-(define_insn_reservation "neon_vld2_4_regs" 4 ++(define_insn_reservation "cortex_a8_neon_vld2_4_regs" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld2_4_regs")) + "cortex_a8_neon_ls_3") + + ;; Instructions using this reservation produce a result at N2 on cycle 4. +-(define_insn_reservation "neon_vld3_vld4" 5 ++(define_insn_reservation "cortex_a8_neon_vld3_vld4" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld3_vld4")) + "cortex_a8_neon_ls_4") + + ;; Instructions using this reservation read their source operands at N1. +-(define_insn_reservation "neon_vst1_1_2_regs_vst2_2_regs" 0 ++(define_insn_reservation "cortex_a8_neon_vst1_1_2_regs_vst2_2_regs" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")) + "cortex_a8_neon_ls_2") + + ;; Instructions using this reservation read their source operands at N1. +-(define_insn_reservation "neon_vst1_3_4_regs" 0 ++(define_insn_reservation "cortex_a8_neon_vst1_3_4_regs" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vst1_3_4_regs")) + "cortex_a8_neon_ls_3") + + ;; Instructions using this reservation read their source operands at N1. +-(define_insn_reservation "neon_vst2_4_regs_vst3_vst4" 0 ++(define_insn_reservation "cortex_a8_neon_vst2_4_regs_vst3_vst4" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")) + "cortex_a8_neon_ls_4") + + ;; Instructions using this reservation read their source operands at N1. +-(define_insn_reservation "neon_vst3_vst4" 0 ++(define_insn_reservation "cortex_a8_neon_vst3_vst4" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vst3_vst4")) + "cortex_a8_neon_ls_4") + + ;; Instructions using this reservation read their source operands at N1, and + ;; produce a result at N2 on cycle 3. +-(define_insn_reservation "neon_vld1_vld2_lane" 4 ++(define_insn_reservation "cortex_a8_neon_vld1_vld2_lane" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld1_vld2_lane")) + "cortex_a8_neon_ls_3") + + ;; Instructions using this reservation read their source operands at N1, and + ;; produce a result at N2 on cycle 5. +-(define_insn_reservation "neon_vld3_vld4_lane" 6 ++(define_insn_reservation "cortex_a8_neon_vld3_vld4_lane" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld3_vld4_lane")) + "cortex_a8_neon_ls_5") + + ;; Instructions using this reservation read their source operands at N1. +-(define_insn_reservation "neon_vst1_vst2_lane" 0 ++(define_insn_reservation "cortex_a8_neon_vst1_vst2_lane" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vst1_vst2_lane")) + "cortex_a8_neon_ls_2") + + ;; Instructions using this reservation read their source operands at N1. +-(define_insn_reservation "neon_vst3_vst4_lane" 0 ++(define_insn_reservation "cortex_a8_neon_vst3_vst4_lane" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vst3_vst4_lane")) + "cortex_a8_neon_ls_3") + + ;; Instructions using this reservation produce a result at N2 on cycle 2. +-(define_insn_reservation "neon_vld3_vld4_all_lanes" 3 ++(define_insn_reservation "cortex_a8_neon_vld3_vld4_all_lanes" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld3_vld4_all_lanes")) + "cortex_a8_neon_ls_3") + + ;; Instructions using this reservation produce a result at N2. +-(define_insn_reservation "neon_mcr" 2 ++(define_insn_reservation "cortex_a8_neon_mcr" 2 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mcr")) + "cortex_a8_neon_perm") + + ;; Instructions using this reservation produce a result at N2. +-(define_insn_reservation "neon_mcr_2_mcrr" 2 ++(define_insn_reservation "cortex_a8_neon_mcr_2_mcrr" 2 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mcr_2_mcrr")) + "cortex_a8_neon_perm_2") + + ;; Exceptions to the default latencies. + +-(define_bypass 1 "neon_mcr_2_mcrr" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 1 "neon_mcr" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 2 "neon_vld3_vld4_all_lanes" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 5 "neon_vld3_vld4_lane" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 3 "neon_vld1_vld2_lane" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 4 "neon_vld3_vld4" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 3 "neon_vld2_4_regs" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 2 "neon_vld2_2_regs_vld1_vld2_all_lanes" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 2 "neon_vld1_3_4_regs" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 1 "neon_vld1_1_2_regs" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 0 "neon_ldr" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 3 "neon_bp_3cycle" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 2 "neon_bp_2cycle" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 1 "neon_bp_simple" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 9 "neon_fp_vrecps_vrsqrts_qqq" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 8 "neon_fp_vrecps_vrsqrts_ddd" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 9 "neon_fp_vmla_qqq_scalar" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 8 "neon_fp_vmla_ddd_scalar" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 9 "neon_fp_vmla_qqq" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 8 "neon_fp_vmla_ddd" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 5 "neon_fp_vmul_qqd" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 4 "neon_fp_vmul_ddd" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 4 "neon_fp_vsum" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 5 "neon_fp_vadd_qqq_vabs_qq" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 4 "neon_fp_vadd_ddd_vabs_dd" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 5 "neon_vsra_vrsra" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 4 "neon_vqshl_vrshl_vqrshl_qqq" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 0 "neon_vshl_ddd" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 3 "neon_shift_3" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 3 "neon_shift_2" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 2 "neon_shift_1" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 5 "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 8 "neon_mul_qqd_32_scalar" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 5 "neon_mul_ddd_16_scalar_32_16_long_scalar" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 8 "neon_mla_qqq_32_qqd_32_scalar" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 6 "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 6 "neon_mla_qqq_8_16" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 5 "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 6 "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 6 "neon_mul_qqq_8_16_32_ddd_32" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 5 "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 5 "neon_vsma" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 6 "neon_vaba_qqq" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 5 "neon_vaba" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 2 "neon_vmov" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 3 "neon_vqneg_vqabs" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 3 "neon_int_5" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 3 "neon_int_4" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 2 "neon_int_3" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 2 "neon_int_2" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") +- +-(define_bypass 2 "neon_int_1" +- "neon_int_1,\ +- neon_int_4,\ +- neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mul_qqq_8_16_32_ddd_32,\ +- neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ +- neon_mla_qqq_8_16,\ +- neon_fp_vadd_ddd_vabs_dd,\ +- neon_fp_vadd_qqq_vabs_qq,\ +- neon_fp_vmla_ddd,\ +- neon_fp_vmla_qqq,\ +- neon_fp_vrecps_vrsqrts_ddd,\ +- neon_fp_vrecps_vrsqrts_qqq") ++(define_bypass 1 "cortex_a8_neon_mcr_2_mcrr" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 1 "cortex_a8_neon_mcr" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 2 "cortex_a8_neon_vld3_vld4_all_lanes" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a8_neon_vld3_vld4_lane" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 3 "cortex_a8_neon_vld1_vld2_lane" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 4 "cortex_a8_neon_vld3_vld4" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 3 "cortex_a8_neon_vld2_4_regs" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 2 "cortex_a8_neon_vld2_2_regs_vld1_vld2_all_lanes" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 2 "cortex_a8_neon_vld1_3_4_regs" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 1 "cortex_a8_neon_vld1_1_2_regs" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 0 "cortex_a8_neon_ldr" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 3 "cortex_a8_neon_bp_3cycle" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 2 "cortex_a8_neon_bp_2cycle" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 1 "cortex_a8_neon_bp_simple" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 9 "cortex_a8_neon_fp_vrecps_vrsqrts_qqq" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 8 "cortex_a8_neon_fp_vrecps_vrsqrts_ddd" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 9 "cortex_a8_neon_fp_vmla_qqq_scalar" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 8 "cortex_a8_neon_fp_vmla_ddd_scalar" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 9 "cortex_a8_neon_fp_vmla_qqq" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 8 "cortex_a8_neon_fp_vmla_ddd" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a8_neon_fp_vmul_qqd" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 4 "cortex_a8_neon_fp_vmul_ddd" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 4 "cortex_a8_neon_fp_vsum" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a8_neon_fp_vadd_qqq_vabs_qq" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 4 "cortex_a8_neon_fp_vadd_ddd_vabs_dd" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a8_neon_vsra_vrsra" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 4 "cortex_a8_neon_vqshl_vrshl_vqrshl_qqq" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 0 "cortex_a8_neon_vshl_ddd" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 3 "cortex_a8_neon_shift_3" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 3 "cortex_a8_neon_shift_2" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 2 "cortex_a8_neon_shift_1" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a8_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 8 "cortex_a8_neon_mul_qqd_32_scalar" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a8_neon_mul_ddd_16_scalar_32_16_long_scalar" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 8 "cortex_a8_neon_mla_qqq_32_qqd_32_scalar" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 6 "cortex_a8_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 6 "cortex_a8_neon_mla_qqq_8_16" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 6 "cortex_a8_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 6 "cortex_a8_neon_mul_qqq_8_16_32_ddd_32" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a8_neon_vsma" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 6 "cortex_a8_neon_vaba_qqq" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a8_neon_vaba" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 2 "cortex_a8_neon_vmov" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 3 "cortex_a8_neon_vqneg_vqabs" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 3 "cortex_a8_neon_int_5" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 3 "cortex_a8_neon_int_4" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 2 "cortex_a8_neon_int_3" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 2 "cortex_a8_neon_int_2" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 2 "cortex_a8_neon_int_1" ++ "cortex_a8_neon_int_1,\ ++ cortex_a8_neon_int_4,\ ++ cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a8_neon_mla_qqq_8_16,\ ++ cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a8_neon_fp_vmla_ddd,\ ++ cortex_a8_neon_fp_vmla_qqq,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + + +=== added file 'gcc/config/arm/cortex-a9-neon.md' +--- old/gcc/config/arm/cortex-a9-neon.md 1970-01-01 00:00:00 +0000 ++++ new/gcc/config/arm/cortex-a9-neon.md 2010-09-16 09:47:44 +0000 +@@ -0,0 +1,1237 @@ ++;; ARM Cortex-A9 pipeline description ++;; Copyright (C) 2010 Free Software Foundation, Inc. ++;; ++;; Neon pipeline description contributed by ARM Ltd. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but ++;; WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++;; General Public License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++ ++(define_automaton "cortex_a9_neon") ++ ++;; Only one instruction can be issued per cycle. ++(define_cpu_unit "cortex_a9_neon_issue_perm" "cortex_a9_neon") ++ ++;; Only one data-processing instruction can be issued per cycle. ++(define_cpu_unit "cortex_a9_neon_issue_dp" "cortex_a9_neon") ++ ++;; We need a special mutual exclusion (to be used in addition to ++;; cortex_a9_neon_issue_dp) for the case when an instruction such as ++;; vmla.f is forwarded from E5 of the floating-point multiply pipeline to ++;; E2 of the floating-point add pipeline. On the cycle previous to that ++;; forward we must prevent issue of any instruction to the floating-point ++;; add pipeline, but still allow issue of a data-processing instruction ++;; to any of the other pipelines. ++(define_cpu_unit "cortex_a9_neon_issue_fadd" "cortex_a9_neon") ++(define_cpu_unit "cortex_a9_neon_mcr" "cortex_a9_neon") ++ ++ ++;; Patterns of reservation. ++;; We model the NEON issue units as running in parallel with the core ones. ++;; We assume that multi-cycle NEON instructions get decomposed into ++;; micro-ops as they are issued into the NEON pipeline. ++ ++(define_reservation "cortex_a9_neon_dp" ++ "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp") ++(define_reservation "cortex_a9_neon_dp_2" ++ "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ ++ cortex_a9_neon_issue_dp") ++(define_reservation "cortex_a9_neon_dp_4" ++ "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ ++ cortex_a9_neon_issue_dp + cortex_a9_neon_issue_perm,\ ++ cortex_a9_neon_issue_dp + cortex_a9_neon_issue_perm,\ ++ cortex_a9_neon_issue_dp") ++ ++(define_reservation "cortex_a9_neon_fadd" ++ "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp + \ ++ cortex_a9_neon_issue_fadd") ++(define_reservation "cortex_a9_neon_fadd_2" ++ "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ ++ cortex_a9_neon_issue_fadd,\ ++ cortex_a9_neon_issue_dp") ++ ++(define_reservation "cortex_a9_neon_perm" ++ "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm") ++(define_reservation "cortex_a9_neon_perm_2" ++ "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm, \ ++ cortex_a9_neon_issue_perm") ++(define_reservation "cortex_a9_neon_perm_3" ++ "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\ ++ cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ ++ cortex_a9_neon_issue_perm") ++ ++(define_reservation "cortex_a9_neon_ls" ++ "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm+cortex_a9_ls") ++(define_reservation "cortex_a9_neon_ls_2" ++ "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\ ++ cortex_a9_neon_issue_perm") ++(define_reservation "cortex_a9_neon_ls_3" ++ "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\ ++ cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ ++ cortex_a9_neon_issue_perm") ++(define_reservation "cortex_a9_neon_ls_4" ++ "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\ ++ cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ ++ cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ ++ cortex_a9_neon_issue_perm") ++(define_reservation "cortex_a9_neon_ls_5" ++ "ca9_issue_vfp_neon + cortex_a9_neon_issue_perm,\ ++ cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ ++ cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ ++ cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ ++ cortex_a9_neon_issue_perm") ++ ++(define_reservation "cortex_a9_neon_fmul_then_fadd" ++ "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ ++ nothing*3,\ ++ cortex_a9_neon_issue_fadd") ++(define_reservation "cortex_a9_neon_fmul_then_fadd_2" ++ "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ ++ cortex_a9_neon_issue_dp,\ ++ nothing*2,\ ++ cortex_a9_neon_issue_fadd,\ ++ cortex_a9_neon_issue_fadd") ++ ++ ++;; NEON -> core transfers. ++(define_insn_reservation "ca9_neon_mrc" 1 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_mrc")) ++ "ca9_issue_vfp_neon + cortex_a9_neon_mcr") ++ ++(define_insn_reservation "ca9_neon_mrrc" 1 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_mrrc")) ++ "ca9_issue_vfp_neon + cortex_a9_neon_mcr") ++ ++;; The remainder of this file is auto-generated by neon-schedgen. ++ ++;; Instructions using this reservation read their source operands at N2, and ++;; produce a result at N3. ++(define_insn_reservation "cortex_a9_neon_int_1" 3 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_int_1")) ++ "cortex_a9_neon_dp") ++ ++;; Instructions using this reservation read their (D|Q)m operands at N1, ++;; their (D|Q)n operands at N2, and produce a result at N3. ++(define_insn_reservation "cortex_a9_neon_int_2" 3 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_int_2")) ++ "cortex_a9_neon_dp") ++ ++;; Instructions using this reservation read their source operands at N1, and ++;; produce a result at N3. ++(define_insn_reservation "cortex_a9_neon_int_3" 3 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_int_3")) ++ "cortex_a9_neon_dp") ++ ++;; Instructions using this reservation read their source operands at N2, and ++;; produce a result at N4. ++(define_insn_reservation "cortex_a9_neon_int_4" 4 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_int_4")) ++ "cortex_a9_neon_dp") ++ ++;; Instructions using this reservation read their (D|Q)m operands at N1, ++;; their (D|Q)n operands at N2, and produce a result at N4. ++(define_insn_reservation "cortex_a9_neon_int_5" 4 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_int_5")) ++ "cortex_a9_neon_dp") ++ ++;; Instructions using this reservation read their source operands at N1, and ++;; produce a result at N4. ++(define_insn_reservation "cortex_a9_neon_vqneg_vqabs" 4 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vqneg_vqabs")) ++ "cortex_a9_neon_dp") ++ ++;; Instructions using this reservation produce a result at N3. ++(define_insn_reservation "cortex_a9_neon_vmov" 3 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vmov")) ++ "cortex_a9_neon_dp") ++ ++;; Instructions using this reservation read their (D|Q)n operands at N2, ++;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and ++;; produce a result at N6. ++(define_insn_reservation "cortex_a9_neon_vaba" 6 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vaba")) ++ "cortex_a9_neon_dp") ++ ++;; Instructions using this reservation read their (D|Q)n operands at N2, ++;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and ++;; produce a result at N6 on cycle 2. ++(define_insn_reservation "cortex_a9_neon_vaba_qqq" 7 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vaba_qqq")) ++ "cortex_a9_neon_dp_2") ++ ++;; Instructions using this reservation read their (D|Q)m operands at N1, ++;; their (D|Q)d operands at N3, and produce a result at N6. ++(define_insn_reservation "cortex_a9_neon_vsma" 6 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vsma")) ++ "cortex_a9_neon_dp") ++ ++;; Instructions using this reservation read their source operands at N2, and ++;; produce a result at N6. ++(define_insn_reservation "cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")) ++ "cortex_a9_neon_dp") ++ ++;; Instructions using this reservation read their source operands at N2, and ++;; produce a result at N6 on cycle 2. ++(define_insn_reservation "cortex_a9_neon_mul_qqq_8_16_32_ddd_32" 7 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_mul_qqq_8_16_32_ddd_32")) ++ "cortex_a9_neon_dp_2") ++ ++;; Instructions using this reservation read their (D|Q)n operands at N2, ++;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2. ++(define_insn_reservation "cortex_a9_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")) ++ "cortex_a9_neon_dp_2") ++ ++;; Instructions using this reservation read their (D|Q)n operands at N2, ++;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and ++;; produce a result at N6. ++(define_insn_reservation "cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")) ++ "cortex_a9_neon_dp") ++ ++;; Instructions using this reservation read their (D|Q)n operands at N2, ++;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and ++;; produce a result at N6 on cycle 2. ++(define_insn_reservation "cortex_a9_neon_mla_qqq_8_16" 7 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_mla_qqq_8_16")) ++ "cortex_a9_neon_dp_2") ++ ++;; Instructions using this reservation read their (D|Q)n operands at N2, ++;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and ++;; produce a result at N6 on cycle 2. ++(define_insn_reservation "cortex_a9_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) ++ "cortex_a9_neon_dp_2") ++ ++;; Instructions using this reservation read their (D|Q)n operands at N2, ++;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and ++;; produce a result at N6 on cycle 4. ++(define_insn_reservation "cortex_a9_neon_mla_qqq_32_qqd_32_scalar" 9 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_mla_qqq_32_qqd_32_scalar")) ++ "cortex_a9_neon_dp_4") ++ ++;; Instructions using this reservation read their (D|Q)n operands at N2, ++;; their (D|Q)m operands at N1, and produce a result at N6. ++(define_insn_reservation "cortex_a9_neon_mul_ddd_16_scalar_32_16_long_scalar" 6 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_mul_ddd_16_scalar_32_16_long_scalar")) ++ "cortex_a9_neon_dp") ++ ++;; Instructions using this reservation read their (D|Q)n operands at N2, ++;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4. ++(define_insn_reservation "cortex_a9_neon_mul_qqd_32_scalar" 9 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_mul_qqd_32_scalar")) ++ "cortex_a9_neon_dp_4") ++ ++;; Instructions using this reservation read their (D|Q)n operands at N2, ++;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and ++;; produce a result at N6. ++(define_insn_reservation "cortex_a9_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")) ++ "cortex_a9_neon_dp") ++ ++;; Instructions using this reservation read their source operands at N1, and ++;; produce a result at N3. ++(define_insn_reservation "cortex_a9_neon_shift_1" 3 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_shift_1")) ++ "cortex_a9_neon_dp") ++ ++;; Instructions using this reservation read their source operands at N1, and ++;; produce a result at N4. ++(define_insn_reservation "cortex_a9_neon_shift_2" 4 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_shift_2")) ++ "cortex_a9_neon_dp") ++ ++;; Instructions using this reservation read their source operands at N1, and ++;; produce a result at N3 on cycle 2. ++(define_insn_reservation "cortex_a9_neon_shift_3" 4 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_shift_3")) ++ "cortex_a9_neon_dp_2") ++ ++;; Instructions using this reservation read their source operands at N1, and ++;; produce a result at N1. ++(define_insn_reservation "cortex_a9_neon_vshl_ddd" 1 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vshl_ddd")) ++ "cortex_a9_neon_dp") ++ ++;; Instructions using this reservation read their source operands at N1, and ++;; produce a result at N4 on cycle 2. ++(define_insn_reservation "cortex_a9_neon_vqshl_vrshl_vqrshl_qqq" 5 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vqshl_vrshl_vqrshl_qqq")) ++ "cortex_a9_neon_dp_2") ++ ++;; Instructions using this reservation read their (D|Q)m operands at N1, ++;; their (D|Q)d operands at N3, and produce a result at N6. ++(define_insn_reservation "cortex_a9_neon_vsra_vrsra" 6 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vsra_vrsra")) ++ "cortex_a9_neon_dp") ++ ++;; Instructions using this reservation read their source operands at N2, and ++;; produce a result at N5. ++(define_insn_reservation "cortex_a9_neon_fp_vadd_ddd_vabs_dd" 5 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")) ++ "cortex_a9_neon_fadd") ++ ++;; Instructions using this reservation read their source operands at N2, and ++;; produce a result at N5 on cycle 2. ++(define_insn_reservation "cortex_a9_neon_fp_vadd_qqq_vabs_qq" 6 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_fp_vadd_qqq_vabs_qq")) ++ "cortex_a9_neon_fadd_2") ++ ++;; Instructions using this reservation read their source operands at N1, and ++;; produce a result at N5. ++(define_insn_reservation "cortex_a9_neon_fp_vsum" 5 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_fp_vsum")) ++ "cortex_a9_neon_fadd") ++ ++;; Instructions using this reservation read their (D|Q)n operands at N2, ++;; their (D|Q)m operands at N1, and produce a result at N5. ++(define_insn_reservation "cortex_a9_neon_fp_vmul_ddd" 5 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_fp_vmul_ddd")) ++ "cortex_a9_neon_dp") ++ ++;; Instructions using this reservation read their (D|Q)n operands at N2, ++;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2. ++(define_insn_reservation "cortex_a9_neon_fp_vmul_qqd" 6 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_fp_vmul_qqd")) ++ "cortex_a9_neon_dp_2") ++ ++;; Instructions using this reservation read their (D|Q)n operands at N2, ++;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and ++;; produce a result at N9. ++(define_insn_reservation "cortex_a9_neon_fp_vmla_ddd" 9 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_fp_vmla_ddd")) ++ "cortex_a9_neon_fmul_then_fadd") ++ ++;; Instructions using this reservation read their (D|Q)n operands at N2, ++;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and ++;; produce a result at N9 on cycle 2. ++(define_insn_reservation "cortex_a9_neon_fp_vmla_qqq" 10 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_fp_vmla_qqq")) ++ "cortex_a9_neon_fmul_then_fadd_2") ++ ++;; Instructions using this reservation read their (D|Q)n operands at N2, ++;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and ++;; produce a result at N9. ++(define_insn_reservation "cortex_a9_neon_fp_vmla_ddd_scalar" 9 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_fp_vmla_ddd_scalar")) ++ "cortex_a9_neon_fmul_then_fadd") ++ ++;; Instructions using this reservation read their (D|Q)n operands at N2, ++;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and ++;; produce a result at N9 on cycle 2. ++(define_insn_reservation "cortex_a9_neon_fp_vmla_qqq_scalar" 10 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_fp_vmla_qqq_scalar")) ++ "cortex_a9_neon_fmul_then_fadd_2") ++ ++;; Instructions using this reservation read their source operands at N2, and ++;; produce a result at N9. ++(define_insn_reservation "cortex_a9_neon_fp_vrecps_vrsqrts_ddd" 9 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_ddd")) ++ "cortex_a9_neon_fmul_then_fadd") ++ ++;; Instructions using this reservation read their source operands at N2, and ++;; produce a result at N9 on cycle 2. ++(define_insn_reservation "cortex_a9_neon_fp_vrecps_vrsqrts_qqq" 10 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_qqq")) ++ "cortex_a9_neon_fmul_then_fadd_2") ++ ++;; Instructions using this reservation read their source operands at N1, and ++;; produce a result at N2. ++(define_insn_reservation "cortex_a9_neon_bp_simple" 2 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_bp_simple")) ++ "cortex_a9_neon_perm") ++ ++;; Instructions using this reservation read their source operands at N1, and ++;; produce a result at N2 on cycle 2. ++(define_insn_reservation "cortex_a9_neon_bp_2cycle" 3 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_bp_2cycle")) ++ "cortex_a9_neon_perm_2") ++ ++;; Instructions using this reservation read their source operands at N1, and ++;; produce a result at N2 on cycle 3. ++(define_insn_reservation "cortex_a9_neon_bp_3cycle" 4 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_bp_3cycle")) ++ "cortex_a9_neon_perm_3") ++ ++;; Instructions using this reservation produce a result at N1. ++(define_insn_reservation "cortex_a9_neon_ldr" 1 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_ldr")) ++ "cortex_a9_neon_ls") ++ ++;; Instructions using this reservation read their source operands at N1. ++(define_insn_reservation "cortex_a9_neon_str" 0 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_str")) ++ "cortex_a9_neon_ls") ++ ++;; Instructions using this reservation produce a result at N1 on cycle 2. ++(define_insn_reservation "cortex_a9_neon_vld1_1_2_regs" 2 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vld1_1_2_regs")) ++ "cortex_a9_neon_ls_2") ++ ++;; Instructions using this reservation produce a result at N1 on cycle 3. ++(define_insn_reservation "cortex_a9_neon_vld1_3_4_regs" 3 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vld1_3_4_regs")) ++ "cortex_a9_neon_ls_3") ++ ++;; Instructions using this reservation produce a result at N2 on cycle 2. ++(define_insn_reservation "cortex_a9_neon_vld2_2_regs_vld1_vld2_all_lanes" 3 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")) ++ "cortex_a9_neon_ls_2") ++ ++;; Instructions using this reservation produce a result at N2 on cycle 3. ++(define_insn_reservation "cortex_a9_neon_vld2_4_regs" 4 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vld2_4_regs")) ++ "cortex_a9_neon_ls_3") ++ ++;; Instructions using this reservation produce a result at N2 on cycle 4. ++(define_insn_reservation "cortex_a9_neon_vld3_vld4" 5 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vld3_vld4")) ++ "cortex_a9_neon_ls_4") ++ ++;; Instructions using this reservation read their source operands at N1. ++(define_insn_reservation "cortex_a9_neon_vst1_1_2_regs_vst2_2_regs" 0 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")) ++ "cortex_a9_neon_ls_2") ++ ++;; Instructions using this reservation read their source operands at N1. ++(define_insn_reservation "cortex_a9_neon_vst1_3_4_regs" 0 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vst1_3_4_regs")) ++ "cortex_a9_neon_ls_3") ++ ++;; Instructions using this reservation read their source operands at N1. ++(define_insn_reservation "cortex_a9_neon_vst2_4_regs_vst3_vst4" 0 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")) ++ "cortex_a9_neon_ls_4") ++ ++;; Instructions using this reservation read their source operands at N1. ++(define_insn_reservation "cortex_a9_neon_vst3_vst4" 0 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vst3_vst4")) ++ "cortex_a9_neon_ls_4") ++ ++;; Instructions using this reservation read their source operands at N1, and ++;; produce a result at N2 on cycle 3. ++(define_insn_reservation "cortex_a9_neon_vld1_vld2_lane" 4 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vld1_vld2_lane")) ++ "cortex_a9_neon_ls_3") ++ ++;; Instructions using this reservation read their source operands at N1, and ++;; produce a result at N2 on cycle 5. ++(define_insn_reservation "cortex_a9_neon_vld3_vld4_lane" 6 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vld3_vld4_lane")) ++ "cortex_a9_neon_ls_5") ++ ++;; Instructions using this reservation read their source operands at N1. ++(define_insn_reservation "cortex_a9_neon_vst1_vst2_lane" 0 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vst1_vst2_lane")) ++ "cortex_a9_neon_ls_2") ++ ++;; Instructions using this reservation read their source operands at N1. ++(define_insn_reservation "cortex_a9_neon_vst3_vst4_lane" 0 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vst3_vst4_lane")) ++ "cortex_a9_neon_ls_3") ++ ++;; Instructions using this reservation produce a result at N2 on cycle 2. ++(define_insn_reservation "cortex_a9_neon_vld3_vld4_all_lanes" 3 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_vld3_vld4_all_lanes")) ++ "cortex_a9_neon_ls_3") ++ ++;; Instructions using this reservation produce a result at N2. ++(define_insn_reservation "cortex_a9_neon_mcr" 2 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_mcr")) ++ "cortex_a9_neon_perm") ++ ++;; Instructions using this reservation produce a result at N2. ++(define_insn_reservation "cortex_a9_neon_mcr_2_mcrr" 2 ++ (and (eq_attr "tune" "cortexa9") ++ (eq_attr "neon_type" "neon_mcr_2_mcrr")) ++ "cortex_a9_neon_perm_2") ++ ++;; Exceptions to the default latencies. ++ ++(define_bypass 1 "cortex_a9_neon_mcr_2_mcrr" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 1 "cortex_a9_neon_mcr" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 2 "cortex_a9_neon_vld3_vld4_all_lanes" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a9_neon_vld3_vld4_lane" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 3 "cortex_a9_neon_vld1_vld2_lane" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 4 "cortex_a9_neon_vld3_vld4" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 3 "cortex_a9_neon_vld2_4_regs" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 2 "cortex_a9_neon_vld2_2_regs_vld1_vld2_all_lanes" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 2 "cortex_a9_neon_vld1_3_4_regs" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 1 "cortex_a9_neon_vld1_1_2_regs" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 0 "cortex_a9_neon_ldr" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 3 "cortex_a9_neon_bp_3cycle" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 2 "cortex_a9_neon_bp_2cycle" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 1 "cortex_a9_neon_bp_simple" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 9 "cortex_a9_neon_fp_vrecps_vrsqrts_qqq" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 8 "cortex_a9_neon_fp_vrecps_vrsqrts_ddd" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 9 "cortex_a9_neon_fp_vmla_qqq_scalar" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 8 "cortex_a9_neon_fp_vmla_ddd_scalar" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 9 "cortex_a9_neon_fp_vmla_qqq" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 8 "cortex_a9_neon_fp_vmla_ddd" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a9_neon_fp_vmul_qqd" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 4 "cortex_a9_neon_fp_vmul_ddd" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 4 "cortex_a9_neon_fp_vsum" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a9_neon_fp_vadd_qqq_vabs_qq" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 4 "cortex_a9_neon_fp_vadd_ddd_vabs_dd" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a9_neon_vsra_vrsra" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 4 "cortex_a9_neon_vqshl_vrshl_vqrshl_qqq" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 0 "cortex_a9_neon_vshl_ddd" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 3 "cortex_a9_neon_shift_3" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 3 "cortex_a9_neon_shift_2" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 2 "cortex_a9_neon_shift_1" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a9_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 8 "cortex_a9_neon_mul_qqd_32_scalar" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a9_neon_mul_ddd_16_scalar_32_16_long_scalar" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 8 "cortex_a9_neon_mla_qqq_32_qqd_32_scalar" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 6 "cortex_a9_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 6 "cortex_a9_neon_mla_qqq_8_16" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 6 "cortex_a9_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 6 "cortex_a9_neon_mul_qqq_8_16_32_ddd_32" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a9_neon_vsma" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 6 "cortex_a9_neon_vaba_qqq" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 5 "cortex_a9_neon_vaba" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 2 "cortex_a9_neon_vmov" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 3 "cortex_a9_neon_vqneg_vqabs" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 3 "cortex_a9_neon_int_5" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 3 "cortex_a9_neon_int_4" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 2 "cortex_a9_neon_int_3" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 2 "cortex_a9_neon_int_2" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ ++(define_bypass 2 "cortex_a9_neon_int_1" ++ "cortex_a9_neon_int_1,\ ++ cortex_a9_neon_int_4,\ ++ cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ ++ cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ ++ cortex_a9_neon_mla_qqq_8_16,\ ++ cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ ++ cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ ++ cortex_a9_neon_fp_vmla_ddd,\ ++ cortex_a9_neon_fp_vmla_qqq,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ ++ cortex_a9_neon_fp_vrecps_vrsqrts_qqq") ++ + +=== modified file 'gcc/config/arm/cortex-a9.md' +--- old/gcc/config/arm/cortex-a9.md 2010-08-24 13:15:54 +0000 ++++ new/gcc/config/arm/cortex-a9.md 2010-09-16 09:47:44 +0000 +@@ -80,8 +80,9 @@ + (define_insn_reservation "cortex_a9_dp" 2 + (and (eq_attr "tune" "cortexa9") + (ior (eq_attr "type" "alu") +- (and (eq_attr "type" "alu_shift_reg, alu_shift") +- (eq_attr "insn" "mov")))) ++ (ior (and (eq_attr "type" "alu_shift_reg, alu_shift") ++ (eq_attr "insn" "mov")) ++ (eq_attr "neon_type" "none")))) + "cortex_a9_p0_default|cortex_a9_p1_default") + + ;; An instruction using the shifter will go down E1. + +=== modified file 'gcc/config/arm/neon-schedgen.ml' +--- old/gcc/config/arm/neon-schedgen.ml 2010-04-02 18:54:46 +0000 ++++ new/gcc/config/arm/neon-schedgen.ml 2010-09-16 09:47:44 +0000 +@@ -1,7 +1,6 @@ + (* Emission of the core of the Cortex-A8 NEON scheduling description. + Copyright (C) 2007, 2010 Free Software Foundation, Inc. + Contributed by CodeSourcery. +- + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under +@@ -21,7 +20,14 @@ + + (* This scheduling description generator works as follows. + - Each group of instructions has source and destination requirements +- specified. The source requirements may be specified using ++ specified and a list of cores supported. This is then filtered ++ and per core scheduler descriptions are generated out. ++ The reservations generated are prefixed by the name of the ++ core and the check is performed on the basis of what the tuning ++ string is. Running this will generate Neon scheduler descriptions ++ for all cores supported. ++ ++ The source requirements may be specified using + Source (the stage at which all source operands not otherwise + described are read), Source_m (the stage at which Rm operands are + read), Source_n (likewise for Rn) and Source_d (likewise for Rd). +@@ -83,6 +89,17 @@ + | Ls of int + | Fmul_then_fadd | Fmul_then_fadd_2 + ++type core = CortexA8 | CortexA9 ++let allCores = [CortexA8; CortexA9] ++let coreStr = function ++ CortexA8 -> "cortex_a8" ++ | CortexA9 -> "cortex_a9" ++ ++let tuneStr = function ++ CortexA8 -> "cortexa8" ++ | CortexA9 -> "cortexa9" ++ ++ + (* This table must be kept as short as possible by conflating + entries with the same availability behavior. + +@@ -90,129 +107,136 @@ + Second components: availability requirements, in the order in which + they should appear in the comments in the .md file. + Third components: reservation info ++ Fourth components: List of supported cores. + *) + let availability_table = [ + (* NEON integer ALU instructions. *) + (* vbit vbif vbsl vorr vbic vnot vcls vclz vcnt vadd vand vorr + veor vbic vorn ddd qqq *) +- "neon_int_1", [Source n2; Dest n3], ALU; ++ "neon_int_1", [Source n2; Dest n3], ALU, allCores; + (* vadd vsub qqd vsub ddd qqq *) +- "neon_int_2", [Source_m n1; Source_n n2; Dest n3], ALU; ++ "neon_int_2", [Source_m n1; Source_n n2; Dest n3], ALU, allCores; + (* vsum vneg dd qq vadd vsub qdd *) +- "neon_int_3", [Source n1; Dest n3], ALU; ++ "neon_int_3", [Source n1; Dest n3], ALU, allCores; + (* vabs vceqz vcgez vcbtz vclez vcltz vadh vradh vsbh vrsbh dqq *) + (* vhadd vrhadd vqadd vtst ddd qqq *) +- "neon_int_4", [Source n2; Dest n4], ALU; ++ "neon_int_4", [Source n2; Dest n4], ALU, allCores; + (* vabd qdd vhsub vqsub vabd vceq vcge vcgt vmax vmin vfmx vfmn ddd ddd *) +- "neon_int_5", [Source_m n1; Source_n n2; Dest n4], ALU; ++ "neon_int_5", [Source_m n1; Source_n n2; Dest n4], ALU, allCores; + (* vqneg vqabs dd qq *) +- "neon_vqneg_vqabs", [Source n1; Dest n4], ALU; ++ "neon_vqneg_vqabs", [Source n1; Dest n4], ALU, allCores; + (* vmov vmvn *) +- "neon_vmov", [Dest n3], ALU; ++ "neon_vmov", [Dest n3], ALU, allCores; + (* vaba *) +- "neon_vaba", [Source_n n2; Source_m n1; Source_d n3; Dest n6], ALU; ++ "neon_vaba", [Source_n n2; Source_m n1; Source_d n3; Dest n6], ALU, allCores; + "neon_vaba_qqq", +- [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)], ALU_2cycle; ++ [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)], ++ ALU_2cycle, allCores; + (* vsma *) +- "neon_vsma", [Source_m n1; Source_d n3; Dest n6], ALU; ++ "neon_vsma", [Source_m n1; Source_d n3; Dest n6], ALU, allCores; + + (* NEON integer multiply instructions. *) + (* vmul, vqdmlh, vqrdmlh *) + (* vmul, vqdmul, qdd 16/8 long 32/16 long *) +- "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long", [Source n2; Dest n6], Mul; +- "neon_mul_qqq_8_16_32_ddd_32", [Source n2; Dest_n_after (1, n6)], Mul_2cycle; ++ "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long", [Source n2; Dest n6], ++ Mul, allCores; ++ "neon_mul_qqq_8_16_32_ddd_32", [Source n2; Dest_n_after (1, n6)], ++ Mul_2cycle, allCores; + (* vmul, vqdmul again *) + "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar", +- [Source_n n2; Source_m n1; Dest_n_after (1, n6)], Mul_2cycle; ++ [Source_n n2; Source_m n1; Dest_n_after (1, n6)], Mul_2cycle, allCores; + (* vmla, vmls *) + "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long", +- [Source_n n2; Source_m n2; Source_d n3; Dest n6], Mul; ++ [Source_n n2; Source_m n2; Source_d n3; Dest n6], Mul, allCores; + "neon_mla_qqq_8_16", +- [Source_n n2; Source_m n2; Source_d n3; Dest_n_after (1, n6)], Mul_2cycle; ++ [Source_n n2; Source_m n2; Source_d n3; Dest_n_after (1, n6)], ++ Mul_2cycle, allCores; + "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long", +- [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)], Mul_2cycle; ++ [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)], ++ Mul_2cycle, allCores; + "neon_mla_qqq_32_qqd_32_scalar", +- [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (3, n6)], Mul_4cycle; ++ [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (3, n6)], ++ Mul_4cycle, allCores; + (* vmul, vqdmulh, vqrdmulh *) + (* vmul, vqdmul *) + "neon_mul_ddd_16_scalar_32_16_long_scalar", +- [Source_n n2; Source_m n1; Dest n6], Mul; ++ [Source_n n2; Source_m n1; Dest n6], Mul, allCores; + "neon_mul_qqd_32_scalar", +- [Source_n n2; Source_m n1; Dest_n_after (3, n6)], Mul_4cycle; ++ [Source_n n2; Source_m n1; Dest_n_after (3, n6)], Mul_4cycle, allCores; + (* vmla, vmls *) + (* vmla, vmla, vqdmla, vqdmls *) + "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar", +- [Source_n n2; Source_m n1; Source_d n3; Dest n6], Mul; ++ [Source_n n2; Source_m n1; Source_d n3; Dest n6], Mul, allCores; + + (* NEON integer shift instructions. *) + (* vshr/vshl immediate, vshr_narrow, vshl_vmvh, vsli_vsri_ddd *) +- "neon_shift_1", [Source n1; Dest n3], Shift; +- (* vqshl, vrshr immediate; vqshr, vqmov, vrshr, vqrshr narrow; ++ "neon_shift_1", [Source n1; Dest n3], Shift, allCores; ++ (* vqshl, vrshr immediate; vqshr, vqmov, vrshr, vqrshr narrow, allCores; + vqshl_vrshl_vqrshl_ddd *) +- "neon_shift_2", [Source n1; Dest n4], Shift; ++ "neon_shift_2", [Source n1; Dest n4], Shift, allCores; + (* vsli, vsri and vshl for qqq *) +- "neon_shift_3", [Source n1; Dest_n_after (1, n3)], Shift_2cycle; +- "neon_vshl_ddd", [Source n1; Dest n1], Shift; ++ "neon_shift_3", [Source n1; Dest_n_after (1, n3)], Shift_2cycle, allCores; ++ "neon_vshl_ddd", [Source n1; Dest n1], Shift, allCores; + "neon_vqshl_vrshl_vqrshl_qqq", [Source n1; Dest_n_after (1, n4)], +- Shift_2cycle; +- "neon_vsra_vrsra", [Source_m n1; Source_d n3; Dest n6], Shift; ++ Shift_2cycle, allCores; ++ "neon_vsra_vrsra", [Source_m n1; Source_d n3; Dest n6], Shift, allCores; + + (* NEON floating-point instructions. *) + (* vadd, vsub, vabd, vmul, vceq, vcge, vcgt, vcage, vcagt, vmax, vmin *) + (* vabs, vneg, vceqz, vcgez, vcgtz, vclez, vcltz, vrecpe, vrsqrte, vcvt *) +- "neon_fp_vadd_ddd_vabs_dd", [Source n2; Dest n5], Fadd; ++ "neon_fp_vadd_ddd_vabs_dd", [Source n2; Dest n5], Fadd, allCores; + "neon_fp_vadd_qqq_vabs_qq", [Source n2; Dest_n_after (1, n5)], +- Fadd_2cycle; ++ Fadd_2cycle, allCores; + (* vsum, fvmx, vfmn *) +- "neon_fp_vsum", [Source n1; Dest n5], Fadd; +- "neon_fp_vmul_ddd", [Source_n n2; Source_m n1; Dest n5], Fmul; ++ "neon_fp_vsum", [Source n1; Dest n5], Fadd, allCores; ++ "neon_fp_vmul_ddd", [Source_n n2; Source_m n1; Dest n5], Fmul, allCores; + "neon_fp_vmul_qqd", [Source_n n2; Source_m n1; Dest_n_after (1, n5)], +- Fmul_2cycle; ++ Fmul_2cycle, allCores; + (* vmla, vmls *) + "neon_fp_vmla_ddd", +- [Source_n n2; Source_m n2; Source_d n3; Dest n9], Fmul_then_fadd; ++ [Source_n n2; Source_m n2; Source_d n3; Dest n9], Fmul_then_fadd, allCores; + "neon_fp_vmla_qqq", + [Source_n n2; Source_m n2; Source_d n3; Dest_n_after (1, n9)], +- Fmul_then_fadd_2; ++ Fmul_then_fadd_2, allCores; + "neon_fp_vmla_ddd_scalar", +- [Source_n n2; Source_m n1; Source_d n3; Dest n9], Fmul_then_fadd; ++ [Source_n n2; Source_m n1; Source_d n3; Dest n9], Fmul_then_fadd, allCores; + "neon_fp_vmla_qqq_scalar", + [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n9)], +- Fmul_then_fadd_2; +- "neon_fp_vrecps_vrsqrts_ddd", [Source n2; Dest n9], Fmul_then_fadd; ++ Fmul_then_fadd_2, allCores; ++ "neon_fp_vrecps_vrsqrts_ddd", [Source n2; Dest n9], Fmul_then_fadd, allCores; + "neon_fp_vrecps_vrsqrts_qqq", [Source n2; Dest_n_after (1, n9)], +- Fmul_then_fadd_2; ++ Fmul_then_fadd_2, allCores; + + (* NEON byte permute instructions. *) + (* vmov; vtrn and vswp for dd; vzip for dd; vuzp for dd; vrev; vext for dd *) +- "neon_bp_simple", [Source n1; Dest n2], Permute 1; +- (* vswp for qq; vext for qqq; vtbl with {Dn} or {Dn, Dn1}; ++ "neon_bp_simple", [Source n1; Dest n2], Permute 1, allCores; ++ (* vswp for qq; vext for qqq; vtbl with {Dn} or {Dn, Dn1}, allCores; + similarly for vtbx *) +- "neon_bp_2cycle", [Source n1; Dest_n_after (1, n2)], Permute 2; ++ "neon_bp_2cycle", [Source n1; Dest_n_after (1, n2)], Permute 2, allCores; + (* all the rest *) +- "neon_bp_3cycle", [Source n1; Dest_n_after (2, n2)], Permute 3; ++ "neon_bp_3cycle", [Source n1; Dest_n_after (2, n2)], Permute 3, allCores; + + (* NEON load/store instructions. *) +- "neon_ldr", [Dest n1], Ls 1; +- "neon_str", [Source n1], Ls 1; +- "neon_vld1_1_2_regs", [Dest_n_after (1, n1)], Ls 2; +- "neon_vld1_3_4_regs", [Dest_n_after (2, n1)], Ls 3; +- "neon_vld2_2_regs_vld1_vld2_all_lanes", [Dest_n_after (1, n2)], Ls 2; +- "neon_vld2_4_regs", [Dest_n_after (2, n2)], Ls 3; +- "neon_vld3_vld4", [Dest_n_after (3, n2)], Ls 4; +- "neon_vst1_1_2_regs_vst2_2_regs", [Source n1], Ls 2; +- "neon_vst1_3_4_regs", [Source n1], Ls 3; +- "neon_vst2_4_regs_vst3_vst4", [Source n1], Ls 4; +- "neon_vst3_vst4", [Source n1], Ls 4; +- "neon_vld1_vld2_lane", [Source n1; Dest_n_after (2, n2)], Ls 3; +- "neon_vld3_vld4_lane", [Source n1; Dest_n_after (4, n2)], Ls 5; +- "neon_vst1_vst2_lane", [Source n1], Ls 2; +- "neon_vst3_vst4_lane", [Source n1], Ls 3; +- "neon_vld3_vld4_all_lanes", [Dest_n_after (1, n2)], Ls 3; ++ "neon_ldr", [Dest n1], Ls 1, allCores; ++ "neon_str", [Source n1], Ls 1, allCores; ++ "neon_vld1_1_2_regs", [Dest_n_after (1, n1)], Ls 2, allCores; ++ "neon_vld1_3_4_regs", [Dest_n_after (2, n1)], Ls 3, allCores; ++ "neon_vld2_2_regs_vld1_vld2_all_lanes", [Dest_n_after (1, n2)], Ls 2, allCores; ++ "neon_vld2_4_regs", [Dest_n_after (2, n2)], Ls 3, allCores; ++ "neon_vld3_vld4", [Dest_n_after (3, n2)], Ls 4, allCores; ++ "neon_vst1_1_2_regs_vst2_2_regs", [Source n1], Ls 2, allCores; ++ "neon_vst1_3_4_regs", [Source n1], Ls 3, allCores; ++ "neon_vst2_4_regs_vst3_vst4", [Source n1], Ls 4, allCores; ++ "neon_vst3_vst4", [Source n1], Ls 4, allCores; ++ "neon_vld1_vld2_lane", [Source n1; Dest_n_after (2, n2)], Ls 3, allCores; ++ "neon_vld3_vld4_lane", [Source n1; Dest_n_after (4, n2)], Ls 5, allCores; ++ "neon_vst1_vst2_lane", [Source n1], Ls 2, allCores; ++ "neon_vst3_vst4_lane", [Source n1], Ls 3, allCores; ++ "neon_vld3_vld4_all_lanes", [Dest_n_after (1, n2)], Ls 3, allCores; + + (* NEON register transfer instructions. *) +- "neon_mcr", [Dest n2], Permute 1; +- "neon_mcr_2_mcrr", [Dest n2], Permute 2; ++ "neon_mcr", [Dest n2], Permute 1, allCores; ++ "neon_mcr_2_mcrr", [Dest n2], Permute 2, allCores; + (* MRC instructions are in the .tpl file. *) + ] + +@@ -221,7 +245,7 @@ + required. (It is also possible that an entry in the table has no + source requirements.) *) + let calculate_sources = +- List.map (fun (name, avail, res) -> ++ List.map (fun (name, avail, res, cores) -> + let earliest_stage = + List.fold_left + (fun cur -> fun info -> +@@ -331,7 +355,7 @@ + of one bypass from this producer to any particular consumer listed + in LATENCIES.) Use a hash table to collate bypasses with the + same latency and guard. *) +-let collate_bypasses (producer_name, _, _, _) largest latencies = ++let collate_bypasses (producer_name, _, _, _) largest latencies core = + let ht = Hashtbl.create 42 in + let keys = ref [] in + List.iter ( +@@ -350,7 +374,7 @@ + (if (try ignore (Hashtbl.find ht (guard, latency)); false + with Not_found -> true) then + keys := (guard, latency) :: !keys); +- Hashtbl.add ht (guard, latency) consumer ++ Hashtbl.add ht (guard, latency) ((coreStr core) ^ "_" ^ consumer) + end + ) latencies; + (* The hash table now has bypasses collated so that ones with the +@@ -372,7 +396,7 @@ + the output in such a way that all bypasses with the same producer + and latency are together, and so that bypasses with the worst-case + latency are ignored. *) +-let worst_case_latencies_and_bypasses = ++let worst_case_latencies_and_bypasses core = + let rec f (worst_acc, bypasses_acc) prev xs = + match xs with + [] -> (worst_acc, bypasses_acc) +@@ -400,7 +424,7 @@ + (* Having got the largest latency, collect all bypasses for + this producer and filter out those with that larger + latency. Record the others for later emission. *) +- let bypasses = collate_bypasses producer largest latencies in ++ let bypasses = collate_bypasses producer largest latencies core in + (* Go on to process remaining producers, having noted + the result for this one. *) + f ((producer_name, producer_avail, largest, +@@ -444,14 +468,18 @@ + in + f avail 0 + ++ + (* Emit a define_insn_reservation for each producer. The latency + written in will be its worst-case latency. *) +-let emit_insn_reservations = +- List.iter ( ++let emit_insn_reservations core = ++ let corestring = coreStr core in ++ let tunestring = tuneStr core ++ in List.iter ( + fun (producer, avail, latency, reservation) -> + write_comment producer avail; +- Printf.printf "(define_insn_reservation \"%s\" %d\n" producer latency; +- Printf.printf " (and (eq_attr \"tune\" \"cortexa8\")\n"; ++ Printf.printf "(define_insn_reservation \"%s_%s\" %d\n" ++ corestring producer latency; ++ Printf.printf " (and (eq_attr \"tune\" \"%s\")\n" tunestring; + Printf.printf " (eq_attr \"neon_type\" \"%s\"))\n" producer; + let str = + match reservation with +@@ -467,7 +495,7 @@ + | Fmul_then_fadd -> "fmul_then_fadd" + | Fmul_then_fadd_2 -> "fmul_then_fadd_2" + in +- Printf.printf " \"cortex_a8_neon_%s\")\n\n" str ++ Printf.printf " \"%s_neon_%s\")\n\n" corestring str + ) + + (* Given a guard description, return the name of the C function to +@@ -480,10 +508,12 @@ + | Guard_none -> assert false + + (* Emit a define_bypass for each bypass. *) +-let emit_bypasses = ++let emit_bypasses core = + List.iter ( + fun (producer, consumers, latency, guard) -> +- Printf.printf "(define_bypass %d \"%s\"\n" latency producer; ++ Printf.printf "(define_bypass %d \"%s_%s\"\n" ++ latency (coreStr core) producer; ++ + if guard = Guard_none then + Printf.printf " \"%s\")\n\n" consumers + else +@@ -493,11 +523,21 @@ + end + ) + ++ ++let calculate_per_core_availability_table core availability_table = ++ let table = calculate_sources availability_table in ++ let worst_cases, bypasses = worst_case_latencies_and_bypasses core table in ++ emit_insn_reservations core (List.rev worst_cases); ++ Printf.printf ";; Exceptions to the default latencies.\n\n"; ++ emit_bypasses core bypasses ++ ++let calculate_core_availability_table core availability_table = ++let filter_core = List.filter (fun (_, _, _, cores) ++ -> List.exists ((=) core) cores) ++in calculate_per_core_availability_table core (filter_core availability_table) ++ ++ + (* Program entry point. *) + let main = +- let table = calculate_sources availability_table in +- let worst_cases, bypasses = worst_case_latencies_and_bypasses table in +- emit_insn_reservations (List.rev worst_cases); +- Printf.printf ";; Exceptions to the default latencies.\n\n"; +- emit_bypasses bypasses +- ++ List.map (fun core -> calculate_core_availability_table ++ core availability_table) allCores + |