2010-09-16 Andrew Stubbs Backport from FSF: 2010-09-01 Ramana Radhakrishnan * config/arm/neon-schedgen.ml (core): New type. (allCores): List of supported cores. (availability_table): Add supported cores. (collate_bypasses): Accept core as a parameter. (worst_case_latencies_and_bypasses): Accept core as a parameter. (emit_insn_reservations): Accept core as a parameter. Use tuneStr and coreStr to get tune attribute and prefix for functional units. (emit_bypasses): Accept core name and use it. (calculate_per_core_availability_table): New. (filter_core): New. (calculate_core_availability_table): New. (main): Use calculate_core_availablity_table. * config/arm/cortex-a8-neon.md: Update copyright year. Regenerated from ml file and merged in. (neon_mrrc, neon_mrc): Rename to cortex_a8_neon_mrrc and cortex_a8_neon_mrc. 2010-09-10 Ramana Radhakrishnan * config/arm/neon-schedgen.ml (allCores): Add support for Cortex-A9. * config/arm/cortex-a9-neon.md: New and partially generated. * config/arm/cortex-a9.md (cortex_a9_dp): Adjust for Neon. 2010-09-15 Chung-Lin Tang Issue #9441 === modified file 'gcc/config/arm/cortex-a8-neon.md' --- old/gcc/config/arm/cortex-a8-neon.md 2009-02-20 15:20:38 +0000 +++ new/gcc/config/arm/cortex-a8-neon.md 2010-09-16 09:47:44 +0000 @@ -182,12 +182,12 @@ ;; NEON -> core transfers. -(define_insn_reservation "neon_mrc" 20 +(define_insn_reservation "cortex_a8_neon_mrc" 20 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_mrc")) "cortex_a8_neon_ls") -(define_insn_reservation "neon_mrrc" 21 +(define_insn_reservation "cortex_a8_neon_mrrc" 21 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_mrrc")) "cortex_a8_neon_ls_2") @@ -196,48 +196,48 @@ ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N3. -(define_insn_reservation "neon_int_1" 3 +(define_insn_reservation "cortex_a8_neon_int_1" 3 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_int_1")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their (D|Q)m operands at N1, ;; their (D|Q)n operands at N2, and produce a result at N3. -(define_insn_reservation "neon_int_2" 3 +(define_insn_reservation "cortex_a8_neon_int_2" 3 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_int_2")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N3. -(define_insn_reservation "neon_int_3" 3 +(define_insn_reservation "cortex_a8_neon_int_3" 3 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_int_3")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N4. -(define_insn_reservation "neon_int_4" 4 +(define_insn_reservation "cortex_a8_neon_int_4" 4 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_int_4")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their (D|Q)m operands at N1, ;; their (D|Q)n operands at N2, and produce a result at N4. -(define_insn_reservation "neon_int_5" 4 +(define_insn_reservation "cortex_a8_neon_int_5" 4 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_int_5")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N4. -(define_insn_reservation "neon_vqneg_vqabs" 4 +(define_insn_reservation "cortex_a8_neon_vqneg_vqabs" 4 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vqneg_vqabs")) "cortex_a8_neon_dp") ;; Instructions using this reservation produce a result at N3. -(define_insn_reservation "neon_vmov" 3 +(define_insn_reservation "cortex_a8_neon_vmov" 3 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vmov")) "cortex_a8_neon_dp") @@ -245,7 +245,7 @@ ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and ;; produce a result at N6. -(define_insn_reservation "neon_vaba" 6 +(define_insn_reservation "cortex_a8_neon_vaba" 6 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vaba")) "cortex_a8_neon_dp") @@ -253,35 +253,35 @@ ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and ;; produce a result at N6 on cycle 2. -(define_insn_reservation "neon_vaba_qqq" 7 +(define_insn_reservation "cortex_a8_neon_vaba_qqq" 7 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vaba_qqq")) "cortex_a8_neon_dp_2") ;; Instructions using this reservation read their (D|Q)m operands at N1, ;; their (D|Q)d operands at N3, and produce a result at N6. -(define_insn_reservation "neon_vsma" 6 +(define_insn_reservation "cortex_a8_neon_vsma" 6 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vsma")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N6. -(define_insn_reservation "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6 +(define_insn_reservation "cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N6 on cycle 2. -(define_insn_reservation "neon_mul_qqq_8_16_32_ddd_32" 7 +(define_insn_reservation "cortex_a8_neon_mul_qqq_8_16_32_ddd_32" 7 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_mul_qqq_8_16_32_ddd_32")) "cortex_a8_neon_dp_2") ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2. -(define_insn_reservation "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7 +(define_insn_reservation "cortex_a8_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")) "cortex_a8_neon_dp_2") @@ -289,7 +289,7 @@ ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and ;; produce a result at N6. -(define_insn_reservation "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6 +(define_insn_reservation "cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")) "cortex_a8_neon_dp") @@ -297,7 +297,7 @@ ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and ;; produce a result at N6 on cycle 2. -(define_insn_reservation "neon_mla_qqq_8_16" 7 +(define_insn_reservation "cortex_a8_neon_mla_qqq_8_16" 7 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_mla_qqq_8_16")) "cortex_a8_neon_dp_2") @@ -305,7 +305,7 @@ ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and ;; produce a result at N6 on cycle 2. -(define_insn_reservation "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7 +(define_insn_reservation "cortex_a8_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) "cortex_a8_neon_dp_2") @@ -313,21 +313,21 @@ ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and ;; produce a result at N6 on cycle 4. -(define_insn_reservation "neon_mla_qqq_32_qqd_32_scalar" 9 +(define_insn_reservation "cortex_a8_neon_mla_qqq_32_qqd_32_scalar" 9 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_mla_qqq_32_qqd_32_scalar")) "cortex_a8_neon_dp_4") ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, and produce a result at N6. -(define_insn_reservation "neon_mul_ddd_16_scalar_32_16_long_scalar" 6 +(define_insn_reservation "cortex_a8_neon_mul_ddd_16_scalar_32_16_long_scalar" 6 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_mul_ddd_16_scalar_32_16_long_scalar")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4. -(define_insn_reservation "neon_mul_qqd_32_scalar" 9 +(define_insn_reservation "cortex_a8_neon_mul_qqd_32_scalar" 9 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_mul_qqd_32_scalar")) "cortex_a8_neon_dp_4") @@ -335,84 +335,84 @@ ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and ;; produce a result at N6. -(define_insn_reservation "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6 +(define_insn_reservation "cortex_a8_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N3. -(define_insn_reservation "neon_shift_1" 3 +(define_insn_reservation "cortex_a8_neon_shift_1" 3 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_shift_1")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N4. -(define_insn_reservation "neon_shift_2" 4 +(define_insn_reservation "cortex_a8_neon_shift_2" 4 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_shift_2")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N3 on cycle 2. -(define_insn_reservation "neon_shift_3" 4 +(define_insn_reservation "cortex_a8_neon_shift_3" 4 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_shift_3")) "cortex_a8_neon_dp_2") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N1. -(define_insn_reservation "neon_vshl_ddd" 1 +(define_insn_reservation "cortex_a8_neon_vshl_ddd" 1 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vshl_ddd")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N4 on cycle 2. -(define_insn_reservation "neon_vqshl_vrshl_vqrshl_qqq" 5 +(define_insn_reservation "cortex_a8_neon_vqshl_vrshl_vqrshl_qqq" 5 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vqshl_vrshl_vqrshl_qqq")) "cortex_a8_neon_dp_2") ;; Instructions using this reservation read their (D|Q)m operands at N1, ;; their (D|Q)d operands at N3, and produce a result at N6. -(define_insn_reservation "neon_vsra_vrsra" 6 +(define_insn_reservation "cortex_a8_neon_vsra_vrsra" 6 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vsra_vrsra")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N5. -(define_insn_reservation "neon_fp_vadd_ddd_vabs_dd" 5 +(define_insn_reservation "cortex_a8_neon_fp_vadd_ddd_vabs_dd" 5 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")) "cortex_a8_neon_fadd") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N5 on cycle 2. -(define_insn_reservation "neon_fp_vadd_qqq_vabs_qq" 6 +(define_insn_reservation "cortex_a8_neon_fp_vadd_qqq_vabs_qq" 6 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_fp_vadd_qqq_vabs_qq")) "cortex_a8_neon_fadd_2") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N5. -(define_insn_reservation "neon_fp_vsum" 5 +(define_insn_reservation "cortex_a8_neon_fp_vsum" 5 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_fp_vsum")) "cortex_a8_neon_fadd") ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, and produce a result at N5. -(define_insn_reservation "neon_fp_vmul_ddd" 5 +(define_insn_reservation "cortex_a8_neon_fp_vmul_ddd" 5 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_fp_vmul_ddd")) "cortex_a8_neon_dp") ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2. -(define_insn_reservation "neon_fp_vmul_qqd" 6 +(define_insn_reservation "cortex_a8_neon_fp_vmul_qqd" 6 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_fp_vmul_qqd")) "cortex_a8_neon_dp_2") @@ -420,7 +420,7 @@ ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and ;; produce a result at N9. -(define_insn_reservation "neon_fp_vmla_ddd" 9 +(define_insn_reservation "cortex_a8_neon_fp_vmla_ddd" 9 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_fp_vmla_ddd")) "cortex_a8_neon_fmul_then_fadd") @@ -428,7 +428,7 @@ ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and ;; produce a result at N9 on cycle 2. -(define_insn_reservation "neon_fp_vmla_qqq" 10 +(define_insn_reservation "cortex_a8_neon_fp_vmla_qqq" 10 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_fp_vmla_qqq")) "cortex_a8_neon_fmul_then_fadd_2") @@ -436,7 +436,7 @@ ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and ;; produce a result at N9. -(define_insn_reservation "neon_fp_vmla_ddd_scalar" 9 +(define_insn_reservation "cortex_a8_neon_fp_vmla_ddd_scalar" 9 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_fp_vmla_ddd_scalar")) "cortex_a8_neon_fmul_then_fadd") @@ -444,869 +444,869 @@ ;; Instructions using this reservation read their (D|Q)n operands at N2, ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and ;; produce a result at N9 on cycle 2. -(define_insn_reservation "neon_fp_vmla_qqq_scalar" 10 +(define_insn_reservation "cortex_a8_neon_fp_vmla_qqq_scalar" 10 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_fp_vmla_qqq_scalar")) "cortex_a8_neon_fmul_then_fadd_2") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N9. -(define_insn_reservation "neon_fp_vrecps_vrsqrts_ddd" 9 +(define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_ddd" 9 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_ddd")) "cortex_a8_neon_fmul_then_fadd") ;; Instructions using this reservation read their source operands at N2, and ;; produce a result at N9 on cycle 2. -(define_insn_reservation "neon_fp_vrecps_vrsqrts_qqq" 10 +(define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_qqq" 10 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_qqq")) "cortex_a8_neon_fmul_then_fadd_2") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N2. -(define_insn_reservation "neon_bp_simple" 2 +(define_insn_reservation "cortex_a8_neon_bp_simple" 2 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_bp_simple")) "cortex_a8_neon_perm") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N2 on cycle 2. -(define_insn_reservation "neon_bp_2cycle" 3 +(define_insn_reservation "cortex_a8_neon_bp_2cycle" 3 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_bp_2cycle")) "cortex_a8_neon_perm_2") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N2 on cycle 3. -(define_insn_reservation "neon_bp_3cycle" 4 +(define_insn_reservation "cortex_a8_neon_bp_3cycle" 4 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_bp_3cycle")) "cortex_a8_neon_perm_3") ;; Instructions using this reservation produce a result at N1. -(define_insn_reservation "neon_ldr" 1 +(define_insn_reservation "cortex_a8_neon_ldr" 1 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_ldr")) "cortex_a8_neon_ls") ;; Instructions using this reservation read their source operands at N1. -(define_insn_reservation "neon_str" 0 +(define_insn_reservation "cortex_a8_neon_str" 0 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_str")) "cortex_a8_neon_ls") ;; Instructions using this reservation produce a result at N1 on cycle 2. -(define_insn_reservation "neon_vld1_1_2_regs" 2 +(define_insn_reservation "cortex_a8_neon_vld1_1_2_regs" 2 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vld1_1_2_regs")) "cortex_a8_neon_ls_2") ;; Instructions using this reservation produce a result at N1 on cycle 3. -(define_insn_reservation "neon_vld1_3_4_regs" 3 +(define_insn_reservation "cortex_a8_neon_vld1_3_4_regs" 3 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vld1_3_4_regs")) "cortex_a8_neon_ls_3") ;; Instructions using this reservation produce a result at N2 on cycle 2. -(define_insn_reservation "neon_vld2_2_regs_vld1_vld2_all_lanes" 3 +(define_insn_reservation "cortex_a8_neon_vld2_2_regs_vld1_vld2_all_lanes" 3 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")) "cortex_a8_neon_ls_2") ;; Instructions using this reservation produce a result at N2 on cycle 3. -(define_insn_reservation "neon_vld2_4_regs" 4 +(define_insn_reservation "cortex_a8_neon_vld2_4_regs" 4 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vld2_4_regs")) "cortex_a8_neon_ls_3") ;; Instructions using this reservation produce a result at N2 on cycle 4. -(define_insn_reservation "neon_vld3_vld4" 5 +(define_insn_reservation "cortex_a8_neon_vld3_vld4" 5 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vld3_vld4")) "cortex_a8_neon_ls_4") ;; Instructions using this reservation read their source operands at N1. -(define_insn_reservation "neon_vst1_1_2_regs_vst2_2_regs" 0 +(define_insn_reservation "cortex_a8_neon_vst1_1_2_regs_vst2_2_regs" 0 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")) "cortex_a8_neon_ls_2") ;; Instructions using this reservation read their source operands at N1. -(define_insn_reservation "neon_vst1_3_4_regs" 0 +(define_insn_reservation "cortex_a8_neon_vst1_3_4_regs" 0 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vst1_3_4_regs")) "cortex_a8_neon_ls_3") ;; Instructions using this reservation read their source operands at N1. -(define_insn_reservation "neon_vst2_4_regs_vst3_vst4" 0 +(define_insn_reservation "cortex_a8_neon_vst2_4_regs_vst3_vst4" 0 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")) "cortex_a8_neon_ls_4") ;; Instructions using this reservation read their source operands at N1. -(define_insn_reservation "neon_vst3_vst4" 0 +(define_insn_reservation "cortex_a8_neon_vst3_vst4" 0 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vst3_vst4")) "cortex_a8_neon_ls_4") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N2 on cycle 3. -(define_insn_reservation "neon_vld1_vld2_lane" 4 +(define_insn_reservation "cortex_a8_neon_vld1_vld2_lane" 4 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vld1_vld2_lane")) "cortex_a8_neon_ls_3") ;; Instructions using this reservation read their source operands at N1, and ;; produce a result at N2 on cycle 5. -(define_insn_reservation "neon_vld3_vld4_lane" 6 +(define_insn_reservation "cortex_a8_neon_vld3_vld4_lane" 6 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vld3_vld4_lane")) "cortex_a8_neon_ls_5") ;; Instructions using this reservation read their source operands at N1. -(define_insn_reservation "neon_vst1_vst2_lane" 0 +(define_insn_reservation "cortex_a8_neon_vst1_vst2_lane" 0 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vst1_vst2_lane")) "cortex_a8_neon_ls_2") ;; Instructions using this reservation read their source operands at N1. -(define_insn_reservation "neon_vst3_vst4_lane" 0 +(define_insn_reservation "cortex_a8_neon_vst3_vst4_lane" 0 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vst3_vst4_lane")) "cortex_a8_neon_ls_3") ;; Instructions using this reservation produce a result at N2 on cycle 2. -(define_insn_reservation "neon_vld3_vld4_all_lanes" 3 +(define_insn_reservation "cortex_a8_neon_vld3_vld4_all_lanes" 3 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_vld3_vld4_all_lanes")) "cortex_a8_neon_ls_3") ;; Instructions using this reservation produce a result at N2. -(define_insn_reservation "neon_mcr" 2 +(define_insn_reservation "cortex_a8_neon_mcr" 2 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_mcr")) "cortex_a8_neon_perm") ;; Instructions using this reservation produce a result at N2. -(define_insn_reservation "neon_mcr_2_mcrr" 2 +(define_insn_reservation "cortex_a8_neon_mcr_2_mcrr" 2 (and (eq_attr "tune" "cortexa8") (eq_attr "neon_type" "neon_mcr_2_mcrr")) "cortex_a8_neon_perm_2") ;; Exceptions to the default latencies. -(define_bypass 1 "neon_mcr_2_mcrr" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 1 "neon_mcr" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 2 "neon_vld3_vld4_all_lanes" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 5 "neon_vld3_vld4_lane" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 3 "neon_vld1_vld2_lane" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 4 "neon_vld3_vld4" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 3 "neon_vld2_4_regs" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 2 "neon_vld2_2_regs_vld1_vld2_all_lanes" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 2 "neon_vld1_3_4_regs" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 1 "neon_vld1_1_2_regs" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 0 "neon_ldr" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 3 "neon_bp_3cycle" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 2 "neon_bp_2cycle" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 1 "neon_bp_simple" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 9 "neon_fp_vrecps_vrsqrts_qqq" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 8 "neon_fp_vrecps_vrsqrts_ddd" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 9 "neon_fp_vmla_qqq_scalar" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 8 "neon_fp_vmla_ddd_scalar" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 9 "neon_fp_vmla_qqq" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 8 "neon_fp_vmla_ddd" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 5 "neon_fp_vmul_qqd" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 4 "neon_fp_vmul_ddd" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 4 "neon_fp_vsum" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 5 "neon_fp_vadd_qqq_vabs_qq" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 4 "neon_fp_vadd_ddd_vabs_dd" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 5 "neon_vsra_vrsra" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 4 "neon_vqshl_vrshl_vqrshl_qqq" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 0 "neon_vshl_ddd" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 3 "neon_shift_3" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 3 "neon_shift_2" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 2 "neon_shift_1" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 5 "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 8 "neon_mul_qqd_32_scalar" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 5 "neon_mul_ddd_16_scalar_32_16_long_scalar" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 8 "neon_mla_qqq_32_qqd_32_scalar" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 6 "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 6 "neon_mla_qqq_8_16" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 5 "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 6 "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 6 "neon_mul_qqq_8_16_32_ddd_32" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 5 "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 5 "neon_vsma" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 6 "neon_vaba_qqq" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 5 "neon_vaba" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 2 "neon_vmov" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 3 "neon_vqneg_vqabs" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 3 "neon_int_5" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 3 "neon_int_4" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 2 "neon_int_3" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 2 "neon_int_2" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") - -(define_bypass 2 "neon_int_1" - "neon_int_1,\ - neon_int_4,\ - neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mul_qqq_8_16_32_ddd_32,\ - neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ - neon_mla_qqq_8_16,\ - neon_fp_vadd_ddd_vabs_dd,\ - neon_fp_vadd_qqq_vabs_qq,\ - neon_fp_vmla_ddd,\ - neon_fp_vmla_qqq,\ - neon_fp_vrecps_vrsqrts_ddd,\ - neon_fp_vrecps_vrsqrts_qqq") +(define_bypass 1 "cortex_a8_neon_mcr_2_mcrr" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "cortex_a8_neon_mcr" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_vld3_vld4_all_lanes" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_vld3_vld4_lane" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_vld1_vld2_lane" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a8_neon_vld3_vld4" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_vld2_4_regs" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_vld2_2_regs_vld1_vld2_all_lanes" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_vld1_3_4_regs" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "cortex_a8_neon_vld1_1_2_regs" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 0 "cortex_a8_neon_ldr" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_bp_3cycle" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_bp_2cycle" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "cortex_a8_neon_bp_simple" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "cortex_a8_neon_fp_vrecps_vrsqrts_qqq" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a8_neon_fp_vrecps_vrsqrts_ddd" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "cortex_a8_neon_fp_vmla_qqq_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a8_neon_fp_vmla_ddd_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "cortex_a8_neon_fp_vmla_qqq" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a8_neon_fp_vmla_ddd" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_fp_vmul_qqd" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a8_neon_fp_vmul_ddd" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a8_neon_fp_vsum" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_fp_vadd_qqq_vabs_qq" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a8_neon_fp_vadd_ddd_vabs_dd" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_vsra_vrsra" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a8_neon_vqshl_vrshl_vqrshl_qqq" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 0 "cortex_a8_neon_vshl_ddd" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_shift_3" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_shift_2" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_shift_1" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a8_neon_mul_qqd_32_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_mul_ddd_16_scalar_32_16_long_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a8_neon_mla_qqq_32_qqd_32_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a8_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a8_neon_mla_qqq_8_16" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a8_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a8_neon_mul_qqq_8_16_32_ddd_32" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_vsma" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a8_neon_vaba_qqq" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_vaba" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_vmov" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_vqneg_vqabs" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_int_5" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_int_4" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_int_3" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_int_2" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_int_1" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") === added file 'gcc/config/arm/cortex-a9-neon.md' --- old/gcc/config/arm/cortex-a9-neon.md 1970-01-01 00:00:00 +0000 +++ new/gcc/config/arm/cortex-a9-neon.md 2010-09-16 09:47:44 +0000 @@ -0,0 +1,1237 @@ +;; ARM Cortex-A9 pipeline description +;; Copyright (C) 2010 Free Software Foundation, Inc. +;; +;; Neon pipeline description contributed by ARM Ltd. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +(define_automaton "cortex_a9_neon") + +;; Only one instruction can be issued per cycle. +(define_cpu_unit "cortex_a9_neon_issue_perm" "cortex_a9_neon") + +;; Only one data-processing instruction can be issued per cycle. +(define_cpu_unit "cortex_a9_neon_issue_dp" "cortex_a9_neon") + +;; We need a special mutual exclusion (to be used in addition to +;; cortex_a9_neon_issue_dp) for the case when an instruction such as +;; vmla.f is forwarded from E5 of the floating-point multiply pipeline to +;; E2 of the floating-point add pipeline. On the cycle previous to that +;; forward we must prevent issue of any instruction to the floating-point +;; add pipeline, but still allow issue of a data-processing instruction +;; to any of the other pipelines. +(define_cpu_unit "cortex_a9_neon_issue_fadd" "cortex_a9_neon") +(define_cpu_unit "cortex_a9_neon_mcr" "cortex_a9_neon") + + +;; Patterns of reservation. +;; We model the NEON issue units as running in parallel with the core ones. +;; We assume that multi-cycle NEON instructions get decomposed into +;; micro-ops as they are issued into the NEON pipeline. + +(define_reservation "cortex_a9_neon_dp" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp") +(define_reservation "cortex_a9_neon_dp_2" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ + cortex_a9_neon_issue_dp") +(define_reservation "cortex_a9_neon_dp_4" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ + cortex_a9_neon_issue_dp + cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp + cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp") + +(define_reservation "cortex_a9_neon_fadd" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp + \ + cortex_a9_neon_issue_fadd") +(define_reservation "cortex_a9_neon_fadd_2" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ + cortex_a9_neon_issue_fadd,\ + cortex_a9_neon_issue_dp") + +(define_reservation "cortex_a9_neon_perm" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm") +(define_reservation "cortex_a9_neon_perm_2" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm, \ + cortex_a9_neon_issue_perm") +(define_reservation "cortex_a9_neon_perm_3" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_perm") + +(define_reservation "cortex_a9_neon_ls" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm+cortex_a9_ls") +(define_reservation "cortex_a9_neon_ls_2" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_perm") +(define_reservation "cortex_a9_neon_ls_3" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_perm") +(define_reservation "cortex_a9_neon_ls_4" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_perm") +(define_reservation "cortex_a9_neon_ls_5" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_perm") + +(define_reservation "cortex_a9_neon_fmul_then_fadd" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ + nothing*3,\ + cortex_a9_neon_issue_fadd") +(define_reservation "cortex_a9_neon_fmul_then_fadd_2" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ + cortex_a9_neon_issue_dp,\ + nothing*2,\ + cortex_a9_neon_issue_fadd,\ + cortex_a9_neon_issue_fadd") + + +;; NEON -> core transfers. +(define_insn_reservation "ca9_neon_mrc" 1 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mrc")) + "ca9_issue_vfp_neon + cortex_a9_neon_mcr") + +(define_insn_reservation "ca9_neon_mrrc" 1 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mrrc")) + "ca9_issue_vfp_neon + cortex_a9_neon_mcr") + +;; The remainder of this file is auto-generated by neon-schedgen. + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N3. +(define_insn_reservation "cortex_a9_neon_int_1" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_int_1")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)n operands at N2, and produce a result at N3. +(define_insn_reservation "cortex_a9_neon_int_2" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_int_2")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N3. +(define_insn_reservation "cortex_a9_neon_int_3" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_int_3")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N4. +(define_insn_reservation "cortex_a9_neon_int_4" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_int_4")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)n operands at N2, and produce a result at N4. +(define_insn_reservation "cortex_a9_neon_int_5" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_int_5")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N4. +(define_insn_reservation "cortex_a9_neon_vqneg_vqabs" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vqneg_vqabs")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation produce a result at N3. +(define_insn_reservation "cortex_a9_neon_vmov" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vmov")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6. +(define_insn_reservation "cortex_a9_neon_vaba" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vaba")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a9_neon_vaba_qqq" 7 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vaba_qqq")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)d operands at N3, and produce a result at N6. +(define_insn_reservation "cortex_a9_neon_vsma" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vsma")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N6. +(define_insn_reservation "cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a9_neon_mul_qqq_8_16_32_ddd_32" 7 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mul_qqq_8_16_32_ddd_32")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a9_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N6. +(define_insn_reservation "cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a9_neon_mla_qqq_8_16" 7 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mla_qqq_8_16")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a9_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 4. +(define_insn_reservation "cortex_a9_neon_mla_qqq_32_qqd_32_scalar" 9 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mla_qqq_32_qqd_32_scalar")) + "cortex_a9_neon_dp_4") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N6. +(define_insn_reservation "cortex_a9_neon_mul_ddd_16_scalar_32_16_long_scalar" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mul_ddd_16_scalar_32_16_long_scalar")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4. +(define_insn_reservation "cortex_a9_neon_mul_qqd_32_scalar" 9 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mul_qqd_32_scalar")) + "cortex_a9_neon_dp_4") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6. +(define_insn_reservation "cortex_a9_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N3. +(define_insn_reservation "cortex_a9_neon_shift_1" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_shift_1")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N4. +(define_insn_reservation "cortex_a9_neon_shift_2" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_shift_2")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N3 on cycle 2. +(define_insn_reservation "cortex_a9_neon_shift_3" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_shift_3")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N1. +(define_insn_reservation "cortex_a9_neon_vshl_ddd" 1 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vshl_ddd")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N4 on cycle 2. +(define_insn_reservation "cortex_a9_neon_vqshl_vrshl_vqrshl_qqq" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vqshl_vrshl_vqrshl_qqq")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)d operands at N3, and produce a result at N6. +(define_insn_reservation "cortex_a9_neon_vsra_vrsra" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vsra_vrsra")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N5. +(define_insn_reservation "cortex_a9_neon_fp_vadd_ddd_vabs_dd" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")) + "cortex_a9_neon_fadd") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N5 on cycle 2. +(define_insn_reservation "cortex_a9_neon_fp_vadd_qqq_vabs_qq" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vadd_qqq_vabs_qq")) + "cortex_a9_neon_fadd_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N5. +(define_insn_reservation "cortex_a9_neon_fp_vsum" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vsum")) + "cortex_a9_neon_fadd") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N5. +(define_insn_reservation "cortex_a9_neon_fp_vmul_ddd" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vmul_ddd")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2. +(define_insn_reservation "cortex_a9_neon_fp_vmul_qqd" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vmul_qqd")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N9. +(define_insn_reservation "cortex_a9_neon_fp_vmla_ddd" 9 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vmla_ddd")) + "cortex_a9_neon_fmul_then_fadd") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N9 on cycle 2. +(define_insn_reservation "cortex_a9_neon_fp_vmla_qqq" 10 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vmla_qqq")) + "cortex_a9_neon_fmul_then_fadd_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N9. +(define_insn_reservation "cortex_a9_neon_fp_vmla_ddd_scalar" 9 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vmla_ddd_scalar")) + "cortex_a9_neon_fmul_then_fadd") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N9 on cycle 2. +(define_insn_reservation "cortex_a9_neon_fp_vmla_qqq_scalar" 10 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vmla_qqq_scalar")) + "cortex_a9_neon_fmul_then_fadd_2") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N9. +(define_insn_reservation "cortex_a9_neon_fp_vrecps_vrsqrts_ddd" 9 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_ddd")) + "cortex_a9_neon_fmul_then_fadd") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N9 on cycle 2. +(define_insn_reservation "cortex_a9_neon_fp_vrecps_vrsqrts_qqq" 10 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_qqq")) + "cortex_a9_neon_fmul_then_fadd_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2. +(define_insn_reservation "cortex_a9_neon_bp_simple" 2 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_bp_simple")) + "cortex_a9_neon_perm") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 2. +(define_insn_reservation "cortex_a9_neon_bp_2cycle" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_bp_2cycle")) + "cortex_a9_neon_perm_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 3. +(define_insn_reservation "cortex_a9_neon_bp_3cycle" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_bp_3cycle")) + "cortex_a9_neon_perm_3") + +;; Instructions using this reservation produce a result at N1. +(define_insn_reservation "cortex_a9_neon_ldr" 1 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_ldr")) + "cortex_a9_neon_ls") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_str" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_str")) + "cortex_a9_neon_ls") + +;; Instructions using this reservation produce a result at N1 on cycle 2. +(define_insn_reservation "cortex_a9_neon_vld1_1_2_regs" 2 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vld1_1_2_regs")) + "cortex_a9_neon_ls_2") + +;; Instructions using this reservation produce a result at N1 on cycle 3. +(define_insn_reservation "cortex_a9_neon_vld1_3_4_regs" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vld1_3_4_regs")) + "cortex_a9_neon_ls_3") + +;; Instructions using this reservation produce a result at N2 on cycle 2. +(define_insn_reservation "cortex_a9_neon_vld2_2_regs_vld1_vld2_all_lanes" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")) + "cortex_a9_neon_ls_2") + +;; Instructions using this reservation produce a result at N2 on cycle 3. +(define_insn_reservation "cortex_a9_neon_vld2_4_regs" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vld2_4_regs")) + "cortex_a9_neon_ls_3") + +;; Instructions using this reservation produce a result at N2 on cycle 4. +(define_insn_reservation "cortex_a9_neon_vld3_vld4" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vld3_vld4")) + "cortex_a9_neon_ls_4") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_vst1_1_2_regs_vst2_2_regs" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")) + "cortex_a9_neon_ls_2") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_vst1_3_4_regs" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vst1_3_4_regs")) + "cortex_a9_neon_ls_3") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_vst2_4_regs_vst3_vst4" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")) + "cortex_a9_neon_ls_4") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_vst3_vst4" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vst3_vst4")) + "cortex_a9_neon_ls_4") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 3. +(define_insn_reservation "cortex_a9_neon_vld1_vld2_lane" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vld1_vld2_lane")) + "cortex_a9_neon_ls_3") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 5. +(define_insn_reservation "cortex_a9_neon_vld3_vld4_lane" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vld3_vld4_lane")) + "cortex_a9_neon_ls_5") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_vst1_vst2_lane" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vst1_vst2_lane")) + "cortex_a9_neon_ls_2") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_vst3_vst4_lane" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vst3_vst4_lane")) + "cortex_a9_neon_ls_3") + +;; Instructions using this reservation produce a result at N2 on cycle 2. +(define_insn_reservation "cortex_a9_neon_vld3_vld4_all_lanes" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vld3_vld4_all_lanes")) + "cortex_a9_neon_ls_3") + +;; Instructions using this reservation produce a result at N2. +(define_insn_reservation "cortex_a9_neon_mcr" 2 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mcr")) + "cortex_a9_neon_perm") + +;; Instructions using this reservation produce a result at N2. +(define_insn_reservation "cortex_a9_neon_mcr_2_mcrr" 2 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mcr_2_mcrr")) + "cortex_a9_neon_perm_2") + +;; Exceptions to the default latencies. + +(define_bypass 1 "cortex_a9_neon_mcr_2_mcrr" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "cortex_a9_neon_mcr" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_vld3_vld4_all_lanes" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_vld3_vld4_lane" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_vld1_vld2_lane" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a9_neon_vld3_vld4" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_vld2_4_regs" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_vld2_2_regs_vld1_vld2_all_lanes" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_vld1_3_4_regs" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "cortex_a9_neon_vld1_1_2_regs" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 0 "cortex_a9_neon_ldr" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_bp_3cycle" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_bp_2cycle" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "cortex_a9_neon_bp_simple" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "cortex_a9_neon_fp_vrecps_vrsqrts_qqq" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a9_neon_fp_vrecps_vrsqrts_ddd" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "cortex_a9_neon_fp_vmla_qqq_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a9_neon_fp_vmla_ddd_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "cortex_a9_neon_fp_vmla_qqq" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a9_neon_fp_vmla_ddd" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_fp_vmul_qqd" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a9_neon_fp_vmul_ddd" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a9_neon_fp_vsum" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_fp_vadd_qqq_vabs_qq" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a9_neon_fp_vadd_ddd_vabs_dd" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_vsra_vrsra" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a9_neon_vqshl_vrshl_vqrshl_qqq" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 0 "cortex_a9_neon_vshl_ddd" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_shift_3" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_shift_2" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_shift_1" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a9_neon_mul_qqd_32_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_mul_ddd_16_scalar_32_16_long_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a9_neon_mla_qqq_32_qqd_32_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a9_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a9_neon_mla_qqq_8_16" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a9_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a9_neon_mul_qqq_8_16_32_ddd_32" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_vsma" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a9_neon_vaba_qqq" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_vaba" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_vmov" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_vqneg_vqabs" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_int_5" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_int_4" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_int_3" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_int_2" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_int_1" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + === modified file 'gcc/config/arm/cortex-a9.md' --- old/gcc/config/arm/cortex-a9.md 2010-08-24 13:15:54 +0000 +++ new/gcc/config/arm/cortex-a9.md 2010-09-16 09:47:44 +0000 @@ -80,8 +80,9 @@ (define_insn_reservation "cortex_a9_dp" 2 (and (eq_attr "tune" "cortexa9") (ior (eq_attr "type" "alu") - (and (eq_attr "type" "alu_shift_reg, alu_shift") - (eq_attr "insn" "mov")))) + (ior (and (eq_attr "type" "alu_shift_reg, alu_shift") + (eq_attr "insn" "mov")) + (eq_attr "neon_type" "none")))) "cortex_a9_p0_default|cortex_a9_p1_default") ;; An instruction using the shifter will go down E1. === modified file 'gcc/config/arm/neon-schedgen.ml' --- old/gcc/config/arm/neon-schedgen.ml 2010-04-02 18:54:46 +0000 +++ new/gcc/config/arm/neon-schedgen.ml 2010-09-16 09:47:44 +0000 @@ -1,7 +1,6 @@ (* Emission of the core of the Cortex-A8 NEON scheduling description. Copyright (C) 2007, 2010 Free Software Foundation, Inc. Contributed by CodeSourcery. - This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under @@ -21,7 +20,14 @@ (* This scheduling description generator works as follows. - Each group of instructions has source and destination requirements - specified. The source requirements may be specified using + specified and a list of cores supported. This is then filtered + and per core scheduler descriptions are generated out. + The reservations generated are prefixed by the name of the + core and the check is performed on the basis of what the tuning + string is. Running this will generate Neon scheduler descriptions + for all cores supported. + + The source requirements may be specified using Source (the stage at which all source operands not otherwise described are read), Source_m (the stage at which Rm operands are read), Source_n (likewise for Rn) and Source_d (likewise for Rd). @@ -83,6 +89,17 @@ | Ls of int | Fmul_then_fadd | Fmul_then_fadd_2 +type core = CortexA8 | CortexA9 +let allCores = [CortexA8; CortexA9] +let coreStr = function + CortexA8 -> "cortex_a8" + | CortexA9 -> "cortex_a9" + +let tuneStr = function + CortexA8 -> "cortexa8" + | CortexA9 -> "cortexa9" + + (* This table must be kept as short as possible by conflating entries with the same availability behavior. @@ -90,129 +107,136 @@ Second components: availability requirements, in the order in which they should appear in the comments in the .md file. Third components: reservation info + Fourth components: List of supported cores. *) let availability_table = [ (* NEON integer ALU instructions. *) (* vbit vbif vbsl vorr vbic vnot vcls vclz vcnt vadd vand vorr veor vbic vorn ddd qqq *) - "neon_int_1", [Source n2; Dest n3], ALU; + "neon_int_1", [Source n2; Dest n3], ALU, allCores; (* vadd vsub qqd vsub ddd qqq *) - "neon_int_2", [Source_m n1; Source_n n2; Dest n3], ALU; + "neon_int_2", [Source_m n1; Source_n n2; Dest n3], ALU, allCores; (* vsum vneg dd qq vadd vsub qdd *) - "neon_int_3", [Source n1; Dest n3], ALU; + "neon_int_3", [Source n1; Dest n3], ALU, allCores; (* vabs vceqz vcgez vcbtz vclez vcltz vadh vradh vsbh vrsbh dqq *) (* vhadd vrhadd vqadd vtst ddd qqq *) - "neon_int_4", [Source n2; Dest n4], ALU; + "neon_int_4", [Source n2; Dest n4], ALU, allCores; (* vabd qdd vhsub vqsub vabd vceq vcge vcgt vmax vmin vfmx vfmn ddd ddd *) - "neon_int_5", [Source_m n1; Source_n n2; Dest n4], ALU; + "neon_int_5", [Source_m n1; Source_n n2; Dest n4], ALU, allCores; (* vqneg vqabs dd qq *) - "neon_vqneg_vqabs", [Source n1; Dest n4], ALU; + "neon_vqneg_vqabs", [Source n1; Dest n4], ALU, allCores; (* vmov vmvn *) - "neon_vmov", [Dest n3], ALU; + "neon_vmov", [Dest n3], ALU, allCores; (* vaba *) - "neon_vaba", [Source_n n2; Source_m n1; Source_d n3; Dest n6], ALU; + "neon_vaba", [Source_n n2; Source_m n1; Source_d n3; Dest n6], ALU, allCores; "neon_vaba_qqq", - [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)], ALU_2cycle; + [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)], + ALU_2cycle, allCores; (* vsma *) - "neon_vsma", [Source_m n1; Source_d n3; Dest n6], ALU; + "neon_vsma", [Source_m n1; Source_d n3; Dest n6], ALU, allCores; (* NEON integer multiply instructions. *) (* vmul, vqdmlh, vqrdmlh *) (* vmul, vqdmul, qdd 16/8 long 32/16 long *) - "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long", [Source n2; Dest n6], Mul; - "neon_mul_qqq_8_16_32_ddd_32", [Source n2; Dest_n_after (1, n6)], Mul_2cycle; + "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long", [Source n2; Dest n6], + Mul, allCores; + "neon_mul_qqq_8_16_32_ddd_32", [Source n2; Dest_n_after (1, n6)], + Mul_2cycle, allCores; (* vmul, vqdmul again *) "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar", - [Source_n n2; Source_m n1; Dest_n_after (1, n6)], Mul_2cycle; + [Source_n n2; Source_m n1; Dest_n_after (1, n6)], Mul_2cycle, allCores; (* vmla, vmls *) "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long", - [Source_n n2; Source_m n2; Source_d n3; Dest n6], Mul; + [Source_n n2; Source_m n2; Source_d n3; Dest n6], Mul, allCores; "neon_mla_qqq_8_16", - [Source_n n2; Source_m n2; Source_d n3; Dest_n_after (1, n6)], Mul_2cycle; + [Source_n n2; Source_m n2; Source_d n3; Dest_n_after (1, n6)], + Mul_2cycle, allCores; "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long", - [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)], Mul_2cycle; + [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)], + Mul_2cycle, allCores; "neon_mla_qqq_32_qqd_32_scalar", - [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (3, n6)], Mul_4cycle; + [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (3, n6)], + Mul_4cycle, allCores; (* vmul, vqdmulh, vqrdmulh *) (* vmul, vqdmul *) "neon_mul_ddd_16_scalar_32_16_long_scalar", - [Source_n n2; Source_m n1; Dest n6], Mul; + [Source_n n2; Source_m n1; Dest n6], Mul, allCores; "neon_mul_qqd_32_scalar", - [Source_n n2; Source_m n1; Dest_n_after (3, n6)], Mul_4cycle; + [Source_n n2; Source_m n1; Dest_n_after (3, n6)], Mul_4cycle, allCores; (* vmla, vmls *) (* vmla, vmla, vqdmla, vqdmls *) "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar", - [Source_n n2; Source_m n1; Source_d n3; Dest n6], Mul; + [Source_n n2; Source_m n1; Source_d n3; Dest n6], Mul, allCores; (* NEON integer shift instructions. *) (* vshr/vshl immediate, vshr_narrow, vshl_vmvh, vsli_vsri_ddd *) - "neon_shift_1", [Source n1; Dest n3], Shift; - (* vqshl, vrshr immediate; vqshr, vqmov, vrshr, vqrshr narrow; + "neon_shift_1", [Source n1; Dest n3], Shift, allCores; + (* vqshl, vrshr immediate; vqshr, vqmov, vrshr, vqrshr narrow, allCores; vqshl_vrshl_vqrshl_ddd *) - "neon_shift_2", [Source n1; Dest n4], Shift; + "neon_shift_2", [Source n1; Dest n4], Shift, allCores; (* vsli, vsri and vshl for qqq *) - "neon_shift_3", [Source n1; Dest_n_after (1, n3)], Shift_2cycle; - "neon_vshl_ddd", [Source n1; Dest n1], Shift; + "neon_shift_3", [Source n1; Dest_n_after (1, n3)], Shift_2cycle, allCores; + "neon_vshl_ddd", [Source n1; Dest n1], Shift, allCores; "neon_vqshl_vrshl_vqrshl_qqq", [Source n1; Dest_n_after (1, n4)], - Shift_2cycle; - "neon_vsra_vrsra", [Source_m n1; Source_d n3; Dest n6], Shift; + Shift_2cycle, allCores; + "neon_vsra_vrsra", [Source_m n1; Source_d n3; Dest n6], Shift, allCores; (* NEON floating-point instructions. *) (* vadd, vsub, vabd, vmul, vceq, vcge, vcgt, vcage, vcagt, vmax, vmin *) (* vabs, vneg, vceqz, vcgez, vcgtz, vclez, vcltz, vrecpe, vrsqrte, vcvt *) - "neon_fp_vadd_ddd_vabs_dd", [Source n2; Dest n5], Fadd; + "neon_fp_vadd_ddd_vabs_dd", [Source n2; Dest n5], Fadd, allCores; "neon_fp_vadd_qqq_vabs_qq", [Source n2; Dest_n_after (1, n5)], - Fadd_2cycle; + Fadd_2cycle, allCores; (* vsum, fvmx, vfmn *) - "neon_fp_vsum", [Source n1; Dest n5], Fadd; - "neon_fp_vmul_ddd", [Source_n n2; Source_m n1; Dest n5], Fmul; + "neon_fp_vsum", [Source n1; Dest n5], Fadd, allCores; + "neon_fp_vmul_ddd", [Source_n n2; Source_m n1; Dest n5], Fmul, allCores; "neon_fp_vmul_qqd", [Source_n n2; Source_m n1; Dest_n_after (1, n5)], - Fmul_2cycle; + Fmul_2cycle, allCores; (* vmla, vmls *) "neon_fp_vmla_ddd", - [Source_n n2; Source_m n2; Source_d n3; Dest n9], Fmul_then_fadd; + [Source_n n2; Source_m n2; Source_d n3; Dest n9], Fmul_then_fadd, allCores; "neon_fp_vmla_qqq", [Source_n n2; Source_m n2; Source_d n3; Dest_n_after (1, n9)], - Fmul_then_fadd_2; + Fmul_then_fadd_2, allCores; "neon_fp_vmla_ddd_scalar", - [Source_n n2; Source_m n1; Source_d n3; Dest n9], Fmul_then_fadd; + [Source_n n2; Source_m n1; Source_d n3; Dest n9], Fmul_then_fadd, allCores; "neon_fp_vmla_qqq_scalar", [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n9)], - Fmul_then_fadd_2; - "neon_fp_vrecps_vrsqrts_ddd", [Source n2; Dest n9], Fmul_then_fadd; + Fmul_then_fadd_2, allCores; + "neon_fp_vrecps_vrsqrts_ddd", [Source n2; Dest n9], Fmul_then_fadd, allCores; "neon_fp_vrecps_vrsqrts_qqq", [Source n2; Dest_n_after (1, n9)], - Fmul_then_fadd_2; + Fmul_then_fadd_2, allCores; (* NEON byte permute instructions. *) (* vmov; vtrn and vswp for dd; vzip for dd; vuzp for dd; vrev; vext for dd *) - "neon_bp_simple", [Source n1; Dest n2], Permute 1; - (* vswp for qq; vext for qqq; vtbl with {Dn} or {Dn, Dn1}; + "neon_bp_simple", [Source n1; Dest n2], Permute 1, allCores; + (* vswp for qq; vext for qqq; vtbl with {Dn} or {Dn, Dn1}, allCores; similarly for vtbx *) - "neon_bp_2cycle", [Source n1; Dest_n_after (1, n2)], Permute 2; + "neon_bp_2cycle", [Source n1; Dest_n_after (1, n2)], Permute 2, allCores; (* all the rest *) - "neon_bp_3cycle", [Source n1; Dest_n_after (2, n2)], Permute 3; + "neon_bp_3cycle", [Source n1; Dest_n_after (2, n2)], Permute 3, allCores; (* NEON load/store instructions. *) - "neon_ldr", [Dest n1], Ls 1; - "neon_str", [Source n1], Ls 1; - "neon_vld1_1_2_regs", [Dest_n_after (1, n1)], Ls 2; - "neon_vld1_3_4_regs", [Dest_n_after (2, n1)], Ls 3; - "neon_vld2_2_regs_vld1_vld2_all_lanes", [Dest_n_after (1, n2)], Ls 2; - "neon_vld2_4_regs", [Dest_n_after (2, n2)], Ls 3; - "neon_vld3_vld4", [Dest_n_after (3, n2)], Ls 4; - "neon_vst1_1_2_regs_vst2_2_regs", [Source n1], Ls 2; - "neon_vst1_3_4_regs", [Source n1], Ls 3; - "neon_vst2_4_regs_vst3_vst4", [Source n1], Ls 4; - "neon_vst3_vst4", [Source n1], Ls 4; - "neon_vld1_vld2_lane", [Source n1; Dest_n_after (2, n2)], Ls 3; - "neon_vld3_vld4_lane", [Source n1; Dest_n_after (4, n2)], Ls 5; - "neon_vst1_vst2_lane", [Source n1], Ls 2; - "neon_vst3_vst4_lane", [Source n1], Ls 3; - "neon_vld3_vld4_all_lanes", [Dest_n_after (1, n2)], Ls 3; + "neon_ldr", [Dest n1], Ls 1, allCores; + "neon_str", [Source n1], Ls 1, allCores; + "neon_vld1_1_2_regs", [Dest_n_after (1, n1)], Ls 2, allCores; + "neon_vld1_3_4_regs", [Dest_n_after (2, n1)], Ls 3, allCores; + "neon_vld2_2_regs_vld1_vld2_all_lanes", [Dest_n_after (1, n2)], Ls 2, allCores; + "neon_vld2_4_regs", [Dest_n_after (2, n2)], Ls 3, allCores; + "neon_vld3_vld4", [Dest_n_after (3, n2)], Ls 4, allCores; + "neon_vst1_1_2_regs_vst2_2_regs", [Source n1], Ls 2, allCores; + "neon_vst1_3_4_regs", [Source n1], Ls 3, allCores; + "neon_vst2_4_regs_vst3_vst4", [Source n1], Ls 4, allCores; + "neon_vst3_vst4", [Source n1], Ls 4, allCores; + "neon_vld1_vld2_lane", [Source n1; Dest_n_after (2, n2)], Ls 3, allCores; + "neon_vld3_vld4_lane", [Source n1; Dest_n_after (4, n2)], Ls 5, allCores; + "neon_vst1_vst2_lane", [Source n1], Ls 2, allCores; + "neon_vst3_vst4_lane", [Source n1], Ls 3, allCores; + "neon_vld3_vld4_all_lanes", [Dest_n_after (1, n2)], Ls 3, allCores; (* NEON register transfer instructions. *) - "neon_mcr", [Dest n2], Permute 1; - "neon_mcr_2_mcrr", [Dest n2], Permute 2; + "neon_mcr", [Dest n2], Permute 1, allCores; + "neon_mcr_2_mcrr", [Dest n2], Permute 2, allCores; (* MRC instructions are in the .tpl file. *) ] @@ -221,7 +245,7 @@ required. (It is also possible that an entry in the table has no source requirements.) *) let calculate_sources = - List.map (fun (name, avail, res) -> + List.map (fun (name, avail, res, cores) -> let earliest_stage = List.fold_left (fun cur -> fun info -> @@ -331,7 +355,7 @@ of one bypass from this producer to any particular consumer listed in LATENCIES.) Use a hash table to collate bypasses with the same latency and guard. *) -let collate_bypasses (producer_name, _, _, _) largest latencies = +let collate_bypasses (producer_name, _, _, _) largest latencies core = let ht = Hashtbl.create 42 in let keys = ref [] in List.iter ( @@ -350,7 +374,7 @@ (if (try ignore (Hashtbl.find ht (guard, latency)); false with Not_found -> true) then keys := (guard, latency) :: !keys); - Hashtbl.add ht (guard, latency) consumer + Hashtbl.add ht (guard, latency) ((coreStr core) ^ "_" ^ consumer) end ) latencies; (* The hash table now has bypasses collated so that ones with the @@ -372,7 +396,7 @@ the output in such a way that all bypasses with the same producer and latency are together, and so that bypasses with the worst-case latency are ignored. *) -let worst_case_latencies_and_bypasses = +let worst_case_latencies_and_bypasses core = let rec f (worst_acc, bypasses_acc) prev xs = match xs with [] -> (worst_acc, bypasses_acc) @@ -400,7 +424,7 @@ (* Having got the largest latency, collect all bypasses for this producer and filter out those with that larger latency. Record the others for later emission. *) - let bypasses = collate_bypasses producer largest latencies in + let bypasses = collate_bypasses producer largest latencies core in (* Go on to process remaining producers, having noted the result for this one. *) f ((producer_name, producer_avail, largest, @@ -444,14 +468,18 @@ in f avail 0 + (* Emit a define_insn_reservation for each producer. The latency written in will be its worst-case latency. *) -let emit_insn_reservations = - List.iter ( +let emit_insn_reservations core = + let corestring = coreStr core in + let tunestring = tuneStr core + in List.iter ( fun (producer, avail, latency, reservation) -> write_comment producer avail; - Printf.printf "(define_insn_reservation \"%s\" %d\n" producer latency; - Printf.printf " (and (eq_attr \"tune\" \"cortexa8\")\n"; + Printf.printf "(define_insn_reservation \"%s_%s\" %d\n" + corestring producer latency; + Printf.printf " (and (eq_attr \"tune\" \"%s\")\n" tunestring; Printf.printf " (eq_attr \"neon_type\" \"%s\"))\n" producer; let str = match reservation with @@ -467,7 +495,7 @@ | Fmul_then_fadd -> "fmul_then_fadd" | Fmul_then_fadd_2 -> "fmul_then_fadd_2" in - Printf.printf " \"cortex_a8_neon_%s\")\n\n" str + Printf.printf " \"%s_neon_%s\")\n\n" corestring str ) (* Given a guard description, return the name of the C function to @@ -480,10 +508,12 @@ | Guard_none -> assert false (* Emit a define_bypass for each bypass. *) -let emit_bypasses = +let emit_bypasses core = List.iter ( fun (producer, consumers, latency, guard) -> - Printf.printf "(define_bypass %d \"%s\"\n" latency producer; + Printf.printf "(define_bypass %d \"%s_%s\"\n" + latency (coreStr core) producer; + if guard = Guard_none then Printf.printf " \"%s\")\n\n" consumers else @@ -493,11 +523,21 @@ end ) + +let calculate_per_core_availability_table core availability_table = + let table = calculate_sources availability_table in + let worst_cases, bypasses = worst_case_latencies_and_bypasses core table in + emit_insn_reservations core (List.rev worst_cases); + Printf.printf ";; Exceptions to the default latencies.\n\n"; + emit_bypasses core bypasses + +let calculate_core_availability_table core availability_table = +let filter_core = List.filter (fun (_, _, _, cores) + -> List.exists ((=) core) cores) +in calculate_per_core_availability_table core (filter_core availability_table) + + (* Program entry point. *) let main = - let table = calculate_sources availability_table in - let worst_cases, bypasses = worst_case_latencies_and_bypasses table in - emit_insn_reservations (List.rev worst_cases); - Printf.printf ";; Exceptions to the default latencies.\n\n"; - emit_bypasses bypasses - + List.map (fun core -> calculate_core_availability_table + core availability_table) allCores