2010-09-09 Andrew Stubbs Backport from mainline: 2010-08-25 Tejas Belagod * config/arm/iterators.md (VU, SE, V_widen_l): New. (V_unpack, US): New. * config/arm/neon.md (vec_unpack_hi_): Expansion for vmovl. (vec_unpack_lo_): Likewise. (neon_vec_unpack_hi_): Instruction pattern for vmovl. (neon_vec_unpack_lo_): Likewise. (vec_widen_mult_lo_): Expansion for vmull. (vec_widen_mult_hi_): Likewise. (neon_vec_mult_lo_"): Instruction pattern for vmull. (neon_vec_mult_hi_"): Likewise. (neon_unpack_): Widening move intermediate step for vectorizing without -mvectorize-with-neon-quad. (neon_vec_mult_): Widening multiply intermediate step for vectorizing without -mvectorize-with-neon-quad. * config/arm/predicates.md (vect_par_constant_high): Check for high-half lanes of a vector. (vect_par_constant_low): Check for low-half lanes of a vector. 2010-08-25 Tejas Belagod * lib/target-supports.exp (check_effective_target_vect_unpack): Set vect_unpack supported flag to true for neon. 2010-09-07 Andrew Stubbs Backport from gcc-patches: === modified file 'gcc/config/arm/arm.md' --- old/gcc/config/arm/arm.md 2010-09-01 13:29:58 +0000 +++ new/gcc/config/arm/arm.md 2010-09-09 14:11:34 +0000 @@ -868,6 +868,9 @@ (define_code_attr cnb [(ltu "CC_C") (geu "CC")]) (define_code_attr optab [(ltu "ltu") (geu "geu")]) +;; Assembler mnemonics for signedness of widening operations. +(define_code_attr US [(sign_extend "s") (zero_extend "u")]) + (define_insn "*addsi3_carryin_" [(set (match_operand:SI 0 "s_register_operand" "=r") (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r") === modified file 'gcc/config/arm/neon.md' --- old/gcc/config/arm/neon.md 2010-08-23 14:29:45 +0000 +++ new/gcc/config/arm/neon.md 2010-09-09 14:11:34 +0000 @@ -235,6 +235,9 @@ ;; Modes with 32-bit elements only. (define_mode_iterator V32 [V2SI V2SF V4SI V4SF]) +;; Modes with 8-bit, 16-bit and 32-bit elements. +(define_mode_iterator VU [V16QI V8HI V4SI]) + ;; (Opposite) mode to convert to/from for above conversions. (define_mode_attr V_CVTTO [(V2SI "V2SF") (V2SF "V2SI") (V4SI "V4SF") (V4SF "V4SI")]) @@ -388,6 +391,9 @@ ;; Same, without unsigned variants (for use with *SFmode pattern). (define_code_iterator vqhs_ops [plus smin smax]) +;; A list of widening operators +(define_code_iterator SE [sign_extend zero_extend]) + ;; Assembler mnemonics for above codes. (define_code_attr VQH_mnem [(plus "vadd") (smin "vmin") (smax "vmax") (umin "vmin") (umax "vmax")]) @@ -443,6 +449,12 @@ (V2SF "2") (V4SF "4") (DI "1") (V2DI "2")]) +;; Same as V_widen, but lower-case. +(define_mode_attr V_widen_l [(V8QI "v8hi") (V4HI "v4si") ( V2SI "v2di")]) + +;; Widen. Result is half the number of elements, but widened to double-width. +(define_mode_attr V_unpack [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")]) + (define_insn "*neon_mov" [(set (match_operand:VD 0 "nonimmediate_operand" "=w,Uv,w, w, ?r,?w,?r,?r, ?Us") @@ -5540,3 +5552,205 @@ emit_insn (gen_orn3_neon (operands[0], operands[1], operands[2])); DONE; }) + +(define_insn "neon_vec_unpack_lo_" + [(set (match_operand: 0 "register_operand" "=w") + (SE: (vec_select: + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_low" ""))))] + "TARGET_NEON" + "vmovl. %q0, %e1" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_insn "neon_vec_unpack_hi_" + [(set (match_operand: 0 "register_operand" "=w") + (SE: (vec_select: + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_high" ""))))] + "TARGET_NEON" + "vmovl. %q0, %f1" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_unpack_hi_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VU 1 "register_operand"))] + "TARGET_NEON" + { + rtvec v = rtvec_alloc (/2) ; + rtx t1; + int i; + for (i = 0; i < (/2); i++) + RTVEC_ELT (v, i) = GEN_INT ((/2) + i); + + t1 = gen_rtx_PARALLEL (mode, v); + emit_insn (gen_neon_vec_unpack_hi_ (operands[0], + operands[1], + t1)); + DONE; + } +) + +(define_expand "vec_unpack_lo_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VU 1 "register_operand" ""))] + "TARGET_NEON" + { + rtvec v = rtvec_alloc (/2) ; + rtx t1; + int i; + for (i = 0; i < (/2) ; i++) + RTVEC_ELT (v, i) = GEN_INT (i); + t1 = gen_rtx_PARALLEL (mode, v); + emit_insn (gen_neon_vec_unpack_lo_ (operands[0], + operands[1], + t1)); + DONE; + } +) + +(define_insn "neon_vec_mult_lo_" + [(set (match_operand: 0 "register_operand" "=w") + (mult: (SE: (vec_select: + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_low" ""))) + (SE: (vec_select: + (match_operand:VU 3 "register_operand" "w") + (match_dup 2)))))] + "TARGET_NEON" + "vmull. %q0, %e1, %e3" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_widen_mult_lo_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VU 1 "register_operand" "")) + (SE: (match_operand:VU 2 "register_operand" ""))] + "TARGET_NEON" + { + rtvec v = rtvec_alloc (/2) ; + rtx t1; + int i; + for (i = 0; i < (/2) ; i++) + RTVEC_ELT (v, i) = GEN_INT (i); + t1 = gen_rtx_PARALLEL (mode, v); + + emit_insn (gen_neon_vec_mult_lo_ (operands[0], + operands[1], + t1, + operands[2])); + DONE; + } +) + +(define_insn "neon_vec_mult_hi_" + [(set (match_operand: 0 "register_operand" "=w") + (mult: (SE: (vec_select: + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_high" ""))) + (SE: (vec_select: + (match_operand:VU 3 "register_operand" "w") + (match_dup 2)))))] + "TARGET_NEON" + "vmull. %q0, %f1, %f3" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_widen_mult_hi_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VU 1 "register_operand" "")) + (SE: (match_operand:VU 2 "register_operand" ""))] + "TARGET_NEON" + { + rtvec v = rtvec_alloc (/2) ; + rtx t1; + int i; + for (i = 0; i < (/2) ; i++) + RTVEC_ELT (v, i) = GEN_INT (/2 + i); + t1 = gen_rtx_PARALLEL (mode, v); + + emit_insn (gen_neon_vec_mult_hi_ (operands[0], + operands[1], + t1, + operands[2])); + DONE; + + } +) + +;; Vectorize for non-neon-quad case +(define_insn "neon_unpack_" + [(set (match_operand: 0 "register_operand" "=w") + (SE: (match_operand:VDI 1 "register_operand" "")))] + "TARGET_NEON" + "vmovl. %q0, %1" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_unpack_lo_" + [(match_operand: 0 "register_operand" "") + (SE:(match_operand:VDI 1 "register_operand"))] + "TARGET_NEON" +{ + rtx tmpreg = gen_reg_rtx (mode); + emit_insn (gen_neon_unpack_ (tmpreg, operands[1])); + emit_insn (gen_neon_vget_low (operands[0], tmpreg)); + + DONE; +} +) + +(define_expand "vec_unpack_hi_" + [(match_operand: 0 "register_operand" "") + (SE:(match_operand:VDI 1 "register_operand"))] + "TARGET_NEON" +{ + rtx tmpreg = gen_reg_rtx (mode); + emit_insn (gen_neon_unpack_ (tmpreg, operands[1])); + emit_insn (gen_neon_vget_high (operands[0], tmpreg)); + + DONE; +} +) + +(define_insn "neon_vec_mult_" + [(set (match_operand: 0 "register_operand" "=w") + (mult: (SE: + (match_operand:VDI 1 "register_operand" "w")) + (SE: + (match_operand:VDI 2 "register_operand" "w"))))] + "TARGET_NEON" + "vmull. %q0, %1, %2" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_widen_mult_hi_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VDI 1 "register_operand" "")) + (SE: (match_operand:VDI 2 "register_operand" ""))] + "TARGET_NEON" + { + rtx tmpreg = gen_reg_rtx (mode); + emit_insn (gen_neon_vec_mult_ (tmpreg, operands[1], operands[2])); + emit_insn (gen_neon_vget_high (operands[0], tmpreg)); + + DONE; + + } +) + +(define_expand "vec_widen_mult_lo_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VDI 1 "register_operand" "")) + (SE: (match_operand:VDI 2 "register_operand" ""))] + "TARGET_NEON" + { + rtx tmpreg = gen_reg_rtx (mode); + emit_insn (gen_neon_vec_mult_ (tmpreg, operands[1], operands[2])); + emit_insn (gen_neon_vget_low (operands[0], tmpreg)); + + DONE; + + } +) === modified file 'gcc/config/arm/predicates.md' --- old/gcc/config/arm/predicates.md 2010-08-31 10:00:27 +0000 +++ new/gcc/config/arm/predicates.md 2010-09-09 14:11:34 +0000 @@ -573,3 +573,61 @@ (and (match_test "TARGET_32BIT") (match_operand 0 "arm_di_operand")))) +;; Predicates for parallel expanders based on mode. +(define_special_predicate "vect_par_constant_high" + (match_code "parallel") +{ + HOST_WIDE_INT count = XVECLEN (op, 0); + int i; + int base = GET_MODE_NUNITS (mode); + + if ((count < 1) + || (count != base/2)) + return false; + + if (!VECTOR_MODE_P (mode)) + return false; + + for (i = 0; i < count; i++) + { + rtx elt = XVECEXP (op, 0, i); + int val; + + if (GET_CODE (elt) != CONST_INT) + return false; + + val = INTVAL (elt); + if (val != (base/2) + i) + return false; + } + return true; +}) + +(define_special_predicate "vect_par_constant_low" + (match_code "parallel") +{ + HOST_WIDE_INT count = XVECLEN (op, 0); + int i; + int base = GET_MODE_NUNITS (mode); + + if ((count < 1) + || (count != base/2)) + return false; + + if (!VECTOR_MODE_P (mode)) + return false; + + for (i = 0; i < count; i++) + { + rtx elt = XVECEXP (op, 0, i); + int val; + + if (GET_CODE (elt) != CONST_INT) + return false; + + val = INTVAL (elt); + if (val != i) + return false; + } + return true; +}) === modified file 'gcc/testsuite/lib/target-supports.exp' --- old/gcc/testsuite/lib/target-supports.exp 2010-08-24 13:00:03 +0000 +++ new/gcc/testsuite/lib/target-supports.exp 2010-09-09 14:11:34 +0000 @@ -2519,7 +2519,8 @@ if { ([istarget powerpc*-*-*] && ![istarget powerpc-*paired*]) || [istarget i?86-*-*] || [istarget x86_64-*-*] - || [istarget spu-*-*] } { + || [istarget spu-*-*] + || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { set et_vect_unpack_saved 1 } }