2010-08-18 Julian Brown Issue #9222 gcc/ * config/arm/neon.md (UNSPEC_VCLE, UNSPEC_VCLT): New constants for unspecs. (vcond, vcondu): New expanders. (neon_vceq, neon_vcge, neon_vcgt): Support comparisons with zero. (neon_vcle, neon_vclt): New patterns. * config/arm/constraints.md (Dz): New constraint. 2010-08-18 Jie Zhang Backport from mainline: === modified file 'gcc/config/arm/constraints.md' Index: gcc-4.5/gcc/config/arm/constraints.md =================================================================== --- gcc-4.5.orig/gcc/config/arm/constraints.md +++ gcc-4.5/gcc/config/arm/constraints.md @@ -29,7 +29,7 @@ ;; in Thumb-1 state: I, J, K, L, M, N, O ;; The following multi-letter normal constraints have been used: -;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di +;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz ;; in Thumb-1 state: Pa, Pb ;; in Thumb-2 state: Ps, Pt, Pv @@ -173,6 +173,12 @@ (and (match_code "const_double") (match_test "TARGET_32BIT && neg_const_double_rtx_ok_for_fpa (op)"))) +(define_constraint "Dz" + "@internal + In ARM/Thumb-2 state a vector of constant zeros." + (and (match_code "const_vector") + (match_test "TARGET_NEON && op == CONST0_RTX (mode)"))) + (define_constraint "Da" "@internal In ARM/Thumb-2 state a const_int, const_double or const_vector that can Index: gcc-4.5/gcc/config/arm/neon.md =================================================================== --- gcc-4.5.orig/gcc/config/arm/neon.md +++ gcc-4.5/gcc/config/arm/neon.md @@ -141,7 +141,9 @@ (UNSPEC_VUZP2 202) (UNSPEC_VZIP1 203) (UNSPEC_VZIP2 204) - (UNSPEC_MISALIGNED_ACCESS 205)]) + (UNSPEC_MISALIGNED_ACCESS 205) + (UNSPEC_VCLE 206) + (UNSPEC_VCLT 207)]) ;; Double-width vector modes. (define_mode_iterator VD [V8QI V4HI V2SI V2SF]) @@ -1804,6 +1806,169 @@ [(set_attr "neon_type" "neon_int_5")] ) +;; Conditional instructions. These are comparisons with conditional moves for +;; vectors. They perform the assignment: +;; +;; Vop0 = (Vop4 Vop5) ? Vop1 : Vop2; +;; +;; where op3 is <, <=, ==, !=, >= or >. Operations are performed +;; element-wise. + +(define_expand "vcond" + [(set (match_operand:VDQW 0 "s_register_operand" "") + (if_then_else:VDQW + (match_operator 3 "arm_comparison_operator" + [(match_operand:VDQW 4 "s_register_operand" "") + (match_operand:VDQW 5 "nonmemory_operand" "")]) + (match_operand:VDQW 1 "s_register_operand" "") + (match_operand:VDQW 2 "s_register_operand" "")))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" +{ + rtx mask; + int inverse = 0, immediate_zero = 0; + /* See the description of "magic" bits in the 'T' case of + arm_print_operand. */ + HOST_WIDE_INT magic_word = (mode == V2SFmode || mode == V4SFmode) + ? 3 : 1; + rtx magic_rtx = GEN_INT (magic_word); + + mask = gen_reg_rtx (mode); + + if (operands[5] == CONST0_RTX (mode)) + immediate_zero = 1; + else if (!REG_P (operands[5])) + operands[5] = force_reg (mode, operands[5]); + + switch (GET_CODE (operands[3])) + { + case GE: + emit_insn (gen_neon_vcge (mask, operands[4], operands[5], + magic_rtx)); + break; + + case GT: + emit_insn (gen_neon_vcgt (mask, operands[4], operands[5], + magic_rtx)); + break; + + case EQ: + emit_insn (gen_neon_vceq (mask, operands[4], operands[5], + magic_rtx)); + break; + + case LE: + if (immediate_zero) + emit_insn (gen_neon_vcle (mask, operands[4], operands[5], + magic_rtx)); + else + emit_insn (gen_neon_vcge (mask, operands[5], operands[4], + magic_rtx)); + break; + + case LT: + if (immediate_zero) + emit_insn (gen_neon_vclt (mask, operands[4], operands[5], + magic_rtx)); + else + emit_insn (gen_neon_vcgt (mask, operands[5], operands[4], + magic_rtx)); + break; + + case NE: + emit_insn (gen_neon_vceq (mask, operands[4], operands[5], + magic_rtx)); + inverse = 1; + break; + + default: + gcc_unreachable (); + } + + if (inverse) + emit_insn (gen_neon_vbsl (operands[0], mask, operands[2], + operands[1])); + else + emit_insn (gen_neon_vbsl (operands[0], mask, operands[1], + operands[2])); + + DONE; +}) + +(define_expand "vcondu" + [(set (match_operand:VDQIW 0 "s_register_operand" "") + (if_then_else:VDQIW + (match_operator 3 "arm_comparison_operator" + [(match_operand:VDQIW 4 "s_register_operand" "") + (match_operand:VDQIW 5 "s_register_operand" "")]) + (match_operand:VDQIW 1 "s_register_operand" "") + (match_operand:VDQIW 2 "s_register_operand" "")))] + "TARGET_NEON" +{ + rtx mask; + int inverse = 0, immediate_zero = 0; + + mask = gen_reg_rtx (mode); + + if (operands[5] == CONST0_RTX (mode)) + immediate_zero = 1; + else if (!REG_P (operands[5])) + operands[5] = force_reg (mode, operands[5]); + + switch (GET_CODE (operands[3])) + { + case GEU: + emit_insn (gen_neon_vcge (mask, operands[4], operands[5], + const0_rtx)); + break; + + case GTU: + emit_insn (gen_neon_vcgt (mask, operands[4], operands[5], + const0_rtx)); + break; + + case EQ: + emit_insn (gen_neon_vceq (mask, operands[4], operands[5], + const0_rtx)); + break; + + case LEU: + if (immediate_zero) + emit_insn (gen_neon_vcle (mask, operands[4], operands[5], + const0_rtx)); + else + emit_insn (gen_neon_vcge (mask, operands[5], operands[4], + const0_rtx)); + break; + + case LTU: + if (immediate_zero) + emit_insn (gen_neon_vclt (mask, operands[4], operands[5], + const0_rtx)); + else + emit_insn (gen_neon_vcgt (mask, operands[5], operands[4], + const0_rtx)); + break; + + case NE: + emit_insn (gen_neon_vceq (mask, operands[4], operands[5], + const0_rtx)); + inverse = 1; + break; + + default: + gcc_unreachable (); + } + + if (inverse) + emit_insn (gen_neon_vbsl (operands[0], mask, operands[2], + operands[1])); + else + emit_insn (gen_neon_vbsl (operands[0], mask, operands[1], + operands[2])); + + DONE; +}) + ;; Patterns for builtins. ; good for plain vadd, vaddq. @@ -2215,13 +2380,16 @@ ) (define_insn "neon_vceq" - [(set (match_operand: 0 "s_register_operand" "=w") - (unspec: [(match_operand:VDQW 1 "s_register_operand" "w") - (match_operand:VDQW 2 "s_register_operand" "w") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_VCEQ))] + [(set (match_operand: 0 "s_register_operand" "=w,w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w,w") + (match_operand:VDQW 2 "nonmemory_operand" "w,Dz") + (match_operand:SI 3 "immediate_operand" "i,i")] + UNSPEC_VCEQ))] "TARGET_NEON" - "vceq.\t%0, %1, %2" + "@ + vceq.\t%0, %1, %2 + vceq.\t%0, %1, #0" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "") (const_int 0)) (if_then_else (ne (symbol_ref "") (const_int 0)) @@ -2231,13 +2399,16 @@ ) (define_insn "neon_vcge" - [(set (match_operand: 0 "s_register_operand" "=w") - (unspec: [(match_operand:VDQW 1 "s_register_operand" "w") - (match_operand:VDQW 2 "s_register_operand" "w") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_VCGE))] + [(set (match_operand: 0 "s_register_operand" "=w,w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w,w") + (match_operand:VDQW 2 "nonmemory_operand" "w,Dz") + (match_operand:SI 3 "immediate_operand" "i,i")] + UNSPEC_VCGE))] "TARGET_NEON" - "vcge.%T3%#\t%0, %1, %2" + "@ + vcge.%T3%#\t%0, %1, %2 + vcge.%T3%#\t%0, %1, #0" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "") (const_int 0)) (if_then_else (ne (symbol_ref "") (const_int 0)) @@ -2247,13 +2418,16 @@ ) (define_insn "neon_vcgt" - [(set (match_operand: 0 "s_register_operand" "=w") - (unspec: [(match_operand:VDQW 1 "s_register_operand" "w") - (match_operand:VDQW 2 "s_register_operand" "w") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_VCGT))] + [(set (match_operand: 0 "s_register_operand" "=w,w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w,w") + (match_operand:VDQW 2 "nonmemory_operand" "w,Dz") + (match_operand:SI 3 "immediate_operand" "i,i")] + UNSPEC_VCGT))] "TARGET_NEON" - "vcgt.%T3%#\t%0, %1, %2" + "@ + vcgt.%T3%#\t%0, %1, %2 + vcgt.%T3%#\t%0, %1, #0" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "") (const_int 0)) (if_then_else (ne (symbol_ref "") (const_int 0)) @@ -2262,6 +2436,43 @@ (const_string "neon_int_5")))] ) +;; VCLE and VCLT only support comparisons with immediate zero (register +;; variants are VCGE and VCGT with operands reversed). + +(define_insn "neon_vcle" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "nonmemory_operand" "Dz") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCLE))] + "TARGET_NEON" + "vcle.%T3%#\t%0, %1, #0" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_5")))] +) + +(define_insn "neon_vclt" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "nonmemory_operand" "Dz") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCLT))] + "TARGET_NEON" + "vclt.%T3%#\t%0, %1, #0" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_5")))] +) + (define_insn "neon_vcage" [(set (match_operand: 0 "s_register_operand" "=w") (unspec: [(match_operand:VCVTF 1 "s_register_operand" "w")