From 2c546a53be9c5890be33aa799ea4f67d193b4ac3 Mon Sep 17 00:00:00 2001 From: Kai Tietz Date: Wed, 21 Apr 2021 07:54:59 +0200 Subject: [PATCH 02/12] add -m(no-)align-vector-insn option for i386 --- gcc/config/i386/i386-options.cc | 9 +++++-- gcc/config/i386/i386.opt | 8 ++++++ gcc/config/i386/predicates.md | 2 +- gcc/config/i386/sse.md | 43 +++++++++++++++++++++++++++++---- gcc/config/mingw/mingw32.h | 2 +- 5 files changed, 55 insertions(+), 9 deletions(-) diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 964449fa8cd..2acace11f0a 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -422,6 +422,7 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY }, { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT }, { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS }, + { "-mno-align-vector-insn", MASK_NO_ALIGN_VECTOR_INSN }, { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 }, { "-mno-push-args", MASK_NO_PUSH_ARGS }, { "-mno-red-zone", MASK_NO_RED_ZONE }, @@ -1174,8 +1175,12 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], MASK_INLINE_STRINGOPS_DYNAMICALLY), IX86_ATTR_NO ("align-stringops", - OPT_mno_align_stringops, - MASK_NO_ALIGN_STRINGOPS), + OPT_mno_align_stringops, + MASK_NO_ALIGN_STRINGOPS), + + IX86_ATTR_NO ("align-vector-insn", + OPT_mno_align_vector_insn, + MASK_NO_ALIGN_VECTOR_INSN), IX86_ATTR_YES ("recip", OPT_mrecip, diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 27d34bd64ea..639aa84b1e5 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -256,6 +256,10 @@ malign-stringops Target RejectNegative InverseMask(NO_ALIGN_STRINGOPS, ALIGN_STRINGOPS) Save Align destination of the string operations. +malign-vector-insn +Target RejectNegative InverseMask(NO_ALIGN_VECTOR_INSN, ALIGN_VECTOR_INSN) Save +Use aligned vector instruction + malign-data= Target RejectNegative Joined Var(ix86_align_data_type) Enum(ix86_align_data) Init(ix86_align_data_type_compat) Use the given data alignment. @@ -439,6 +443,10 @@ mdaz-ftz Target Set the FTZ and DAZ Flags. +mno-align-vector-insn +Target Mask(NO_ALIGN_VECTOR_INSN) Save +Uses unaligned over aligned vector instruction memonics + mpreferred-stack-boundary= Target RejectNegative Joined UInteger Var(ix86_preferred_stack_boundary_arg) Attempt to keep stack aligned to this power of 2. diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 3d3848c0a22..7b08cc48675 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1715,7 +1715,7 @@ ;; less than its natural alignment. (define_predicate "misaligned_operand" (and (match_code "mem") - (match_test "MEM_ALIGN (op) < GET_MODE_BITSIZE (mode)"))) + (match_test "TARGET_NO_ALIGN_VECTOR_INSN || MEM_ALIGN (op) < GET_MODE_BITSIZE (mode)"))) ;; Return true if OP is a parallel for an mov{d,q,dqa,ps,pd} vec_select, ;; where one of the two operands of the vec_concat is const0_operand. diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b280676eee6..98adb8ad90f 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1952,7 +1952,8 @@ (vec_concat:V2DF (vec_select:DF (match_dup 2) (parallel [(const_int 0)])) (match_operand:DF 3 "memory_operand")))] - "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL + "TARGET_SSE2 + && (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL || TARGET_NO_ALIGN_VECTOR_INSN) && ix86_operands_ok_for_move_multiple (operands, true, DFmode)" [(set (match_dup 2) (match_dup 5))] "operands[5] = adjust_address (operands[1], V2DFmode, 0);") @@ -1963,7 +1964,8 @@ (set (match_operand:V2DF 2 "sse_reg_operand") (vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand") (match_operand:DF 3 "memory_operand")))] - "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL + "TARGET_SSE2 + && (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL || TARGET_NO_ALIGN_VECTOR_INSN) && REGNO (operands[4]) == REGNO (operands[2]) && ix86_operands_ok_for_move_multiple (operands, true, DFmode)" [(set (match_dup 2) (match_dup 5))] @@ -1977,7 +1979,8 @@ (set (match_operand:DF 2 "memory_operand") (vec_select:DF (match_operand:V2DF 3 "sse_reg_operand") (parallel [(const_int 1)])))] - "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL + "TARGET_SSE2 + && (TARGET_SSE_UNALIGNED_STORE_OPTIMAL || TARGET_NO_ALIGN_VECTOR_INSN) && ix86_operands_ok_for_move_multiple (operands, false, DFmode)" [(set (match_dup 4) (match_dup 1))] "operands[4] = adjust_address (operands[0], V2DFmode, 0);") @@ -11622,7 +11625,8 @@ (vec_select:V2SF (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m") (parallel [(const_int 0) (const_int 1)])))] - "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "TARGET_SSE && TARGET_ALIGN_VECTOR_INSN + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ %vmovlps\t{%1, %0|%q0, %1} %vmovaps\t{%1, %0|%0, %1} @@ -11631,6 +11635,21 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "V2SF,V4SF,V2SF")]) +(define_insn "sse_storelps_unalign" + [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v") + (vec_select:V2SF + (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m") + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_SSE && TARGET_NO_ALIGN_VECTOR_INSN + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + %vmovlps\t{%1, %0|%q0, %1} + %vmovups\t{%1, %0|%0, %1} + %vmovlps\t{%1, %d0|%d0, %q1}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "V2SF,V4SF,V2SF")]) + (define_expand "sse_loadlps_exp" [(set (match_operand:V4SF 0 "nonimmediate_operand") (vec_concat:V4SF @@ -14662,7 +14681,7 @@ (vec_select:DF (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m") (parallel [(const_int 0)])))] - "!TARGET_SSE2 && TARGET_SSE + "!TARGET_SSE2 && TARGET_SSE && TARGET_ALIGN_VECTOR_INSN && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ movlps\t{%1, %0|%0, %1} @@ -14671,6 +14690,20 @@ [(set_attr "type" "ssemov") (set_attr "mode" "V2SF,V4SF,V2SF")]) +(define_insn "*vec_extractv2df_0_sse_unalign" + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m") + (parallel [(const_int 0)])))] + "!TARGET_SSE2 && TARGET_SSE && TARGET_NO_ALIGN_VECTOR_INSN + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + movlps\t{%1, %0|%0, %1} + movups\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %q1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2SF,V4SF,V2SF")]) + ;; Convert lowpart SUBREG into sse2_storelpd or *vec_extractv2df_0_sse. (define_split [(set (match_operand:DF 0 "register_operand") diff --git a/gcc/config/mingw/mingw32.h b/gcc/config/mingw/mingw32.h index 10bcd293527..a08de5a8a93 100644 --- a/gcc/config/mingw/mingw32.h +++ b/gcc/config/mingw/mingw32.h @@ -32,7 +32,7 @@ along with GCC; see the file COPYING3. If not see #define TARGET_SUBTARGET_DEFAULT \ (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS \ | MASK_STACK_PROBE | MASK_ALIGN_DOUBLE \ - | MASK_MS_BITFIELD_LAYOUT) + | MASK_MS_BITFIELD_LAYOUT | MASK_NO_ALIGN_VECTOR_INSN) #ifdef TARGET_USING_MCFGTHREAD #define DEFINE_THREAD_MODEL builtin_define ("__USING_MCFGTHREAD__"); -- 2.45.1