summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Stubbs <ams@codesourcery.com>2022-06-10 15:15:49 +0100
committerAndrew Stubbs <ams@codesourcery.com>2022-06-27 17:28:03 +0100
commit9aefadc9e6ac17437ead1bf3bcf452b52c447b78 (patch)
tree6fbec78836b825894e310c605ad14fb6f9b8fd79
parent88c73dbc2de129c5d4f1af95c6e5433447d847a2 (diff)
amdgcn: Support XNACK mode
The XNACK feature allows memory load instructions to restart safely following a page-miss interrupt. This is useful for shared-memory devices, like APUs, and to implement OpenMP Unified Shared Memory. To support the feature we must be able to set the appropriate meta-data and set the load instructions to early-clobber. When the port supports scheduling of s_waitcnt instructions there will be further requirements. gcc/ChangeLog: * config/gcn/gcn-hsa.h (XNACKOPT): New macro. (ASM_SPEC): Use XNACKOPT. * config/gcn/gcn-opts.h (enum sram_ecc_type): Rename to ... (enum hsaco_attr_type): ... this, and generalize the names. (TARGET_XNACK): New macro. * config/gcn/gcn-valu.md (gather<mode>_insn_1offset<exec>): Add xnack compatible alternatives. (gather<mode>_insn_2offsets<exec>): Likewise. * config/gcn/gcn.c (gcn_option_override): Permit -mxnack for devices other than Fiji. (gcn_expand_epilogue): Remove early-clobber problems. (output_file_start): Emit xnack attributes. (gcn_hsa_declare_function_name): Obey -mxnack setting. * config/gcn/gcn.md (xnack): New attribute. (enabled): Rework to include "xnack" attribute. (*movbi): Add xnack compatible alternatives. (*mov<mode>_insn): Likewise. (*mov<mode>_insn): Likewise. (*mov<mode>_insn): Likewise. (*movti_insn): Likewise. * config/gcn/gcn.opt (-mxnack): Add the "on/off/any" syntax. (sram_ecc_type): Rename to ... (hsaco_attr_type: ... this.) * config/gcn/mkoffload.c (SET_XNACK_ANY): New macro. (TEST_XNACK): Delete. (TEST_XNACK_ANY): New macro. (TEST_XNACK_ON): New macro. (main): Support the new -mxnack=on/off/any syntax.
-rw-r--r--gcc/config/gcn/gcn-hsa.h3
-rw-r--r--gcc/config/gcn/gcn-opts.h10
-rw-r--r--gcc/config/gcn/gcn-valu.md29
-rw-r--r--gcc/config/gcn/gcn.c34
-rw-r--r--gcc/config/gcn/gcn.md113
-rw-r--r--gcc/config/gcn/gcn.opt18
-rw-r--r--gcc/config/gcn/mkoffload.c19
7 files changed, 140 insertions, 86 deletions
diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h
index 12c3b16c62e..d5eb3034dc6 100644
--- a/gcc/config/gcn/gcn-hsa.h
+++ b/gcc/config/gcn/gcn-hsa.h
@@ -81,12 +81,13 @@ extern unsigned int gcn_local_sym_hash (const char *name);
/* In HSACOv4 no attribute setting means the binary supports "any" hardware
configuration. The name of the attribute also changed. */
#define SRAMOPT "msram-ecc=on:-mattr=+sramecc;msram-ecc=off:-mattr=-sramecc"
+#define XNACKOPT "mxnack=on:-mattr=+xnack;mxnack=off:-mattr=-xnack"
/* Use LLVM assembler and linker options. */
#define ASM_SPEC "-triple=amdgcn--amdhsa " \
"%:last_arg(%{march=*:-mcpu=%*}) " \
"%{!march=*|march=fiji:--amdhsa-code-object-version=3} " \
- "%{" NO_XNACK "mxnack:-mattr=+xnack;:-mattr=-xnack} " \
+ "%{" NO_XNACK XNACKOPT "}" \
"%{" NO_SRAM_ECC SRAMOPT "} " \
"-filetype=obj"
#define LINK_SPEC "--pie --export-dynamic"
diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h
index 48065f96336..370e4bac662 100644
--- a/gcc/config/gcn/gcn-opts.h
+++ b/gcc/config/gcn/gcn-opts.h
@@ -48,11 +48,13 @@ extern enum gcn_isa {
#define TARGET_M0_LDS_LIMIT (TARGET_GCN3)
#define TARGET_PACKED_WORK_ITEMS (TARGET_CDNA2_PLUS)
-enum sram_ecc_type
+#define TARGET_XNACK (flag_xnack != HSACO_ATTR_OFF)
+
+enum hsaco_attr_type
{
- SRAM_ECC_OFF,
- SRAM_ECC_ON,
- SRAM_ECC_ANY
+ HSACO_ATTR_OFF,
+ HSACO_ATTR_ON,
+ HSACO_ATTR_ANY
};
#endif
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 9b7197aa19a..4f51592b642 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -741,13 +741,13 @@
{})
(define_insn "gather<mode>_insn_1offset<exec>"
- [(set (match_operand:V_ALL 0 "register_operand" "=v")
+ [(set (match_operand:V_ALL 0 "register_operand" "=v,&v")
(unspec:V_ALL
- [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v")
+ [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v, v")
(vec_duplicate:<VnDI>
- (match_operand 2 "immediate_operand" " n")))
- (match_operand 3 "immediate_operand" " n")
- (match_operand 4 "immediate_operand" " n")
+ (match_operand 2 "immediate_operand" " n, n")))
+ (match_operand 3 "immediate_operand" " n, n")
+ (match_operand 4 "immediate_operand" " n, n")
(mem:BLK (scratch))]
UNSPEC_GATHER))]
"(AS_FLAT_P (INTVAL (operands[3]))
@@ -777,7 +777,8 @@
return buf;
}
[(set_attr "type" "flat")
- (set_attr "length" "12")])
+ (set_attr "length" "12")
+ (set_attr "xnack" "off,on")])
(define_insn "gather<mode>_insn_1offset_ds<exec>"
[(set (match_operand:V_ALL 0 "register_operand" "=v")
@@ -802,17 +803,18 @@
(set_attr "length" "12")])
(define_insn "gather<mode>_insn_2offsets<exec>"
- [(set (match_operand:V_ALL 0 "register_operand" "=v")
+ [(set (match_operand:V_ALL 0 "register_operand" "=v,&v")
(unspec:V_ALL
[(plus:<VnDI>
(plus:<VnDI>
(vec_duplicate:<VnDI>
- (match_operand:DI 1 "register_operand" "Sv"))
+ (match_operand:DI 1 "register_operand" "Sv,Sv"))
(sign_extend:<VnDI>
- (match_operand:<VnSI> 2 "register_operand" " v")))
- (vec_duplicate:<VnDI> (match_operand 3 "immediate_operand" " n")))
- (match_operand 4 "immediate_operand" " n")
- (match_operand 5 "immediate_operand" " n")
+ (match_operand:<VnSI> 2 "register_operand" " v, v")))
+ (vec_duplicate:<VnDI> (match_operand 3 "immediate_operand"
+ " n, n")))
+ (match_operand 4 "immediate_operand" " n, n")
+ (match_operand 5 "immediate_operand" " n, n")
(mem:BLK (scratch))]
UNSPEC_GATHER))]
"(AS_GLOBAL_P (INTVAL (operands[4]))
@@ -831,7 +833,8 @@
return buf;
}
[(set_attr "type" "flat")
- (set_attr "length" "12")])
+ (set_attr "length" "12")
+ (set_attr "xnack" "off,on")])
(define_expand "scatter_store<mode><vnsi>"
[(match_operand:DI 0 "register_operand")
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 6382ef91163..5317e18a78e 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -170,9 +170,14 @@ gcn_option_override (void)
acc_lds_size = 32768;
}
- /* The xnack option is a placeholder, for now. */
- if (flag_xnack)
- sorry ("XNACK support");
+ /* gfx908 "Fiji" does not support XNACK. */
+ if (gcn_arch == PROCESSOR_FIJI)
+ {
+ if (flag_xnack == HSACO_ATTR_ON)
+ error ("-mxnack=on is incompatible with -march=fiji");
+ /* Allow HSACO_ATTR_ANY silently because that's the default. */
+ flag_xnack = HSACO_ATTR_OFF;
+ }
}
/* }}} */
@@ -3187,17 +3192,19 @@ gcn_expand_epilogue (void)
/* Assume that an exit value compatible with gcn-run is expected.
That is, the third input parameter is an int*.
- We can't allocate any new registers, but the kernarg_reg is
- dead after this, so we'll use that. */
+ We can't allocate any new registers, but the dispatch_ptr and
+ kernarg_reg are dead after this, so we'll use those. */
+ rtx dispatch_ptr_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
+ [DISPATCH_PTR_ARG]);
rtx kernarg_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
[KERNARG_SEGMENT_PTR_ARG]);
rtx retptr_mem = gen_rtx_MEM (DImode,
gen_rtx_PLUS (DImode, kernarg_reg,
GEN_INT (16)));
set_mem_addr_space (retptr_mem, ADDR_SPACE_SCALAR_FLAT);
- emit_move_insn (kernarg_reg, retptr_mem);
+ emit_move_insn (dispatch_ptr_reg, retptr_mem);
- rtx retval_mem = gen_rtx_MEM (SImode, kernarg_reg);
+ rtx retval_mem = gen_rtx_MEM (SImode, dispatch_ptr_reg);
set_mem_addr_space (retval_mem, ADDR_SPACE_SCALAR_FLAT);
emit_move_insn (retval_mem,
gen_rtx_REG (SImode, SGPR_REGNO (RETURN_VALUE_REG)));
@@ -5222,11 +5229,12 @@ static void
output_file_start (void)
{
/* In HSACOv4 no attribute setting means the binary supports "any" hardware
- configuration. In GCC binaries, this is true for SRAM ECC, but not
- XNACK. */
- const char *xnack = (flag_xnack ? ":xnack+" : ":xnack-");
- const char *sram_ecc = (flag_sram_ecc == SRAM_ECC_ON ? ":sramecc+"
- : flag_sram_ecc == SRAM_ECC_OFF ? ":sramecc-"
+ configuration. */
+ const char *xnack = (flag_xnack == HSACO_ATTR_ON ? ":xnack+"
+ : flag_xnack == HSACO_ATTR_OFF ? ":xnack-"
+ : "");
+ const char *sram_ecc = (flag_sram_ecc == HSACO_ATTR_ON ? ":sramecc+"
+ : flag_sram_ecc == HSACO_ATTR_OFF ? ":sramecc-"
: "");
const char *cpu;
@@ -5270,7 +5278,7 @@ void
gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
{
int sgpr, vgpr;
- bool xnack_enabled = false;
+ bool xnack_enabled = TARGET_XNACK;
fputs ("\n\n", file);
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index d41ce6c80a2..cba169093ce 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -276,12 +276,19 @@
(define_attr "gcn_version" "gcn3,gcn5" (const_string "gcn3"))
+(define_attr "xnack" "na,off,on" (const_string "na"))
+
(define_attr "enabled" ""
- (cond [(eq_attr "gcn_version" "gcn3") (const_int 1)
- (and (eq_attr "gcn_version" "gcn5")
- (ne (symbol_ref "TARGET_GCN5_PLUS") (const_int 0)))
- (const_int 1)]
- (const_int 0)))
+ (cond [(and (eq_attr "gcn_version" "gcn5")
+ (eq (symbol_ref "TARGET_GCN5_PLUS") (const_int 0)))
+ (const_int 0)
+ (and (eq_attr "xnack" "off")
+ (ne (symbol_ref "TARGET_XNACK") (const_int 0)))
+ (const_int 0)
+ (and (eq_attr "xnack" "on")
+ (eq (symbol_ref "TARGET_XNACK") (const_int 0)))
+ (const_int 0)]
+ (const_int 1)))
; We need to be able to identify v_readlane and v_writelane with
; SGPR lane selection in order to handle "Manually Inserted Wait States".
@@ -470,9 +477,9 @@
(define_insn "*movbi"
[(set (match_operand:BI 0 "nonimmediate_operand"
- "=Sg, v,Sg,cs,cV,cV,Sm,RS, v,RF, v,RM")
+ "=Sg, v,Sg,cs,cV,cV,Sm,&Sm,RS, v,&v,RF, v,&v,RM")
(match_operand:BI 1 "gcn_load_operand"
- "SSA,vSvA, v,SS, v,SS,RS,Sm,RF, v,RM, v"))]
+ "SSA,vSvA, v,SS, v,SS,RS, RS,Sm,RF,RF, v,RM,RM, v"))]
""
{
/* SCC as an operand is currently not accepted by the LLVM assembler, so
@@ -514,66 +521,77 @@
return "s_mov_b32\tvcc_lo, %1\;"
"s_mov_b32\tvcc_hi, 0";
case 6:
- return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)";
case 7:
- return "s_store_dword\t%1, %A0";
+ return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)";
case 8:
- return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0";
+ return "s_store_dword\t%1, %A0";
case 9:
- return "flat_store_dword\t%A0, %1%O0%g0";
case 10:
- return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)";
+ return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0";
case 11:
+ return "flat_store_dword\t%A0, %1%O0%g0";
+ case 12:
+ case 13:
+ return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)";
+ case 14:
return "global_store_dword\t%A0, %1%O0%g0";
default:
gcc_unreachable ();
}
}
- [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,flat,flat,
- flat,flat")
- (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*")
- (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12")])
+ [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,smem,flat,flat,
+ flat,flat,flat,flat")
+ (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*,*,*,*")
+ (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12,12,12,12")
+ (set_attr "xnack" "*,*,*,*,*,*,off,on,*,off,on,*,off,on,*")])
; 32bit move pattern
(define_insn "*mov<mode>_insn"
[(set (match_operand:SISF 0 "nonimmediate_operand"
- "=SD,SD,SD,SD,RB,Sm,RS,v,Sg, v, v,RF,v,RLRG, v,SD, v,RM")
+ "=SD,SD,SD,SD,&SD,RB,Sm,&Sm,RS,v,Sg, v, v,&v,RF,v,RLRG, v,SD, v,&v,RM")
(match_operand:SISF 1 "gcn_load_operand"
- "SSA, J, B,RB,Sm,RS,Sm,v, v,Sv,RF, v,B, v,RLRG, Y,RM, v"))]
+ "SSA, J, B,RB, RB,Sm,RS, RS,Sm,v, v,Sv,RF,RF, v,B, v,RLRG, Y,RM,RM, v"))]
""
"@
s_mov_b32\t%0, %1
s_movk_i32\t%0, %1
s_mov_b32\t%0, %1
s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
+ s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
s_buffer_store%s1\t%1, s[0:3], %0
s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
+ s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
s_store_dword\t%1, %A0
v_mov_b32\t%0, %1
v_readlane_b32\t%0, %1, 0
v_writelane_b32\t%0, %1, 0
flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
+ flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
flat_store_dword\t%A0, %1%O0%g0
v_mov_b32\t%0, %1
ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
s_mov_b32\t%0, %1
global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+ global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
global_store_dword\t%A0, %1%O0%g0"
- [(set_attr "type" "sop1,sopk,sop1,smem,smem,smem,smem,vop1,vop3a,vop3a,flat,
- flat,vop1,ds,ds,sop1,flat,flat")
- (set_attr "exec" "*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*")
- (set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12")])
+ [(set_attr "type" "sop1,sopk,sop1,smem,smem,smem,smem,smem,smem,vop1,vop3a,
+ vop3a,flat,flat,flat,vop1,ds,ds,sop1,flat,flat,flat")
+ (set_attr "exec" "*,*,*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*,*,*")
+ (set_attr "length"
+ "4,4,8,12,12,12,12,12,12,4,8,8,12,12,12,8,12,12,8,12,12,12")
+ (set_attr "xnack"
+ "*,*,*,off,on,*,off,on,*,*,*,*,off,on,*,*,*,*,*,off,on,*")])
; 8/16bit move pattern
; TODO: implement combined load and zero_extend, but *only* for -msram-ecc=on
(define_insn "*mov<mode>_insn"
[(set (match_operand:QIHI 0 "nonimmediate_operand"
- "=SD,SD,SD,v,Sg, v, v,RF,v,RLRG, v, v,RM")
+ "=SD,SD,SD,v,Sg, v, v,&v,RF,v,RLRG, v, v,&v,RM")
(match_operand:QIHI 1 "gcn_load_operand"
- "SSA, J, B,v, v,Sv,RF, v,B, v,RLRG,RM, v"))]
+ "SSA, J, B,v, v,Sv,RF,RF, v,B, v,RLRG,RM,RM, v"))]
"gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
"@
s_mov_b32\t%0, %1
@@ -583,24 +601,27 @@
v_readlane_b32\t%0, %1, 0
v_writelane_b32\t%0, %1, 0
flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
+ flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
flat_store%s0\t%A0, %1%O0%g0
v_mov_b32\t%0, %1
ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+ global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
global_store%s0\t%A0, %1%O0%g0"
- [(set_attr "type"
- "sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,vop1,ds,ds,flat,flat")
- (set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*")
- (set_attr "length" "4,4,8,4,4,4,12,12,8,12,12,12,12")])
+ [(set_attr "type" "sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,flat,vop1,ds,ds,
+ flat,flat,flat")
+ (set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*,*,*")
+ (set_attr "length" "4,4,8,4,4,4,12,12,12,8,12,12,12,12,12")
+ (set_attr "xnack" "*,*,*,*,*,*,off,on,*,*,*,*,off,on,*")])
; 64bit move pattern
(define_insn_and_split "*mov<mode>_insn"
[(set (match_operand:DIDF 0 "nonimmediate_operand"
- "=SD,SD,SD,RS,Sm,v, v,Sg, v, v,RF,RLRG, v, v,RM")
+ "=SD,SD,SD,RS,Sm,&Sm,v, v,Sg, v, v,&v,RF,RLRG, v, v,&v,RM")
(match_operand:DIDF 1 "general_operand"
- "SSA, C,DB,Sm,RS,v,DB, v,Sv,RF, v, v,RLRG,RM, v"))]
+ "SSA, C,DB,Sm,RS, RS,v,DB, v,Sv,RF,RF, v, v,RLRG,RM,RM, v"))]
"GET_CODE(operands[1]) != SYMBOL_REF"
"@
s_mov_b64\t%0, %1
@@ -608,15 +629,18 @@
#
s_store_dwordx2\t%1, %A0
s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
+ s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
#
#
#
#
flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
+ flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
flat_store_dwordx2\t%A0, %1%O0%g0
ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+ global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
global_store_dwordx2\t%A0, %1%O0%g0"
"reload_completed
&& ((!MEM_P (operands[0]) && !MEM_P (operands[1])
@@ -647,29 +671,33 @@
operands[3] = inhi;
}
}
- [(set_attr "type" "sop1,sop1,mult,smem,smem,vmult,vmult,vmult,vmult,flat,
- flat,ds,ds,flat,flat")
- (set_attr "length" "4,8,*,12,12,*,*,*,*,12,12,12,12,12,12")])
+ [(set_attr "type" "sop1,sop1,mult,smem,smem,smem,vmult,vmult,vmult,vmult,
+ flat,flat,flat,ds,ds,flat,flat,flat")
+ (set_attr "length" "4,8,*,12,12,12,*,*,*,*,12,12,12,12,12,12,12,12")
+ (set_attr "xnack" "*,*,*,*,off,on,*,*,*,*,off,on,*,*,*,off,on,*")])
; 128-bit move.
(define_insn_and_split "*movti_insn"
[(set (match_operand:TI 0 "nonimmediate_operand"
- "=SD,RS,Sm,RF, v,v, v,SD,RM, v,RL, v")
- (match_operand:TI 1 "general_operand"
- "SSB,Sm,RS, v,RF,v,Sv, v, v,RM, v,RL"))]
+ "=SD,RS,Sm,&Sm,RF, v,&v,v, v,SD,RM, v,&v,RL, v")
+ (match_operand:TI 1 "general_operand"
+ "SSB,Sm,RS, RS, v,RF,RF,v,Sv, v, v,RM,RM, v,RL"))]
""
"@
#
s_store_dwordx4\t%1, %A0
s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
+ s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
flat_store_dwordx4\t%A0, %1%O0%g0
flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
+ flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
#
#
#
global_store_dwordx4\t%A0, %1%O0%g0
global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+ global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)"
"reload_completed
@@ -691,10 +719,11 @@
operands[0] = gcn_operand_part (TImode, operands[0], 0);
operands[1] = gcn_operand_part (TImode, operands[1], 0);
}
- [(set_attr "type" "mult,smem,smem,flat,flat,vmult,vmult,vmult,flat,flat,\
- ds,ds")
- (set_attr "delayeduse" "*,*,yes,*,*,*,*,*,yes,*,*,*")
- (set_attr "length" "*,12,12,12,12,*,*,*,12,12,12,12")])
+ [(set_attr "type" "mult,smem,smem,smem,flat,flat,flat,vmult,vmult,vmult,flat,
+ flat,flat,ds,ds")
+ (set_attr "delayeduse" "*,*,yes,yes,*,*,*,*,*,*,*,yes,*,*,*")
+ (set_attr "length" "*,12,12,12,12,12,12,*,*,*,12,12,12,12,12")
+ (set_attr "xnack" "*,*,off,on,*,off,on,*,*,*,*,off,on,*,*")])
;; }}}
;; {{{ Prologue/Epilogue
@@ -862,6 +891,8 @@
(clobber (reg:BI SCC_REG))]
"GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF"
{
+ /* This s_load may not be XNACK-safe on devices where the GOT may fault.
+ DGPUs are most likely fine. */
if (SYMBOL_REF_P (operands[1])
&& SYMBOL_REF_WEAK (operands[1]))
return "s_getpc_b64\t%0\;"
@@ -886,6 +917,8 @@
{
/* !!! These sequences clobber CC_SAVE_REG. */
+ /* This s_load may not be XNACK-safe on devices where the GOT may fault.
+ DGPUs are most likely fine. */
if (SYMBOL_REF_P (operands[1])
&& SYMBOL_REF_WEAK (operands[1]))
return "; s_mov_b32\ts22, scc is not supported by the assembler.\;"
diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt
index 8c2009e3d6f..869f59716e7 100644
--- a/gcc/config/gcn/gcn.opt
+++ b/gcc/config/gcn/gcn.opt
@@ -86,23 +86,23 @@ Wopenacc-dims
Target Var(warn_openacc_dims) Warning
Warn about invalid OpenACC dimensions.
-mxnack
-Target Var(flag_xnack) Init(0)
-Compile for devices requiring XNACK enabled. Default off.
-
Enum
-Name(sram_ecc_type) Type(enum sram_ecc_type)
+Name(hsaco_attr_type) Type(enum hsaco_attr_type)
SRAM-ECC modes:
EnumValue
-Enum(sram_ecc_type) String(off) Value(SRAM_ECC_OFF)
+Enum(hsaco_attr_type) String(off) Value(HSACO_ATTR_OFF)
EnumValue
-Enum(sram_ecc_type) String(on) Value(SRAM_ECC_ON)
+Enum(hsaco_attr_type) String(on) Value(HSACO_ATTR_ON)
EnumValue
-Enum(sram_ecc_type) String(any) Value(SRAM_ECC_ANY)
+Enum(hsaco_attr_type) String(any) Value(HSACO_ATTR_ANY)
+
+mxnack=
+Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_xnack) Init(HSACO_ATTR_ANY)
+Compile for devices requiring XNACK enabled. Default off.
msram-ecc=
-Target RejectNegative Joined ToLower Enum(sram_ecc_type) Var(flag_sram_ecc) Init(SRAM_ECC_ANY)
+Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_sram_ecc) Init(HSACO_ATTR_ANY)
Compile for devices with the SRAM ECC feature enabled, or not. Default \"any\".
diff --git a/gcc/config/gcn/mkoffload.c b/gcc/config/gcn/mkoffload.c
index 2876ab6a9ef..febc8461197 100644
--- a/gcc/config/gcn/mkoffload.c
+++ b/gcc/config/gcn/mkoffload.c
@@ -72,10 +72,14 @@
#define SET_XNACK_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
| EF_AMDGPU_FEATURE_XNACK_ON_V4)
+#define SET_XNACK_ANY(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
+ | EF_AMDGPU_FEATURE_XNACK_ANY_V4)
#define SET_XNACK_OFF(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
| EF_AMDGPU_FEATURE_XNACK_OFF_V4)
-#define TEST_XNACK(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
- == EF_AMDGPU_FEATURE_XNACK_ON_V4)
+#define TEST_XNACK_ANY(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
+ == EF_AMDGPU_FEATURE_XNACK_ANY_V4)
+#define TEST_XNACK_ON(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
+ == EF_AMDGPU_FEATURE_XNACK_ON_V4)
#define SET_SRAM_ECC_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
| EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
@@ -881,9 +885,11 @@ main (int argc, char **argv)
fPIC = true;
else if (strcmp (argv[i], "-fpic") == 0)
fpic = true;
- else if (strcmp (argv[i], "-mxnack") == 0)
+ else if (strcmp (argv[i], "-mxnack=on") == 0)
SET_XNACK_ON (elf_flags);
- else if (strcmp (argv[i], "-mno-xnack") == 0)
+ else if (strcmp (argv[i], "-mxnack=any") == 0)
+ SET_XNACK_ANY (elf_flags);
+ else if (strcmp (argv[i], "-mxnack=off") == 0)
SET_XNACK_OFF (elf_flags);
else if (strcmp (argv[i], "-msram-ecc=on") == 0)
SET_SRAM_ECC_ON (elf_flags);
@@ -1042,8 +1048,9 @@ main (int argc, char **argv)
obstack_ptr_grow (&ld_argv_obstack, gcn_s2_name);
obstack_ptr_grow (&ld_argv_obstack, "-lgomp");
obstack_ptr_grow (&ld_argv_obstack,
- (TEST_XNACK (elf_flags)
- ? "-mxnack" : "-mno-xnack"));
+ (TEST_XNACK_ON (elf_flags) ? "-mxnack=on"
+ : TEST_XNACK_ANY (elf_flags) ? "-mxnack=any"
+ : "-mxnack=off"));
obstack_ptr_grow (&ld_argv_obstack,
(TEST_SRAM_ECC_ON (elf_flags) ? "-msram-ecc=on"
: TEST_SRAM_ECC_ANY (elf_flags) ? "-msram-ecc=any"