summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Stubbs <ams@codesourcery.com>2022-02-24 17:16:13 +0000
committerAndrew Stubbs <ams@codesourcery.com>2022-05-24 16:59:27 +0100
commita1539294321e03cc762f7d18d94ba972729a0339 (patch)
tree5c0647f207ac3d660123a5dce3e624ce41df252a
parent7d5faa57568b68d83543480e1cf39383986c86b5 (diff)
amdgcn: Add gfx90a support
This adds architecture options and multilibs for the AMD GFX90a GPUs. It also tidies up some of the ISA selection code, and corrects a few small mistake in the gfx908 naming. gcc/ChangeLog: * config.gcc (amdgcn): Accept --with-arch=gfx908 and gfx90a. * config/gcn/gcn-opts.h (enum gcn_isa): New. (TARGET_GCN3): Use enum gcn_isa. (TARGET_GCN3_PLUS): Likewise. (TARGET_GCN5): Likewise. (TARGET_GCN5_PLUS): Likewise. (TARGET_CDNA1): New. (TARGET_CDNA1_PLUS): New. (TARGET_CDNA2): New. (TARGET_CDNA2_PLUS): New. (TARGET_M0_LDS_LIMIT): New. (TARGET_PACKED_WORK_ITEMS): New. * config/gcn/gcn.cc (gcn_isa): Change to enum gcn_isa. (gcn_option_override): Recognise CDNA ISA variants. (gcn_omp_device_kind_arch_isa): Support gfx90a. (gcn_expand_prologue): Make m0 init optional. Add support for packed work items. (output_file_start): Support gfx90a. (gcn_hsa_declare_function_name): Support gfx90a metadata. * config/gcn/gcn.h (TARGET_CPU_CPP_BUILTINS):Add __CDNA1__ and __CDNA2__. * config/gcn/gcn.md (<su>mulsi3_highpart): Use TARGET_GCN5_PLUS. (<su>mulsi3_highpart_imm): Likewise. (<su>mulsidi3): Likewise. (<su>mulsidi3_imm): Likewise. * config/gcn/gcn.opt (gpu_type): Add gfx90a. * config/gcn/mkoffload.cc (EF_AMDGPU_MACH_AMDGCN_GFX90a): New. (main): Support gfx90a. * config/gcn/t-gcn-hsa: Add gfx90a multilib. * config/gcn/t-omp-device: Add gfx90a isa. libgomp/ChangeLog: * plugin/plugin-gcn.c (EF_AMDGPU_MACH): Add EF_AMDGPU_MACH_AMDGCN_GFX90a. (gcn_gfx90a_s): New. (isa_hsa_name): Support gfx90a. (isa_code): Likewise. Backport from cde52d3a2d02d037da53e6974d5e39021030b346.
-rw-r--r--gcc/ChangeLog.omp33
-rw-r--r--gcc/config.gcc2
-rw-r--r--gcc/config/gcn/gcn-opts.h28
-rw-r--r--gcc/config/gcn/gcn.c60
-rw-r--r--gcc/config/gcn/gcn.h4
-rw-r--r--gcc/config/gcn/gcn.opt3
-rw-r--r--gcc/config/gcn/mkoffload.c4
-rw-r--r--gcc/config/gcn/t-gcn-hsa4
-rw-r--r--gcc/config/gcn/t-omp-device2
-rw-r--r--libgomp/ChangeLog.omp8
-rw-r--r--libgomp/plugin/plugin-gcn.c9
11 files changed, 137 insertions, 20 deletions
diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index 21555f7bab8..eb766170ace 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,5 +1,38 @@
2022-05-24 Andrew Stubbs <ams@codesourcery.com>
+ * config.gcc (amdgcn): Accept --with-arch=gfx908 and gfx90a.
+ * config/gcn/gcn-opts.h (enum gcn_isa): New.
+ (TARGET_GCN3): Use enum gcn_isa.
+ (TARGET_GCN3_PLUS): Likewise.
+ (TARGET_GCN5): Likewise.
+ (TARGET_GCN5_PLUS): Likewise.
+ (TARGET_CDNA1): New.
+ (TARGET_CDNA1_PLUS): New.
+ (TARGET_CDNA2): New.
+ (TARGET_CDNA2_PLUS): New.
+ (TARGET_M0_LDS_LIMIT): New.
+ (TARGET_PACKED_WORK_ITEMS): New.
+ * config/gcn/gcn.cc (gcn_isa): Change to enum gcn_isa.
+ (gcn_option_override): Recognise CDNA ISA variants.
+ (gcn_omp_device_kind_arch_isa): Support gfx90a.
+ (gcn_expand_prologue): Make m0 init optional.
+ Add support for packed work items.
+ (output_file_start): Support gfx90a.
+ (gcn_hsa_declare_function_name): Support gfx90a metadata.
+ * config/gcn/gcn.h (TARGET_CPU_CPP_BUILTINS):Add __CDNA1__ and
+ __CDNA2__.
+ * config/gcn/gcn.md (<su>mulsi3_highpart): Use TARGET_GCN5_PLUS.
+ (<su>mulsi3_highpart_imm): Likewise.
+ (<su>mulsidi3): Likewise.
+ (<su>mulsidi3_imm): Likewise.
+ * config/gcn/gcn.opt (gpu_type): Add gfx90a.
+ * config/gcn/mkoffload.cc (EF_AMDGPU_MACH_AMDGCN_GFX90a): New.
+ (main): Support gfx90a.
+ * config/gcn/t-gcn-hsa: Add gfx90a multilib.
+ * config/gcn/t-omp-device: Add gfx90a isa.
+
+2022-05-24 Andrew Stubbs <ams@codesourcery.com>
+
* config.in: Regenerate.
* config/gcn/gcn-hsa.h (X_FIJI): Delete.
(X_900): Delete.
diff --git a/gcc/config.gcc b/gcc/config.gcc
index a020e0808c9..17a4352b764 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4522,7 +4522,7 @@ case "${target}" in
for which in arch tune; do
eval "val=\$with_$which"
case ${val} in
- "" | fiji | gfx900 | gfx906 )
+ "" | fiji | gfx900 | gfx906 | gfx908 | gfx90a)
# OK
;;
*)
diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h
index b25516060e1..48065f96336 100644
--- a/gcc/config/gcn/gcn-opts.h
+++ b/gcc/config/gcn/gcn-opts.h
@@ -23,16 +23,30 @@ enum processor_type
PROCESSOR_FIJI, // gfx803
PROCESSOR_VEGA10, // gfx900
PROCESSOR_VEGA20, // gfx906
- PROCESSOR_GFX908 // as yet unnamed
+ PROCESSOR_GFX908,
+ PROCESSOR_GFX90a
};
/* Set in gcn_option_override. */
-extern int gcn_isa;
-
-#define TARGET_GCN3 (gcn_isa == 3)
-#define TARGET_GCN3_PLUS (gcn_isa >= 3)
-#define TARGET_GCN5 (gcn_isa == 5)
-#define TARGET_GCN5_PLUS (gcn_isa >= 5)
+extern enum gcn_isa {
+ ISA_UNKNOWN,
+ ISA_GCN3,
+ ISA_GCN5,
+ ISA_CDNA1,
+ ISA_CDNA2
+} gcn_isa;
+
+#define TARGET_GCN3 (gcn_isa == ISA_GCN3)
+#define TARGET_GCN3_PLUS (gcn_isa >= ISA_GCN3)
+#define TARGET_GCN5 (gcn_isa == ISA_GCN5)
+#define TARGET_GCN5_PLUS (gcn_isa >= ISA_GCN5)
+#define TARGET_CDNA1 (gcn_isa == ISA_CDNA1)
+#define TARGET_CDNA1_PLUS (gcn_isa >= ISA_CDNA1)
+#define TARGET_CDNA2 (gcn_isa == ISA_CDNA2)
+#define TARGET_CDNA2_PLUS (gcn_isa >= ISA_CDNA2)
+
+#define TARGET_M0_LDS_LIMIT (TARGET_GCN3)
+#define TARGET_PACKED_WORK_ITEMS (TARGET_CDNA2_PLUS)
enum sram_ecc_type
{
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 7f00cd7a14c..6382ef91163 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -66,7 +66,7 @@ static bool ext_gcn_constants_init = 0;
/* Holds the ISA variant, derived from the command line parameters. */
-int gcn_isa = 3; /* Default to GCN3. */
+enum gcn_isa gcn_isa = ISA_GCN3; /* Default to GCN3. */
/* Reserve this much space for LDS (for propagating variables from
worker-single mode to worker-partitioned mode), per workgroup. Global
@@ -129,7 +129,13 @@ gcn_option_override (void)
if (!flag_pic)
flag_pic = flag_pie;
- gcn_isa = gcn_arch == PROCESSOR_FIJI ? 3 : 5;
+ gcn_isa = (gcn_arch == PROCESSOR_FIJI ? ISA_GCN3
+ : gcn_arch == PROCESSOR_VEGA10 ? ISA_GCN5
+ : gcn_arch == PROCESSOR_VEGA20 ? ISA_GCN5
+ : gcn_arch == PROCESSOR_GFX908 ? ISA_CDNA1
+ : gcn_arch == PROCESSOR_GFX90a ? ISA_CDNA2
+ : ISA_UNKNOWN);
+ gcc_assert (gcn_isa != ISA_UNKNOWN);
/* The default stack size needs to be small for offload kernels because
there may be many, many threads. Also, a smaller stack gives a
@@ -2641,6 +2647,8 @@ gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait,
return gcn_arch == PROCESSOR_VEGA20;
if (strcmp (name, "gfx908") == 0)
return gcn_arch == PROCESSOR_GFX908;
+ if (strcmp (name, "gfx90a") == 0)
+ return gcn_arch == PROCESSOR_GFX90a;
return 0;
default:
gcc_unreachable ();
@@ -3080,13 +3088,35 @@ gcn_expand_prologue ()
/* Ensure that the scheduler doesn't do anything unexpected. */
emit_insn (gen_blockage ());
- /* m0 is initialized for the usual LDS DS and FLAT memory case.
- The low-part is the address of the topmost addressable byte, which is
- size-1. The high-part is an offset and should be zero. */
- emit_move_insn (gen_rtx_REG (SImode, M0_REG),
- gen_int_mode (LDS_SIZE, SImode));
+ if (TARGET_M0_LDS_LIMIT)
+ {
+ /* m0 is initialized for the usual LDS DS and FLAT memory case.
+ The low-part is the address of the topmost addressable byte, which is
+ size-1. The high-part is an offset and should be zero. */
+ emit_move_insn (gen_rtx_REG (SImode, M0_REG),
+ gen_int_mode (LDS_SIZE, SImode));
+
+ emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG)));
+ }
- emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG)));
+ if (TARGET_PACKED_WORK_ITEMS
+ && cfun && cfun->machine && !cfun->machine->normal_function)
+ {
+ /* v0 conatins the X, Y and Z dimensions all in one.
+ Expand them out for ABI compatibility. */
+ /* TODO: implement and use zero_extract. */
+ rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
+ emit_insn (gen_andv64si3 (v1, gen_rtx_REG (V64SImode, VGPR_REGNO (0)),
+ gen_rtx_CONST_INT (VOIDmode, 0x3FF << 10)));
+ emit_insn (gen_lshrv64si3 (v1, v1, gen_rtx_CONST_INT (VOIDmode, 10)));
+ emit_insn (gen_prologue_use (v1));
+
+ rtx v2 = gen_rtx_REG (V64SImode, VGPR_REGNO (2));
+ emit_insn (gen_andv64si3 (v2, gen_rtx_REG (V64SImode, VGPR_REGNO (0)),
+ gen_rtx_CONST_INT (VOIDmode, 0x3FF << 20)));
+ emit_insn (gen_lshrv64si3 (v2, v2, gen_rtx_CONST_INT (VOIDmode, 20)));
+ emit_insn (gen_prologue_use (v2));
+ }
if (cfun && cfun->machine && !cfun->machine->normal_function && flag_openmp)
{
@@ -5218,6 +5248,9 @@ output_file_start (void)
case PROCESSOR_GFX908:
cpu = "gfx908";
break;
+ case PROCESSOR_GFX90a:
+ cpu = "gfx90a";
+ break;
default: gcc_unreachable ();
}
@@ -5271,6 +5304,10 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
sgpr = MAX_NORMAL_SGPR_COUNT;
}
+ /* The gfx90a accum_offset field can't represent 0 registers. */
+ if (gcn_arch == PROCESSOR_GFX90a && vgpr < 4)
+ vgpr = 4;
+
fputs ("\t.rodata\n"
"\t.p2align\t6\n"
"\t.amdhsa_kernel\t", file);
@@ -5339,6 +5376,11 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
one 64th the wave-front stack size. */
stack_size_opt / 64,
LDS_SIZE);
+ if (gcn_arch == PROCESSOR_GFX90a)
+ fprintf (file,
+ "\t .amdhsa_accum_offset\t%i\n"
+ "\t .amdhsa_tg_split\t0\n",
+ (vgpr+3)&~3); // I think this means the AGPRs come after the VGPRs
fputs ("\t.end_amdhsa_kernel\n", file);
#if 1
@@ -5367,6 +5409,8 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
LDS_SIZE,
stack_size_opt / 64,
sgpr, vgpr);
+ if (gcn_arch == PROCESSOR_GFX90a)
+ fprintf (file, " .agpr_count: 0\n"); // AGPRs are not used, yet
fputs (" .end_amdgpu_metadata\n", file);
#endif
diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
index a6efea7eaf1..6a08a629b22 100644
--- a/gcc/config/gcn/gcn.h
+++ b/gcc/config/gcn/gcn.h
@@ -24,6 +24,10 @@
builtin_define ("__GCN3__"); \
else if (TARGET_GCN5) \
builtin_define ("__GCN5__"); \
+ else if (TARGET_CDNA1) \
+ builtin_define ("__CDNA1__"); \
+ else if (TARGET_CDNA2) \
+ builtin_define ("__CDNA2__"); \
} \
while(0)
diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt
index e7c77b3cb96..8c2009e3d6f 100644
--- a/gcc/config/gcn/gcn.opt
+++ b/gcc/config/gcn/gcn.opt
@@ -37,6 +37,9 @@ Enum(gpu_type) String(gfx906) Value(PROCESSOR_VEGA20)
EnumValue
Enum(gpu_type) String(gfx908) Value(PROCESSOR_GFX908)
+EnumValue
+Enum(gpu_type) String(gfx90a) Value(PROCESSOR_GFX90a)
+
march=
Target RejectNegative Joined ToLower Enum(gpu_type) Var(gcn_arch) Init(PROCESSOR_FIJI)
Specify the name of the target GPU.
diff --git a/gcc/config/gcn/mkoffload.c b/gcc/config/gcn/mkoffload.c
index 58c9b46dc35..2876ab6a9ef 100644
--- a/gcc/config/gcn/mkoffload.c
+++ b/gcc/config/gcn/mkoffload.c
@@ -55,6 +55,8 @@
#define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f
#undef EF_AMDGPU_MACH_AMDGCN_GFX908
#define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30
+#undef EF_AMDGPU_MACH_AMDGCN_GFX90a
+#define EF_AMDGPU_MACH_AMDGCN_GFX90a 0x3f
#define EF_AMDGPU_FEATURE_XNACK_V4 0x300 /* Mask. */
#define EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 0x000
@@ -904,6 +906,8 @@ main (int argc, char **argv)
elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX906;
else if (strcmp (argv[i], "-march=gfx908") == 0)
elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX908;
+ else if (strcmp (argv[i], "-march=gfx90a") == 0)
+ elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX90a;
}
if (!(fopenacc ^ fopenmp))
diff --git a/gcc/config/gcn/t-gcn-hsa b/gcc/config/gcn/t-gcn-hsa
index ee4d9b30ff2..4a67b110611 100644
--- a/gcc/config/gcn/t-gcn-hsa
+++ b/gcc/config/gcn/t-gcn-hsa
@@ -42,8 +42,8 @@ ALL_HOST_OBJS += gcn-run.o
gcn-run$(exeext): gcn-run.o
+$(LINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ $< -ldl
-MULTILIB_OPTIONS = march=gfx900/march=gfx906/march=gfx908
-MULTILIB_DIRNAMES = gfx900 gfx906 gfx908
+MULTILIB_OPTIONS = march=gfx900/march=gfx906/march=gfx908/march=gfx90a
+MULTILIB_DIRNAMES = gfx900 gfx906 gfx908 gfx90a
gcn-tree.o: $(srcdir)/config/gcn/gcn-tree.c
$(COMPILE) $<
diff --git a/gcc/config/gcn/t-omp-device b/gcc/config/gcn/t-omp-device
index 8461c432ca9..edc43a10e0e 100644
--- a/gcc/config/gcn/t-omp-device
+++ b/gcc/config/gcn/t-omp-device
@@ -1,4 +1,4 @@
omp-device-properties-gcn: $(srcdir)/config/gcn/gcn.c
echo kind: gpu > $@
echo arch: gcn >> $@
- echo isa: fiji gfx900 gfx906 gfx908 >> $@
+ echo isa: fiji gfx900 gfx906 gfx908 gfx90a >> $@
diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index ce3d9e5c536..a66c4ebac41 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,3 +1,11 @@
+2022-05-24 Andrew Stubbs <ams@codesourcery.com>
+
+ * plugin/plugin-gcn.c (EF_AMDGPU_MACH): Add
+ EF_AMDGPU_MACH_AMDGCN_GFX90a.
+ (gcn_gfx90a_s): New.
+ (isa_hsa_name): Support gfx90a.
+ (isa_code): Likewise.
+
2022-04-13 Andrew Stubbs <ams@codesourcery.com>
* testsuite/libgomp.c/alloc-pinned-1.c: Autodetect page size.
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index f1cba0b811a..969683ea1d2 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -403,7 +403,8 @@ typedef enum {
EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a,
EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,
EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
- EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030
+ EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030,
+ EF_AMDGPU_MACH_AMDGCN_GFX90a = 0x03f
} EF_AMDGPU_MACH;
const static int EF_AMDGPU_MACH_MASK = 0x000000ff;
@@ -1629,6 +1630,7 @@ const static char *gcn_gfx803_s = "gfx803";
const static char *gcn_gfx900_s = "gfx900";
const static char *gcn_gfx906_s = "gfx906";
const static char *gcn_gfx908_s = "gfx908";
+const static char *gcn_gfx90a_s = "gfx90a";
const static int gcn_isa_name_len = 6;
/* Returns the name that the HSA runtime uses for the ISA or NULL if we do not
@@ -1646,6 +1648,8 @@ isa_hsa_name (int isa) {
return gcn_gfx906_s;
case EF_AMDGPU_MACH_AMDGCN_GFX908:
return gcn_gfx908_s;
+ case EF_AMDGPU_MACH_AMDGCN_GFX90a:
+ return gcn_gfx90a_s;
}
return NULL;
}
@@ -1682,6 +1686,9 @@ isa_code(const char *isa) {
if (!strncmp (isa, gcn_gfx908_s, gcn_isa_name_len))
return EF_AMDGPU_MACH_AMDGCN_GFX908;
+ if (!strncmp (isa, gcn_gfx90a_s, gcn_isa_name_len))
+ return EF_AMDGPU_MACH_AMDGCN_GFX90a;
+
return -1;
}