diff options
author | Andrew Stubbs <ams@codesourcery.com> | 2022-02-24 17:16:13 +0000 |
---|---|---|
committer | Andrew Stubbs <ams@codesourcery.com> | 2022-05-24 16:59:27 +0100 |
commit | a1539294321e03cc762f7d18d94ba972729a0339 (patch) | |
tree | 5c0647f207ac3d660123a5dce3e624ce41df252a | |
parent | 7d5faa57568b68d83543480e1cf39383986c86b5 (diff) |
amdgcn: Add gfx90a support
This adds architecture options and multilibs for the AMD GFX90a GPUs.
It also tidies up some of the ISA selection code, and corrects a few small
mistake in the gfx908 naming.
gcc/ChangeLog:
* config.gcc (amdgcn): Accept --with-arch=gfx908 and gfx90a.
* config/gcn/gcn-opts.h (enum gcn_isa): New.
(TARGET_GCN3): Use enum gcn_isa.
(TARGET_GCN3_PLUS): Likewise.
(TARGET_GCN5): Likewise.
(TARGET_GCN5_PLUS): Likewise.
(TARGET_CDNA1): New.
(TARGET_CDNA1_PLUS): New.
(TARGET_CDNA2): New.
(TARGET_CDNA2_PLUS): New.
(TARGET_M0_LDS_LIMIT): New.
(TARGET_PACKED_WORK_ITEMS): New.
* config/gcn/gcn.cc (gcn_isa): Change to enum gcn_isa.
(gcn_option_override): Recognise CDNA ISA variants.
(gcn_omp_device_kind_arch_isa): Support gfx90a.
(gcn_expand_prologue): Make m0 init optional.
Add support for packed work items.
(output_file_start): Support gfx90a.
(gcn_hsa_declare_function_name): Support gfx90a metadata.
* config/gcn/gcn.h (TARGET_CPU_CPP_BUILTINS):Add __CDNA1__ and
__CDNA2__.
* config/gcn/gcn.md (<su>mulsi3_highpart): Use TARGET_GCN5_PLUS.
(<su>mulsi3_highpart_imm): Likewise.
(<su>mulsidi3): Likewise.
(<su>mulsidi3_imm): Likewise.
* config/gcn/gcn.opt (gpu_type): Add gfx90a.
* config/gcn/mkoffload.cc (EF_AMDGPU_MACH_AMDGCN_GFX90a): New.
(main): Support gfx90a.
* config/gcn/t-gcn-hsa: Add gfx90a multilib.
* config/gcn/t-omp-device: Add gfx90a isa.
libgomp/ChangeLog:
* plugin/plugin-gcn.c (EF_AMDGPU_MACH): Add
EF_AMDGPU_MACH_AMDGCN_GFX90a.
(gcn_gfx90a_s): New.
(isa_hsa_name): Support gfx90a.
(isa_code): Likewise.
Backport from cde52d3a2d02d037da53e6974d5e39021030b346.
-rw-r--r-- | gcc/ChangeLog.omp | 33 | ||||
-rw-r--r-- | gcc/config.gcc | 2 | ||||
-rw-r--r-- | gcc/config/gcn/gcn-opts.h | 28 | ||||
-rw-r--r-- | gcc/config/gcn/gcn.c | 60 | ||||
-rw-r--r-- | gcc/config/gcn/gcn.h | 4 | ||||
-rw-r--r-- | gcc/config/gcn/gcn.opt | 3 | ||||
-rw-r--r-- | gcc/config/gcn/mkoffload.c | 4 | ||||
-rw-r--r-- | gcc/config/gcn/t-gcn-hsa | 4 | ||||
-rw-r--r-- | gcc/config/gcn/t-omp-device | 2 | ||||
-rw-r--r-- | libgomp/ChangeLog.omp | 8 | ||||
-rw-r--r-- | libgomp/plugin/plugin-gcn.c | 9 |
11 files changed, 137 insertions, 20 deletions
diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp index 21555f7bab8..eb766170ace 100644 --- a/gcc/ChangeLog.omp +++ b/gcc/ChangeLog.omp @@ -1,5 +1,38 @@ 2022-05-24 Andrew Stubbs <ams@codesourcery.com> + * config.gcc (amdgcn): Accept --with-arch=gfx908 and gfx90a. + * config/gcn/gcn-opts.h (enum gcn_isa): New. + (TARGET_GCN3): Use enum gcn_isa. + (TARGET_GCN3_PLUS): Likewise. + (TARGET_GCN5): Likewise. + (TARGET_GCN5_PLUS): Likewise. + (TARGET_CDNA1): New. + (TARGET_CDNA1_PLUS): New. + (TARGET_CDNA2): New. + (TARGET_CDNA2_PLUS): New. + (TARGET_M0_LDS_LIMIT): New. + (TARGET_PACKED_WORK_ITEMS): New. + * config/gcn/gcn.cc (gcn_isa): Change to enum gcn_isa. + (gcn_option_override): Recognise CDNA ISA variants. + (gcn_omp_device_kind_arch_isa): Support gfx90a. + (gcn_expand_prologue): Make m0 init optional. + Add support for packed work items. + (output_file_start): Support gfx90a. + (gcn_hsa_declare_function_name): Support gfx90a metadata. + * config/gcn/gcn.h (TARGET_CPU_CPP_BUILTINS):Add __CDNA1__ and + __CDNA2__. + * config/gcn/gcn.md (<su>mulsi3_highpart): Use TARGET_GCN5_PLUS. + (<su>mulsi3_highpart_imm): Likewise. + (<su>mulsidi3): Likewise. + (<su>mulsidi3_imm): Likewise. + * config/gcn/gcn.opt (gpu_type): Add gfx90a. + * config/gcn/mkoffload.cc (EF_AMDGPU_MACH_AMDGCN_GFX90a): New. + (main): Support gfx90a. + * config/gcn/t-gcn-hsa: Add gfx90a multilib. + * config/gcn/t-omp-device: Add gfx90a isa. + +2022-05-24 Andrew Stubbs <ams@codesourcery.com> + * config.in: Regenerate. * config/gcn/gcn-hsa.h (X_FIJI): Delete. (X_900): Delete. diff --git a/gcc/config.gcc b/gcc/config.gcc index a020e0808c9..17a4352b764 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -4522,7 +4522,7 @@ case "${target}" in for which in arch tune; do eval "val=\$with_$which" case ${val} in - "" | fiji | gfx900 | gfx906 ) + "" | fiji | gfx900 | gfx906 | gfx908 | gfx90a) # OK ;; *) diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h index b25516060e1..48065f96336 100644 --- a/gcc/config/gcn/gcn-opts.h +++ b/gcc/config/gcn/gcn-opts.h @@ -23,16 +23,30 @@ enum processor_type PROCESSOR_FIJI, // gfx803 PROCESSOR_VEGA10, // gfx900 PROCESSOR_VEGA20, // gfx906 - PROCESSOR_GFX908 // as yet unnamed + PROCESSOR_GFX908, + PROCESSOR_GFX90a }; /* Set in gcn_option_override. */ -extern int gcn_isa; - -#define TARGET_GCN3 (gcn_isa == 3) -#define TARGET_GCN3_PLUS (gcn_isa >= 3) -#define TARGET_GCN5 (gcn_isa == 5) -#define TARGET_GCN5_PLUS (gcn_isa >= 5) +extern enum gcn_isa { + ISA_UNKNOWN, + ISA_GCN3, + ISA_GCN5, + ISA_CDNA1, + ISA_CDNA2 +} gcn_isa; + +#define TARGET_GCN3 (gcn_isa == ISA_GCN3) +#define TARGET_GCN3_PLUS (gcn_isa >= ISA_GCN3) +#define TARGET_GCN5 (gcn_isa == ISA_GCN5) +#define TARGET_GCN5_PLUS (gcn_isa >= ISA_GCN5) +#define TARGET_CDNA1 (gcn_isa == ISA_CDNA1) +#define TARGET_CDNA1_PLUS (gcn_isa >= ISA_CDNA1) +#define TARGET_CDNA2 (gcn_isa == ISA_CDNA2) +#define TARGET_CDNA2_PLUS (gcn_isa >= ISA_CDNA2) + +#define TARGET_M0_LDS_LIMIT (TARGET_GCN3) +#define TARGET_PACKED_WORK_ITEMS (TARGET_CDNA2_PLUS) enum sram_ecc_type { diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c index 7f00cd7a14c..6382ef91163 100644 --- a/gcc/config/gcn/gcn.c +++ b/gcc/config/gcn/gcn.c @@ -66,7 +66,7 @@ static bool ext_gcn_constants_init = 0; /* Holds the ISA variant, derived from the command line parameters. */ -int gcn_isa = 3; /* Default to GCN3. */ +enum gcn_isa gcn_isa = ISA_GCN3; /* Default to GCN3. */ /* Reserve this much space for LDS (for propagating variables from worker-single mode to worker-partitioned mode), per workgroup. Global @@ -129,7 +129,13 @@ gcn_option_override (void) if (!flag_pic) flag_pic = flag_pie; - gcn_isa = gcn_arch == PROCESSOR_FIJI ? 3 : 5; + gcn_isa = (gcn_arch == PROCESSOR_FIJI ? ISA_GCN3 + : gcn_arch == PROCESSOR_VEGA10 ? ISA_GCN5 + : gcn_arch == PROCESSOR_VEGA20 ? ISA_GCN5 + : gcn_arch == PROCESSOR_GFX908 ? ISA_CDNA1 + : gcn_arch == PROCESSOR_GFX90a ? ISA_CDNA2 + : ISA_UNKNOWN); + gcc_assert (gcn_isa != ISA_UNKNOWN); /* The default stack size needs to be small for offload kernels because there may be many, many threads. Also, a smaller stack gives a @@ -2641,6 +2647,8 @@ gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait, return gcn_arch == PROCESSOR_VEGA20; if (strcmp (name, "gfx908") == 0) return gcn_arch == PROCESSOR_GFX908; + if (strcmp (name, "gfx90a") == 0) + return gcn_arch == PROCESSOR_GFX90a; return 0; default: gcc_unreachable (); @@ -3080,13 +3088,35 @@ gcn_expand_prologue () /* Ensure that the scheduler doesn't do anything unexpected. */ emit_insn (gen_blockage ()); - /* m0 is initialized for the usual LDS DS and FLAT memory case. - The low-part is the address of the topmost addressable byte, which is - size-1. The high-part is an offset and should be zero. */ - emit_move_insn (gen_rtx_REG (SImode, M0_REG), - gen_int_mode (LDS_SIZE, SImode)); + if (TARGET_M0_LDS_LIMIT) + { + /* m0 is initialized for the usual LDS DS and FLAT memory case. + The low-part is the address of the topmost addressable byte, which is + size-1. The high-part is an offset and should be zero. */ + emit_move_insn (gen_rtx_REG (SImode, M0_REG), + gen_int_mode (LDS_SIZE, SImode)); + + emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG))); + } - emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG))); + if (TARGET_PACKED_WORK_ITEMS + && cfun && cfun->machine && !cfun->machine->normal_function) + { + /* v0 conatins the X, Y and Z dimensions all in one. + Expand them out for ABI compatibility. */ + /* TODO: implement and use zero_extract. */ + rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1)); + emit_insn (gen_andv64si3 (v1, gen_rtx_REG (V64SImode, VGPR_REGNO (0)), + gen_rtx_CONST_INT (VOIDmode, 0x3FF << 10))); + emit_insn (gen_lshrv64si3 (v1, v1, gen_rtx_CONST_INT (VOIDmode, 10))); + emit_insn (gen_prologue_use (v1)); + + rtx v2 = gen_rtx_REG (V64SImode, VGPR_REGNO (2)); + emit_insn (gen_andv64si3 (v2, gen_rtx_REG (V64SImode, VGPR_REGNO (0)), + gen_rtx_CONST_INT (VOIDmode, 0x3FF << 20))); + emit_insn (gen_lshrv64si3 (v2, v2, gen_rtx_CONST_INT (VOIDmode, 20))); + emit_insn (gen_prologue_use (v2)); + } if (cfun && cfun->machine && !cfun->machine->normal_function && flag_openmp) { @@ -5218,6 +5248,9 @@ output_file_start (void) case PROCESSOR_GFX908: cpu = "gfx908"; break; + case PROCESSOR_GFX90a: + cpu = "gfx90a"; + break; default: gcc_unreachable (); } @@ -5271,6 +5304,10 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree) sgpr = MAX_NORMAL_SGPR_COUNT; } + /* The gfx90a accum_offset field can't represent 0 registers. */ + if (gcn_arch == PROCESSOR_GFX90a && vgpr < 4) + vgpr = 4; + fputs ("\t.rodata\n" "\t.p2align\t6\n" "\t.amdhsa_kernel\t", file); @@ -5339,6 +5376,11 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree) one 64th the wave-front stack size. */ stack_size_opt / 64, LDS_SIZE); + if (gcn_arch == PROCESSOR_GFX90a) + fprintf (file, + "\t .amdhsa_accum_offset\t%i\n" + "\t .amdhsa_tg_split\t0\n", + (vgpr+3)&~3); // I think this means the AGPRs come after the VGPRs fputs ("\t.end_amdhsa_kernel\n", file); #if 1 @@ -5367,6 +5409,8 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree) LDS_SIZE, stack_size_opt / 64, sgpr, vgpr); + if (gcn_arch == PROCESSOR_GFX90a) + fprintf (file, " .agpr_count: 0\n"); // AGPRs are not used, yet fputs (" .end_amdgpu_metadata\n", file); #endif diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h index a6efea7eaf1..6a08a629b22 100644 --- a/gcc/config/gcn/gcn.h +++ b/gcc/config/gcn/gcn.h @@ -24,6 +24,10 @@ builtin_define ("__GCN3__"); \ else if (TARGET_GCN5) \ builtin_define ("__GCN5__"); \ + else if (TARGET_CDNA1) \ + builtin_define ("__CDNA1__"); \ + else if (TARGET_CDNA2) \ + builtin_define ("__CDNA2__"); \ } \ while(0) diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt index e7c77b3cb96..8c2009e3d6f 100644 --- a/gcc/config/gcn/gcn.opt +++ b/gcc/config/gcn/gcn.opt @@ -37,6 +37,9 @@ Enum(gpu_type) String(gfx906) Value(PROCESSOR_VEGA20) EnumValue Enum(gpu_type) String(gfx908) Value(PROCESSOR_GFX908) +EnumValue +Enum(gpu_type) String(gfx90a) Value(PROCESSOR_GFX90a) + march= Target RejectNegative Joined ToLower Enum(gpu_type) Var(gcn_arch) Init(PROCESSOR_FIJI) Specify the name of the target GPU. diff --git a/gcc/config/gcn/mkoffload.c b/gcc/config/gcn/mkoffload.c index 58c9b46dc35..2876ab6a9ef 100644 --- a/gcc/config/gcn/mkoffload.c +++ b/gcc/config/gcn/mkoffload.c @@ -55,6 +55,8 @@ #define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f #undef EF_AMDGPU_MACH_AMDGCN_GFX908 #define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30 +#undef EF_AMDGPU_MACH_AMDGCN_GFX90a +#define EF_AMDGPU_MACH_AMDGCN_GFX90a 0x3f #define EF_AMDGPU_FEATURE_XNACK_V4 0x300 /* Mask. */ #define EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 0x000 @@ -904,6 +906,8 @@ main (int argc, char **argv) elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX906; else if (strcmp (argv[i], "-march=gfx908") == 0) elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX908; + else if (strcmp (argv[i], "-march=gfx90a") == 0) + elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX90a; } if (!(fopenacc ^ fopenmp)) diff --git a/gcc/config/gcn/t-gcn-hsa b/gcc/config/gcn/t-gcn-hsa index ee4d9b30ff2..4a67b110611 100644 --- a/gcc/config/gcn/t-gcn-hsa +++ b/gcc/config/gcn/t-gcn-hsa @@ -42,8 +42,8 @@ ALL_HOST_OBJS += gcn-run.o gcn-run$(exeext): gcn-run.o +$(LINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ $< -ldl -MULTILIB_OPTIONS = march=gfx900/march=gfx906/march=gfx908 -MULTILIB_DIRNAMES = gfx900 gfx906 gfx908 +MULTILIB_OPTIONS = march=gfx900/march=gfx906/march=gfx908/march=gfx90a +MULTILIB_DIRNAMES = gfx900 gfx906 gfx908 gfx90a gcn-tree.o: $(srcdir)/config/gcn/gcn-tree.c $(COMPILE) $< diff --git a/gcc/config/gcn/t-omp-device b/gcc/config/gcn/t-omp-device index 8461c432ca9..edc43a10e0e 100644 --- a/gcc/config/gcn/t-omp-device +++ b/gcc/config/gcn/t-omp-device @@ -1,4 +1,4 @@ omp-device-properties-gcn: $(srcdir)/config/gcn/gcn.c echo kind: gpu > $@ echo arch: gcn >> $@ - echo isa: fiji gfx900 gfx906 gfx908 >> $@ + echo isa: fiji gfx900 gfx906 gfx908 gfx90a >> $@ diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index ce3d9e5c536..a66c4ebac41 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,3 +1,11 @@ +2022-05-24 Andrew Stubbs <ams@codesourcery.com> + + * plugin/plugin-gcn.c (EF_AMDGPU_MACH): Add + EF_AMDGPU_MACH_AMDGCN_GFX90a. + (gcn_gfx90a_s): New. + (isa_hsa_name): Support gfx90a. + (isa_code): Likewise. + 2022-04-13 Andrew Stubbs <ams@codesourcery.com> * testsuite/libgomp.c/alloc-pinned-1.c: Autodetect page size. diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index f1cba0b811a..969683ea1d2 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -403,7 +403,8 @@ typedef enum { EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a, EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c, EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f, - EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030 + EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030, + EF_AMDGPU_MACH_AMDGCN_GFX90a = 0x03f } EF_AMDGPU_MACH; const static int EF_AMDGPU_MACH_MASK = 0x000000ff; @@ -1629,6 +1630,7 @@ const static char *gcn_gfx803_s = "gfx803"; const static char *gcn_gfx900_s = "gfx900"; const static char *gcn_gfx906_s = "gfx906"; const static char *gcn_gfx908_s = "gfx908"; +const static char *gcn_gfx90a_s = "gfx90a"; const static int gcn_isa_name_len = 6; /* Returns the name that the HSA runtime uses for the ISA or NULL if we do not @@ -1646,6 +1648,8 @@ isa_hsa_name (int isa) { return gcn_gfx906_s; case EF_AMDGPU_MACH_AMDGCN_GFX908: return gcn_gfx908_s; + case EF_AMDGPU_MACH_AMDGCN_GFX90a: + return gcn_gfx90a_s; } return NULL; } @@ -1682,6 +1686,9 @@ isa_code(const char *isa) { if (!strncmp (isa, gcn_gfx908_s, gcn_isa_name_len)) return EF_AMDGPU_MACH_AMDGCN_GFX908; + if (!strncmp (isa, gcn_gfx90a_s, gcn_isa_name_len)) + return EF_AMDGPU_MACH_AMDGCN_GFX90a; + return -1; } |