amdgcn: Add gfx90a support

This adds architecture options and multilibs for the AMD GFX90a GPUs. It also tidies up some of the ISA selection code, and corrects a few small mistake in the gfx908 naming. gcc/ChangeLog: * config.gcc (amdgcn): Accept --with-arch=gfx908 and gfx90a. * config/gcn/gcn-opts.h (enum gcn_isa): New. (TARGET_GCN3): Use enum gcn_isa. (TARGET_GCN3_PLUS): Likewise. (TARGET_GCN5): Likewise. (TARGET_GCN5_PLUS): Likewise. (TARGET_CDNA1): New. (TARGET_CDNA1_PLUS): New. (TARGET_CDNA2): New. (TARGET_CDNA2_PLUS): New. (TARGET_M0_LDS_LIMIT): New. (TARGET_PACKED_WORK_ITEMS): New. * config/gcn/gcn.cc (gcn_isa): Change to enum gcn_isa. (gcn_option_override): Recognise CDNA ISA variants. (gcn_omp_device_kind_arch_isa): Support gfx90a. (gcn_expand_prologue): Make m0 init optional. Add support for packed work items. (output_file_start): Support gfx90a. (gcn_hsa_declare_function_name): Support gfx90a metadata. * config/gcn/gcn.h (TARGET_CPU_CPP_BUILTINS):Add __CDNA1__ and __CDNA2__. * config/gcn/gcn.md (<su>mulsi3_highpart): Use TARGET_GCN5_PLUS. (<su>mulsi3_highpart_imm): Likewise. (<su>mulsidi3): Likewise. (<su>mulsidi3_imm): Likewise. * config/gcn/gcn.opt (gpu_type): Add gfx90a. * config/gcn/mkoffload.cc (EF_AMDGPU_MACH_AMDGCN_GFX90a): New. (main): Support gfx90a. * config/gcn/t-gcn-hsa: Add gfx90a multilib. * config/gcn/t-omp-device: Add gfx90a isa. libgomp/ChangeLog: * plugin/plugin-gcn.c (EF_AMDGPU_MACH): Add EF_AMDGPU_MACH_AMDGCN_GFX90a. (gcn_gfx90a_s): New. (isa_hsa_name): Support gfx90a. (isa_code): Likewise. Backport from cde52d3a2d02d037da53e6974d5e39021030b346.
author: Andrew Stubbs <ams@codesourcery.com> 2022-02-24 17:16:13 +0000
committer: Andrew Stubbs <ams@codesourcery.com> 2022-05-24 16:59:27 +0100
commit: a1539294321e03cc762f7d18d94ba972729a0339 (patch)
tree: 5c0647f207ac3d660123a5dce3e624ce41df252a
parent: 7d5faa57568b68d83543480e1cf39383986c86b5 (diff)
11 files changed, 137 insertions, 20 deletions
diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index 21555f7bab8..eb766170ace 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,5 +1,38 @@
 2022-05-24  Andrew Stubbs  <ams@codesourcery.com>
 
+	* config.gcc (amdgcn): Accept --with-arch=gfx908 and gfx90a.
+	* config/gcn/gcn-opts.h (enum gcn_isa): New.
+	(TARGET_GCN3): Use enum gcn_isa.
+	(TARGET_GCN3_PLUS): Likewise.
+	(TARGET_GCN5): Likewise.
+	(TARGET_GCN5_PLUS): Likewise.
+	(TARGET_CDNA1): New.
+	(TARGET_CDNA1_PLUS): New.
+	(TARGET_CDNA2): New.
+	(TARGET_CDNA2_PLUS): New.
+	(TARGET_M0_LDS_LIMIT): New.
+	(TARGET_PACKED_WORK_ITEMS): New.
+	* config/gcn/gcn.cc (gcn_isa): Change to enum gcn_isa.
+	(gcn_option_override): Recognise CDNA ISA variants.
+	(gcn_omp_device_kind_arch_isa): Support gfx90a.
+	(gcn_expand_prologue): Make m0 init optional.
+	Add support for packed work items.
+	(output_file_start): Support gfx90a.
+	(gcn_hsa_declare_function_name): Support gfx90a metadata.
+	* config/gcn/gcn.h (TARGET_CPU_CPP_BUILTINS):Add __CDNA1__ and
+	__CDNA2__.
+	* config/gcn/gcn.md (<su>mulsi3_highpart): Use TARGET_GCN5_PLUS.
+	(<su>mulsi3_highpart_imm): Likewise.
+	(<su>mulsidi3): Likewise.
+	(<su>mulsidi3_imm): Likewise.
+	* config/gcn/gcn.opt (gpu_type): Add gfx90a.
+	* config/gcn/mkoffload.cc (EF_AMDGPU_MACH_AMDGCN_GFX90a): New.
+	(main): Support gfx90a.
+	* config/gcn/t-gcn-hsa: Add gfx90a multilib.
+	* config/gcn/t-omp-device: Add gfx90a isa.
+
+2022-05-24  Andrew Stubbs  <ams@codesourcery.com>
+
 	* config.in: Regenerate.
 	* config/gcn/gcn-hsa.h (X_FIJI): Delete.
 	(X_900): Delete.
diff --git a/gcc/config.gcc b/gcc/config.gcc
index a020e0808c9..17a4352b764 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4522,7 +4522,7 @@ case "${target}" in
 		for which in arch tune; do
 			eval "val=\$with_$which"
 			case ${val} in
-			"" | fiji | gfx900 | gfx906 )
+			"" | fiji | gfx900 | gfx906 | gfx908 | gfx90a)
 				# OK
 				;;
 			*)
diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h
index b25516060e1..48065f96336 100644
--- a/gcc/config/gcn/gcn-opts.h
+++ b/gcc/config/gcn/gcn-opts.h
@@ -23,16 +23,30 @@ enum processor_type
   PROCESSOR_FIJI,    // gfx803
   PROCESSOR_VEGA10,  // gfx900
   PROCESSOR_VEGA20,  // gfx906
-  PROCESSOR_GFX908   // as yet unnamed
+  PROCESSOR_GFX908,
+  PROCESSOR_GFX90a
 };
 
 /* Set in gcn_option_override.  */
-extern int gcn_isa;
-
-#define TARGET_GCN3 (gcn_isa == 3)
-#define TARGET_GCN3_PLUS (gcn_isa >= 3)
-#define TARGET_GCN5 (gcn_isa == 5)
-#define TARGET_GCN5_PLUS (gcn_isa >= 5)
+extern enum gcn_isa {
+  ISA_UNKNOWN,
+  ISA_GCN3,
+  ISA_GCN5,
+  ISA_CDNA1,
+  ISA_CDNA2
+} gcn_isa;
+
+#define TARGET_GCN3 (gcn_isa == ISA_GCN3)
+#define TARGET_GCN3_PLUS (gcn_isa >= ISA_GCN3)
+#define TARGET_GCN5 (gcn_isa == ISA_GCN5)
+#define TARGET_GCN5_PLUS (gcn_isa >= ISA_GCN5)
+#define TARGET_CDNA1 (gcn_isa == ISA_CDNA1)
+#define TARGET_CDNA1_PLUS (gcn_isa >= ISA_CDNA1)
+#define TARGET_CDNA2 (gcn_isa == ISA_CDNA2)
+#define TARGET_CDNA2_PLUS (gcn_isa >= ISA_CDNA2)
+
+#define TARGET_M0_LDS_LIMIT (TARGET_GCN3)
+#define TARGET_PACKED_WORK_ITEMS (TARGET_CDNA2_PLUS)
 
 enum sram_ecc_type
 {
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 7f00cd7a14c..6382ef91163 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -66,7 +66,7 @@ static bool ext_gcn_constants_init = 0;
 
 /* Holds the ISA variant, derived from the command line parameters.  */
 
-int gcn_isa = 3;		/* Default to GCN3.  */
+enum gcn_isa gcn_isa = ISA_GCN3;	/* Default to GCN3.  */
 
 /* Reserve this much space for LDS (for propagating variables from
    worker-single mode to worker-partitioned mode), per workgroup.  Global
@@ -129,7 +129,13 @@ gcn_option_override (void)
   if (!flag_pic)
     flag_pic = flag_pie;
 
-  gcn_isa = gcn_arch == PROCESSOR_FIJI ? 3 : 5;
+  gcn_isa = (gcn_arch == PROCESSOR_FIJI ? ISA_GCN3
+      : gcn_arch == PROCESSOR_VEGA10 ? ISA_GCN5
+      : gcn_arch == PROCESSOR_VEGA20 ? ISA_GCN5
+      : gcn_arch == PROCESSOR_GFX908 ? ISA_CDNA1
+      : gcn_arch == PROCESSOR_GFX90a ? ISA_CDNA2
+      : ISA_UNKNOWN);
+  gcc_assert (gcn_isa != ISA_UNKNOWN);
 
   /* The default stack size needs to be small for offload kernels because
      there may be many, many threads.  Also, a smaller stack gives a
@@ -2641,6 +2647,8 @@ gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait,
 	return gcn_arch == PROCESSOR_VEGA20;
       if (strcmp (name, "gfx908") == 0)
 	return gcn_arch == PROCESSOR_GFX908;
+      if (strcmp (name, "gfx90a") == 0)
+	return gcn_arch == PROCESSOR_GFX90a;
       return 0;
     default:
       gcc_unreachable ();
@@ -3080,13 +3088,35 @@ gcn_expand_prologue ()
   /* Ensure that the scheduler doesn't do anything unexpected.  */
   emit_insn (gen_blockage ());
 
-  /* m0 is initialized for the usual LDS DS and FLAT memory case.
-     The low-part is the address of the topmost addressable byte, which is
-     size-1.  The high-part is an offset and should be zero.  */
-  emit_move_insn (gen_rtx_REG (SImode, M0_REG),
-		  gen_int_mode (LDS_SIZE, SImode));
+  if (TARGET_M0_LDS_LIMIT)
+  {
+    /* m0 is initialized for the usual LDS DS and FLAT memory case.
+       The low-part is the address of the topmost addressable byte, which is
+       size-1.  The high-part is an offset and should be zero.  */
+    emit_move_insn (gen_rtx_REG (SImode, M0_REG),
+	gen_int_mode (LDS_SIZE, SImode));
+
+    emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG)));
+  }
 
-  emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG)));
+  if (TARGET_PACKED_WORK_ITEMS
+      && cfun && cfun->machine && !cfun->machine->normal_function)
+  {
+    /* v0 conatins the X, Y and Z dimensions all in one.
+       Expand them out for ABI compatibility.  */
+    /* TODO: implement and use zero_extract.  */
+    rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
+    emit_insn (gen_andv64si3 (v1, gen_rtx_REG (V64SImode, VGPR_REGNO (0)),
+	       gen_rtx_CONST_INT (VOIDmode, 0x3FF << 10)));
+    emit_insn (gen_lshrv64si3 (v1, v1, gen_rtx_CONST_INT (VOIDmode, 10)));
+    emit_insn (gen_prologue_use (v1));
+
+    rtx v2 = gen_rtx_REG (V64SImode, VGPR_REGNO (2));
+    emit_insn (gen_andv64si3 (v2, gen_rtx_REG (V64SImode, VGPR_REGNO (0)),
+	       gen_rtx_CONST_INT (VOIDmode, 0x3FF << 20)));
+    emit_insn (gen_lshrv64si3 (v2, v2, gen_rtx_CONST_INT (VOIDmode, 20)));
+    emit_insn (gen_prologue_use (v2));
+  }
 
   if (cfun && cfun->machine && !cfun->machine->normal_function && flag_openmp)
     {
@@ -5218,6 +5248,9 @@ output_file_start (void)
     case PROCESSOR_GFX908:
       cpu = "gfx908";
       break;
+    case PROCESSOR_GFX90a:
+      cpu = "gfx90a";
+      break;
     default: gcc_unreachable ();
     }
 
@@ -5271,6 +5304,10 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
 	sgpr = MAX_NORMAL_SGPR_COUNT;
     }
 
+  /* The gfx90a accum_offset field can't represent 0 registers.  */
+  if (gcn_arch == PROCESSOR_GFX90a && vgpr < 4)
+    vgpr = 4;
+
   fputs ("\t.rodata\n"
 	 "\t.p2align\t6\n"
 	 "\t.amdhsa_kernel\t", file);
@@ -5339,6 +5376,11 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
 	      one 64th the wave-front stack size.  */
 	   stack_size_opt / 64,
 	   LDS_SIZE);
+  if (gcn_arch == PROCESSOR_GFX90a)
+    fprintf (file,
+	     "\t  .amdhsa_accum_offset\t%i\n"
+	     "\t  .amdhsa_tg_split\t0\n",
+	     (vgpr+3)&~3); // I think this means the AGPRs come after the VGPRs
   fputs ("\t.end_amdhsa_kernel\n", file);
 
 #if 1
@@ -5367,6 +5409,8 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
 	   LDS_SIZE,
 	   stack_size_opt / 64,
 	   sgpr, vgpr);
+  if (gcn_arch == PROCESSOR_GFX90a)
+    fprintf (file, "            .agpr_count: 0\n"); // AGPRs are not used, yet
   fputs ("        .end_amdgpu_metadata\n", file);
 #endif
 
diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
index a6efea7eaf1..6a08a629b22 100644
--- a/gcc/config/gcn/gcn.h
+++ b/gcc/config/gcn/gcn.h
@@ -24,6 +24,10 @@
 	builtin_define ("__GCN3__");	\
       else if (TARGET_GCN5)		\
 	builtin_define ("__GCN5__");	\
+      else if (TARGET_CDNA1)		\
+	builtin_define ("__CDNA1__");	\
+      else if (TARGET_CDNA2)		\
+	builtin_define ("__CDNA2__");	\
     }					\
   while(0)
 
diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt
index e7c77b3cb96..8c2009e3d6f 100644
--- a/gcc/config/gcn/gcn.opt
+++ b/gcc/config/gcn/gcn.opt
@@ -37,6 +37,9 @@ Enum(gpu_type) String(gfx906) Value(PROCESSOR_VEGA20)
 EnumValue
 Enum(gpu_type) String(gfx908) Value(PROCESSOR_GFX908)
 
+EnumValue
+Enum(gpu_type) String(gfx90a) Value(PROCESSOR_GFX90a)
+
 march=
 Target RejectNegative Joined ToLower Enum(gpu_type) Var(gcn_arch) Init(PROCESSOR_FIJI)
 Specify the name of the target GPU.
diff --git a/gcc/config/gcn/mkoffload.c b/gcc/config/gcn/mkoffload.c
index 58c9b46dc35..2876ab6a9ef 100644
--- a/gcc/config/gcn/mkoffload.c
+++ b/gcc/config/gcn/mkoffload.c
@@ -55,6 +55,8 @@
 #define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f
 #undef  EF_AMDGPU_MACH_AMDGCN_GFX908
 #define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30
+#undef  EF_AMDGPU_MACH_AMDGCN_GFX90a
+#define EF_AMDGPU_MACH_AMDGCN_GFX90a 0x3f
 
 #define EF_AMDGPU_FEATURE_XNACK_V4	0x300  /* Mask.  */
 #define EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4	0x000
@@ -904,6 +906,8 @@ main (int argc, char **argv)
 	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX906;
       else if (strcmp (argv[i], "-march=gfx908") == 0)
 	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX908;
+      else if (strcmp (argv[i], "-march=gfx90a") == 0)
+	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX90a;
     }
 
   if (!(fopenacc ^ fopenmp))
diff --git a/gcc/config/gcn/t-gcn-hsa b/gcc/config/gcn/t-gcn-hsa
index ee4d9b30ff2..4a67b110611 100644
--- a/gcc/config/gcn/t-gcn-hsa
+++ b/gcc/config/gcn/t-gcn-hsa
@@ -42,8 +42,8 @@ ALL_HOST_OBJS += gcn-run.o
 gcn-run$(exeext): gcn-run.o
 	+$(LINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ $< -ldl
 
-MULTILIB_OPTIONS = march=gfx900/march=gfx906/march=gfx908
-MULTILIB_DIRNAMES = gfx900 gfx906 gfx908
+MULTILIB_OPTIONS = march=gfx900/march=gfx906/march=gfx908/march=gfx90a
+MULTILIB_DIRNAMES = gfx900 gfx906 gfx908 gfx90a
 
 gcn-tree.o: $(srcdir)/config/gcn/gcn-tree.c
 	$(COMPILE) $<
diff --git a/gcc/config/gcn/t-omp-device b/gcc/config/gcn/t-omp-device
index 8461c432ca9..edc43a10e0e 100644
--- a/gcc/config/gcn/t-omp-device
+++ b/gcc/config/gcn/t-omp-device
@@ -1,4 +1,4 @@
 omp-device-properties-gcn: $(srcdir)/config/gcn/gcn.c
 	echo kind: gpu > $@
 	echo arch: gcn >> $@
-	echo isa: fiji gfx900 gfx906 gfx908 >> $@
+	echo isa: fiji gfx900 gfx906 gfx908 gfx90a >> $@
diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index ce3d9e5c536..a66c4ebac41 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,3 +1,11 @@
+2022-05-24  Andrew Stubbs  <ams@codesourcery.com>
+
+	* plugin/plugin-gcn.c (EF_AMDGPU_MACH): Add
+	EF_AMDGPU_MACH_AMDGCN_GFX90a.
+	(gcn_gfx90a_s): New.
+	(isa_hsa_name): Support gfx90a.
+	(isa_code): Likewise.
+
 2022-04-13  Andrew Stubbs  <ams@codesourcery.com>
 
 	* testsuite/libgomp.c/alloc-pinned-1.c: Autodetect page size.
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index f1cba0b811a..969683ea1d2 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -403,7 +403,8 @@ typedef enum {
   EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a,
   EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,
   EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
-  EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030
+  EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030,
+  EF_AMDGPU_MACH_AMDGCN_GFX90a = 0x03f
 } EF_AMDGPU_MACH;
 
 const static int EF_AMDGPU_MACH_MASK = 0x000000ff;
@@ -1629,6 +1630,7 @@ const static char *gcn_gfx803_s = "gfx803";
 const static char *gcn_gfx900_s = "gfx900";
 const static char *gcn_gfx906_s = "gfx906";
 const static char *gcn_gfx908_s = "gfx908";
+const static char *gcn_gfx90a_s = "gfx90a";
 const static int gcn_isa_name_len = 6;
 
 /* Returns the name that the HSA runtime uses for the ISA or NULL if we do not
@@ -1646,6 +1648,8 @@ isa_hsa_name (int isa) {
       return gcn_gfx906_s;
     case EF_AMDGPU_MACH_AMDGCN_GFX908:
       return gcn_gfx908_s;
+    case EF_AMDGPU_MACH_AMDGCN_GFX90a:
+      return gcn_gfx90a_s;
     }
   return NULL;
 }
@@ -1682,6 +1686,9 @@ isa_code(const char *isa) {
   if (!strncmp (isa, gcn_gfx908_s, gcn_isa_name_len))
     return EF_AMDGPU_MACH_AMDGCN_GFX908;
 
+  if (!strncmp (isa, gcn_gfx90a_s, gcn_isa_name_len))
+    return EF_AMDGPU_MACH_AMDGCN_GFX90a;
+
   return -1;
 }
author	Andrew Stubbs <ams@codesourcery.com>	2022-02-24 17:16:13 +0000
committer	Andrew Stubbs <ams@codesourcery.com>	2022-05-24 16:59:27 +0100
commit	a1539294321e03cc762f7d18d94ba972729a0339 (patch)
tree	5c0647f207ac3d660123a5dce3e624ce41df252a
parent	7d5faa57568b68d83543480e1cf39383986c86b5 (diff)