summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2024-02-25 10:21:04 -0800
committerH.J. Lu <hjl.tools@gmail.com>2024-02-27 02:33:23 -0800
commit26b1012c26c4b4de0b4561e74b856a7f7d259a48 (patch)
tree5cab1635fffe156ea2ada4274399ab6f542e216b
parent41af48a1750635a72c48a5809e713d9dd14d9655 (diff)
x86: Properly implement AMX-TILE load/store intrinsics
ldtilecfg and sttilecfg take a 512-byte memory block. With _tile_loadconfig implemented as extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _tile_loadconfig (const void *__config) { __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config))); } GCC sees: (parallel [ (asm_operands/v ("ldtilecfg %X0") ("") 0 [(mem/f/c:DI (plus:DI (reg/f:DI 77 virtual-stack-vars) (const_int -64 [0xffffffffffffffc0])) [1 MEM[(const void * *)&tile_data]+0 S8 A128])] [(asm_input:DI ("m"))] (clobber (reg:CC 17 flags))]) and the memory operand size is 1 byte. As the result, the rest of 511 bytes is ignored by GCC. Implement ldtilecfg and sttilecfg intrinsics with a pointer to XImode to honor the 512-byte memory block. gcc/ChangeLog: PR target/114098 * config/i386/amxtileintrin.h (_tile_loadconfig): Use __builtin_ia32_ldtilecfg. (_tile_storeconfig): Use __builtin_ia32_sttilecfg. * config/i386/i386-builtin.def (BDESC): Add __builtin_ia32_ldtilecfg and __builtin_ia32_sttilecfg. * config/i386/i386-expand.c (ix86_expand_builtin): Handle IX86_BUILTIN_LDTILECFG and IX86_BUILTIN_STTILECFG. * config/i386/i386.md (ldtilecfg): New pattern. (sttilecfg): Likewise. gcc/testsuite/ChangeLog: PR target/114098 * gcc.target/i386/amxtile-4.c: New test. (cherry picked from commit 4972f97a265c574d51e20373ddefd66576051e5c)
-rw-r--r--gcc/config/i386/amxtileintrin.h4
-rw-r--r--gcc/config/i386/i386-builtin.def4
-rw-r--r--gcc/config/i386/i386-expand.c19
-rw-r--r--gcc/config/i386/i386.md24
-rw-r--r--gcc/testsuite/gcc.target/i386/amxtile-4.c52
5 files changed, 101 insertions, 2 deletions
diff --git a/gcc/config/i386/amxtileintrin.h b/gcc/config/i386/amxtileintrin.h
index 3a0a6b44c17..0b0f6d979aa 100644
--- a/gcc/config/i386/amxtileintrin.h
+++ b/gcc/config/i386/amxtileintrin.h
@@ -39,14 +39,14 @@ extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_tile_loadconfig (const void *__config)
{
- __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
+ __builtin_ia32_ldtilecfg (__config);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_tile_storeconfig (void *__config)
{
- __asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
+ __builtin_ia32_sttilecfg (__config);
}
extern __inline void
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 5c7f436ac17..db3e5a6ea7f 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -126,6 +126,10 @@ BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, "__b
BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64)
BDESC (OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64)
+/* LDFILECFG and STFILECFG. */
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, CODE_FOR_nothing, "__builtin_ia32_ldtilecfg", IX86_BUILTIN_LDTILECFG, UNKNOWN, (int) VOID_FTYPE_PCVOID)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, CODE_FOR_nothing, "__builtin_ia32_sttilecfg", IX86_BUILTIN_STTILECFG, UNKNOWN, (int) VOID_FTYPE_PVOID)
+
/* SSE */
BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_movv4sf_internal, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF)
BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF)
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index dc8228e8a87..5afd61bc50a 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -12026,6 +12026,25 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
emit_insn (pat);
return 0;
+ case IX86_BUILTIN_LDTILECFG:
+ case IX86_BUILTIN_STTILECFG:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ if (!address_operand (op0, VOIDmode))
+ {
+ op0 = convert_memory_address (Pmode, op0);
+ op0 = copy_addr_to_reg (op0);
+ }
+ op0 = gen_rtx_MEM (XImode, op0);
+ if (fcode == IX86_BUILTIN_LDTILECFG)
+ icode = CODE_FOR_ldtilecfg;
+ else
+ icode = CODE_FOR_sttilecfg;
+ pat = GEN_FCN (icode) (op0);
+ emit_insn (pat);
+ return 0;
+
case IX86_BUILTIN_LLWPCB:
arg0 = CALL_EXPR_ARG (exp, 0);
op0 = expand_normal (arg0);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 0c6d923ea1e..e954958e98b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -319,6 +319,10 @@
;; For HRESET support
UNSPECV_HRESET
+
+ ;; For AMX-TILE
+ UNSPECV_LDTILECFG
+ UNSPECV_STTILECFG
])
;; Constants to represent rounding modes in the ROUND instruction
@@ -21908,6 +21912,26 @@
[(set_attr "type" "other")
(set_attr "length" "4")])
+(define_insn "ldtilecfg"
+ [(unspec_volatile [(match_operand:XI 0 "memory_operand" "m")]
+ UNSPECV_LDTILECFG)]
+ "TARGET_AMX_TILE"
+ "ldtilecfg\t%0"
+ [(set_attr "type" "other")
+ (set_attr "prefix" "maybe_evex")
+ (set_attr "memory" "load")
+ (set_attr "mode" "XI")])
+
+(define_insn "sttilecfg"
+ [(set (match_operand:XI 0 "memory_operand" "=m")
+ (unspec_volatile:XI [(const_int 0)] UNSPECV_STTILECFG))]
+ "TARGET_AMX_TILE"
+ "sttilecfg\t%0"
+ [(set_attr "type" "other")
+ (set_attr "prefix" "maybe_evex")
+ (set_attr "memory" "store")
+ (set_attr "mode" "XI")])
+
(include "mmx.md")
(include "sse.md")
(include "sync.md")
diff --git a/gcc/testsuite/gcc.target/i386/amxtile-4.c b/gcc/testsuite/gcc.target/i386/amxtile-4.c
new file mode 100644
index 00000000000..6b49cdeeb50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxtile-4.c
@@ -0,0 +1,52 @@
+/* PR target/114098 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mamx-tile" } */
+
+#include <stdint.h>
+#include <x86intrin.h>
+
+#define MAX_ROWS 16
+#define MAX_COLS 64
+#define MAX 1024
+#define STRIDE 64
+
+typedef struct __tile_config
+{
+ uint8_t palette_id;
+ uint8_t start_row;
+ uint8_t reserved_0[14];
+ uint16_t colsb[16];
+ uint8_t rows[16];
+} __tilecfg __attribute__ ((aligned (64)));
+
+/* Initialize tile config */
+static void
+init_tile_config (__tilecfg *tileinfo)
+{
+ int i;
+ tileinfo->palette_id = 1;
+ tileinfo->start_row = 0;
+
+ for (i = 0; i < 1; ++i)
+ {
+ tileinfo->colsb[i] = MAX_ROWS;
+ tileinfo->rows[i] = MAX_ROWS;
+ }
+
+ for (i = 1; i < 4; ++i)
+ {
+ tileinfo->colsb[i] = MAX_COLS;
+ tileinfo->rows[i] = MAX_ROWS;
+ }
+
+ _tile_loadconfig (tileinfo);
+}
+
+void
+enable_amx (void)
+{
+ __tilecfg tile_data = {0};
+ init_tile_config (&tile_data);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[^\n\]*%xmm" 1 } } */