diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2024-02-25 10:21:04 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2024-02-27 02:33:23 -0800 |
commit | 26b1012c26c4b4de0b4561e74b856a7f7d259a48 (patch) | |
tree | 5cab1635fffe156ea2ada4274399ab6f542e216b | |
parent | 41af48a1750635a72c48a5809e713d9dd14d9655 (diff) |
x86: Properly implement AMX-TILE load/store intrinsics
ldtilecfg and sttilecfg take a 512-byte memory block. With
_tile_loadconfig implemented as
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_tile_loadconfig (const void *__config)
{
__asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
}
GCC sees:
(parallel [
(asm_operands/v ("ldtilecfg %X0") ("") 0
[(mem/f/c:DI (plus:DI (reg/f:DI 77 virtual-stack-vars)
(const_int -64 [0xffffffffffffffc0])) [1 MEM[(const void * *)&tile_data]+0 S8 A128])]
[(asm_input:DI ("m"))]
(clobber (reg:CC 17 flags))])
and the memory operand size is 1 byte. As the result, the rest of 511
bytes is ignored by GCC. Implement ldtilecfg and sttilecfg intrinsics
with a pointer to XImode to honor the 512-byte memory block.
gcc/ChangeLog:
PR target/114098
* config/i386/amxtileintrin.h (_tile_loadconfig): Use
__builtin_ia32_ldtilecfg.
(_tile_storeconfig): Use __builtin_ia32_sttilecfg.
* config/i386/i386-builtin.def (BDESC): Add
__builtin_ia32_ldtilecfg and __builtin_ia32_sttilecfg.
* config/i386/i386-expand.c (ix86_expand_builtin): Handle
IX86_BUILTIN_LDTILECFG and IX86_BUILTIN_STTILECFG.
* config/i386/i386.md (ldtilecfg): New pattern.
(sttilecfg): Likewise.
gcc/testsuite/ChangeLog:
PR target/114098
* gcc.target/i386/amxtile-4.c: New test.
(cherry picked from commit 4972f97a265c574d51e20373ddefd66576051e5c)
-rw-r--r-- | gcc/config/i386/amxtileintrin.h | 4 | ||||
-rw-r--r-- | gcc/config/i386/i386-builtin.def | 4 | ||||
-rw-r--r-- | gcc/config/i386/i386-expand.c | 19 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 24 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/amxtile-4.c | 52 |
5 files changed, 101 insertions, 2 deletions
diff --git a/gcc/config/i386/amxtileintrin.h b/gcc/config/i386/amxtileintrin.h index 3a0a6b44c17..0b0f6d979aa 100644 --- a/gcc/config/i386/amxtileintrin.h +++ b/gcc/config/i386/amxtileintrin.h @@ -39,14 +39,14 @@ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _tile_loadconfig (const void *__config) { - __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config))); + __builtin_ia32_ldtilecfg (__config); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _tile_storeconfig (void *__config) { - __asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config))); + __builtin_ia32_sttilecfg (__config); } extern __inline void diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 5c7f436ac17..db3e5a6ea7f 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -126,6 +126,10 @@ BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, "__b BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64) BDESC (OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64) +/* LDFILECFG and STFILECFG. */ +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, CODE_FOR_nothing, "__builtin_ia32_ldtilecfg", IX86_BUILTIN_LDTILECFG, UNKNOWN, (int) VOID_FTYPE_PCVOID) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, CODE_FOR_nothing, "__builtin_ia32_sttilecfg", IX86_BUILTIN_STTILECFG, UNKNOWN, (int) VOID_FTYPE_PVOID) + /* SSE */ BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_movv4sf_internal, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF) BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF) diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index dc8228e8a87..5afd61bc50a 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -12026,6 +12026,25 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, emit_insn (pat); return 0; + case IX86_BUILTIN_LDTILECFG: + case IX86_BUILTIN_STTILECFG: + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + + if (!address_operand (op0, VOIDmode)) + { + op0 = convert_memory_address (Pmode, op0); + op0 = copy_addr_to_reg (op0); + } + op0 = gen_rtx_MEM (XImode, op0); + if (fcode == IX86_BUILTIN_LDTILECFG) + icode = CODE_FOR_ldtilecfg; + else + icode = CODE_FOR_sttilecfg; + pat = GEN_FCN (icode) (op0); + emit_insn (pat); + return 0; + case IX86_BUILTIN_LLWPCB: arg0 = CALL_EXPR_ARG (exp, 0); op0 = expand_normal (arg0); diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 0c6d923ea1e..e954958e98b 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -319,6 +319,10 @@ ;; For HRESET support UNSPECV_HRESET + + ;; For AMX-TILE + UNSPECV_LDTILECFG + UNSPECV_STTILECFG ]) ;; Constants to represent rounding modes in the ROUND instruction @@ -21908,6 +21912,26 @@ [(set_attr "type" "other") (set_attr "length" "4")]) +(define_insn "ldtilecfg" + [(unspec_volatile [(match_operand:XI 0 "memory_operand" "m")] + UNSPECV_LDTILECFG)] + "TARGET_AMX_TILE" + "ldtilecfg\t%0" + [(set_attr "type" "other") + (set_attr "prefix" "maybe_evex") + (set_attr "memory" "load") + (set_attr "mode" "XI")]) + +(define_insn "sttilecfg" + [(set (match_operand:XI 0 "memory_operand" "=m") + (unspec_volatile:XI [(const_int 0)] UNSPECV_STTILECFG))] + "TARGET_AMX_TILE" + "sttilecfg\t%0" + [(set_attr "type" "other") + (set_attr "prefix" "maybe_evex") + (set_attr "memory" "store") + (set_attr "mode" "XI")]) + (include "mmx.md") (include "sse.md") (include "sync.md") diff --git a/gcc/testsuite/gcc.target/i386/amxtile-4.c b/gcc/testsuite/gcc.target/i386/amxtile-4.c new file mode 100644 index 00000000000..6b49cdeeb50 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxtile-4.c @@ -0,0 +1,52 @@ +/* PR target/114098 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mamx-tile" } */ + +#include <stdint.h> +#include <x86intrin.h> + +#define MAX_ROWS 16 +#define MAX_COLS 64 +#define MAX 1024 +#define STRIDE 64 + +typedef struct __tile_config +{ + uint8_t palette_id; + uint8_t start_row; + uint8_t reserved_0[14]; + uint16_t colsb[16]; + uint8_t rows[16]; +} __tilecfg __attribute__ ((aligned (64))); + +/* Initialize tile config */ +static void +init_tile_config (__tilecfg *tileinfo) +{ + int i; + tileinfo->palette_id = 1; + tileinfo->start_row = 0; + + for (i = 0; i < 1; ++i) + { + tileinfo->colsb[i] = MAX_ROWS; + tileinfo->rows[i] = MAX_ROWS; + } + + for (i = 1; i < 4; ++i) + { + tileinfo->colsb[i] = MAX_COLS; + tileinfo->rows[i] = MAX_ROWS; + } + + _tile_loadconfig (tileinfo); +} + +void +enable_amx (void) +{ + __tilecfg tile_data = {0}; + init_tile_config (&tile_data); +} + +/* { dg-final { scan-assembler-times "pxor\[^\n\]*%xmm" 1 } } */ |