diff --git a/libclc/clc/include/clc/mem_fence/clc_mem_fence.h b/libclc/clc/include/clc/mem_fence/clc_mem_fence.h new file mode 100644 index 0000000000000..f0bbd136955bd --- /dev/null +++ b/libclc/clc/include/clc/mem_fence/clc_mem_fence.h @@ -0,0 +1,18 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MEM_FENCE_CLC_MEM_FENCE_H__ +#define __CLC_MEM_FENCE_CLC_MEM_FENCE_H__ + +#include +#include + +_CLC_OVERLOAD _CLC_DECL void __clc_mem_fence(Scope scope, + MemorySemantics semantics); + +#endif // __CLC_MEM_FENCE_CLC_MEM_FENCE_H__ diff --git a/libclc/clc/include/clc/mem_fence/clc_mem_scope_semantics.h b/libclc/clc/include/clc/mem_fence/clc_mem_scope_semantics.h new file mode 100644 index 0000000000000..7294026386b7a --- /dev/null +++ b/libclc/clc/include/clc/mem_fence/clc_mem_scope_semantics.h @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MEM_FENCE_CLC_MEM_SCOPE_SEMANTICS_H__ +#define __CLC_MEM_FENCE_CLC_MEM_SCOPE_SEMANTICS_H__ + +// Scope values are defined in SPIR-V spec. +typedef enum Scope { + CrossDevice = 0, + Device = 1, + Workgroup = 2, + Subgroup = 3, + Invocation = 4, +} Scope; + +// MemorySemantics values are defined in SPIR-V spec. +typedef enum MemorySemantics { + None = 0x0, + Acquire = 0x2, + Release = 0x4, + AcquireRelease = 0x8, + SequentiallyConsistent = 0x10, + UniformMemory = 0x40, + SubgroupMemory = 0x80, + WorkgroupMemory = 0x100, + CrossWorkgroupMemory = 0x200, + AtomicCounterMemory = 0x400, + ImageMemory = 0x800, +} MemorySemantics; + +#endif // __CLC_MEM_FENCE_CLC_MEM_SCOPE_SEMANTICS_H__ diff --git a/libclc/clc/include/clc/synchronization/clc_barrier.h b/libclc/clc/include/clc/synchronization/clc_barrier.h new file mode 100644 index 0000000000000..d363652c6e14d --- /dev/null +++ b/libclc/clc/include/clc/synchronization/clc_barrier.h @@ -0,0 +1,18 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_SYNCHRONIZATION_CLC_BARRIER_H__ +#define __CLC_SYNCHRONIZATION_CLC_BARRIER_H__ + +#include +#include + +_CLC_OVERLOAD _CLC_DECL void __clc_barrier(Scope scope, + MemorySemantics semantics); + +#endif // __CLC_SYNCHRONIZATION_CLC_BARRIER_H__ diff --git a/libclc/clc/lib/amdgcn/SOURCES b/libclc/clc/lib/amdgcn/SOURCES index 7bec1740f7636..f2f58e3124aa8 100644 --- a/libclc/clc/lib/amdgcn/SOURCES +++ b/libclc/clc/lib/amdgcn/SOURCES @@ -1,6 +1,8 @@ math/clc_fmax.cl math/clc_fmin.cl math/clc_ldexp_override.cl +mem_fence/clc_mem_fence.cl +synchronization/clc_barrier.cl workitem/clc_get_global_offset.cl workitem/clc_get_global_size.cl workitem/clc_get_group_id.cl diff --git a/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl b/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl new file mode 100644 index 0000000000000..12ec6d8d18091 --- /dev/null +++ b/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl @@ -0,0 +1,37 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +void __clc_amdgcn_s_waitcnt(unsigned flags); + +// s_waitcnt takes 16bit argument with a combined number of maximum allowed +// pending operations: +// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages +// [7] -- undefined +// [6:4] -- exports, GDS, and mem write +// [3:0] -- vector memory operations + +// Newer clang supports __builtin_amdgcn_s_waitcnt +#if __clang_major__ >= 5 +#define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x) +#else +#define __waitcnt(x) __clc_amdgcn_s_waitcnt(x) +_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt"); +#endif + +_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(Scope scope, + MemorySemantics semantics) { + if (semantics & CrossWorkgroupMemory) { + // scalar loads are counted with LGKM but we don't know whether + // the compiler turned any loads to scalar + __waitcnt(0); + } else if (semantics & WorkgroupMemory) + __waitcnt(0xff); // LGKM is [12:8] +} +#undef __waitcnt diff --git a/libclc/clc/lib/amdgcn/synchronization/clc_barrier.cl b/libclc/clc/lib/amdgcn/synchronization/clc_barrier.cl new file mode 100644 index 0000000000000..0299a426e4d21 --- /dev/null +++ b/libclc/clc/lib/amdgcn/synchronization/clc_barrier.cl @@ -0,0 +1,16 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +_CLC_OVERLOAD _CLC_DEF void __clc_barrier(Scope scope, + MemorySemantics semantics) { + __clc_mem_fence(scope, semantics); + __builtin_amdgcn_s_barrier(); +} diff --git a/libclc/clc/lib/ptx-nvidiacl/SOURCES b/libclc/clc/lib/ptx-nvidiacl/SOURCES index 05368c5e4d4e3..a0fb861549ebc 100644 --- a/libclc/clc/lib/ptx-nvidiacl/SOURCES +++ b/libclc/clc/lib/ptx-nvidiacl/SOURCES @@ -1,3 +1,5 @@ +mem_fence/clc_mem_fence.cl +synchronization/clc_barrier.cl workitem/clc_get_global_id.cl workitem/clc_get_group_id.cl workitem/clc_get_local_id.cl diff --git a/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl b/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl new file mode 100644 index 0000000000000..4c0d342b7244f --- /dev/null +++ b/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(Scope scope, + MemorySemantics semantics) { + if (semantics & (CrossWorkgroupMemory | WorkgroupMemory)) + __nvvm_membar_cta(); +} diff --git a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_barrier.cl b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_barrier.cl new file mode 100644 index 0000000000000..920b17cb02f92 --- /dev/null +++ b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_barrier.cl @@ -0,0 +1,14 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +_CLC_OVERLOAD _CLC_DEF void __clc_barrier(Scope scope, + MemorySemantics semantics) { + __syncthreads(); +} diff --git a/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h b/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h index 6636515fca47d..18f9a4afb2d5f 100644 --- a/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h +++ b/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h @@ -11,7 +11,34 @@ typedef uint cl_mem_fence_flags; +// Copied from +// https://github.com/llvm/llvm-project/blob/08e40c12fa0c/clang/lib/Headers/opencl-c-base.h#L390 +typedef enum memory_scope { + memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, + memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, + memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, +#if defined(__opencl_c_atomic_scope_all_devices) + memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, +#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) + memory_scope_all_devices = memory_scope_all_svm_devices, +#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= + // 202100) +#endif // defined(__opencl_c_atomic_scope_all_devices) +/** + * Subgroups have different requirements on forward progress, so just test + * all the relevant macros. + * CL 3.0 sub-groups "they are not guaranteed to make independent forward + * progress" KHR subgroups "Subgroups within a workgroup are independent, make + * forward progress with respect to each other" + */ +#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || \ + defined(__opencl_c_subgroups) + memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP +#endif +} memory_scope; + #define CLK_LOCAL_MEM_FENCE 1 #define CLK_GLOBAL_MEM_FENCE 2 +#define CLK_IMAGE_MEM_FENCE 4 #endif // __CLC_OPENCL_SYNCHRONIZATION_CL_MEM_FENCE_FLAGS_H__ diff --git a/libclc/opencl/include/clc/opencl/synchronization/utils.h b/libclc/opencl/include/clc/opencl/synchronization/utils.h new file mode 100644 index 0000000000000..bbcfa20a556d5 --- /dev/null +++ b/libclc/opencl/include/clc/opencl/synchronization/utils.h @@ -0,0 +1,50 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__ +#define __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__ + +#include +#include +#include + +_CLC_INLINE Scope getCLCScope(memory_scope memory_scope) { + switch (memory_scope) { + case memory_scope_work_item: + return Invocation; +#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || \ + defined(__opencl_c_subgroups) + case memory_scope_sub_group: + return Subgroup; +#endif + case memory_scope_work_group: + return Workgroup; + case memory_scope_device: + return Device; + default: + break; + } +#ifdef __opencl_c_atomic_scope_all_devices + return CrossDevice; +#else + return Device; +#endif +} + +_CLC_INLINE MemorySemantics getCLCMemorySemantics(cl_mem_fence_flags flag) { + MemorySemantics semantics = SequentiallyConsistent; + if (flag & CLK_GLOBAL_MEM_FENCE) + semantics |= CrossWorkgroupMemory; + if (flag & CLK_LOCAL_MEM_FENCE) + semantics |= WorkgroupMemory; + if (flag & CLK_IMAGE_MEM_FENCE) + semantics |= ImageMemory; + return semantics; +} + +#endif // __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__ diff --git a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl index 88b953005aae6..10d879d835c06 100644 --- a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl +++ b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl @@ -6,34 +6,15 @@ // //===----------------------------------------------------------------------===// +#include #include - -void __clc_amdgcn_s_waitcnt(unsigned flags); - -// s_waitcnt takes 16bit argument with a combined number of maximum allowed -// pending operations: -// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages -// [7] -- undefined -// [6:4] -- exports, GDS, and mem write -// [3:0] -- vector memory operations - -// Newer clang supports __builtin_amdgcn_s_waitcnt -#if __clang_major__ >= 5 -#define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x) -#else -#define __waitcnt(x) __clc_amdgcn_s_waitcnt(x) -_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt"); -#endif +#include _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) { - if (flags & CLK_GLOBAL_MEM_FENCE) { - // scalar loads are counted with LGKM but we don't know whether - // the compiler turned any loads to scalar - __waitcnt(0); - } else if (flags & CLK_LOCAL_MEM_FENCE) - __waitcnt(0xff); // LGKM is [12:8] + Scope scope = getCLCScope(memory_scope_device); + MemorySemantics semantics = getCLCMemorySemantics(flags); + __clc_mem_fence(scope, semantics); } -#undef __waitcnt // We don't have separate mechanism for read and write fences _CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) { diff --git a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl b/libclc/opencl/lib/amdgcn/synchronization/barrier.cl index 5203db72f484c..b8372d4800bf1 100644 --- a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl +++ b/libclc/opencl/lib/amdgcn/synchronization/barrier.cl @@ -6,10 +6,12 @@ // //===----------------------------------------------------------------------===// -#include #include +#include +#include _CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) { - mem_fence(flags); - __builtin_amdgcn_s_barrier(); + Scope scope = getCLCScope(memory_scope_device); + MemorySemantics semantics = getCLCMemorySemantics(flags); + __clc_barrier(scope, semantics); } diff --git a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl index d24569ecda1bc..2d591c90d63c2 100644 --- a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl +++ b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl @@ -6,11 +6,14 @@ // //===----------------------------------------------------------------------===// +#include #include +#include _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) { - if (flags & (CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE)) - __nvvm_membar_cta(); + Scope scope = getCLCScope(memory_scope_device); + MemorySemantics semantics = getCLCMemorySemantics(flags); + __clc_mem_fence(scope, semantics); } // We do not have separate mechanism for read and write fences. diff --git a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl b/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl index 7c57478795dda..b8372d4800bf1 100644 --- a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl +++ b/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl @@ -7,7 +7,11 @@ //===----------------------------------------------------------------------===// #include +#include +#include _CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) { - __syncthreads(); + Scope scope = getCLCScope(memory_scope_device); + MemorySemantics semantics = getCLCMemorySemantics(flags); + __clc_barrier(scope, semantics); }