Skip to content

[libclc] Move mem_fence and barrier to clc library #151446

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions libclc/clc/include/clc/mem_fence/clc_mem_fence.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef __CLC_MEM_FENCE_CLC_MEM_FENCE_H__
#define __CLC_MEM_FENCE_CLC_MEM_FENCE_H__

#include <clc/internal/clc.h>
#include <clc/mem_fence/clc_mem_scope_semantics.h>

_CLC_OVERLOAD _CLC_DECL void __clc_mem_fence(Scope scope,
MemorySemantics semantics);

#endif // __CLC_MEM_FENCE_CLC_MEM_FENCE_H__
36 changes: 36 additions & 0 deletions libclc/clc/include/clc/mem_fence/clc_mem_scope_semantics.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef __CLC_MEM_FENCE_CLC_MEM_SCOPE_SEMANTICS_H__
#define __CLC_MEM_FENCE_CLC_MEM_SCOPE_SEMANTICS_H__

// Scope values are defined in SPIR-V spec.
typedef enum Scope {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should stick to OpenCL spec and terminology; I thought clang already had predefined enums for both of these

CrossDevice = 0,
Device = 1,
Workgroup = 2,
Subgroup = 3,
Invocation = 4,
} Scope;

// MemorySemantics values are defined in SPIR-V spec.
typedef enum MemorySemantics {
None = 0x0,
Acquire = 0x2,
Release = 0x4,
AcquireRelease = 0x8,
SequentiallyConsistent = 0x10,
UniformMemory = 0x40,
SubgroupMemory = 0x80,
WorkgroupMemory = 0x100,
CrossWorkgroupMemory = 0x200,
AtomicCounterMemory = 0x400,
ImageMemory = 0x800,
} MemorySemantics;

#endif // __CLC_MEM_FENCE_CLC_MEM_SCOPE_SEMANTICS_H__
18 changes: 18 additions & 0 deletions libclc/clc/include/clc/synchronization/clc_barrier.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef __CLC_SYNCHRONIZATION_CLC_BARRIER_H__
#define __CLC_SYNCHRONIZATION_CLC_BARRIER_H__

#include <clc/internal/clc.h>
#include <clc/mem_fence/clc_mem_scope_semantics.h>

_CLC_OVERLOAD _CLC_DECL void __clc_barrier(Scope scope,
MemorySemantics semantics);

#endif // __CLC_SYNCHRONIZATION_CLC_BARRIER_H__
2 changes: 2 additions & 0 deletions libclc/clc/lib/amdgcn/SOURCES
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
math/clc_fmax.cl
math/clc_fmin.cl
math/clc_ldexp_override.cl
mem_fence/clc_mem_fence.cl
synchronization/clc_barrier.cl
workitem/clc_get_global_offset.cl
workitem/clc_get_global_size.cl
workitem/clc_get_group_id.cl
Expand Down
37 changes: 37 additions & 0 deletions libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <clc/mem_fence/clc_mem_fence.h>

void __clc_amdgcn_s_waitcnt(unsigned flags);

// s_waitcnt takes 16bit argument with a combined number of maximum allowed
// pending operations:
// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages
// [7] -- undefined
// [6:4] -- exports, GDS, and mem write
// [3:0] -- vector memory operations

// Newer clang supports __builtin_amdgcn_s_waitcnt
#if __clang_major__ >= 5
#define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x)
#else
#define __waitcnt(x) __clc_amdgcn_s_waitcnt(x)
_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt");
#endif

_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(Scope scope,
MemorySemantics semantics) {
if (semantics & CrossWorkgroupMemory) {
// scalar loads are counted with LGKM but we don't know whether
// the compiler turned any loads to scalar
__waitcnt(0);
} else if (semantics & WorkgroupMemory)
__waitcnt(0xff); // LGKM is [12:8]
}
#undef __waitcnt
16 changes: 16 additions & 0 deletions libclc/clc/lib/amdgcn/synchronization/clc_barrier.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <clc/mem_fence/clc_mem_fence.h>
#include <clc/synchronization/clc_barrier.h>

_CLC_OVERLOAD _CLC_DEF void __clc_barrier(Scope scope,
MemorySemantics semantics) {
__clc_mem_fence(scope, semantics);
__builtin_amdgcn_s_barrier();
}
2 changes: 2 additions & 0 deletions libclc/clc/lib/ptx-nvidiacl/SOURCES
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
mem_fence/clc_mem_fence.cl
synchronization/clc_barrier.cl
workitem/clc_get_global_id.cl
workitem/clc_get_group_id.cl
workitem/clc_get_local_id.cl
Expand Down
15 changes: 15 additions & 0 deletions libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <clc/mem_fence/clc_mem_fence.h>

_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(Scope scope,
MemorySemantics semantics) {
if (semantics & (CrossWorkgroupMemory | WorkgroupMemory))
__nvvm_membar_cta();
}
14 changes: 14 additions & 0 deletions libclc/clc/lib/ptx-nvidiacl/synchronization/clc_barrier.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <clc/synchronization/clc_barrier.h>

_CLC_OVERLOAD _CLC_DEF void __clc_barrier(Scope scope,
MemorySemantics semantics) {
__syncthreads();
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,34 @@

typedef uint cl_mem_fence_flags;

// Copied from
// https://github.com/llvm/llvm-project/blob/08e40c12fa0c/clang/lib/Headers/opencl-c-base.h#L390
typedef enum memory_scope {
memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
#if defined(__opencl_c_atomic_scope_all_devices)
memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
memory_scope_all_devices = memory_scope_all_svm_devices,
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >=
// 202100)
#endif // defined(__opencl_c_atomic_scope_all_devices)
/**
* Subgroups have different requirements on forward progress, so just test
* all the relevant macros.
* CL 3.0 sub-groups "they are not guaranteed to make independent forward
* progress" KHR subgroups "Subgroups within a workgroup are independent, make
* forward progress with respect to each other"
*/
#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || \
defined(__opencl_c_subgroups)
memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
#endif
} memory_scope;

#define CLK_LOCAL_MEM_FENCE 1
#define CLK_GLOBAL_MEM_FENCE 2
#define CLK_IMAGE_MEM_FENCE 4

#endif // __CLC_OPENCL_SYNCHRONIZATION_CL_MEM_FENCE_FLAGS_H__
50 changes: 50 additions & 0 deletions libclc/opencl/include/clc/opencl/synchronization/utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__
#define __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__

#include <clc/internal/clc.h>
#include <clc/mem_fence/clc_mem_scope_semantics.h>
#include <clc/opencl/synchronization/cl_mem_fence_flags.h>

_CLC_INLINE Scope getCLCScope(memory_scope memory_scope) {
switch (memory_scope) {
case memory_scope_work_item:
return Invocation;
#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || \
defined(__opencl_c_subgroups)
case memory_scope_sub_group:
return Subgroup;
#endif
case memory_scope_work_group:
return Workgroup;
case memory_scope_device:
return Device;
default:
break;
}
#ifdef __opencl_c_atomic_scope_all_devices
return CrossDevice;
#else
return Device;
#endif
}

_CLC_INLINE MemorySemantics getCLCMemorySemantics(cl_mem_fence_flags flag) {
MemorySemantics semantics = SequentiallyConsistent;
if (flag & CLK_GLOBAL_MEM_FENCE)
semantics |= CrossWorkgroupMemory;
if (flag & CLK_LOCAL_MEM_FENCE)
semantics |= WorkgroupMemory;
if (flag & CLK_IMAGE_MEM_FENCE)
semantics |= ImageMemory;
return semantics;
}

#endif // __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__
29 changes: 5 additions & 24 deletions libclc/opencl/lib/amdgcn/mem_fence/fence.cl
Original file line number Diff line number Diff line change
Expand Up @@ -6,34 +6,15 @@
//
//===----------------------------------------------------------------------===//

#include <clc/mem_fence/clc_mem_fence.h>
#include <clc/opencl/explicit_fence/explicit_memory_fence.h>

void __clc_amdgcn_s_waitcnt(unsigned flags);

// s_waitcnt takes 16bit argument with a combined number of maximum allowed
// pending operations:
// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages
// [7] -- undefined
// [6:4] -- exports, GDS, and mem write
// [3:0] -- vector memory operations

// Newer clang supports __builtin_amdgcn_s_waitcnt
#if __clang_major__ >= 5
#define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x)
#else
#define __waitcnt(x) __clc_amdgcn_s_waitcnt(x)
_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt");
#endif
#include <clc/opencl/synchronization/utils.h>

_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
if (flags & CLK_GLOBAL_MEM_FENCE) {
// scalar loads are counted with LGKM but we don't know whether
// the compiler turned any loads to scalar
__waitcnt(0);
} else if (flags & CLK_LOCAL_MEM_FENCE)
__waitcnt(0xff); // LGKM is [12:8]
Scope scope = getCLCScope(memory_scope_device);
MemorySemantics semantics = getCLCMemorySemantics(flags);
__clc_mem_fence(scope, semantics);
}
#undef __waitcnt

// We don't have separate mechanism for read and write fences
_CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
Expand Down
8 changes: 5 additions & 3 deletions libclc/opencl/lib/amdgcn/synchronization/barrier.cl
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
//
//===----------------------------------------------------------------------===//

#include <clc/opencl/explicit_fence/explicit_memory_fence.h>
#include <clc/opencl/synchronization/barrier.h>
#include <clc/opencl/synchronization/utils.h>
#include <clc/synchronization/clc_barrier.h>

_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
mem_fence(flags);
__builtin_amdgcn_s_barrier();
Scope scope = getCLCScope(memory_scope_device);
MemorySemantics semantics = getCLCMemorySemantics(flags);
__clc_barrier(scope, semantics);
}
7 changes: 5 additions & 2 deletions libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
//
//===----------------------------------------------------------------------===//

#include <clc/mem_fence/clc_mem_fence.h>
#include <clc/opencl/explicit_fence/explicit_memory_fence.h>
#include <clc/opencl/synchronization/utils.h>

_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
if (flags & (CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE))
__nvvm_membar_cta();
Scope scope = getCLCScope(memory_scope_device);
MemorySemantics semantics = getCLCMemorySemantics(flags);
__clc_mem_fence(scope, semantics);
}

// We do not have separate mechanism for read and write fences.
Expand Down
6 changes: 5 additions & 1 deletion libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
//===----------------------------------------------------------------------===//

#include <clc/opencl/synchronization/barrier.h>
#include <clc/opencl/synchronization/utils.h>
#include <clc/synchronization/clc_barrier.h>

_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
__syncthreads();
Scope scope = getCLCScope(memory_scope_device);
MemorySemantics semantics = getCLCMemorySemantics(flags);
__clc_barrier(scope, semantics);
}
Loading