Skip to content

Commit f85ae05

Browse files
committed
[OpenMP] Provide math functions in OpenMP device code via OpenMP variants
For OpenMP target regions to piggy back on the CUDA/AMDGPU/... implementation of math functions, we include the appropriate definitions inside of an `omp begin/end declare variant match(device={arch(nvptx)})` scope. This way, the vendor specific math functions will become specialized versions of the system math functions. When a system math function is called and specialized version is available the selection logic introduced in D75779 instead call the specialized version. In contrast to the code path we used so far, the system header is actually included. This means functions without specialized versions are available and so are macro definitions. This should address PR42061, PR42798, and PR42799. Reviewed By: ye-luo Differential Revision: https://reviews.llvm.org/D75788
1 parent eb5a16e commit f85ae05

29 files changed

+909
-168
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1216,7 +1216,7 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
12161216
}
12171217

12181218
CmdArgs.push_back("-include");
1219-
CmdArgs.push_back("__clang_openmp_math_declares.h");
1219+
CmdArgs.push_back("__clang_openmp_device_functions.h");
12201220
}
12211221

12221222
// Add -i* options, and automatically translate to

clang/lib/Headers/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,7 @@ set(ppc_wrapper_files
145145
set(openmp_wrapper_files
146146
openmp_wrappers/math.h
147147
openmp_wrappers/cmath
148-
openmp_wrappers/__clang_openmp_math.h
149-
openmp_wrappers/__clang_openmp_math_declares.h
148+
openmp_wrappers/__clang_openmp_device_functions.h
150149
openmp_wrappers/new
151150
)
152151

clang/lib/Headers/__clang_cuda_cmath.h

Lines changed: 12 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212
#error "This file is for CUDA compilation only."
1313
#endif
1414

15+
#ifndef _OPENMP
1516
#include <limits>
17+
#endif
1618

1719
// CUDA lets us use various std math functions on the device side. This file
1820
// works in concert with __clang_cuda_math_forward_declares.h to make this work.
@@ -31,31 +33,15 @@
3133
// std covers all of the known knowns.
3234

3335
#ifdef _OPENMP
34-
#define __DEVICE__ static __attribute__((always_inline))
36+
#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
3537
#else
3638
#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
3739
#endif
3840

39-
// For C++ 17 we need to include noexcept attribute to be compatible
40-
// with the header-defined version. This may be removed once
41-
// variant is supported.
42-
#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L
43-
#define __NOEXCEPT noexcept
44-
#else
45-
#define __NOEXCEPT
46-
#endif
47-
48-
#if !(defined(_OPENMP) && defined(__cplusplus))
4941
__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
5042
__DEVICE__ long abs(long __n) { return ::labs(__n); }
5143
__DEVICE__ float abs(float __x) { return ::fabsf(__x); }
5244
__DEVICE__ double abs(double __x) { return ::fabs(__x); }
53-
#endif
54-
// TODO: remove once variat is supported.
55-
#if defined(_OPENMP) && defined(__cplusplus)
56-
__DEVICE__ const float abs(const float __x) { return ::fabsf((float)__x); }
57-
__DEVICE__ const double abs(const double __x) { return ::fabs((double)__x); }
58-
#endif
5945
__DEVICE__ float acos(float __x) { return ::acosf(__x); }
6046
__DEVICE__ float asin(float __x) { return ::asinf(__x); }
6147
__DEVICE__ float atan(float __x) { return ::atanf(__x); }
@@ -64,11 +50,9 @@ __DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
6450
__DEVICE__ float cos(float __x) { return ::cosf(__x); }
6551
__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
6652
__DEVICE__ float exp(float __x) { return ::expf(__x); }
67-
__DEVICE__ float fabs(float __x) __NOEXCEPT { return ::fabsf(__x); }
53+
__DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
6854
__DEVICE__ float floor(float __x) { return ::floorf(__x); }
6955
__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
70-
// TODO: remove when variant is supported
71-
#ifndef _OPENMP
7256
__DEVICE__ int fpclassify(float __x) {
7357
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
7458
FP_ZERO, __x);
@@ -77,14 +61,15 @@ __DEVICE__ int fpclassify(double __x) {
7761
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
7862
FP_ZERO, __x);
7963
}
80-
#endif
8164
__DEVICE__ float frexp(float __arg, int *__exp) {
8265
return ::frexpf(__arg, __exp);
8366
}
8467

8568
// For inscrutable reasons, the CUDA headers define these functions for us on
86-
// Windows.
87-
#ifndef _MSC_VER
69+
// Windows. For OpenMP we omit these as some old system headers have
70+
// non-conforming `isinf(float)` and `isnan(float)` implementations that return
71+
// an `int`. The system versions of these functions should be fine anyway.
72+
#if !defined(_MSC_VER) && !defined(_OPENMP)
8873
__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
8974
__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
9075
__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
@@ -161,6 +146,8 @@ __DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
161146
// libdevice doesn't provide an implementation, and we don't want to be in the
162147
// business of implementing tricky libm functions in this header.
163148

149+
#ifndef _OPENMP
150+
164151
// Now we've defined everything we promised we'd define in
165152
// __clang_cuda_math_forward_declares.h. We need to do two additional things to
166153
// fix up our math functions.
@@ -457,10 +444,7 @@ using ::remainderf;
457444
using ::remquof;
458445
using ::rintf;
459446
using ::roundf;
460-
// TODO: remove once variant is supported
461-
#ifndef _OPENMP
462447
using ::scalblnf;
463-
#endif
464448
using ::scalbnf;
465449
using ::sinf;
466450
using ::sinhf;
@@ -479,7 +463,8 @@ _GLIBCXX_END_NAMESPACE_VERSION
479463
} // namespace std
480464
#endif
481465

482-
#undef __NOEXCEPT
466+
#endif // _OPENMP
467+
483468
#undef __DEVICE__
484469

485470
#endif

clang/lib/Headers/__clang_cuda_device_functions.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
// functions and __forceinline__ helps inlining these wrappers at -O1.
2222
#pragma push_macro("__DEVICE__")
2323
#ifdef _OPENMP
24-
#define __DEVICE__ static __attribute__((always_inline))
24+
#define __DEVICE__ static __attribute__((always_inline, nothrow))
2525
#else
2626
#define __DEVICE__ static __device__ __forceinline__
2727
#endif

clang/lib/Headers/__clang_cuda_math.h

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,25 @@
2323
// functions and __forceinline__ helps inlining these wrappers at -O1.
2424
#pragma push_macro("__DEVICE__")
2525
#ifdef _OPENMP
26-
#define __DEVICE__ static __inline__ __attribute__((always_inline))
26+
#if defined(__cplusplus)
27+
#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
28+
#else
29+
#define __DEVICE__ static __attribute__((always_inline, nothrow))
30+
#endif
2731
#else
2832
#define __DEVICE__ static __device__ __forceinline__
2933
#endif
3034

35+
// Specialized version of __DEVICE__ for functions with void return type. Needed
36+
// because the OpenMP overlay requires constexpr functions here but prior to
37+
// c++14 void return functions could not be constexpr.
38+
#pragma push_macro("__DEVICE_VOID__")
39+
#ifdef _OPENMP && defined(__cplusplus) && __cplusplus < 201402L
40+
#define __DEVICE_VOID__ static __attribute__((always_inline, nothrow))
41+
#else
42+
#define __DEVICE_VOID__ __DEVICE__
43+
#endif
44+
3145
// libdevice provides fast low precision and slow full-recision implementations
3246
// for some functions. Which one gets selected depends on
3347
// __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if
@@ -39,17 +53,8 @@
3953
#define __FAST_OR_SLOW(fast, slow) slow
4054
#endif
4155

42-
// For C++ 17 we need to include noexcept attribute to be compatible
43-
// with the header-defined version. This may be removed once
44-
// variant is supported.
45-
#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L
46-
#define __NOEXCEPT noexcept
47-
#else
48-
#define __NOEXCEPT
49-
#endif
50-
51-
__DEVICE__ int abs(int __a) __NOEXCEPT { return __nv_abs(__a); }
52-
__DEVICE__ double fabs(double __a) __NOEXCEPT { return __nv_fabs(__a); }
56+
__DEVICE__ int abs(int __a) { return __nv_abs(__a); }
57+
__DEVICE__ double fabs(double __a) { return __nv_fabs(__a); }
5358
__DEVICE__ double acos(double __a) { return __nv_acos(__a); }
5459
__DEVICE__ float acosf(float __a) { return __nv_acosf(__a); }
5560
__DEVICE__ double acosh(double __a) { return __nv_acosh(__a); }
@@ -104,7 +109,7 @@ __DEVICE__ float exp2f(float __a) { return __nv_exp2f(__a); }
104109
__DEVICE__ float expf(float __a) { return __nv_expf(__a); }
105110
__DEVICE__ double expm1(double __a) { return __nv_expm1(__a); }
106111
__DEVICE__ float expm1f(float __a) { return __nv_expm1f(__a); }
107-
__DEVICE__ float fabsf(float __a) __NOEXCEPT { return __nv_fabsf(__a); }
112+
__DEVICE__ float fabsf(float __a) { return __nv_fabsf(__a); }
108113
__DEVICE__ double fdim(double __a, double __b) { return __nv_fdim(__a, __b); }
109114
__DEVICE__ float fdimf(float __a, float __b) { return __nv_fdimf(__a, __b); }
110115
__DEVICE__ double fdivide(double __a, double __b) { return __a / __b; }
@@ -142,15 +147,15 @@ __DEVICE__ float j1f(float __a) { return __nv_j1f(__a); }
142147
__DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); }
143148
__DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }
144149
#if defined(__LP64__) || defined(_WIN64)
145-
__DEVICE__ long labs(long __a) __NOEXCEPT { return __nv_llabs(__a); };
150+
__DEVICE__ long labs(long __a) { return __nv_llabs(__a); };
146151
#else
147-
__DEVICE__ long labs(long __a) __NOEXCEPT { return __nv_abs(__a); };
152+
__DEVICE__ long labs(long __a) { return __nv_abs(__a); };
148153
#endif
149154
__DEVICE__ double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); }
150155
__DEVICE__ float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); }
151156
__DEVICE__ double lgamma(double __a) { return __nv_lgamma(__a); }
152157
__DEVICE__ float lgammaf(float __a) { return __nv_lgammaf(__a); }
153-
__DEVICE__ long long llabs(long long __a) __NOEXCEPT { return __nv_llabs(__a); }
158+
__DEVICE__ long long llabs(long long __a) { return __nv_llabs(__a); }
154159
__DEVICE__ long long llmax(long long __a, long long __b) {
155160
return __nv_llmax(__a, __b);
156161
}
@@ -270,8 +275,6 @@ __DEVICE__ double rsqrt(double __a) { return __nv_rsqrt(__a); }
270275
__DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); }
271276
__DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); }
272277
__DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); }
273-
// TODO: remove once variant is supported
274-
#ifndef _OPENMP
275278
__DEVICE__ double scalbln(double __a, long __b) {
276279
if (__b > INT_MAX)
277280
return __a > 0 ? HUGE_VAL : -HUGE_VAL;
@@ -286,18 +289,17 @@ __DEVICE__ float scalblnf(float __a, long __b) {
286289
return __a > 0 ? 0.f : -0.f;
287290
return scalbnf(__a, (int)__b);
288291
}
289-
#endif
290292
__DEVICE__ double sin(double __a) { return __nv_sin(__a); }
291-
__DEVICE__ void sincos(double __a, double *__s, double *__c) {
293+
__DEVICE_VOID__ void sincos(double __a, double *__s, double *__c) {
292294
return __nv_sincos(__a, __s, __c);
293295
}
294-
__DEVICE__ void sincosf(float __a, float *__s, float *__c) {
296+
__DEVICE_VOID__ void sincosf(float __a, float *__s, float *__c) {
295297
return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __s, __c);
296298
}
297-
__DEVICE__ void sincospi(double __a, double *__s, double *__c) {
299+
__DEVICE_VOID__ void sincospi(double __a, double *__s, double *__c) {
298300
return __nv_sincospi(__a, __s, __c);
299301
}
300-
__DEVICE__ void sincospif(float __a, float *__s, float *__c) {
302+
__DEVICE_VOID__ void sincospif(float __a, float *__s, float *__c) {
301303
return __nv_sincospif(__a, __s, __c);
302304
}
303305
__DEVICE__ float sinf(float __a) {
@@ -339,7 +341,7 @@ __DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); }
339341
__DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }
340342

341343
#pragma pop_macro("__DEVICE__")
344+
#pragma pop_macro("__DEVICE_VOID__")
342345
#pragma pop_macro("__FAST_OR_SLOW")
343-
#undef __NOEXCEPT
344346

345347
#endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__

clang/lib/Headers/__clang_cuda_math_forward_declares.h

Lines changed: 5 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -20,37 +20,14 @@
2020
// would preclude the use of our own __device__ overloads for these functions.
2121

2222
#pragma push_macro("__DEVICE__")
23-
#ifdef _OPENMP
24-
#define __DEVICE__ static __inline__ __attribute__((always_inline))
25-
#else
2623
#define __DEVICE__ \
2724
static __inline__ __attribute__((always_inline)) __attribute__((device))
28-
#endif
29-
30-
// For C++ 17 we need to include noexcept attribute to be compatible
31-
// with the header-defined version. This may be removed once
32-
// variant is supported.
33-
#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L
34-
#define __NOEXCEPT noexcept
35-
#else
36-
#define __NOEXCEPT
37-
#endif
3825

39-
#if !(defined(_OPENMP) && defined(__cplusplus))
4026
__DEVICE__ long abs(long);
4127
__DEVICE__ long long abs(long long);
4228
__DEVICE__ double abs(double);
4329
__DEVICE__ float abs(float);
44-
#endif
45-
// While providing the CUDA declarations and definitions for math functions,
46-
// we may manually define additional functions.
47-
// TODO: Once variant is supported the additional functions will have
48-
// to be removed.
49-
#if defined(_OPENMP) && defined(__cplusplus)
50-
__DEVICE__ const double abs(const double);
51-
__DEVICE__ const float abs(const float);
52-
#endif
53-
__DEVICE__ int abs(int) __NOEXCEPT;
30+
__DEVICE__ int abs(int);
5431
__DEVICE__ double acos(double);
5532
__DEVICE__ float acos(float);
5633
__DEVICE__ double acosh(double);
@@ -85,8 +62,8 @@ __DEVICE__ double exp(double);
8562
__DEVICE__ float exp(float);
8663
__DEVICE__ double expm1(double);
8764
__DEVICE__ float expm1(float);
88-
__DEVICE__ double fabs(double) __NOEXCEPT;
89-
__DEVICE__ float fabs(float) __NOEXCEPT;
65+
__DEVICE__ double fabs(double);
66+
__DEVICE__ float fabs(float);
9067
__DEVICE__ double fdim(double, double);
9168
__DEVICE__ float fdim(float, float);
9269
__DEVICE__ double floor(double);
@@ -136,12 +113,12 @@ __DEVICE__ bool isnormal(double);
136113
__DEVICE__ bool isnormal(float);
137114
__DEVICE__ bool isunordered(double, double);
138115
__DEVICE__ bool isunordered(float, float);
139-
__DEVICE__ long labs(long) __NOEXCEPT;
116+
__DEVICE__ long labs(long);
140117
__DEVICE__ double ldexp(double, int);
141118
__DEVICE__ float ldexp(float, int);
142119
__DEVICE__ double lgamma(double);
143120
__DEVICE__ float lgamma(float);
144-
__DEVICE__ long long llabs(long long) __NOEXCEPT;
121+
__DEVICE__ long long llabs(long long);
145122
__DEVICE__ long long llrint(double);
146123
__DEVICE__ long long llrint(float);
147124
__DEVICE__ double log10(double);
@@ -152,9 +129,6 @@ __DEVICE__ double log2(double);
152129
__DEVICE__ float log2(float);
153130
__DEVICE__ double logb(double);
154131
__DEVICE__ float logb(float);
155-
#if defined(_OPENMP) && defined(__cplusplus)
156-
__DEVICE__ long double log(long double);
157-
#endif
158132
__DEVICE__ double log(double);
159133
__DEVICE__ float log(float);
160134
__DEVICE__ long lrint(double);
@@ -302,7 +276,6 @@ _GLIBCXX_END_NAMESPACE_VERSION
302276
} // namespace std
303277
#endif
304278

305-
#undef __NOEXCEPT
306279
#pragma pop_macro("__DEVICE__")
307280

308281
#endif
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/*===---- __clang_openmp_math_declares.h - OpenMP math declares ------------===
1+
/*===- __clang_openmp_device_functions.h - OpenMP device function declares -===
22
*
33
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
* See https://llvm.org/LICENSE.txt for license information.
@@ -7,28 +7,32 @@
77
*===-----------------------------------------------------------------------===
88
*/
99

10-
#ifndef __CLANG_OPENMP_MATH_DECLARES_H__
11-
#define __CLANG_OPENMP_MATH_DECLARES_H__
10+
#ifndef __CLANG_OPENMP_DEVICE_FUNCTIONS_H__
11+
#define __CLANG_OPENMP_DEVICE_FUNCTIONS_H__
1212

1313
#ifndef _OPENMP
1414
#error "This file is for OpenMP compilation only."
1515
#endif
1616

17-
#if defined(__NVPTX__) && defined(_OPENMP)
17+
#pragma omp begin declare variant match( \
18+
device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
1819

19-
#define __CUDA__
20-
21-
#if defined(__cplusplus)
22-
#include <__clang_cuda_math_forward_declares.h>
20+
#ifdef __cplusplus
21+
extern "C" {
2322
#endif
2423

24+
#define __CUDA__
2525
/// Include declarations for libdevice functions.
2626
#include <__clang_cuda_libdevice_declares.h>
27+
2728
/// Provide definitions for these functions.
2829
#include <__clang_cuda_device_functions.h>
29-
#include <__clang_cuda_math.h>
30-
3130
#undef __CUDA__
3231

32+
#ifdef __cplusplus
33+
} // extern "C"
3334
#endif
35+
36+
#pragma omp end declare variant
37+
3438
#endif

0 commit comments

Comments
 (0)