зеркало из https://github.com/microsoft/clang.git
[CUDA] Allow function overloads in CUDA based on host/device attributes.
The patch makes it possible to parse CUDA files that contain host/device functions with identical signatures, but different attributes without having to physically split source into host-only and device-only parts. This change is needed in order to parse CUDA header files that have a lot of name clashes with standard include files. Gory details are in design doc here: https://goo.gl/EXnymm Feel free to leave comments there or in this review thread. This feature is controlled with CC1 option -fcuda-target-overloads and is disabled by default. Differential Revision: http://reviews.llvm.org/D12453 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@248295 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Родитель
2ead4d5eab
Коммит
72de1e381c
|
@ -166,6 +166,7 @@ LANGOPT(OpenMPUseTLS , 1, 0, "Use TLS for threadprivates or runtime calls")
|
|||
LANGOPT(CUDAIsDevice , 1, 0, "Compiling for CUDA device")
|
||||
LANGOPT(CUDAAllowHostCallsFromHostDevice, 1, 0, "Allow host device functions to call host functions")
|
||||
LANGOPT(CUDADisableTargetCallChecks, 1, 0, "Disable checks for call targets (host, device, etc.)")
|
||||
LANGOPT(CUDATargetOverloads, 1, 0, "Enable function overloads based on CUDA target attributes")
|
||||
LANGOPT(CUDAUsesLibDevice , 1, 0, "Selectively link and internalize bitcode.")
|
||||
|
||||
LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators")
|
||||
|
|
|
@ -659,6 +659,8 @@ def fcuda_disable_target_call_checks : Flag<["-"],
|
|||
HelpText<"Disable all cross-target (host, device, etc.) call checks in CUDA">;
|
||||
def fcuda_include_gpubinary : Separate<["-"], "fcuda-include-gpubinary">,
|
||||
HelpText<"Incorporate CUDA device-side binary into host object file.">;
|
||||
def fcuda_target_overloads : Flag<["-"], "fcuda-target-overloads">,
|
||||
HelpText<"Enable function overloads based on CUDA target attributes.">;
|
||||
def fcuda_uses_libdevice : Flag<["-"], "fcuda-uses-libdevice">,
|
||||
HelpText<"Selectively link and internalize bitcode.">;
|
||||
|
||||
|
|
|
@ -8613,8 +8613,37 @@ public:
|
|||
|
||||
CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D);
|
||||
|
||||
enum CUDAFunctionPreference {
|
||||
CFP_Never, // Invalid caller/callee combination.
|
||||
CFP_LastResort, // Lowest priority. Only in effect if
|
||||
// LangOpts.CUDADisableTargetCallChecks is true.
|
||||
CFP_Fallback, // Low priority caller/callee combination
|
||||
CFP_Best, // Preferred caller/callee combination
|
||||
};
|
||||
|
||||
/// Identifies relative preference of a given Caller/Callee
|
||||
/// combination, based on their host/device attributes.
|
||||
/// \param Caller function which needs address of \p Callee.
|
||||
/// nullptr in case of global context.
|
||||
/// \param Callee target function
|
||||
///
|
||||
/// \returns preference value for particular Caller/Callee combination.
|
||||
CUDAFunctionPreference IdentifyCUDAPreference(const FunctionDecl *Caller,
|
||||
const FunctionDecl *Callee);
|
||||
|
||||
bool CheckCUDATarget(const FunctionDecl *Caller, const FunctionDecl *Callee);
|
||||
|
||||
/// Finds a function in \p Matches with highest calling priority
|
||||
/// from \p Caller context and erases all functions with lower
|
||||
/// calling priority.
|
||||
void EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
|
||||
SmallVectorImpl<FunctionDecl *> &Matches);
|
||||
void EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
|
||||
SmallVectorImpl<DeclAccessPair> &Matches);
|
||||
void EraseUnwantedCUDAMatches(
|
||||
const FunctionDecl *Caller,
|
||||
SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches);
|
||||
|
||||
/// Given a implicit special member, infer its CUDA target from the
|
||||
/// calls it needs to make to underlying base/field special members.
|
||||
/// \param ClassDecl the class for which the member is being created.
|
||||
|
|
|
@ -1416,6 +1416,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
|
|||
if (Args.hasArg(OPT_fcuda_disable_target_call_checks))
|
||||
Opts.CUDADisableTargetCallChecks = 1;
|
||||
|
||||
if (Args.hasArg(OPT_fcuda_target_overloads))
|
||||
Opts.CUDATargetOverloads = 1;
|
||||
|
||||
if (Opts.ObjC1) {
|
||||
if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
|
||||
StringRef value = arg->getValue();
|
||||
|
|
|
@ -60,8 +60,101 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
|
|||
return CFT_Host;
|
||||
}
|
||||
|
||||
// * CUDA Call preference table
|
||||
//
|
||||
// F - from,
|
||||
// T - to
|
||||
// Ph - preference in host mode
|
||||
// Pd - preference in device mode
|
||||
// H - handled in (x)
|
||||
// Preferences: b-best, f-fallback, l-last resort, n-never.
|
||||
//
|
||||
// | F | T | Ph | Pd | H |
|
||||
// |----+----+----+----+-----+
|
||||
// | d | d | b | b | (b) |
|
||||
// | d | g | n | n | (a) |
|
||||
// | d | h | l | l | (e) |
|
||||
// | d | hd | f | f | (c) |
|
||||
// | g | d | b | b | (b) |
|
||||
// | g | g | n | n | (a) |
|
||||
// | g | h | l | l | (e) |
|
||||
// | g | hd | f | f | (c) |
|
||||
// | h | d | l | l | (e) |
|
||||
// | h | g | b | b | (b) |
|
||||
// | h | h | b | b | (b) |
|
||||
// | h | hd | f | f | (c) |
|
||||
// | hd | d | l | f | (d) |
|
||||
// | hd | g | f | n |(d/a)|
|
||||
// | hd | h | f | l | (d) |
|
||||
// | hd | hd | b | b | (b) |
|
||||
|
||||
Sema::CUDAFunctionPreference
|
||||
Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
|
||||
const FunctionDecl *Callee) {
|
||||
assert(getLangOpts().CUDATargetOverloads &&
|
||||
"Should not be called w/o enabled target overloads.");
|
||||
|
||||
assert(Callee && "Callee must be valid.");
|
||||
CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
|
||||
CUDAFunctionTarget CallerTarget =
|
||||
(Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host;
|
||||
|
||||
// If one of the targets is invalid, the check always fails, no matter what
|
||||
// the other target is.
|
||||
if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
|
||||
return CFP_Never;
|
||||
|
||||
// (a) Can't call global from some contexts until we support CUDA's
|
||||
// dynamic parallelism.
|
||||
if (CalleeTarget == CFT_Global &&
|
||||
(CallerTarget == CFT_Global || CallerTarget == CFT_Device ||
|
||||
(CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
|
||||
return CFP_Never;
|
||||
|
||||
// (b) Best case scenarios
|
||||
if (CalleeTarget == CallerTarget ||
|
||||
(CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
|
||||
(CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
|
||||
return CFP_Best;
|
||||
|
||||
// (c) Calling HostDevice is OK as a fallback that works for everyone.
|
||||
if (CalleeTarget == CFT_HostDevice)
|
||||
return CFP_Fallback;
|
||||
|
||||
// Figure out what should be returned 'last resort' cases. Normally
|
||||
// those would not be allowed, but we'll consider them if
|
||||
// CUDADisableTargetCallChecks is true.
|
||||
CUDAFunctionPreference QuestionableResult =
|
||||
getLangOpts().CUDADisableTargetCallChecks ? CFP_LastResort : CFP_Never;
|
||||
|
||||
// (d) HostDevice behavior depends on compilation mode.
|
||||
if (CallerTarget == CFT_HostDevice) {
|
||||
// Calling a function that matches compilation mode is OK.
|
||||
// Calling a function from the other side is frowned upon.
|
||||
if (getLangOpts().CUDAIsDevice)
|
||||
return CalleeTarget == CFT_Device ? CFP_Fallback : QuestionableResult;
|
||||
else
|
||||
return (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)
|
||||
? CFP_Fallback
|
||||
: QuestionableResult;
|
||||
}
|
||||
|
||||
// (e) Calling across device/host boundary is not something you should do.
|
||||
if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
|
||||
(CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
|
||||
(CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
|
||||
return QuestionableResult;
|
||||
|
||||
llvm_unreachable("All cases should've been handled by now.");
|
||||
}
|
||||
|
||||
bool Sema::CheckCUDATarget(const FunctionDecl *Caller,
|
||||
const FunctionDecl *Callee) {
|
||||
// With target overloads enabled, we only disallow calling
|
||||
// combinations with CFP_Never.
|
||||
if (getLangOpts().CUDATargetOverloads)
|
||||
return IdentifyCUDAPreference(Caller,Callee) == CFP_Never;
|
||||
|
||||
// The CUDADisableTargetCallChecks short-circuits this check: we assume all
|
||||
// cross-target calls are valid.
|
||||
if (getLangOpts().CUDADisableTargetCallChecks)
|
||||
|
@ -117,6 +210,57 @@ bool Sema::CheckCUDATarget(const FunctionDecl *Caller,
|
|||
return false;
|
||||
}
|
||||
|
||||
template <typename T, typename FetchDeclFn>
|
||||
static void EraseUnwantedCUDAMatchesImpl(Sema &S, const FunctionDecl *Caller,
|
||||
llvm::SmallVectorImpl<T> &Matches,
|
||||
FetchDeclFn FetchDecl) {
|
||||
assert(S.getLangOpts().CUDATargetOverloads &&
|
||||
"Should not be called w/o enabled target overloads.");
|
||||
if (Matches.size() <= 1)
|
||||
return;
|
||||
|
||||
// Find the best call preference among the functions in Matches.
|
||||
Sema::CUDAFunctionPreference P, BestCFP = Sema::CFP_Never;
|
||||
for (auto const &Match : Matches) {
|
||||
P = S.IdentifyCUDAPreference(Caller, FetchDecl(Match));
|
||||
if (P > BestCFP)
|
||||
BestCFP = P;
|
||||
}
|
||||
|
||||
// Erase all functions with lower priority.
|
||||
for (unsigned I = 0, N = Matches.size(); I != N;)
|
||||
if (S.IdentifyCUDAPreference(Caller, FetchDecl(Matches[I])) < BestCFP) {
|
||||
Matches[I] = Matches[--N];
|
||||
Matches.resize(N);
|
||||
} else {
|
||||
++I;
|
||||
}
|
||||
}
|
||||
|
||||
void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
|
||||
SmallVectorImpl<FunctionDecl *> &Matches){
|
||||
EraseUnwantedCUDAMatchesImpl<FunctionDecl *>(
|
||||
*this, Caller, Matches, [](const FunctionDecl *item) { return item; });
|
||||
}
|
||||
|
||||
void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
|
||||
SmallVectorImpl<DeclAccessPair> &Matches) {
|
||||
EraseUnwantedCUDAMatchesImpl<DeclAccessPair>(
|
||||
*this, Caller, Matches, [](const DeclAccessPair &item) {
|
||||
return dyn_cast<FunctionDecl>(item.getDecl());
|
||||
});
|
||||
}
|
||||
|
||||
void Sema::EraseUnwantedCUDAMatches(
|
||||
const FunctionDecl *Caller,
|
||||
SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches){
|
||||
EraseUnwantedCUDAMatchesImpl<std::pair<DeclAccessPair, FunctionDecl *>>(
|
||||
*this, Caller, Matches,
|
||||
[](const std::pair<DeclAccessPair, FunctionDecl *> &item) {
|
||||
return dyn_cast<FunctionDecl>(item.second);
|
||||
});
|
||||
}
|
||||
|
||||
/// When an implicitly-declared special member has to invoke more than one
|
||||
/// base/field special member, conflicts may occur in the targets of these
|
||||
/// members. For example, if one base's member __host__ and another's is
|
||||
|
|
|
@ -5515,6 +5515,12 @@ static bool isIncompleteDeclExternC(Sema &S, const T *D) {
|
|||
// In C++, the overloadable attribute negates the effects of extern "C".
|
||||
if (!D->isInExternCContext() || D->template hasAttr<OverloadableAttr>())
|
||||
return false;
|
||||
|
||||
// So do CUDA's host/device attributes if overloading is enabled.
|
||||
if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads &&
|
||||
(D->template hasAttr<CUDADeviceAttr>() ||
|
||||
D->template hasAttr<CUDAHostAttr>()))
|
||||
return false;
|
||||
}
|
||||
return D->isExternC();
|
||||
}
|
||||
|
|
|
@ -2265,6 +2265,9 @@ FunctionDecl *Sema::FindUsualDeallocationFunction(SourceLocation StartLoc,
|
|||
"found an unexpected usual deallocation function");
|
||||
}
|
||||
|
||||
if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads)
|
||||
EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(CurContext), Matches);
|
||||
|
||||
assert(Matches.size() == 1 &&
|
||||
"unexpectedly have multiple usual deallocation functions");
|
||||
return Matches.front();
|
||||
|
@ -2296,6 +2299,9 @@ bool Sema::FindDeallocationFunction(SourceLocation StartLoc, CXXRecordDecl *RD,
|
|||
Matches.push_back(F.getPair());
|
||||
}
|
||||
|
||||
if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads)
|
||||
EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(CurContext), Matches);
|
||||
|
||||
// There's exactly one suitable operator; pick it.
|
||||
if (Matches.size() == 1) {
|
||||
Operator = cast<CXXMethodDecl>(Matches[0]->getUnderlyingDecl());
|
||||
|
|
|
@ -1072,6 +1072,25 @@ bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old,
|
|||
return true;
|
||||
}
|
||||
|
||||
if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads) {
|
||||
CUDAFunctionTarget NewTarget = IdentifyCUDATarget(New),
|
||||
OldTarget = IdentifyCUDATarget(Old);
|
||||
if (NewTarget == CFT_InvalidTarget || NewTarget == CFT_Global)
|
||||
return false;
|
||||
|
||||
assert((OldTarget != CFT_InvalidTarget) && "Unexpected invalid target.");
|
||||
|
||||
// Don't allow mixing of HD with other kinds. This guarantees that
|
||||
// we have only one viable function with this signature on any
|
||||
// side of CUDA compilation .
|
||||
if ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice))
|
||||
return false;
|
||||
|
||||
// Allow overloading of functions with same signature, but
|
||||
// different CUDA target attributes.
|
||||
return NewTarget != OldTarget;
|
||||
}
|
||||
|
||||
// The signatures match; this is not an overload.
|
||||
return false;
|
||||
}
|
||||
|
@ -8508,6 +8527,13 @@ bool clang::isBetterOverloadCandidate(Sema &S, const OverloadCandidate &Cand1,
|
|||
return true;
|
||||
}
|
||||
|
||||
if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads &&
|
||||
Cand1.Function && Cand2.Function) {
|
||||
FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
|
||||
return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
|
||||
S.IdentifyCUDAPreference(Caller, Cand2.Function);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -9925,6 +9951,10 @@ public:
|
|||
EliminateAllExceptMostSpecializedTemplate();
|
||||
}
|
||||
}
|
||||
|
||||
if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads &&
|
||||
Matches.size() > 1)
|
||||
EliminateSuboptimalCudaMatches();
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -10100,11 +10130,15 @@ private:
|
|||
++I;
|
||||
else {
|
||||
Matches[I] = Matches[--N];
|
||||
Matches.set_size(N);
|
||||
Matches.resize(N);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EliminateSuboptimalCudaMatches() {
|
||||
S.EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(S.CurContext), Matches);
|
||||
}
|
||||
|
||||
public:
|
||||
void ComplainNoMatchesFound() const {
|
||||
assert(Matches.empty());
|
||||
|
|
|
@ -0,0 +1,214 @@
|
|||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: nvptx-registered-target
|
||||
|
||||
// Make sure we handle target overloads correctly.
|
||||
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
|
||||
// RUN: -fcuda-target-overloads -emit-llvm -o - %s \
|
||||
// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s
|
||||
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \
|
||||
// RUN: -fcuda-target-overloads -emit-llvm -o - %s \
|
||||
// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s
|
||||
|
||||
// Check target overloads handling with disabled call target checks.
|
||||
// RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -emit-llvm \
|
||||
// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads -o - %s \
|
||||
// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST \
|
||||
// RUN: -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-HOST-NC %s
|
||||
// RUN: %clang_cc1 -DNOCHECKS -triple nvptx64-nvidia-cuda -emit-llvm \
|
||||
// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads \
|
||||
// RUN: -fcuda-is-device -o - %s \
|
||||
// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE \
|
||||
// RUN: -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-DEVICE-NC %s
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
typedef int (*fp_t)(void);
|
||||
typedef void (*gp_t)(void);
|
||||
|
||||
// CHECK-HOST: @hp = global i32 ()* @_Z1hv
|
||||
// CHECK-HOST: @chp = global i32 ()* @ch
|
||||
// CHECK-HOST: @dhp = global i32 ()* @_Z2dhv
|
||||
// CHECK-HOST: @cdhp = global i32 ()* @cdh
|
||||
// CHECK-HOST: @gp = global void ()* @_Z1gv
|
||||
|
||||
// CHECK-BOTH-LABEL: define i32 @_Z2dhv()
|
||||
__device__ int dh(void) { return 1; }
|
||||
// CHECK-DEVICE: ret i32 1
|
||||
__host__ int dh(void) { return 2; }
|
||||
// CHECK-HOST: ret i32 2
|
||||
|
||||
// CHECK-BOTH-LABEL: define i32 @_Z2hdv()
|
||||
__host__ __device__ int hd(void) { return 3; }
|
||||
// CHECK-BOTH: ret i32 3
|
||||
|
||||
// CHECK-DEVICE-LABEL: define i32 @_Z1dv()
|
||||
__device__ int d(void) { return 8; }
|
||||
// CHECK-DEVICE: ret i32 8
|
||||
|
||||
// CHECK-HOST-LABEL: define i32 @_Z1hv()
|
||||
__host__ int h(void) { return 9; }
|
||||
// CHECK-HOST: ret i32 9
|
||||
|
||||
// CHECK-BOTH-LABEL: define void @_Z1gv()
|
||||
__global__ void g(void) {}
|
||||
// CHECK-BOTH: ret void
|
||||
|
||||
// mangled names of extern "C" __host__ __device__ functions clash
|
||||
// with those of their __host__/__device__ counterparts, so
|
||||
// overloading of extern "C" functions can only happen for __host__
|
||||
// and __device__ functions -- we never codegen them in the same
|
||||
// compilation and therefore mangled name conflict is not a problem.
|
||||
|
||||
// CHECK-BOTH-LABEL: define i32 @cdh()
|
||||
extern "C" __device__ int cdh(void) {return 10;}
|
||||
// CHECK-DEVICE: ret i32 10
|
||||
extern "C" __host__ int cdh(void) {return 11;}
|
||||
// CHECK-HOST: ret i32 11
|
||||
|
||||
// CHECK-DEVICE-LABEL: define i32 @cd()
|
||||
extern "C" __device__ int cd(void) {return 12;}
|
||||
// CHECK-DEVICE: ret i32 12
|
||||
|
||||
// CHECK-HOST-LABEL: define i32 @ch()
|
||||
extern "C" __host__ int ch(void) {return 13;}
|
||||
// CHECK-HOST: ret i32 13
|
||||
|
||||
// CHECK-BOTH-LABEL: define i32 @chd()
|
||||
extern "C" __host__ __device__ int chd(void) {return 14;}
|
||||
// CHECK-BOTH: ret i32 14
|
||||
|
||||
// CHECK-HOST-LABEL: define void @_Z5hostfv()
|
||||
__host__ void hostf(void) {
|
||||
#if defined (NOCHECKS)
|
||||
fp_t dp = d; // CHECK-HOST-NC: store {{.*}} @_Z1dv, {{.*}} %dp,
|
||||
fp_t cdp = cd; // CHECK-HOST-NC: store {{.*}} @cd, {{.*}} %cdp,
|
||||
#endif
|
||||
fp_t hp = h; // CHECK-HOST: store {{.*}} @_Z1hv, {{.*}} %hp,
|
||||
fp_t chp = ch; // CHECK-HOST: store {{.*}} @ch, {{.*}} %chp,
|
||||
fp_t dhp = dh; // CHECK-HOST: store {{.*}} @_Z2dhv, {{.*}} %dhp,
|
||||
fp_t cdhp = cdh; // CHECK-HOST: store {{.*}} @cdh, {{.*}} %cdhp,
|
||||
fp_t hdp = hd; // CHECK-HOST: store {{.*}} @_Z2hdv, {{.*}} %hdp,
|
||||
fp_t chdp = chd; // CHECK-HOST: store {{.*}} @chd, {{.*}} %chdp,
|
||||
gp_t gp = g; // CHECK-HOST: store {{.*}} @_Z1gv, {{.*}} %gp,
|
||||
|
||||
#if defined (NOCHECKS)
|
||||
d(); // CHECK-HOST-NC: call i32 @_Z1dv()
|
||||
cd(); // CHECK-HOST-NC: call i32 @cd()
|
||||
#endif
|
||||
h(); // CHECK-HOST: call i32 @_Z1hv()
|
||||
ch(); // CHECK-HOST: call i32 @ch()
|
||||
dh(); // CHECK-HOST: call i32 @_Z2dhv()
|
||||
cdh(); // CHECK-HOST: call i32 @cdh()
|
||||
g<<<0,0>>>(); // CHECK-HOST: call void @_Z1gv()
|
||||
}
|
||||
|
||||
// CHECK-DEVICE-LABEL: define void @_Z7devicefv()
|
||||
__device__ void devicef(void) {
|
||||
fp_t dp = d; // CHECK-DEVICE: store {{.*}} @_Z1dv, {{.*}} %dp,
|
||||
fp_t cdp = cd; // CHECK-DEVICE: store {{.*}} @cd, {{.*}} %cdp,
|
||||
#if defined (NOCHECKS)
|
||||
fp_t hp = h; // CHECK-DEVICE-NC: store {{.*}} @_Z1hv, {{.*}} %hp,
|
||||
fp_t chp = ch; // CHECK-DEVICE-NC: store {{.*}} @ch, {{.*}} %chp,
|
||||
#endif
|
||||
fp_t dhp = dh; // CHECK-DEVICE: store {{.*}} @_Z2dhv, {{.*}} %dhp,
|
||||
fp_t cdhp = cdh; // CHECK-DEVICE: store {{.*}} @cdh, {{.*}} %cdhp,
|
||||
fp_t hdp = hd; // CHECK-DEVICE: store {{.*}} @_Z2hdv, {{.*}} %hdp,
|
||||
fp_t chdp = chd; // CHECK-DEVICE: store {{.*}} @chd, {{.*}} %chdp,
|
||||
|
||||
d(); // CHECK-DEVICE: call i32 @_Z1dv()
|
||||
cd(); // CHECK-DEVICE: call i32 @cd()
|
||||
#if defined (NOCHECKS)
|
||||
h(); // CHECK-DEVICE-NC: call i32 @_Z1hv()
|
||||
ch(); // CHECK-DEVICE-NC: call i32 @ch()
|
||||
#endif
|
||||
dh(); // CHECK-DEVICE: call i32 @_Z2dhv()
|
||||
cdh(); // CHECK-DEVICE: call i32 @cdh()
|
||||
}
|
||||
|
||||
// CHECK-BOTH-LABEL: define void @_Z11hostdevicefv()
|
||||
__host__ __device__ void hostdevicef(void) {
|
||||
#if defined (NOCHECKS)
|
||||
fp_t dp = d; // CHECK-BOTH-NC: store {{.*}} @_Z1dv, {{.*}} %dp,
|
||||
fp_t cdp = cd; // CHECK-BOTH-NC: store {{.*}} @cd, {{.*}} %cdp,
|
||||
fp_t hp = h; // CHECK-BOTH-NC: store {{.*}} @_Z1hv, {{.*}} %hp,
|
||||
fp_t chp = ch; // CHECK-BOTH-NC: store {{.*}} @ch, {{.*}} %chp,
|
||||
#endif
|
||||
fp_t dhp = dh; // CHECK-BOTH: store {{.*}} @_Z2dhv, {{.*}} %dhp,
|
||||
fp_t cdhp = cdh; // CHECK-BOTH: store {{.*}} @cdh, {{.*}} %cdhp,
|
||||
fp_t hdp = hd; // CHECK-BOTH: store {{.*}} @_Z2hdv, {{.*}} %hdp,
|
||||
fp_t chdp = chd; // CHECK-BOTH: store {{.*}} @chd, {{.*}} %chdp,
|
||||
#if defined (NOCHECKS) && !defined(__CUDA_ARCH__)
|
||||
gp_t gp = g; // CHECK-HOST-NC: store {{.*}} @_Z1gv, {{.*}} %gp,
|
||||
#endif
|
||||
|
||||
#if defined (NOCHECKS)
|
||||
d(); // CHECK-BOTH-NC: call i32 @_Z1dv()
|
||||
cd(); // CHECK-BOTH-NC: call i32 @cd()
|
||||
h(); // CHECK-BOTH-NC: call i32 @_Z1hv()
|
||||
ch(); // CHECK-BOTH-NC: call i32 @ch()
|
||||
#endif
|
||||
dh(); // CHECK-BOTH: call i32 @_Z2dhv()
|
||||
cdh(); // CHECK-BOTH: call i32 @cdh()
|
||||
#if defined (NOCHECKS) && !defined(__CUDA_ARCH__)
|
||||
g<<<0,0>>>(); // CHECK-HOST-NC: call void @_Z1gv()
|
||||
#endif
|
||||
}
|
||||
|
||||
// Test for address of overloaded function resolution in the global context.
|
||||
fp_t hp = h;
|
||||
fp_t chp = ch;
|
||||
fp_t dhp = dh;
|
||||
fp_t cdhp = cdh;
|
||||
gp_t gp = g;
|
||||
|
||||
int x;
|
||||
// Check constructors/destructors for D/H functions
|
||||
struct s_cd_dh {
|
||||
__host__ s_cd_dh() { x = 11; }
|
||||
__device__ s_cd_dh() { x = 12; }
|
||||
__host__ ~s_cd_dh() { x = 21; }
|
||||
__device__ ~s_cd_dh() { x = 22; }
|
||||
};
|
||||
|
||||
struct s_cd_hd {
|
||||
__host__ __device__ s_cd_hd() { x = 31; }
|
||||
__host__ __device__ ~s_cd_hd() { x = 32; }
|
||||
};
|
||||
|
||||
// CHECK-BOTH: define void @_Z7wrapperv
|
||||
#if defined(__CUDA_ARCH__)
|
||||
__device__
|
||||
#else
|
||||
__host__
|
||||
#endif
|
||||
void wrapper() {
|
||||
s_cd_dh scddh;
|
||||
// CHECK-BOTH: call void @_ZN7s_cd_dhC1Ev(
|
||||
s_cd_hd scdhd;
|
||||
// CHECK-BOTH: call void @_ZN7s_cd_hdC1Ev
|
||||
|
||||
// CHECK-BOTH: call void @_ZN7s_cd_hdD1Ev(
|
||||
// CHECK-BOTH: call void @_ZN7s_cd_dhD1Ev(
|
||||
}
|
||||
// CHECK-BOTH: ret void
|
||||
|
||||
// Now it's time to check what's been generated for the methods we used.
|
||||
|
||||
// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhC2Ev(
|
||||
// CHECK-HOST: store i32 11,
|
||||
// CHECK-DEVICE: store i32 12,
|
||||
// CHECK-BOTH: ret void
|
||||
|
||||
// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdC2Ev(
|
||||
// CHECK-BOTH: store i32 31,
|
||||
// CHECK-BOTH: ret void
|
||||
|
||||
// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdD2Ev(
|
||||
// CHECK-BOTH: store i32 32,
|
||||
// CHECK-BOTH: ret void
|
||||
|
||||
// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhD2Ev(
|
||||
// CHECK-HOST: store i32 21,
|
||||
// CHECK-DEVICE: store i32 22,
|
||||
// CHECK-BOTH: ret void
|
||||
|
|
@ -0,0 +1,317 @@
|
|||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: nvptx-registered-target
|
||||
|
||||
// Make sure we handle target overloads correctly.
|
||||
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
|
||||
// RUN: -fsyntax-only -fcuda-target-overloads -verify %s
|
||||
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
|
||||
// RUN: -fsyntax-only -fcuda-target-overloads -fcuda-is-device -verify %s
|
||||
|
||||
// Check target overloads handling with disabled call target checks.
|
||||
// RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -fsyntax-only \
|
||||
// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads -verify %s
|
||||
// RUN: %clang_cc1 -DNOCHECKS -triple nvptx64-nvidia-cuda -fsyntax-only \
|
||||
// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads \
|
||||
// RUN: -fcuda-is-device -verify %s
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
typedef int (*fp_t)(void);
|
||||
typedef void (*gp_t)(void);
|
||||
|
||||
// Host and unattributed functions can't be overloaded
|
||||
__host__ int hh(void) { return 1; } // expected-note {{previous definition is here}}
|
||||
int hh(void) { return 1; } // expected-error {{redefinition of 'hh'}}
|
||||
|
||||
// H/D overloading is OK
|
||||
__host__ int dh(void) { return 2; }
|
||||
__device__ int dh(void) { return 2; }
|
||||
|
||||
// H/HD and D/HD are not allowed
|
||||
__host__ __device__ int hdh(void) { return 5; } // expected-note {{previous definition is here}}
|
||||
__host__ int hdh(void) { return 4; } // expected-error {{redefinition of 'hdh'}}
|
||||
|
||||
__host__ int hhd(void) { return 4; } // expected-note {{previous definition is here}}
|
||||
__host__ __device__ int hhd(void) { return 5; } // expected-error {{redefinition of 'hhd'}}
|
||||
// expected-warning@-1 {{attribute declaration must precede definition}}
|
||||
// expected-note@-3 {{previous definition is here}}
|
||||
|
||||
__host__ __device__ int hdd(void) { return 7; } // expected-note {{previous definition is here}}
|
||||
__device__ int hdd(void) { return 6; } // expected-error {{redefinition of 'hdd'}}
|
||||
|
||||
__device__ int dhd(void) { return 6; } // expected-note {{previous definition is here}}
|
||||
__host__ __device__ int dhd(void) { return 7; } // expected-error {{redefinition of 'dhd'}}
|
||||
// expected-warning@-1 {{attribute declaration must precede definition}}
|
||||
// expected-note@-3 {{previous definition is here}}
|
||||
|
||||
// Same tests for extern "C" functions
|
||||
extern "C" __host__ int chh(void) {return 11;} // expected-note {{previous definition is here}}
|
||||
extern "C" int chh(void) {return 11;} // expected-error {{redefinition of 'chh'}}
|
||||
|
||||
// H/D overloading is OK
|
||||
extern "C" __device__ int cdh(void) {return 10;}
|
||||
extern "C" __host__ int cdh(void) {return 11;}
|
||||
|
||||
// H/HD and D/HD overloading is not allowed.
|
||||
extern "C" __host__ __device__ int chhd1(void) {return 12;} // expected-note {{previous definition is here}}
|
||||
extern "C" __host__ int chhd1(void) {return 13;} // expected-error {{redefinition of 'chhd1'}}
|
||||
|
||||
extern "C" __host__ int chhd2(void) {return 13;} // expected-note {{previous definition is here}}
|
||||
extern "C" __host__ __device__ int chhd2(void) {return 12;} // expected-error {{redefinition of 'chhd2'}}
|
||||
// expected-warning@-1 {{attribute declaration must precede definition}}
|
||||
// expected-note@-3 {{previous definition is here}}
|
||||
|
||||
// Helper functions to verify calling restrictions.
|
||||
__device__ int d(void) { return 8; }
|
||||
__host__ int h(void) { return 9; }
|
||||
__global__ void g(void) {}
|
||||
extern "C" __device__ int cd(void) {return 10;}
|
||||
extern "C" __host__ int ch(void) {return 11;}
|
||||
|
||||
__host__ void hostf(void) {
|
||||
fp_t dp = d;
|
||||
fp_t cdp = cd;
|
||||
#if !defined(NOCHECKS)
|
||||
// expected-error@-3 {{reference to __device__ function 'd' in __host__ function}}
|
||||
// expected-note@65 {{'d' declared here}}
|
||||
// expected-error@-4 {{reference to __device__ function 'cd' in __host__ function}}
|
||||
// expected-note@68 {{'cd' declared here}}
|
||||
#endif
|
||||
fp_t hp = h;
|
||||
fp_t chp = ch;
|
||||
fp_t dhp = dh;
|
||||
fp_t cdhp = cdh;
|
||||
gp_t gp = g;
|
||||
|
||||
d();
|
||||
cd();
|
||||
#if !defined(NOCHECKS)
|
||||
// expected-error@-3 {{no matching function for call to 'd'}}
|
||||
// expected-note@65 {{candidate function not viable: call to __device__ function from __host__ function}}
|
||||
// expected-error@-4 {{no matching function for call to 'cd'}}
|
||||
// expected-note@68 {{candidate function not viable: call to __device__ function from __host__ function}}
|
||||
#endif
|
||||
h();
|
||||
ch();
|
||||
dh();
|
||||
cdh();
|
||||
g(); // expected-error {{call to global function g not configured}}
|
||||
g<<<0,0>>>();
|
||||
}
|
||||
|
||||
|
||||
__device__ void devicef(void) {
|
||||
fp_t dp = d;
|
||||
fp_t cdp = cd;
|
||||
fp_t hp = h;
|
||||
fp_t chp = ch;
|
||||
#if !defined(NOCHECKS)
|
||||
// expected-error@-3 {{reference to __host__ function 'h' in __device__ function}}
|
||||
// expected-note@66 {{'h' declared here}}
|
||||
// expected-error@-4 {{reference to __host__ function 'ch' in __device__ function}}
|
||||
// expected-note@69 {{'ch' declared here}}
|
||||
#endif
|
||||
fp_t dhp = dh;
|
||||
fp_t cdhp = cdh;
|
||||
gp_t gp = g; // expected-error {{reference to __global__ function 'g' in __device__ function}}
|
||||
// expected-note@67 {{'g' declared here}}
|
||||
|
||||
d();
|
||||
cd();
|
||||
h();
|
||||
ch();
|
||||
#if !defined(NOCHECKS)
|
||||
// expected-error@-3 {{no matching function for call to 'h'}}
|
||||
// expected-note@66 {{candidate function not viable: call to __host__ function from __device__ function}}
|
||||
// expected-error@-4 {{no matching function for call to 'ch'}}
|
||||
// expected-note@69 {{candidate function not viable: call to __host__ function from __device__ function}}
|
||||
#endif
|
||||
dh();
|
||||
cdh();
|
||||
g(); // expected-error {{no matching function for call to 'g'}}
|
||||
// expected-note@67 {{candidate function not viable: call to __global__ function from __device__ function}}
|
||||
g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __device__ function}}
|
||||
// expected-note@67 {{'g' declared here}}
|
||||
}
|
||||
|
||||
__global__ void globalf(void) {
|
||||
fp_t dp = d;
|
||||
fp_t cdp = cd;
|
||||
fp_t hp = h;
|
||||
fp_t chp = ch;
|
||||
#if !defined(NOCHECKS)
|
||||
// expected-error@-3 {{reference to __host__ function 'h' in __global__ function}}
|
||||
// expected-note@66 {{'h' declared here}}
|
||||
// expected-error@-4 {{reference to __host__ function 'ch' in __global__ function}}
|
||||
// expected-note@69 {{'ch' declared here}}
|
||||
#endif
|
||||
fp_t dhp = dh;
|
||||
fp_t cdhp = cdh;
|
||||
gp_t gp = g; // expected-error {{reference to __global__ function 'g' in __global__ function}}
|
||||
// expected-note@67 {{'g' declared here}}
|
||||
|
||||
d();
|
||||
cd();
|
||||
h();
|
||||
ch();
|
||||
#if !defined(NOCHECKS)
|
||||
// expected-error@-3 {{no matching function for call to 'h'}}
|
||||
// expected-note@66 {{candidate function not viable: call to __host__ function from __global__ function}}
|
||||
// expected-error@-4 {{no matching function for call to 'ch'}}
|
||||
// expected-note@69 {{candidate function not viable: call to __host__ function from __global__ function}}
|
||||
#endif
|
||||
dh();
|
||||
cdh();
|
||||
g(); // expected-error {{no matching function for call to 'g'}}
|
||||
// expected-note@67 {{candidate function not viable: call to __global__ function from __global__ function}}
|
||||
g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __global__ function}}
|
||||
// expected-note@67 {{'g' declared here}}
|
||||
}
|
||||
|
||||
__host__ __device__ void hostdevicef(void) {
|
||||
fp_t dp = d;
|
||||
fp_t cdp = cd;
|
||||
fp_t hp = h;
|
||||
fp_t chp = ch;
|
||||
#if !defined(NOCHECKS)
|
||||
#if !defined(__CUDA_ARCH__)
|
||||
// expected-error@-6 {{reference to __device__ function 'd' in __host__ __device__ function}}
|
||||
// expected-note@65 {{'d' declared here}}
|
||||
// expected-error@-7 {{reference to __device__ function 'cd' in __host__ __device__ function}}
|
||||
// expected-note@68 {{'cd' declared here}}
|
||||
#else
|
||||
// expected-error@-9 {{reference to __host__ function 'h' in __host__ __device__ function}}
|
||||
// expected-note@66 {{'h' declared here}}
|
||||
// expected-error@-10 {{reference to __host__ function 'ch' in __host__ __device__ function}}
|
||||
// expected-note@69 {{'ch' declared here}}
|
||||
#endif
|
||||
#endif
|
||||
fp_t dhp = dh;
|
||||
fp_t cdhp = cdh;
|
||||
gp_t gp = g;
|
||||
#if defined(__CUDA_ARCH__)
|
||||
// expected-error@-2 {{reference to __global__ function 'g' in __host__ __device__ function}}
|
||||
// expected-note@67 {{'g' declared here}}
|
||||
#endif
|
||||
|
||||
d();
|
||||
cd();
|
||||
h();
|
||||
ch();
|
||||
#if !defined(NOCHECKS)
|
||||
#if !defined(__CUDA_ARCH__)
|
||||
// expected-error@-6 {{no matching function for call to 'd'}}
|
||||
// expected-note@65 {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
|
||||
// expected-error@-7 {{no matching function for call to 'cd'}}
|
||||
// expected-note@68 {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
|
||||
#else
|
||||
// expected-error@-9 {{no matching function for call to 'h'}}
|
||||
// expected-note@66 {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
|
||||
// expected-error@-10 {{no matching function for call to 'ch'}}
|
||||
// expected-note@69 {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
dh();
|
||||
cdh();
|
||||
g();
|
||||
g<<<0,0>>>();
|
||||
#if !defined(__CUDA_ARCH__)
|
||||
// expected-error@-3 {{call to global function g not configured}}
|
||||
#else
|
||||
// expected-error@-5 {{no matching function for call to 'g'}}
|
||||
// expected-note@67 {{candidate function not viable: call to __global__ function from __host__ __device__ function}}
|
||||
// expected-error@-6 {{reference to __global__ function 'g' in __host__ __device__ function}}
|
||||
// expected-note@67 {{'g' declared here}}
|
||||
#endif // __CUDA_ARCH__
|
||||
}
|
||||
|
||||
// Test for address of overloaded function resolution in the global context.
|
||||
fp_t hp = h;
|
||||
fp_t chp = ch;
|
||||
fp_t dhp = dh;
|
||||
fp_t cdhp = cdh;
|
||||
gp_t gp = g;
|
||||
|
||||
|
||||
// Test overloading of destructors
|
||||
// Can't mix H and unattributed destructors
|
||||
struct d_h {
|
||||
~d_h() {} // expected-note {{previous declaration is here}}
|
||||
__host__ ~d_h() {} // expected-error {{destructor cannot be redeclared}}
|
||||
};
|
||||
|
||||
// H/D overloading is OK
|
||||
struct d_dh {
|
||||
__device__ ~d_dh() {}
|
||||
__host__ ~d_dh() {}
|
||||
};
|
||||
|
||||
// HD is OK
|
||||
struct d_hd {
|
||||
__host__ __device__ ~d_hd() {}
|
||||
};
|
||||
|
||||
// Mixing H/D and HD is not allowed.
|
||||
struct d_dhhd {
|
||||
__device__ ~d_dhhd() {}
|
||||
__host__ ~d_dhhd() {} // expected-note {{previous declaration is here}}
|
||||
__host__ __device__ ~d_dhhd() {} // expected-error {{destructor cannot be redeclared}}
|
||||
};
|
||||
|
||||
struct d_hhd {
|
||||
__host__ ~d_hhd() {} // expected-note {{previous declaration is here}}
|
||||
__host__ __device__ ~d_hhd() {} // expected-error {{destructor cannot be redeclared}}
|
||||
};
|
||||
|
||||
struct d_hdh {
|
||||
__host__ __device__ ~d_hdh() {} // expected-note {{previous declaration is here}}
|
||||
__host__ ~d_hdh() {} // expected-error {{destructor cannot be redeclared}}
|
||||
};
|
||||
|
||||
struct d_dhd {
|
||||
__device__ ~d_dhd() {} // expected-note {{previous declaration is here}}
|
||||
__host__ __device__ ~d_dhd() {} // expected-error {{destructor cannot be redeclared}}
|
||||
};
|
||||
|
||||
struct d_hdd {
|
||||
__host__ __device__ ~d_hdd() {} // expected-note {{previous declaration is here}}
|
||||
__device__ ~d_hdd() {} // expected-error {{destructor cannot be redeclared}}
|
||||
};
|
||||
|
||||
// Test overloading of member functions
|
||||
struct m_h {
|
||||
void operator delete(void *ptr); // expected-note {{previous declaration is here}}
|
||||
__host__ void operator delete(void *ptr); // expected-error {{class member cannot be redeclared}}
|
||||
};
|
||||
|
||||
// D/H overloading is OK
|
||||
struct m_dh {
|
||||
__device__ void operator delete(void *ptr);
|
||||
__host__ void operator delete(void *ptr);
|
||||
};
|
||||
|
||||
// HD by itself is OK
|
||||
struct m_hd {
|
||||
__device__ __host__ void operator delete(void *ptr);
|
||||
};
|
||||
|
||||
struct m_hhd {
|
||||
__host__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}}
|
||||
__host__ __device__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}}
|
||||
};
|
||||
|
||||
struct m_hdh {
|
||||
__host__ __device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}}
|
||||
__host__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}}
|
||||
};
|
||||
|
||||
struct m_dhd {
|
||||
__device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}}
|
||||
__host__ __device__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}}
|
||||
};
|
||||
|
||||
struct m_hdd {
|
||||
__host__ __device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}}
|
||||
__device__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}}
|
||||
};
|
Загрузка…
Ссылка в новой задаче