From d51e43af0b3a6897b971f316c4de2035ec82d1f2 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 6 Oct 2011 18:29:46 +0000 Subject: [PATCH] CUDA: add -fcuda-is-device flag This frontend-only flag is used by the IR generator to determine whether to filter CUDA declarations for the host or for the device. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@141301 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Driver/CC1Options.td | 7 +++++ include/clang/Frontend/CodeGenOptions.h | 2 ++ lib/CodeGen/CodeGenModule.cpp | 17 +++++++++++ lib/Frontend/CompilerInvocation.cpp | 3 ++ test/CodeGenCUDA/filter-decl.cu | 40 +++++++++++++++++++++++++ test/CodeGenCUDA/ptx-kernels.cu | 2 +- 6 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 test/CodeGenCUDA/filter-decl.cu diff --git a/include/clang/Driver/CC1Options.td b/include/clang/Driver/CC1Options.td index ffe1eafd61..ef83cbc148 100644 --- a/include/clang/Driver/CC1Options.td +++ b/include/clang/Driver/CC1Options.td @@ -714,3 +714,10 @@ def cl_mad_enable : Flag<"-cl-mad-enable">, HelpText<"OpenCL only. Enable less precise MAD instructions to be generated.">; def cl_std_EQ : Joined<"-cl-std=">, HelpText<"OpenCL language standard to compile for">; + +//===----------------------------------------------------------------------===// +// CUDA Options +//===----------------------------------------------------------------------===// + +def fcuda_is_device : Flag<"-fcuda-is-device">, + HelpText<"Generate code for CUDA device">; diff --git a/include/clang/Frontend/CodeGenOptions.h b/include/clang/Frontend/CodeGenOptions.h index 5f824f2d71..4874c17c79 100644 --- a/include/clang/Frontend/CodeGenOptions.h +++ b/include/clang/Frontend/CodeGenOptions.h @@ -37,6 +37,7 @@ public: unsigned AsmVerbose : 1; /// -dA, -fverbose-asm. unsigned ObjCAutoRefCountExceptions : 1; /// Whether ARC should be EH-safe. + unsigned CUDAIsDevice : 1; /// Set when compiling for CUDA device. unsigned CXAAtExit : 1; /// Use __cxa_atexit for calling destructors. unsigned CXXCtorDtorAliases: 1; /// Emit complete ctors/dtors as linker /// aliases to base ctors when possible. @@ -143,6 +144,7 @@ public: public: CodeGenOptions() { AsmVerbose = 0; + CUDAIsDevice = 0; CXAAtExit = 1; CXXCtorDtorAliases = 0; DataSections = 0; diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index f5e37897bc..d8b9c9d853 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -785,6 +785,23 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { if (Global->hasAttr()) return EmitAliasDefinition(GD); + // If this is CUDA, be selective about which declarations we emit. + if (Features.CUDA) { + if (CodeGenOpts.CUDAIsDevice) { + if (!Global->hasAttr() && + !Global->hasAttr() && + !Global->hasAttr() && + !Global->hasAttr()) + return; + } else { + if (!Global->hasAttr() && ( + Global->hasAttr() || + Global->hasAttr() || + Global->hasAttr())) + return; + } + } + // Ignore declarations, they will be emitted on their first use. if (const FunctionDecl *FD = dyn_cast(Global)) { // Forward declarations are emitted lazily on first use. diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp index 4fb3ee62f0..b35bc65c7e 100644 --- a/lib/Frontend/CompilerInvocation.cpp +++ b/lib/Frontend/CompilerInvocation.cpp @@ -183,6 +183,8 @@ static void CodeGenOptsToArgs(const CodeGenOptions &Opts, Res.push_back("-mcode-model"); Res.push_back(Opts.CodeModel); } + if (Opts.CUDAIsDevice) + Res.push_back("-fcuda-is-device"); if (!Opts.CXAAtExit) Res.push_back("-fno-use-cxa-atexit"); if (Opts.CXXCtorDtorAliases) @@ -1028,6 +1030,7 @@ static void ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Opts.ObjCAutoRefCountExceptions = Args.hasArg(OPT_fobjc_arc_exceptions); Opts.ObjCRuntimeHasARC = Args.hasArg(OPT_fobjc_runtime_has_arc); Opts.ObjCRuntimeHasTerminate = Args.hasArg(OPT_fobjc_runtime_has_terminate); + Opts.CUDAIsDevice = Args.hasArg(OPT_fcuda_is_device); Opts.CXAAtExit = !Args.hasArg(OPT_fno_use_cxa_atexit); Opts.CXXCtorDtorAliases = Args.hasArg(OPT_mconstructor_aliases); Opts.CodeModel = Args.getLastArgValue(OPT_mcode_model); diff --git a/test/CodeGenCUDA/filter-decl.cu b/test/CodeGenCUDA/filter-decl.cu new file mode 100644 index 0000000000..b758632d12 --- /dev/null +++ b/test/CodeGenCUDA/filter-decl.cu @@ -0,0 +1,40 @@ +// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-HOST %s +// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device | FileCheck -check-prefix=CHECK-DEVICE %s + +#include "../SemaCUDA/cuda.h" + +// CHECK-HOST-NOT: constantdata = global +// CHECK-DEVICE: constantdata = global +__constant__ char constantdata[256]; + +// CHECK-HOST-NOT: devicedata = global +// CHECK-DEVICE: devicedata = global +__device__ char devicedata[256]; + +// CHECK-HOST-NOT: shareddata = global +// CHECK-DEVICE: shareddata = global +__shared__ char shareddata[256]; + +// CHECK-HOST: hostdata = global +// CHECK-DEVICE-NOT: hostdata = global +char hostdata[256]; + +// CHECK-HOST: define{{.*}}implicithostonlyfunc +// CHECK-DEVICE-NOT: define{{.*}}implicithostonlyfunc +void implicithostonlyfunc(void) {} + +// CHECK-HOST: define{{.*}}explicithostonlyfunc +// CHECK-DEVICE-NOT: define{{.*}}explicithostonlyfunc +__host__ void explicithostonlyfunc(void) {} + +// CHECK-HOST-NOT: define{{.*}}deviceonlyfunc +// CHECK-DEVICE: define{{.*}}deviceonlyfunc +__device__ void deviceonlyfunc(void) {} + +// CHECK-HOST: define{{.*}}hostdevicefunc +// CHECK-DEVICE: define{{.*}}hostdevicefunc +__host__ __device__ void hostdevicefunc(void) {} + +// CHECK-HOST: define{{.*}}globalfunc +// CHECK-DEVICE: define{{.*}}globalfunc +__global__ void globalfunc(void) {} diff --git a/test/CodeGenCUDA/ptx-kernels.cu b/test/CodeGenCUDA/ptx-kernels.cu index 310fa2a276..ecca8519af 100644 --- a/test/CodeGenCUDA/ptx-kernels.cu +++ b/test/CodeGenCUDA/ptx-kernels.cu @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -triple ptx32-unknown-unknown -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -triple ptx32-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s #include "../SemaCUDA/cuda.h"