1
0
mirror of https://github.com/Kitware/CMake.git synced 2025-10-14 02:08:27 +08:00

CUDA: Clang separable compilation

For NVCC the compiler takes care of device linking when passed the "-dlink"
flag.
Clang doesn't support such magic and requires the buildsystem to do the work
that NVCC does behind the scenes.

The implementation is based on Bazel's device linking documentation:
7cabcdf073/third_party/nccl/build_defs.bzl.tpl (L259)

Closes: #20726
This commit is contained in:
Raul Tambre
2020-09-05 19:40:02 +03:00
committed by Brad King
parent c98ec731f9
commit c63fe01835
22 changed files with 506 additions and 128 deletions

View File

@@ -0,0 +1,4 @@
cuda-clang-separable-compilation
--------------------------------
* :prop_tgt:`CUDA_SEPARABLE_COMPILATION` is now supported when using Clang.

View File

@@ -3,6 +3,8 @@ set(CMAKE_CUDA_HOST_COMPILER "@CMAKE_CUDA_HOST_COMPILER@")
set(CMAKE_CUDA_HOST_LINK_LAUNCHER "@CMAKE_CUDA_HOST_LINK_LAUNCHER@") set(CMAKE_CUDA_HOST_LINK_LAUNCHER "@CMAKE_CUDA_HOST_LINK_LAUNCHER@")
set(CMAKE_CUDA_COMPILER_ID "@CMAKE_CUDA_COMPILER_ID@") set(CMAKE_CUDA_COMPILER_ID "@CMAKE_CUDA_COMPILER_ID@")
set(CMAKE_CUDA_COMPILER_VERSION "@CMAKE_CUDA_COMPILER_VERSION@") set(CMAKE_CUDA_COMPILER_VERSION "@CMAKE_CUDA_COMPILER_VERSION@")
set(CMAKE_CUDA_DEVICE_LINKER "@CMAKE_CUDA_DEVICE_LINKER@")
set(CMAKE_CUDA_FATBINARY "@CMAKE_CUDA_FATBINARY@")
set(CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT "@CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT@") set(CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT "@CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT@")
set(CMAKE_CUDA_COMPILE_FEATURES "@CMAKE_CUDA_COMPILE_FEATURES@") set(CMAKE_CUDA_COMPILE_FEATURES "@CMAKE_CUDA_COMPILE_FEATURES@")
set(CMAKE_CUDA03_COMPILE_FEATURES "@CMAKE_CUDA03_COMPILE_FEATURES@") set(CMAKE_CUDA03_COMPILE_FEATURES "@CMAKE_CUDA03_COMPILE_FEATURES@")
@@ -44,6 +46,7 @@ if(CMAKE_CUDA_LIBRARY_ARCHITECTURE)
endif() endif()
set(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_ROOT@") set(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_ROOT@")
set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT@")
set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_LIBRARY_ROOT@") set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_LIBRARY_ROOT@")
set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES@") set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES@")

View File

@@ -145,7 +145,7 @@ endif()
#Specify how to compile when separable compilation has been requested #Specify how to compile when separable compilation has been requested
if(NOT CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION) if(NOT CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION)
set(CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION set(CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION
"<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <DEFINES> <INCLUDES> <FLAGS> ${_CMAKE_COMPILE_AS_CUDA_FLAG} -dc <SOURCE> -o <OBJECT>") "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <DEFINES> <INCLUDES> <FLAGS> ${_CMAKE_COMPILE_AS_CUDA_FLAG} ${_CMAKE_CUDA_DEVICE_CODE} <SOURCE> -o <OBJECT>")
endif() endif()
#Specify how to compile when whole compilation has been requested #Specify how to compile when whole compilation has been requested
@@ -200,6 +200,11 @@ if(NOT CMAKE_CUDA_DEVICE_LINK_EXECUTABLE)
"<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> ${CMAKE_CUDA_COMPILE_OPTIONS_PIC} ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_DLINK_FLAGS}") "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> ${CMAKE_CUDA_COMPILE_OPTIONS_PIC} ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_DLINK_FLAGS}")
endif() endif()
# Used when device linking is handled by CMake.
if(NOT CMAKE_CUDA_DEVICE_LINK_COMPILE)
set(CMAKE_CUDA_DEVICE_LINK_COMPILE "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <FLAGS> -D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ -D__NV_EXTRA_INITIALIZATION=\"\" -D__NV_EXTRA_FINALIZATION=\"\" -DREGISTERLINKBINARYFILE=\\\"<REGISTER_FILE>\\\" -DFATBINFILE=\\\"<FATBINARY>\\\" ${_CMAKE_COMPILE_AS_CUDA_FLAG} -c \"${CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT}/bin/crt/link.stub\" -o <OBJECT>")
endif()
unset(__IMPLICT_DLINK_FLAGS) unset(__IMPLICT_DLINK_FLAGS)
set(CMAKE_CUDA_INFORMATION_LOADED 1) set(CMAKE_CUDA_INFORMATION_LOADED 1)

View File

@@ -169,11 +169,14 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
endif() endif()
get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${_CUDA_NVCC_EXECUTABLE}" DIRECTORY) get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${_CUDA_NVCC_EXECUTABLE}" DIRECTORY)
set(CMAKE_CUDA_DEVICE_LINKER "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/nvlink${CMAKE_EXECUTABLE_SUFFIX}")
set(CMAKE_CUDA_FATBINARY "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/fatbinary${CMAKE_EXECUTABLE_SUFFIX}")
get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}" DIRECTORY) get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}" DIRECTORY)
# CMAKE_CUDA_COMPILER_LIBRARY_ROOT contains the device library and version file. # In a non-scattered installation the following are equivalent to CMAKE_CUDA_COMPILER_TOOLKIT_ROOT.
# In a non-scattered installation this is equivalent to CMAKE_CUDA_COMPILER_TOOLKIT_ROOT.
# We first check for a non-scattered installation to prefer it over a scattered installation. # We first check for a non-scattered installation to prefer it over a scattered installation.
# CMAKE_CUDA_COMPILER_LIBRARY_ROOT contains the device library and version file.
if(EXISTS "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/version.txt") if(EXISTS "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/version.txt")
set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}") set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}")
elseif(CMAKE_SYSROOT_LINK AND EXISTS "${CMAKE_SYSROOT_LINK}/usr/lib/cuda/version.txt") elseif(CMAKE_SYSROOT_LINK AND EXISTS "${CMAKE_SYSROOT_LINK}/usr/lib/cuda/version.txt")
@@ -181,6 +184,15 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
elseif(EXISTS "${CMAKE_SYSROOT}/usr/lib/cuda/version.txt") elseif(EXISTS "${CMAKE_SYSROOT}/usr/lib/cuda/version.txt")
set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_SYSROOT}/usr/lib/cuda") set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_SYSROOT}/usr/lib/cuda")
endif() endif()
# CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT contains the linking stubs necessary for device linking and other low-level library files.
if(CMAKE_SYSROOT_LINK AND EXISTS "${CMAKE_SYSROOT_LINK}/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub")
set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "${CMAKE_SYSROOT_LINK}/usr/lib/nvidia-cuda-toolkit")
elseif(EXISTS "${CMAKE_SYSROOT}/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub")
set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "${CMAKE_SYSROOT}/usr/lib/nvidia-cuda-toolkit")
else()
set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}")
endif()
endif() endif()
set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-v") set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-v")

View File

@@ -13,6 +13,7 @@ __compiler_clang_cxx_standards(CUDA)
set(CMAKE_CUDA_COMPILER_HAS_DEVICE_LINK_PHASE TRUE) set(CMAKE_CUDA_COMPILER_HAS_DEVICE_LINK_PHASE TRUE)
set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cuda") set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cuda")
set(_CMAKE_CUDA_PTX_FLAG "--cuda-device-only -S") set(_CMAKE_CUDA_PTX_FLAG "--cuda-device-only -S")
set(_CMAKE_CUDA_DEVICE_CODE "-fgpu-rdc -c")
# RulePlaceholderExpander expands crosscompile variables like sysroot and target only for CMAKE_<LANG>_COMPILER. Override the default. # RulePlaceholderExpander expands crosscompile variables like sysroot and target only for CMAKE_<LANG>_COMPILER. Override the default.
set(CMAKE_CUDA_LINK_EXECUTABLE "<CMAKE_CUDA_COMPILER> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_LINKS}") set(CMAKE_CUDA_LINK_EXECUTABLE "<CMAKE_CUDA_COMPILER> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_LINKS}")

View File

@@ -6,6 +6,7 @@ set(CMAKE_CUDA_VERBOSE_COMPILE_FLAG "-Xcompiler=-v")
set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cu") set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cu")
set(_CMAKE_CUDA_PTX_FLAG "-ptx") set(_CMAKE_CUDA_PTX_FLAG "-ptx")
set(_CMAKE_CUDA_DEVICE_CODE "-dc")
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 10.2.89) if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 10.2.89)
# The -forward-unknown-to-host-compiler flag was only # The -forward-unknown-to-host-compiler flag was only

View File

@@ -1955,17 +1955,6 @@ void cmLocalGenerator::AddLanguageFlags(std::string& flags,
} else if (lang == "CUDA") { } else if (lang == "CUDA") {
target->AddCUDAArchitectureFlags(flags); target->AddCUDAArchitectureFlags(flags);
target->AddCUDAToolkitFlags(flags); target->AddCUDAToolkitFlags(flags);
if (compiler == "Clang") {
bool separable = target->GetPropertyAsBool("CUDA_SEPARABLE_COMPILATION");
if (separable) {
this->Makefile->IssueMessage(
MessageType::FATAL_ERROR,
"CUDA_SEPARABLE_COMPILATION isn't supported on Clang. "
"See CMake issue #20726.");
}
}
} else if (lang == "ISPC") { } else if (lang == "ISPC") {
target->AddISPCTargetFlags(flags); target->AddISPCTargetFlags(flags);
} }

View File

@@ -446,7 +446,7 @@ public:
void GetTargetCompileFlags(cmGeneratorTarget* target, void GetTargetCompileFlags(cmGeneratorTarget* target,
std::string const& config, std::string const& config,
std::string const& lang, std::string& flags, std::string const& lang, std::string& flags,
std::string const& arch = std::string()); std::string const& arch);
std::vector<BT<std::string>> GetTargetCompileFlags( std::vector<BT<std::string>> GetTargetCompileFlags(
cmGeneratorTarget* target, std::string const& config, cmGeneratorTarget* target, std::string const& config,
std::string const& lang, std::string const& arch = std::string()); std::string const& lang, std::string const& arch = std::string());

View File

@@ -91,19 +91,12 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
std::vector<std::string> commands; std::vector<std::string> commands;
// Get the language to use for linking this library. // Get the name of the device object to generate.
std::string linkLanguage = "CUDA";
std::string const& objExt = std::string const& objExt =
this->Makefile->GetSafeDefinition("CMAKE_CUDA_OUTPUT_EXTENSION"); this->Makefile->GetSafeDefinition("CMAKE_CUDA_OUTPUT_EXTENSION");
std::string const targetOutput =
// Build list of dependencies.
std::vector<std::string> depends;
this->AppendLinkDepends(depends, linkLanguage);
// Get the name of the device object to generate.
std::string const targetOutputReal =
this->GeneratorTarget->ObjectDirectory + "cmake_device_link" + objExt; this->GeneratorTarget->ObjectDirectory + "cmake_device_link" + objExt;
this->DeviceLinkObject = targetOutputReal; this->DeviceLinkObject = targetOutput;
this->NumberOfProgressActions++; this->NumberOfProgressActions++;
if (!this->NoRuleMessages) { if (!this->NoRuleMessages) {
@@ -111,7 +104,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
this->MakeEchoProgress(progress); this->MakeEchoProgress(progress);
// Add the link message. // Add the link message.
std::string buildEcho = std::string buildEcho =
cmStrCat("Linking ", linkLanguage, " device code ", cmStrCat("Linking CUDA device code ",
this->LocalGenerator->ConvertToOutputFormat( this->LocalGenerator->ConvertToOutputFormat(
this->LocalGenerator->MaybeConvertToRelativePath( this->LocalGenerator->MaybeConvertToRelativePath(
this->LocalGenerator->GetCurrentBinaryDirectory(), this->LocalGenerator->GetCurrentBinaryDirectory(),
@@ -121,6 +114,29 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress); commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress);
} }
if (this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID") == "Clang") {
this->WriteDeviceLinkRule(commands, targetOutput);
} else {
this->WriteNvidiaDeviceExecutableRule(relink, commands, targetOutput);
}
// Write the main driver rule to build everything in this target.
this->WriteTargetDriverRule(targetOutput, relink);
#else
static_cast<void>(relink);
#endif
}
void cmMakefileExecutableTargetGenerator::WriteNvidiaDeviceExecutableRule(
bool relink, std::vector<std::string>& commands,
const std::string& targetOutput)
{
const std::string linkLanguage = "CUDA";
// Build list of dependencies.
std::vector<std::string> depends;
this->AppendLinkDepends(depends, linkLanguage);
// Build a list of compiler flags and linker flags. // Build a list of compiler flags and linker flags.
std::string langFlags; std::string langFlags;
std::string linkFlags; std::string linkFlags;
@@ -136,7 +152,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
// may need to be cleaned. // may need to be cleaned.
std::vector<std::string> exeCleanFiles; std::vector<std::string> exeCleanFiles;
exeCleanFiles.push_back(this->LocalGenerator->MaybeConvertToRelativePath( exeCleanFiles.push_back(this->LocalGenerator->MaybeConvertToRelativePath(
this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal)); this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput));
// Determine whether a link script will be used. // Determine whether a link script will be used.
bool useLinkScript = this->GlobalGenerator->GetUseLinkScript(); bool useLinkScript = this->GlobalGenerator->GetUseLinkScript();
@@ -195,7 +211,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
: cmOutputConverter::SHELL; : cmOutputConverter::SHELL;
std::string target = this->LocalGenerator->ConvertToOutputFormat( std::string target = this->LocalGenerator->ConvertToOutputFormat(
this->LocalGenerator->MaybeConvertToRelativePath( this->LocalGenerator->MaybeConvertToRelativePath(
this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal), this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput),
output); output);
std::string targetFullPathCompilePDB = std::string targetFullPathCompilePDB =
@@ -226,7 +242,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
this->LocalGenerator->CreateRulePlaceholderExpander()); this->LocalGenerator->CreateRulePlaceholderExpander());
// Expand placeholders in the commands. // Expand placeholders in the commands.
rulePlaceholderExpander->SetTargetImpLib(targetOutputReal); rulePlaceholderExpander->SetTargetImpLib(targetOutput);
for (std::string& real_link_command : real_link_commands) { for (std::string& real_link_command : real_link_commands) {
real_link_command = cmStrCat(launcher, real_link_command); real_link_command = cmStrCat(launcher, real_link_command);
rulePlaceholderExpander->ExpandRuleVariables(this->LocalGenerator, rulePlaceholderExpander->ExpandRuleVariables(this->LocalGenerator,
@@ -255,17 +271,10 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
// Write the build rule. // Write the build rule.
this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr, this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr,
targetOutputReal, depends, commands, targetOutput, depends, commands, false);
false);
// Write the main driver rule to build everything in this target.
this->WriteTargetDriverRule(targetOutputReal, relink);
// Clean all the possible executable names and symlinks. // Clean all the possible executable names and symlinks.
this->CleanFiles.insert(exeCleanFiles.begin(), exeCleanFiles.end()); this->CleanFiles.insert(exeCleanFiles.begin(), exeCleanFiles.end());
#else
static_cast<void>(relink);
#endif
} }
void cmMakefileExecutableTargetGenerator::WriteExecutableRule(bool relink) void cmMakefileExecutableTargetGenerator::WriteExecutableRule(bool relink)

View File

@@ -5,6 +5,7 @@
#include "cmConfigure.h" // IWYU pragma: keep #include "cmConfigure.h" // IWYU pragma: keep
#include <string> #include <string>
#include <vector>
#include "cmMakefileTargetGenerator.h" #include "cmMakefileTargetGenerator.h"
@@ -23,6 +24,9 @@ public:
protected: protected:
virtual void WriteExecutableRule(bool relink); virtual void WriteExecutableRule(bool relink);
virtual void WriteDeviceExecutableRule(bool relink); virtual void WriteDeviceExecutableRule(bool relink);
virtual void WriteNvidiaDeviceExecutableRule(
bool relink, std::vector<std::string>& commands,
const std::string& targetOutput);
private: private:
std::string DeviceLinkObject; std::string DeviceLinkObject;

View File

@@ -129,8 +129,7 @@ void cmMakefileLibraryTargetGenerator::WriteStaticLibraryRules()
const bool requiresDeviceLinking = requireDeviceLinking( const bool requiresDeviceLinking = requireDeviceLinking(
*this->GeneratorTarget, *this->LocalGenerator, this->GetConfigName()); *this->GeneratorTarget, *this->LocalGenerator, this->GetConfigName());
if (requiresDeviceLinking) { if (requiresDeviceLinking) {
std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY"; this->WriteDeviceLibraryRules("CMAKE_CUDA_DEVICE_LINK_LIBRARY", false);
this->WriteDeviceLibraryRules(linkRuleVar, false);
} }
std::string linkLanguage = std::string linkLanguage =
@@ -156,8 +155,7 @@ void cmMakefileLibraryTargetGenerator::WriteSharedLibraryRules(bool relink)
const bool requiresDeviceLinking = requireDeviceLinking( const bool requiresDeviceLinking = requireDeviceLinking(
*this->GeneratorTarget, *this->LocalGenerator, this->GetConfigName()); *this->GeneratorTarget, *this->LocalGenerator, this->GetConfigName());
if (requiresDeviceLinking) { if (requiresDeviceLinking) {
std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY"; this->WriteDeviceLibraryRules("CMAKE_CUDA_DEVICE_LINK_LIBRARY", relink);
this->WriteDeviceLibraryRules(linkRuleVar, relink);
} }
} }
@@ -191,8 +189,7 @@ void cmMakefileLibraryTargetGenerator::WriteModuleLibraryRules(bool relink)
const bool requiresDeviceLinking = requireDeviceLinking( const bool requiresDeviceLinking = requireDeviceLinking(
*this->GeneratorTarget, *this->LocalGenerator, this->GetConfigName()); *this->GeneratorTarget, *this->LocalGenerator, this->GetConfigName());
if (requiresDeviceLinking) { if (requiresDeviceLinking) {
std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY"; this->WriteDeviceLibraryRules("CMAKE_CUDA_DEVICE_LINK_LIBRARY", relink);
this->WriteDeviceLibraryRules(linkRuleVar, relink);
} }
} }
@@ -239,12 +236,47 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
// TODO: Merge the methods that call this method to avoid // TODO: Merge the methods that call this method to avoid
// code duplication. // code duplication.
std::vector<std::string> commands; std::vector<std::string> commands;
// Get the language to use for linking this library.
std::string linkLanguage = "CUDA";
std::string const objExt = std::string const objExt =
this->Makefile->GetSafeDefinition("CMAKE_CUDA_OUTPUT_EXTENSION"); this->Makefile->GetSafeDefinition("CMAKE_CUDA_OUTPUT_EXTENSION");
// Get the name of the device object to generate.
std::string const targetOutput =
this->GeneratorTarget->ObjectDirectory + "cmake_device_link" + objExt;
this->DeviceLinkObject = targetOutput;
this->NumberOfProgressActions++;
if (!this->NoRuleMessages) {
cmLocalUnixMakefileGenerator3::EchoProgress progress;
this->MakeEchoProgress(progress);
// Add the link message.
std::string buildEcho =
cmStrCat("Linking CUDA device code ",
this->LocalGenerator->ConvertToOutputFormat(
this->LocalGenerator->MaybeConvertToRelativePath(
this->LocalGenerator->GetCurrentBinaryDirectory(),
this->DeviceLinkObject),
cmOutputConverter::SHELL));
this->LocalGenerator->AppendEcho(
commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress);
}
if (this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID") == "Clang") {
this->WriteDeviceLinkRule(commands, targetOutput);
} else {
this->WriteNvidiaDeviceLibraryRules(linkRuleVar, relink, commands,
targetOutput);
}
// Write the main driver rule to build everything in this target.
this->WriteTargetDriverRule(targetOutput, relink);
}
void cmMakefileLibraryTargetGenerator::WriteNvidiaDeviceLibraryRules(
const std::string& linkRuleVar, bool relink,
std::vector<std::string>& commands, const std::string& targetOutput)
{
std::string linkLanguage = "CUDA";
// Build list of dependencies. // Build list of dependencies.
std::vector<std::string> depends; std::vector<std::string> depends;
this->AppendLinkDepends(depends, linkLanguage); this->AppendLinkDepends(depends, linkLanguage);
@@ -258,30 +290,10 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
std::string linkFlags; std::string linkFlags;
this->GetDeviceLinkFlags(linkFlags, linkLanguage); this->GetDeviceLinkFlags(linkFlags, linkLanguage);
// Get the name of the device object to generate.
std::string const targetOutputReal =
this->GeneratorTarget->ObjectDirectory + "cmake_device_link" + objExt;
this->DeviceLinkObject = targetOutputReal;
this->NumberOfProgressActions++;
if (!this->NoRuleMessages) {
cmLocalUnixMakefileGenerator3::EchoProgress progress;
this->MakeEchoProgress(progress);
// Add the link message.
std::string buildEcho =
cmStrCat("Linking ", linkLanguage, " device code ",
this->LocalGenerator->ConvertToOutputFormat(
this->LocalGenerator->MaybeConvertToRelativePath(
this->LocalGenerator->GetCurrentBinaryDirectory(),
this->DeviceLinkObject),
cmOutputConverter::SHELL));
this->LocalGenerator->AppendEcho(
commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress);
}
// Clean files associated with this library. // Clean files associated with this library.
std::set<std::string> libCleanFiles; std::set<std::string> libCleanFiles;
libCleanFiles.insert(this->LocalGenerator->MaybeConvertToRelativePath( libCleanFiles.insert(this->LocalGenerator->MaybeConvertToRelativePath(
this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal)); this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput));
// Determine whether a link script will be used. // Determine whether a link script will be used.
bool useLinkScript = this->GlobalGenerator->GetUseLinkScript(); bool useLinkScript = this->GlobalGenerator->GetUseLinkScript();
@@ -335,7 +347,7 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
std::string target = this->LocalGenerator->ConvertToOutputFormat( std::string target = this->LocalGenerator->ConvertToOutputFormat(
this->LocalGenerator->MaybeConvertToRelativePath( this->LocalGenerator->MaybeConvertToRelativePath(
this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal), this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput),
output); output);
std::string targetFullPathCompilePDB = std::string targetFullPathCompilePDB =
@@ -364,7 +376,7 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
this->LocalGenerator->CreateRulePlaceholderExpander()); this->LocalGenerator->CreateRulePlaceholderExpander());
// Construct the main link rule and expand placeholders. // Construct the main link rule and expand placeholders.
rulePlaceholderExpander->SetTargetImpLib(targetOutputReal); rulePlaceholderExpander->SetTargetImpLib(targetOutput);
std::string linkRule = this->GetLinkRule(linkRuleVar); std::string linkRule = this->GetLinkRule(linkRuleVar);
cmExpandList(linkRule, real_link_commands); cmExpandList(linkRule, real_link_commands);
@@ -399,14 +411,11 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
commands1.clear(); commands1.clear();
// Compute the list of outputs. // Compute the list of outputs.
std::vector<std::string> outputs(1, targetOutputReal); std::vector<std::string> outputs(1, targetOutput);
// Write the build rule. // Write the build rule.
this->WriteMakeRule(*this->BuildFileStream, nullptr, outputs, depends, this->WriteMakeRule(*this->BuildFileStream, nullptr, outputs, depends,
commands, false); commands, false);
// Write the main driver rule to build everything in this target.
this->WriteTargetDriverRule(targetOutputReal, relink);
#else #else
static_cast<void>(linkRuleVar); static_cast<void>(linkRuleVar);
static_cast<void>(relink); static_cast<void>(relink);

View File

@@ -5,6 +5,7 @@
#include "cmConfigure.h" // IWYU pragma: keep #include "cmConfigure.h" // IWYU pragma: keep
#include <string> #include <string>
#include <vector>
#include "cmMakefileTargetGenerator.h" #include "cmMakefileTargetGenerator.h"
@@ -27,6 +28,10 @@ protected:
void WriteModuleLibraryRules(bool relink); void WriteModuleLibraryRules(bool relink);
void WriteDeviceLibraryRules(const std::string& linkRule, bool relink); void WriteDeviceLibraryRules(const std::string& linkRule, bool relink);
void WriteNvidiaDeviceLibraryRules(const std::string& linkRuleVar,
bool relink,
std::vector<std::string>& commands,
const std::string& targetOutput);
void WriteLibraryRules(const std::string& linkRule, void WriteLibraryRules(const std::string& linkRule,
const std::string& extraFlags, bool relink); const std::string& extraFlags, bool relink);
// MacOSX Framework support methods // MacOSX Framework support methods

View File

@@ -2,10 +2,13 @@
file Copyright.txt or https://cmake.org/licensing for details. */ file Copyright.txt or https://cmake.org/licensing for details. */
#include "cmMakefileTargetGenerator.h" #include "cmMakefileTargetGenerator.h"
#include <algorithm>
#include <cassert> #include <cassert>
#include <cstdio> #include <cstdio>
#include <iterator>
#include <sstream> #include <sstream>
#include <unordered_map> #include <unordered_map>
#include <unordered_set>
#include <utility> #include <utility>
#include <cm/memory> #include <cm/memory>
@@ -25,6 +28,7 @@
#include "cmMakefileExecutableTargetGenerator.h" #include "cmMakefileExecutableTargetGenerator.h"
#include "cmMakefileLibraryTargetGenerator.h" #include "cmMakefileLibraryTargetGenerator.h"
#include "cmMakefileUtilityTargetGenerator.h" #include "cmMakefileUtilityTargetGenerator.h"
#include "cmMessageType.h"
#include "cmOutputConverter.h" #include "cmOutputConverter.h"
#include "cmPolicies.h" #include "cmPolicies.h"
#include "cmProperty.h" #include "cmProperty.h"
@@ -1323,6 +1327,130 @@ void cmMakefileTargetGenerator::WriteObjectDependRules(
} }
} }
void cmMakefileTargetGenerator::WriteDeviceLinkRule(
std::vector<std::string>& commands, const std::string& output)
{
std::string architecturesStr =
this->GeneratorTarget->GetSafeProperty("CUDA_ARCHITECTURES");
if (cmIsOff(architecturesStr)) {
this->Makefile->IssueMessage(MessageType::FATAL_ERROR,
"CUDA_SEPARABLE_COMPILATION on Clang "
"requires CUDA_ARCHITECTURES to be set.");
return;
}
std::vector<std::string> architectures = cmExpandedList(architecturesStr);
// Ensure there are no duplicates.
const std::vector<std::string> linkDeps = [&]() -> std::vector<std::string> {
std::vector<std::string> deps;
this->AppendTargetDepends(deps, true);
this->GeneratorTarget->GetLinkDepends(deps, this->GetConfigName(), "CUDA");
std::copy(this->Objects.begin(), this->Objects.end(),
std::back_inserter(deps));
std::unordered_set<std::string> depsSet(deps.begin(), deps.end());
deps.clear();
std::copy(depsSet.begin(), depsSet.end(), std::back_inserter(deps));
return deps;
}();
const std::string objectDir = this->GeneratorTarget->ObjectDirectory;
const std::string relObjectDir =
this->LocalGenerator->MaybeConvertToRelativePath(
this->LocalGenerator->GetCurrentBinaryDirectory(), objectDir);
// Construct a list of files associated with this executable that
// may need to be cleaned.
std::vector<std::string> cleanFiles;
cleanFiles.push_back(this->LocalGenerator->MaybeConvertToRelativePath(
this->LocalGenerator->GetCurrentBinaryDirectory(), output));
std::string profiles;
std::vector<std::string> fatbinaryDepends;
std::string registerFile = cmStrCat(objectDir, "cmake_cuda_register.h");
// Link device code for each architecture.
for (const std::string& architectureKind : architectures) {
// Clang always generates real code, so strip the specifier.
const std::string architecture =
architectureKind.substr(0, architectureKind.find('-'));
const std::string cubin =
cmStrCat(relObjectDir, "sm_", architecture, ".cubin");
profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
fatbinaryDepends.emplace_back(cubin);
std::string registerFileCmd;
// The generated register file contains macros that when expanded register
// the device routines. Because the routines are the same for all
// architectures the register file will be the same too. Thus generate it
// only on the first invocation to reduce overhead.
if (fatbinaryDepends.size() == 1) {
std::string registerFileRel =
this->LocalGenerator->MaybeConvertToRelativePath(
this->LocalGenerator->GetCurrentBinaryDirectory(), registerFile);
registerFileCmd =
cmStrCat(" --register-link-binaries=", registerFileRel);
cleanFiles.push_back(registerFileRel);
}
std::string command = cmStrCat(
this->Makefile->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"),
" -arch=sm_", architecture, registerFileCmd, " -o=$@ ",
cmJoin(linkDeps, " "));
this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr, cubin,
linkDeps, { command }, false);
}
// Combine all architectures into a single fatbinary.
const std::string fatbinaryCommand =
cmStrCat(this->Makefile->GetRequiredDefinition("CMAKE_CUDA_FATBINARY"),
" -64 -cmdline=--compile-only -compress-all -link "
"--embedded-fatbin=$@",
profiles);
const std::string fatbinaryOutput =
cmStrCat(objectDir, "cmake_cuda_fatbin.h");
const std::string fatbinaryOutputRel =
this->LocalGenerator->MaybeConvertToRelativePath(
this->LocalGenerator->GetCurrentBinaryDirectory(), fatbinaryOutput);
this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr,
fatbinaryOutputRel, fatbinaryDepends,
{ fatbinaryCommand }, false);
// Compile the stub that registers the kernels and contains the fatbinaries.
cmRulePlaceholderExpander::RuleVariables vars;
vars.CMTargetName = this->GetGeneratorTarget()->GetName().c_str();
vars.CMTargetType =
cmState::GetTargetTypeName(this->GetGeneratorTarget()->GetType()).c_str();
vars.Language = "CUDA";
vars.Object = output.c_str();
vars.Fatbinary = fatbinaryOutput.c_str();
vars.RegisterFile = registerFile.c_str();
std::string flags = this->GetFlags("CUDA", this->GetConfigName());
vars.Flags = flags.c_str();
std::string compileCmd = this->GetLinkRule("CMAKE_CUDA_DEVICE_LINK_COMPILE");
std::unique_ptr<cmRulePlaceholderExpander> rulePlaceholderExpander(
this->LocalGenerator->CreateRulePlaceholderExpander());
rulePlaceholderExpander->ExpandRuleVariables(this->LocalGenerator,
compileCmd, vars);
commands.emplace_back(compileCmd);
this->LocalGenerator->WriteMakeRule(
*this->BuildFileStream, nullptr, output,
{ cmStrCat(relObjectDir, "cmake_cuda_fatbin.h") }, commands, false);
// Clean all the possible executable names and symlinks.
this->CleanFiles.insert(cleanFiles.begin(), cleanFiles.end());
}
void cmMakefileTargetGenerator::GenerateCustomRuleFile( void cmMakefileTargetGenerator::GenerateCustomRuleFile(
cmCustomCommandGenerator const& ccg) cmCustomCommandGenerator const& ccg)
{ {
@@ -1579,10 +1707,11 @@ void cmMakefileTargetGenerator::WriteTargetDriverRule(
} }
void cmMakefileTargetGenerator::AppendTargetDepends( void cmMakefileTargetGenerator::AppendTargetDepends(
std::vector<std::string>& depends) std::vector<std::string>& depends, bool ignoreType)
{ {
// Static libraries never depend on anything for linking. // Static libraries never depend on anything for linking.
if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY) { if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY &&
!ignoreType) {
return; return;
} }

View File

@@ -104,6 +104,10 @@ protected:
void WriteObjectDependRules(cmSourceFile const& source, void WriteObjectDependRules(cmSourceFile const& source,
std::vector<std::string>& depends); std::vector<std::string>& depends);
// CUDA device linking.
void WriteDeviceLinkRule(std::vector<std::string>& commands,
const std::string& output);
// write the build rule for a custom command // write the build rule for a custom command
void GenerateCustomRuleFile(cmCustomCommandGenerator const& ccg); void GenerateCustomRuleFile(cmCustomCommandGenerator const& ccg);
@@ -127,7 +131,8 @@ protected:
void DriveCustomCommands(std::vector<std::string>& depends); void DriveCustomCommands(std::vector<std::string>& depends);
// append intertarget dependencies // append intertarget dependencies
void AppendTargetDepends(std::vector<std::string>& depends); void AppendTargetDepends(std::vector<std::string>& depends,
bool ignoreType = false);
// Append object file dependencies. // Append object file dependencies.
void AppendObjectDepends(std::vector<std::string>& depends); void AppendObjectDepends(std::vector<std::string>& depends);

View File

@@ -8,6 +8,7 @@
#include <map> #include <map>
#include <set> #include <set>
#include <sstream> #include <sstream>
#include <unordered_set>
#include <utility> #include <utility>
#include <cm/memory> #include <cm/memory>
@@ -25,6 +26,7 @@
#include "cmLocalGenerator.h" #include "cmLocalGenerator.h"
#include "cmLocalNinjaGenerator.h" #include "cmLocalNinjaGenerator.h"
#include "cmMakefile.h" #include "cmMakefile.h"
#include "cmMessageType.h"
#include "cmNinjaLinkLineDeviceComputer.h" #include "cmNinjaLinkLineDeviceComputer.h"
#include "cmNinjaTypes.h" #include "cmNinjaTypes.h"
#include "cmOSXBundleGenerator.h" #include "cmOSXBundleGenerator.h"
@@ -178,6 +180,33 @@ std::string cmNinjaNormalTargetGenerator::LanguageLinkerDeviceRule(
"_", config); "_", config);
} }
std::string cmNinjaNormalTargetGenerator::LanguageLinkerCudaDeviceRule(
const std::string& config) const
{
return cmStrCat(
this->TargetLinkLanguage(config), "_DEVICE_LINK__",
cmGlobalNinjaGenerator::EncodeRuleName(this->GeneratorTarget->GetName()),
'_', config);
}
std::string cmNinjaNormalTargetGenerator::LanguageLinkerCudaDeviceCompileRule(
const std::string& config) const
{
return cmStrCat(
this->TargetLinkLanguage(config), "_DEVICE_LINK_COMPILE__",
cmGlobalNinjaGenerator::EncodeRuleName(this->GeneratorTarget->GetName()),
'_', config);
}
std::string cmNinjaNormalTargetGenerator::LanguageLinkerCudaFatbinaryRule(
const std::string& config) const
{
return cmStrCat(
this->TargetLinkLanguage(config), "_FATBINARY__",
cmGlobalNinjaGenerator::EncodeRuleName(this->GeneratorTarget->GetName()),
'_', config);
}
struct cmNinjaRemoveNoOpCommands struct cmNinjaRemoveNoOpCommands
{ {
bool operator()(std::string const& cmd) bool operator()(std::string const& cmd)
@@ -186,7 +215,7 @@ struct cmNinjaRemoveNoOpCommands
} }
}; };
void cmNinjaNormalTargetGenerator::WriteDeviceLinkRule( void cmNinjaNormalTargetGenerator::WriteNvidiaDeviceLinkRule(
bool useResponseFile, const std::string& config) bool useResponseFile, const std::string& config)
{ {
cmNinjaRule rule(this->LanguageLinkerDeviceRule(config)); cmNinjaRule rule(this->LanguageLinkerDeviceRule(config));
@@ -272,6 +301,55 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkRule(
} }
} }
void cmNinjaNormalTargetGenerator::WriteDeviceLinkRules(
const std::string& config)
{
const cmMakefile* mf = this->GetMakefile();
cmNinjaRule rule(LanguageLinkerCudaDeviceRule(config));
rule.Command = this->GetLocalGenerator()->BuildCommandLine(
{ cmStrCat(mf->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"),
" -arch=$ARCH $REGISTER -o=$out $in") });
rule.Comment = "Rule for CUDA device linking.";
rule.Description = "Linking CUDA $out";
this->GetGlobalGenerator()->AddRule(rule);
cmRulePlaceholderExpander::RuleVariables vars;
vars.CMTargetName = this->GetGeneratorTarget()->GetName().c_str();
vars.CMTargetType =
cmState::GetTargetTypeName(this->GetGeneratorTarget()->GetType()).c_str();
vars.Language = "CUDA";
vars.Object = "$out";
vars.Fatbinary = "$FATBIN";
vars.RegisterFile = "$REGISTER";
std::string flags = this->GetFlags("CUDA", config);
vars.Flags = flags.c_str();
std::string compileCmd = this->GetMakefile()->GetRequiredDefinition(
"CMAKE_CUDA_DEVICE_LINK_COMPILE");
std::unique_ptr<cmRulePlaceholderExpander> rulePlaceholderExpander(
this->GetLocalGenerator()->CreateRulePlaceholderExpander());
rulePlaceholderExpander->ExpandRuleVariables(this->GetLocalGenerator(),
compileCmd, vars);
rule.Name = LanguageLinkerCudaDeviceCompileRule(config);
rule.Command = this->GetLocalGenerator()->BuildCommandLine({ compileCmd });
rule.Comment = "Rule for compiling CUDA device stubs.";
rule.Description = "Compiling CUDA device stub $out";
this->GetGlobalGenerator()->AddRule(rule);
rule.Name = LanguageLinkerCudaFatbinaryRule(config);
rule.Command = this->GetLocalGenerator()->BuildCommandLine(
{ cmStrCat(mf->GetRequiredDefinition("CMAKE_CUDA_FATBINARY"),
" -64 -cmdline=--compile-only -compress-all -link "
"--embedded-fatbin=$out $PROFILES") });
rule.Comment = "Rule for CUDA fatbinaries.";
rule.Description = "Creating fatbinary $out";
this->GetGlobalGenerator()->AddRule(rule);
}
void cmNinjaNormalTargetGenerator::WriteLinkRule(bool useResponseFile, void cmNinjaNormalTargetGenerator::WriteLinkRule(bool useResponseFile,
const std::string& config) const std::string& config)
{ {
@@ -586,7 +664,6 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
// First and very important step is to make sure while inside this // First and very important step is to make sure while inside this
// step our link language is set to CUDA // step our link language is set to CUDA
std::string cudaLinkLanguage = "CUDA";
std::string const& objExt = std::string const& objExt =
this->Makefile->GetSafeDefinition("CMAKE_CUDA_OUTPUT_EXTENSION"); this->Makefile->GetSafeDefinition("CMAKE_CUDA_OUTPUT_EXTENSION");
@@ -598,6 +675,118 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
std::string targetOutputReal = std::string targetOutputReal =
ConvertToNinjaPath(targetOutputDir + "cmake_device_link" + objExt); ConvertToNinjaPath(targetOutputDir + "cmake_device_link" + objExt);
if (firstForConfig) {
globalGen->GetByproductsForCleanTarget(config).push_back(targetOutputReal);
}
this->DeviceLinkObject = targetOutputReal;
// Write comments.
cmGlobalNinjaGenerator::WriteDivider(this->GetCommonFileStream());
this->GetCommonFileStream()
<< "# Device Link build statements for "
<< cmState::GetTargetTypeName(genTarget->GetType()) << " target "
<< this->GetTargetName() << "\n\n";
if (this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID") == "Clang") {
std::string architecturesStr =
this->GeneratorTarget->GetSafeProperty("CUDA_ARCHITECTURES");
if (cmIsOff(architecturesStr)) {
this->Makefile->IssueMessage(MessageType::FATAL_ERROR,
"CUDA_SEPARABLE_COMPILATION on Clang "
"requires CUDA_ARCHITECTURES to be set.");
return;
}
this->WriteDeviceLinkRules(config);
this->WriteDeviceLinkStatements(config, cmExpandedList(architecturesStr),
targetOutputReal);
} else {
this->WriteNvidiaDeviceLinkStatement(config, fileConfig, targetOutputDir,
targetOutputReal);
}
}
void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements(
const std::string& config, const std::vector<std::string>& architectures,
const std::string& output)
{
// Ensure there are no duplicates.
const cmNinjaDeps explicitDeps = [&]() -> std::vector<std::string> {
std::unordered_set<std::string> depsSet;
const cmNinjaDeps linkDeps =
this->ComputeLinkDeps(this->TargetLinkLanguage(config), config, true);
const cmNinjaDeps objects = this->GetObjects(config);
depsSet.insert(linkDeps.begin(), linkDeps.end());
depsSet.insert(objects.begin(), objects.end());
std::vector<std::string> deps;
std::copy(depsSet.begin(), depsSet.end(), std::back_inserter(deps));
return deps;
}();
const std::string objectDir =
cmStrCat(this->GeneratorTarget->GetSupportDirectory(),
this->GetGlobalGenerator()->ConfigDirectory(config));
const std::string ninjaOutputDir = this->ConvertToNinjaPath(objectDir);
cmNinjaBuild fatbinary(LanguageLinkerCudaFatbinaryRule(config));
// Link device code for each architecture.
for (const std::string& architectureKind : architectures) {
// Clang always generates real code, so strip the specifier.
const std::string architecture =
architectureKind.substr(0, architectureKind.find('-'));
const std::string cubin =
cmStrCat(ninjaOutputDir, "/sm_", architecture, ".cubin");
fatbinary.Variables["PROFILES"] +=
cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
fatbinary.ExplicitDeps.emplace_back(cubin);
cmNinjaBuild dlink(LanguageLinkerCudaDeviceRule(config));
dlink.ExplicitDeps = explicitDeps;
dlink.Outputs = { cubin };
dlink.Variables["ARCH"] = cmStrCat("sm_", architecture);
// The generated register file contains macros that when expanded register
// the device routines. Because the routines are the same for all
// architectures the register file will be the same too. Thus generate it
// only on the first invocation to reduce overhead.
if (fatbinary.ExplicitDeps.size() == 1) {
dlink.Variables["REGISTER"] = cmStrCat(
"--register-link-binaries=", ninjaOutputDir, "/cmake_cuda_register.h");
}
this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(), dlink);
}
// Combine all architectures into a single fatbinary.
fatbinary.Outputs = { cmStrCat(ninjaOutputDir, "/cmake_cuda_fatbin.h") };
this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(),
fatbinary);
// Compile the stub that registers the kernels and contains the fatbinaries.
cmNinjaBuild dcompile(LanguageLinkerCudaDeviceCompileRule(config));
dcompile.Outputs = { output };
dcompile.ExplicitDeps = { cmStrCat(ninjaOutputDir, "/cmake_cuda_fatbin.h") };
dcompile.Variables["FATBIN"] =
this->GetLocalGenerator()->ConvertToOutputFormat(
cmStrCat(objectDir, "/cmake_cuda_fatbin.h"), cmOutputConverter::SHELL);
dcompile.Variables["REGISTER"] =
this->GetLocalGenerator()->ConvertToOutputFormat(
cmStrCat(objectDir, "/cmake_cuda_register.h"), cmOutputConverter::SHELL);
this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(),
dcompile);
}
void cmNinjaNormalTargetGenerator::WriteNvidiaDeviceLinkStatement(
const std::string& config, const std::string& fileConfig,
const std::string& outputDir, const std::string& output)
{
cmGeneratorTarget* genTarget = this->GetGeneratorTarget();
cmGlobalNinjaGenerator* globalGen = this->GetGlobalGenerator();
std::string targetOutputImplib = ConvertToNinjaPath( std::string targetOutputImplib = ConvertToNinjaPath(
genTarget->GetFullPath(config, cmStateEnums::ImportLibraryArtifact)); genTarget->GetFullPath(config, cmStateEnums::ImportLibraryArtifact));
@@ -606,8 +795,8 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
cmStrCat(this->GetLocalGenerator()->GetTargetDirectory(genTarget), cmStrCat(this->GetLocalGenerator()->GetTargetDirectory(genTarget),
globalGen->ConfigDirectory(fileConfig), "/"); globalGen->ConfigDirectory(fileConfig), "/");
targetOutputFileConfigDir = targetOutputFileConfigDir =
globalGen->ExpandCFGIntDir(targetOutputDir, fileConfig); globalGen->ExpandCFGIntDir(outputDir, fileConfig);
if (targetOutputDir == targetOutputFileConfigDir) { if (outputDir == targetOutputFileConfigDir) {
return; return;
} }
@@ -623,27 +812,15 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
} }
} }
if (firstForConfig) {
globalGen->GetByproductsForCleanTarget(config).push_back(targetOutputReal);
}
this->DeviceLinkObject = targetOutputReal;
// Write comments.
cmGlobalNinjaGenerator::WriteDivider(this->GetCommonFileStream());
const cmStateEnums::TargetType targetType = genTarget->GetType();
this->GetCommonFileStream() << "# Device Link build statements for "
<< cmState::GetTargetTypeName(targetType)
<< " target " << this->GetTargetName() << "\n\n";
// Compute the comment. // Compute the comment.
cmNinjaBuild build(this->LanguageLinkerDeviceRule(config)); cmNinjaBuild build(this->LanguageLinkerDeviceRule(config));
build.Comment = build.Comment =
cmStrCat("Link the ", this->GetVisibleTypeName(), ' ', targetOutputReal); cmStrCat("Link the ", this->GetVisibleTypeName(), ' ', output);
cmNinjaVars& vars = build.Variables; cmNinjaVars& vars = build.Variables;
// Compute outputs. // Compute outputs.
build.Outputs.push_back(targetOutputReal); build.Outputs.push_back(output);
// Compute specific libraries to link with. // Compute specific libraries to link with.
build.ExplicitDeps = this->GetObjects(config); build.ExplicitDeps = this->GetObjects(config);
build.ImplicitDeps = build.ImplicitDeps =
@@ -659,7 +836,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
cmLocalNinjaGenerator& localGen = *this->GetLocalGenerator(); cmLocalNinjaGenerator& localGen = *this->GetLocalGenerator();
vars["TARGET_FILE"] = vars["TARGET_FILE"] =
localGen.ConvertToOutputFormat(targetOutputReal, cmOutputConverter::SHELL); localGen.ConvertToOutputFormat(output, cmOutputConverter::SHELL);
std::unique_ptr<cmLinkLineComputer> linkLineComputer( std::unique_ptr<cmLinkLineComputer> linkLineComputer(
new cmNinjaLinkLineDeviceComputer( new cmNinjaLinkLineDeviceComputer(
@@ -683,8 +860,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
// Compute language specific link flags. // Compute language specific link flags.
std::string langFlags; std::string langFlags;
localGen.AddLanguageFlagsForLinking(langFlags, genTarget, cudaLinkLanguage, localGen.AddLanguageFlagsForLinking(langFlags, genTarget, "CUDA", config);
config);
vars["LANGUAGE_COMPILE_FLAGS"] = langFlags; vars["LANGUAGE_COMPILE_FLAGS"] = langFlags;
auto const tgtNames = this->TargetNames(config); auto const tgtNames = this->TargetNames(config);
@@ -692,7 +868,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
vars["SONAME_FLAG"] = vars["SONAME_FLAG"] =
this->GetMakefile()->GetSONameFlag(this->TargetLinkLanguage(config)); this->GetMakefile()->GetSONameFlag(this->TargetLinkLanguage(config));
vars["SONAME"] = tgtNames.SharedObject; vars["SONAME"] = tgtNames.SharedObject;
if (targetType == cmStateEnums::SHARED_LIBRARY) { if (genTarget->GetType() == cmStateEnums::SHARED_LIBRARY) {
std::string install_dir = std::string install_dir =
this->GetGeneratorTarget()->GetInstallNameDirForBuildTree(config); this->GetGeneratorTarget()->GetInstallNameDirForBuildTree(config);
if (!install_dir.empty()) { if (!install_dir.empty()) {
@@ -731,7 +907,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
// do not check if the user has explicitly forced a response file. // do not check if the user has explicitly forced a response file.
int const commandLineLengthLimit = int const commandLineLengthLimit =
static_cast<int>(cmSystemTools::CalculateCommandLineLengthLimit()) - static_cast<int>(cmSystemTools::CalculateCommandLineLengthLimit()) -
globalGen->GetRuleCmdLength(this->LanguageLinkerDeviceRule(config)); globalGen->GetRuleCmdLength(build.Rule);
build.RspFile = this->ConvertToNinjaPath( build.RspFile = this->ConvertToNinjaPath(
cmStrCat("CMakeFiles/", genTarget->GetName(), cmStrCat("CMakeFiles/", genTarget->GetName(),
@@ -746,7 +922,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
bool usedResponseFile = false; bool usedResponseFile = false;
globalGen->WriteBuild(this->GetCommonFileStream(), build, globalGen->WriteBuild(this->GetCommonFileStream(), build,
commandLineLengthLimit, &usedResponseFile); commandLineLengthLimit, &usedResponseFile);
this->WriteDeviceLinkRule(usedResponseFile, config); this->WriteNvidiaDeviceLinkRule(usedResponseFile, config);
} }
void cmNinjaNormalTargetGenerator::WriteLinkStatement( void cmNinjaNormalTargetGenerator::WriteLinkStatement(

View File

@@ -21,18 +21,31 @@ public:
private: private:
std::string LanguageLinkerRule(const std::string& config) const; std::string LanguageLinkerRule(const std::string& config) const;
std::string LanguageLinkerDeviceRule(const std::string& config) const; std::string LanguageLinkerDeviceRule(const std::string& config) const;
std::string LanguageLinkerCudaDeviceRule(const std::string& config) const;
std::string LanguageLinkerCudaDeviceCompileRule(
const std::string& config) const;
std::string LanguageLinkerCudaFatbinaryRule(const std::string& config) const;
const char* GetVisibleTypeName() const; const char* GetVisibleTypeName() const;
void WriteLanguagesRules(const std::string& config); void WriteLanguagesRules(const std::string& config);
void WriteLinkRule(bool useResponseFile, const std::string& config); void WriteLinkRule(bool useResponseFile, const std::string& config);
void WriteDeviceLinkRule(bool useResponseFile, const std::string& config); void WriteDeviceLinkRules(const std::string& config);
void WriteNvidiaDeviceLinkRule(bool useResponseFile,
const std::string& config);
void WriteLinkStatement(const std::string& config, void WriteLinkStatement(const std::string& config,
const std::string& fileConfig, bool firstForConfig); const std::string& fileConfig, bool firstForConfig);
void WriteDeviceLinkStatement(const std::string& config, void WriteDeviceLinkStatement(const std::string& config,
const std::string& fileConfig, const std::string& fileConfig,
bool firstForConfig); bool firstForConfig);
void WriteDeviceLinkStatements(const std::string& config,
const std::vector<std::string>& architectures,
const std::string& output);
void WriteNvidiaDeviceLinkStatement(const std::string& config,
const std::string& fileConfig,
const std::string& outputDir,
const std::string& output);
void WriteObjectLibStatement(const std::string& config); void WriteObjectLibStatement(const std::string& config);

View File

@@ -346,11 +346,13 @@ std::string cmNinjaTargetGenerator::ComputeIncludes(
} }
cmNinjaDeps cmNinjaTargetGenerator::ComputeLinkDeps( cmNinjaDeps cmNinjaTargetGenerator::ComputeLinkDeps(
const std::string& linkLanguage, const std::string& config) const const std::string& linkLanguage, const std::string& config,
bool ignoreType) const
{ {
// Static libraries never depend on other targets for linking. // Static libraries never depend on other targets for linking.
if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY || if (!ignoreType &&
this->GeneratorTarget->GetType() == cmStateEnums::OBJECT_LIBRARY) { (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY ||
this->GeneratorTarget->GetType() == cmStateEnums::OBJECT_LIBRARY)) {
return cmNinjaDeps(); return cmNinjaDeps();
} }
@@ -1009,6 +1011,7 @@ void cmNinjaTargetGenerator::WriteObjectBuildStatements(
{ {
std::vector<cmSourceFile const*> objectSources; std::vector<cmSourceFile const*> objectSources;
this->GeneratorTarget->GetObjectSources(objectSources, config); this->GeneratorTarget->GetObjectSources(objectSources, config);
for (cmSourceFile const* sf : objectSources) { for (cmSourceFile const* sf : objectSources) {
this->WriteObjectBuildStatement(sf, config, fileConfig, firstForConfig); this->WriteObjectBuildStatement(sf, config, fileConfig, firstForConfig);
} }

View File

@@ -113,7 +113,8 @@ protected:
/// @return the list of link dependency for the given target @a target. /// @return the list of link dependency for the given target @a target.
cmNinjaDeps ComputeLinkDeps(const std::string& linkLanguage, cmNinjaDeps ComputeLinkDeps(const std::string& linkLanguage,
const std::string& config) const; const std::string& config,
bool ignoreType = false) const;
/// @return the source file path for the given @a source. /// @return the source file path for the given @a source.
std::string GetSourceFilePath(cmSourceFile const* source) const; std::string GetSourceFilePath(cmSourceFile const* source) const;

View File

@@ -141,6 +141,16 @@ std::string cmRulePlaceholderExpander::ExpandRuleVariable(
return replaceValues.DependencyFile; return replaceValues.DependencyFile;
} }
} }
if (replaceValues.Fatbinary) {
if (variable == "FATBINARY") {
return replaceValues.Fatbinary;
}
}
if (replaceValues.RegisterFile) {
if (variable == "REGISTER_FILE") {
return replaceValues.RegisterFile;
}
}
if (replaceValues.Target) { if (replaceValues.Target) {
if (variable == "TARGET_QUOTED") { if (variable == "TARGET_QUOTED") {

View File

@@ -64,6 +64,8 @@ public:
const char* SwiftOutputFileMap; const char* SwiftOutputFileMap;
const char* SwiftSources; const char* SwiftSources;
const char* ISPCHeader; const char* ISPCHeader;
const char* Fatbinary;
const char* RegisterFile;
}; };
// Expand rule variables in CMake of the type found in language rules // Expand rule variables in CMake of the type found in language rules

View File

@@ -17,13 +17,12 @@ add_cuda_test_macro(Cuda.SeparableCompCXXOnly SeparableCompCXXOnly)
add_cuda_test_macro(Cuda.Toolkit Toolkit) add_cuda_test_macro(Cuda.Toolkit Toolkit)
add_cuda_test_macro(Cuda.IncludePathNoToolkit IncludePathNoToolkit) add_cuda_test_macro(Cuda.IncludePathNoToolkit IncludePathNoToolkit)
add_cuda_test_macro(Cuda.SharedRuntimePlusToolkit SharedRuntimePlusToolkit) add_cuda_test_macro(Cuda.SharedRuntimePlusToolkit SharedRuntimePlusToolkit)
add_cuda_test_macro(Cuda.Complex CudaComplex)
add_cuda_test_macro(Cuda.ProperLinkFlags ProperLinkFlags)
# Separable compilation is currently only supported on NVCC. Disable tests
# using it for other compilers.
if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang") if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
add_cuda_test_macro(Cuda.Complex CudaComplex) # Clang lacks __CUDACC_VER*__ defines.
add_cuda_test_macro(Cuda.ProperDeviceLibraries ProperDeviceLibraries) add_cuda_test_macro(Cuda.ProperDeviceLibraries ProperDeviceLibraries)
add_cuda_test_macro(Cuda.ProperLinkFlags ProperLinkFlags)
endif() endif()
# The CUDA only ships the shared version of the toolkit libraries # The CUDA only ships the shared version of the toolkit libraries

View File

@@ -12,33 +12,31 @@ add_cuda_test_macro(CudaOnly.SharedRuntimePlusToolkit CudaOnlySharedRuntimePlusT
add_cuda_test_macro(CudaOnly.Standard98 CudaOnlyStandard98) add_cuda_test_macro(CudaOnly.Standard98 CudaOnlyStandard98)
add_cuda_test_macro(CudaOnly.Toolkit CudaOnlyToolkit) add_cuda_test_macro(CudaOnly.Toolkit CudaOnlyToolkit)
add_cuda_test_macro(CudaOnly.WithDefs CudaOnlyWithDefs) add_cuda_test_macro(CudaOnly.WithDefs CudaOnlyWithDefs)
add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
add_cuda_test_macro(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang") if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
# Clang doesn't have flags for selecting the runtime.
add_cuda_test_macro(CudaOnly.SharedRuntimeViaCUDAFlags CudaOnlySharedRuntimeViaCUDAFlags) add_cuda_test_macro(CudaOnly.SharedRuntimeViaCUDAFlags CudaOnlySharedRuntimeViaCUDAFlags)
# Separable compilation is currently only supported on NVCC. Disable tests
# using it for other compilers.
add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
add_cuda_test_macro(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
--build-and-test
"${CMAKE_CURRENT_SOURCE_DIR}/DontResolveDeviceSymbols/"
"${CMAKE_CURRENT_BINARY_DIR}/DontResolveDeviceSymbols/"
${build_generator_args}
--build-project DontResolveDeviceSymbols
--build-options ${build_options}
--test-command ${CMAKE_CTEST_COMMAND} -V -C $<CONFIGURATION>
)
set_property(TEST "CudaOnly.DontResolveDeviceSymbols" APPEND
PROPERTY LABELS "CUDA")
# Only NVCC defines __CUDACC_DEBUG__ when compiling in debug mode. # Only NVCC defines __CUDACC_DEBUG__ when compiling in debug mode.
add_cuda_test_macro(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag) add_cuda_test_macro(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
endif() endif()
add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
--build-and-test
"${CMAKE_CURRENT_SOURCE_DIR}/DontResolveDeviceSymbols/"
"${CMAKE_CURRENT_BINARY_DIR}/DontResolveDeviceSymbols/"
${build_generator_args}
--build-project DontResolveDeviceSymbols
--build-options ${build_options}
--test-command ${CMAKE_CTEST_COMMAND} -V -C $<CONFIGURATION>
)
set_property(TEST "CudaOnly.DontResolveDeviceSymbols" APPEND
PROPERTY LABELS "CUDA")
# The CUDA only ships the shared version of the toolkit libraries # The CUDA only ships the shared version of the toolkit libraries
# on windows # on windows
if(NOT WIN32) if(NOT WIN32)