mirror of
https://github.com/Kitware/CMake.git
synced 2025-10-14 02:08:27 +08:00
CUDA: Clang separable compilation
For NVCC the compiler takes care of device linking when passed the "-dlink"
flag.
Clang doesn't support such magic and requires the buildsystem to do the work
that NVCC does behind the scenes.
The implementation is based on Bazel's device linking documentation:
7cabcdf073/third_party/nccl/build_defs.bzl.tpl (L259)
Closes: #20726
This commit is contained in:
4
Help/release/dev/cuda-clang-separable-compilation.rst
Normal file
4
Help/release/dev/cuda-clang-separable-compilation.rst
Normal file
@@ -0,0 +1,4 @@
|
||||
cuda-clang-separable-compilation
|
||||
--------------------------------
|
||||
|
||||
* :prop_tgt:`CUDA_SEPARABLE_COMPILATION` is now supported when using Clang.
|
@@ -3,6 +3,8 @@ set(CMAKE_CUDA_HOST_COMPILER "@CMAKE_CUDA_HOST_COMPILER@")
|
||||
set(CMAKE_CUDA_HOST_LINK_LAUNCHER "@CMAKE_CUDA_HOST_LINK_LAUNCHER@")
|
||||
set(CMAKE_CUDA_COMPILER_ID "@CMAKE_CUDA_COMPILER_ID@")
|
||||
set(CMAKE_CUDA_COMPILER_VERSION "@CMAKE_CUDA_COMPILER_VERSION@")
|
||||
set(CMAKE_CUDA_DEVICE_LINKER "@CMAKE_CUDA_DEVICE_LINKER@")
|
||||
set(CMAKE_CUDA_FATBINARY "@CMAKE_CUDA_FATBINARY@")
|
||||
set(CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT "@CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT@")
|
||||
set(CMAKE_CUDA_COMPILE_FEATURES "@CMAKE_CUDA_COMPILE_FEATURES@")
|
||||
set(CMAKE_CUDA03_COMPILE_FEATURES "@CMAKE_CUDA03_COMPILE_FEATURES@")
|
||||
@@ -44,6 +46,7 @@ if(CMAKE_CUDA_LIBRARY_ARCHITECTURE)
|
||||
endif()
|
||||
|
||||
set(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_ROOT@")
|
||||
set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT@")
|
||||
set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_LIBRARY_ROOT@")
|
||||
|
||||
set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES@")
|
||||
|
@@ -145,7 +145,7 @@ endif()
|
||||
#Specify how to compile when separable compilation has been requested
|
||||
if(NOT CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION)
|
||||
set(CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION
|
||||
"<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <DEFINES> <INCLUDES> <FLAGS> ${_CMAKE_COMPILE_AS_CUDA_FLAG} -dc <SOURCE> -o <OBJECT>")
|
||||
"<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <DEFINES> <INCLUDES> <FLAGS> ${_CMAKE_COMPILE_AS_CUDA_FLAG} ${_CMAKE_CUDA_DEVICE_CODE} <SOURCE> -o <OBJECT>")
|
||||
endif()
|
||||
|
||||
#Specify how to compile when whole compilation has been requested
|
||||
@@ -200,6 +200,11 @@ if(NOT CMAKE_CUDA_DEVICE_LINK_EXECUTABLE)
|
||||
"<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> ${CMAKE_CUDA_COMPILE_OPTIONS_PIC} ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_DLINK_FLAGS}")
|
||||
endif()
|
||||
|
||||
# Used when device linking is handled by CMake.
|
||||
if(NOT CMAKE_CUDA_DEVICE_LINK_COMPILE)
|
||||
set(CMAKE_CUDA_DEVICE_LINK_COMPILE "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <FLAGS> -D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ -D__NV_EXTRA_INITIALIZATION=\"\" -D__NV_EXTRA_FINALIZATION=\"\" -DREGISTERLINKBINARYFILE=\\\"<REGISTER_FILE>\\\" -DFATBINFILE=\\\"<FATBINARY>\\\" ${_CMAKE_COMPILE_AS_CUDA_FLAG} -c \"${CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT}/bin/crt/link.stub\" -o <OBJECT>")
|
||||
endif()
|
||||
|
||||
unset(__IMPLICT_DLINK_FLAGS)
|
||||
|
||||
set(CMAKE_CUDA_INFORMATION_LOADED 1)
|
||||
|
@@ -169,11 +169,14 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
|
||||
endif()
|
||||
|
||||
get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${_CUDA_NVCC_EXECUTABLE}" DIRECTORY)
|
||||
set(CMAKE_CUDA_DEVICE_LINKER "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/nvlink${CMAKE_EXECUTABLE_SUFFIX}")
|
||||
set(CMAKE_CUDA_FATBINARY "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/fatbinary${CMAKE_EXECUTABLE_SUFFIX}")
|
||||
get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}" DIRECTORY)
|
||||
|
||||
# CMAKE_CUDA_COMPILER_LIBRARY_ROOT contains the device library and version file.
|
||||
# In a non-scattered installation this is equivalent to CMAKE_CUDA_COMPILER_TOOLKIT_ROOT.
|
||||
# In a non-scattered installation the following are equivalent to CMAKE_CUDA_COMPILER_TOOLKIT_ROOT.
|
||||
# We first check for a non-scattered installation to prefer it over a scattered installation.
|
||||
|
||||
# CMAKE_CUDA_COMPILER_LIBRARY_ROOT contains the device library and version file.
|
||||
if(EXISTS "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/version.txt")
|
||||
set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}")
|
||||
elseif(CMAKE_SYSROOT_LINK AND EXISTS "${CMAKE_SYSROOT_LINK}/usr/lib/cuda/version.txt")
|
||||
@@ -181,6 +184,15 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
|
||||
elseif(EXISTS "${CMAKE_SYSROOT}/usr/lib/cuda/version.txt")
|
||||
set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_SYSROOT}/usr/lib/cuda")
|
||||
endif()
|
||||
|
||||
# CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT contains the linking stubs necessary for device linking and other low-level library files.
|
||||
if(CMAKE_SYSROOT_LINK AND EXISTS "${CMAKE_SYSROOT_LINK}/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub")
|
||||
set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "${CMAKE_SYSROOT_LINK}/usr/lib/nvidia-cuda-toolkit")
|
||||
elseif(EXISTS "${CMAKE_SYSROOT}/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub")
|
||||
set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "${CMAKE_SYSROOT}/usr/lib/nvidia-cuda-toolkit")
|
||||
else()
|
||||
set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-v")
|
||||
|
@@ -13,6 +13,7 @@ __compiler_clang_cxx_standards(CUDA)
|
||||
set(CMAKE_CUDA_COMPILER_HAS_DEVICE_LINK_PHASE TRUE)
|
||||
set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cuda")
|
||||
set(_CMAKE_CUDA_PTX_FLAG "--cuda-device-only -S")
|
||||
set(_CMAKE_CUDA_DEVICE_CODE "-fgpu-rdc -c")
|
||||
|
||||
# RulePlaceholderExpander expands crosscompile variables like sysroot and target only for CMAKE_<LANG>_COMPILER. Override the default.
|
||||
set(CMAKE_CUDA_LINK_EXECUTABLE "<CMAKE_CUDA_COMPILER> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_LINKS}")
|
||||
|
@@ -6,6 +6,7 @@ set(CMAKE_CUDA_VERBOSE_COMPILE_FLAG "-Xcompiler=-v")
|
||||
|
||||
set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cu")
|
||||
set(_CMAKE_CUDA_PTX_FLAG "-ptx")
|
||||
set(_CMAKE_CUDA_DEVICE_CODE "-dc")
|
||||
|
||||
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 10.2.89)
|
||||
# The -forward-unknown-to-host-compiler flag was only
|
||||
|
@@ -1955,17 +1955,6 @@ void cmLocalGenerator::AddLanguageFlags(std::string& flags,
|
||||
} else if (lang == "CUDA") {
|
||||
target->AddCUDAArchitectureFlags(flags);
|
||||
target->AddCUDAToolkitFlags(flags);
|
||||
|
||||
if (compiler == "Clang") {
|
||||
bool separable = target->GetPropertyAsBool("CUDA_SEPARABLE_COMPILATION");
|
||||
|
||||
if (separable) {
|
||||
this->Makefile->IssueMessage(
|
||||
MessageType::FATAL_ERROR,
|
||||
"CUDA_SEPARABLE_COMPILATION isn't supported on Clang. "
|
||||
"See CMake issue #20726.");
|
||||
}
|
||||
}
|
||||
} else if (lang == "ISPC") {
|
||||
target->AddISPCTargetFlags(flags);
|
||||
}
|
||||
|
@@ -446,7 +446,7 @@ public:
|
||||
void GetTargetCompileFlags(cmGeneratorTarget* target,
|
||||
std::string const& config,
|
||||
std::string const& lang, std::string& flags,
|
||||
std::string const& arch = std::string());
|
||||
std::string const& arch);
|
||||
std::vector<BT<std::string>> GetTargetCompileFlags(
|
||||
cmGeneratorTarget* target, std::string const& config,
|
||||
std::string const& lang, std::string const& arch = std::string());
|
||||
|
@@ -91,19 +91,12 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
|
||||
|
||||
std::vector<std::string> commands;
|
||||
|
||||
// Get the language to use for linking this library.
|
||||
std::string linkLanguage = "CUDA";
|
||||
// Get the name of the device object to generate.
|
||||
std::string const& objExt =
|
||||
this->Makefile->GetSafeDefinition("CMAKE_CUDA_OUTPUT_EXTENSION");
|
||||
|
||||
// Build list of dependencies.
|
||||
std::vector<std::string> depends;
|
||||
this->AppendLinkDepends(depends, linkLanguage);
|
||||
|
||||
// Get the name of the device object to generate.
|
||||
std::string const targetOutputReal =
|
||||
std::string const targetOutput =
|
||||
this->GeneratorTarget->ObjectDirectory + "cmake_device_link" + objExt;
|
||||
this->DeviceLinkObject = targetOutputReal;
|
||||
this->DeviceLinkObject = targetOutput;
|
||||
|
||||
this->NumberOfProgressActions++;
|
||||
if (!this->NoRuleMessages) {
|
||||
@@ -111,7 +104,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
|
||||
this->MakeEchoProgress(progress);
|
||||
// Add the link message.
|
||||
std::string buildEcho =
|
||||
cmStrCat("Linking ", linkLanguage, " device code ",
|
||||
cmStrCat("Linking CUDA device code ",
|
||||
this->LocalGenerator->ConvertToOutputFormat(
|
||||
this->LocalGenerator->MaybeConvertToRelativePath(
|
||||
this->LocalGenerator->GetCurrentBinaryDirectory(),
|
||||
@@ -121,6 +114,29 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
|
||||
commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress);
|
||||
}
|
||||
|
||||
if (this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID") == "Clang") {
|
||||
this->WriteDeviceLinkRule(commands, targetOutput);
|
||||
} else {
|
||||
this->WriteNvidiaDeviceExecutableRule(relink, commands, targetOutput);
|
||||
}
|
||||
|
||||
// Write the main driver rule to build everything in this target.
|
||||
this->WriteTargetDriverRule(targetOutput, relink);
|
||||
#else
|
||||
static_cast<void>(relink);
|
||||
#endif
|
||||
}
|
||||
|
||||
void cmMakefileExecutableTargetGenerator::WriteNvidiaDeviceExecutableRule(
|
||||
bool relink, std::vector<std::string>& commands,
|
||||
const std::string& targetOutput)
|
||||
{
|
||||
const std::string linkLanguage = "CUDA";
|
||||
|
||||
// Build list of dependencies.
|
||||
std::vector<std::string> depends;
|
||||
this->AppendLinkDepends(depends, linkLanguage);
|
||||
|
||||
// Build a list of compiler flags and linker flags.
|
||||
std::string langFlags;
|
||||
std::string linkFlags;
|
||||
@@ -136,7 +152,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
|
||||
// may need to be cleaned.
|
||||
std::vector<std::string> exeCleanFiles;
|
||||
exeCleanFiles.push_back(this->LocalGenerator->MaybeConvertToRelativePath(
|
||||
this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal));
|
||||
this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput));
|
||||
|
||||
// Determine whether a link script will be used.
|
||||
bool useLinkScript = this->GlobalGenerator->GetUseLinkScript();
|
||||
@@ -195,7 +211,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
|
||||
: cmOutputConverter::SHELL;
|
||||
std::string target = this->LocalGenerator->ConvertToOutputFormat(
|
||||
this->LocalGenerator->MaybeConvertToRelativePath(
|
||||
this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal),
|
||||
this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput),
|
||||
output);
|
||||
|
||||
std::string targetFullPathCompilePDB =
|
||||
@@ -226,7 +242,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
|
||||
this->LocalGenerator->CreateRulePlaceholderExpander());
|
||||
|
||||
// Expand placeholders in the commands.
|
||||
rulePlaceholderExpander->SetTargetImpLib(targetOutputReal);
|
||||
rulePlaceholderExpander->SetTargetImpLib(targetOutput);
|
||||
for (std::string& real_link_command : real_link_commands) {
|
||||
real_link_command = cmStrCat(launcher, real_link_command);
|
||||
rulePlaceholderExpander->ExpandRuleVariables(this->LocalGenerator,
|
||||
@@ -255,17 +271,10 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
|
||||
|
||||
// Write the build rule.
|
||||
this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr,
|
||||
targetOutputReal, depends, commands,
|
||||
false);
|
||||
|
||||
// Write the main driver rule to build everything in this target.
|
||||
this->WriteTargetDriverRule(targetOutputReal, relink);
|
||||
targetOutput, depends, commands, false);
|
||||
|
||||
// Clean all the possible executable names and symlinks.
|
||||
this->CleanFiles.insert(exeCleanFiles.begin(), exeCleanFiles.end());
|
||||
#else
|
||||
static_cast<void>(relink);
|
||||
#endif
|
||||
}
|
||||
|
||||
void cmMakefileExecutableTargetGenerator::WriteExecutableRule(bool relink)
|
||||
|
@@ -5,6 +5,7 @@
|
||||
#include "cmConfigure.h" // IWYU pragma: keep
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "cmMakefileTargetGenerator.h"
|
||||
|
||||
@@ -23,6 +24,9 @@ public:
|
||||
protected:
|
||||
virtual void WriteExecutableRule(bool relink);
|
||||
virtual void WriteDeviceExecutableRule(bool relink);
|
||||
virtual void WriteNvidiaDeviceExecutableRule(
|
||||
bool relink, std::vector<std::string>& commands,
|
||||
const std::string& targetOutput);
|
||||
|
||||
private:
|
||||
std::string DeviceLinkObject;
|
||||
|
@@ -129,8 +129,7 @@ void cmMakefileLibraryTargetGenerator::WriteStaticLibraryRules()
|
||||
const bool requiresDeviceLinking = requireDeviceLinking(
|
||||
*this->GeneratorTarget, *this->LocalGenerator, this->GetConfigName());
|
||||
if (requiresDeviceLinking) {
|
||||
std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
|
||||
this->WriteDeviceLibraryRules(linkRuleVar, false);
|
||||
this->WriteDeviceLibraryRules("CMAKE_CUDA_DEVICE_LINK_LIBRARY", false);
|
||||
}
|
||||
|
||||
std::string linkLanguage =
|
||||
@@ -156,8 +155,7 @@ void cmMakefileLibraryTargetGenerator::WriteSharedLibraryRules(bool relink)
|
||||
const bool requiresDeviceLinking = requireDeviceLinking(
|
||||
*this->GeneratorTarget, *this->LocalGenerator, this->GetConfigName());
|
||||
if (requiresDeviceLinking) {
|
||||
std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
|
||||
this->WriteDeviceLibraryRules(linkRuleVar, relink);
|
||||
this->WriteDeviceLibraryRules("CMAKE_CUDA_DEVICE_LINK_LIBRARY", relink);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -191,8 +189,7 @@ void cmMakefileLibraryTargetGenerator::WriteModuleLibraryRules(bool relink)
|
||||
const bool requiresDeviceLinking = requireDeviceLinking(
|
||||
*this->GeneratorTarget, *this->LocalGenerator, this->GetConfigName());
|
||||
if (requiresDeviceLinking) {
|
||||
std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
|
||||
this->WriteDeviceLibraryRules(linkRuleVar, relink);
|
||||
this->WriteDeviceLibraryRules("CMAKE_CUDA_DEVICE_LINK_LIBRARY", relink);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -239,12 +236,47 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
|
||||
// TODO: Merge the methods that call this method to avoid
|
||||
// code duplication.
|
||||
std::vector<std::string> commands;
|
||||
|
||||
// Get the language to use for linking this library.
|
||||
std::string linkLanguage = "CUDA";
|
||||
std::string const objExt =
|
||||
this->Makefile->GetSafeDefinition("CMAKE_CUDA_OUTPUT_EXTENSION");
|
||||
|
||||
// Get the name of the device object to generate.
|
||||
std::string const targetOutput =
|
||||
this->GeneratorTarget->ObjectDirectory + "cmake_device_link" + objExt;
|
||||
this->DeviceLinkObject = targetOutput;
|
||||
|
||||
this->NumberOfProgressActions++;
|
||||
if (!this->NoRuleMessages) {
|
||||
cmLocalUnixMakefileGenerator3::EchoProgress progress;
|
||||
this->MakeEchoProgress(progress);
|
||||
// Add the link message.
|
||||
std::string buildEcho =
|
||||
cmStrCat("Linking CUDA device code ",
|
||||
this->LocalGenerator->ConvertToOutputFormat(
|
||||
this->LocalGenerator->MaybeConvertToRelativePath(
|
||||
this->LocalGenerator->GetCurrentBinaryDirectory(),
|
||||
this->DeviceLinkObject),
|
||||
cmOutputConverter::SHELL));
|
||||
this->LocalGenerator->AppendEcho(
|
||||
commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress);
|
||||
}
|
||||
|
||||
if (this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID") == "Clang") {
|
||||
this->WriteDeviceLinkRule(commands, targetOutput);
|
||||
} else {
|
||||
this->WriteNvidiaDeviceLibraryRules(linkRuleVar, relink, commands,
|
||||
targetOutput);
|
||||
}
|
||||
|
||||
// Write the main driver rule to build everything in this target.
|
||||
this->WriteTargetDriverRule(targetOutput, relink);
|
||||
}
|
||||
|
||||
void cmMakefileLibraryTargetGenerator::WriteNvidiaDeviceLibraryRules(
|
||||
const std::string& linkRuleVar, bool relink,
|
||||
std::vector<std::string>& commands, const std::string& targetOutput)
|
||||
{
|
||||
std::string linkLanguage = "CUDA";
|
||||
|
||||
// Build list of dependencies.
|
||||
std::vector<std::string> depends;
|
||||
this->AppendLinkDepends(depends, linkLanguage);
|
||||
@@ -258,30 +290,10 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
|
||||
std::string linkFlags;
|
||||
this->GetDeviceLinkFlags(linkFlags, linkLanguage);
|
||||
|
||||
// Get the name of the device object to generate.
|
||||
std::string const targetOutputReal =
|
||||
this->GeneratorTarget->ObjectDirectory + "cmake_device_link" + objExt;
|
||||
this->DeviceLinkObject = targetOutputReal;
|
||||
|
||||
this->NumberOfProgressActions++;
|
||||
if (!this->NoRuleMessages) {
|
||||
cmLocalUnixMakefileGenerator3::EchoProgress progress;
|
||||
this->MakeEchoProgress(progress);
|
||||
// Add the link message.
|
||||
std::string buildEcho =
|
||||
cmStrCat("Linking ", linkLanguage, " device code ",
|
||||
this->LocalGenerator->ConvertToOutputFormat(
|
||||
this->LocalGenerator->MaybeConvertToRelativePath(
|
||||
this->LocalGenerator->GetCurrentBinaryDirectory(),
|
||||
this->DeviceLinkObject),
|
||||
cmOutputConverter::SHELL));
|
||||
this->LocalGenerator->AppendEcho(
|
||||
commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress);
|
||||
}
|
||||
// Clean files associated with this library.
|
||||
std::set<std::string> libCleanFiles;
|
||||
libCleanFiles.insert(this->LocalGenerator->MaybeConvertToRelativePath(
|
||||
this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal));
|
||||
this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput));
|
||||
|
||||
// Determine whether a link script will be used.
|
||||
bool useLinkScript = this->GlobalGenerator->GetUseLinkScript();
|
||||
@@ -335,7 +347,7 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
|
||||
|
||||
std::string target = this->LocalGenerator->ConvertToOutputFormat(
|
||||
this->LocalGenerator->MaybeConvertToRelativePath(
|
||||
this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal),
|
||||
this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput),
|
||||
output);
|
||||
|
||||
std::string targetFullPathCompilePDB =
|
||||
@@ -364,7 +376,7 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
|
||||
this->LocalGenerator->CreateRulePlaceholderExpander());
|
||||
|
||||
// Construct the main link rule and expand placeholders.
|
||||
rulePlaceholderExpander->SetTargetImpLib(targetOutputReal);
|
||||
rulePlaceholderExpander->SetTargetImpLib(targetOutput);
|
||||
std::string linkRule = this->GetLinkRule(linkRuleVar);
|
||||
cmExpandList(linkRule, real_link_commands);
|
||||
|
||||
@@ -399,14 +411,11 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
|
||||
commands1.clear();
|
||||
|
||||
// Compute the list of outputs.
|
||||
std::vector<std::string> outputs(1, targetOutputReal);
|
||||
std::vector<std::string> outputs(1, targetOutput);
|
||||
|
||||
// Write the build rule.
|
||||
this->WriteMakeRule(*this->BuildFileStream, nullptr, outputs, depends,
|
||||
commands, false);
|
||||
|
||||
// Write the main driver rule to build everything in this target.
|
||||
this->WriteTargetDriverRule(targetOutputReal, relink);
|
||||
#else
|
||||
static_cast<void>(linkRuleVar);
|
||||
static_cast<void>(relink);
|
||||
|
@@ -5,6 +5,7 @@
|
||||
#include "cmConfigure.h" // IWYU pragma: keep
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "cmMakefileTargetGenerator.h"
|
||||
|
||||
@@ -27,6 +28,10 @@ protected:
|
||||
void WriteModuleLibraryRules(bool relink);
|
||||
|
||||
void WriteDeviceLibraryRules(const std::string& linkRule, bool relink);
|
||||
void WriteNvidiaDeviceLibraryRules(const std::string& linkRuleVar,
|
||||
bool relink,
|
||||
std::vector<std::string>& commands,
|
||||
const std::string& targetOutput);
|
||||
void WriteLibraryRules(const std::string& linkRule,
|
||||
const std::string& extraFlags, bool relink);
|
||||
// MacOSX Framework support methods
|
||||
|
@@ -2,10 +2,13 @@
|
||||
file Copyright.txt or https://cmake.org/licensing for details. */
|
||||
#include "cmMakefileTargetGenerator.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <iterator>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
#include <cm/memory>
|
||||
@@ -25,6 +28,7 @@
|
||||
#include "cmMakefileExecutableTargetGenerator.h"
|
||||
#include "cmMakefileLibraryTargetGenerator.h"
|
||||
#include "cmMakefileUtilityTargetGenerator.h"
|
||||
#include "cmMessageType.h"
|
||||
#include "cmOutputConverter.h"
|
||||
#include "cmPolicies.h"
|
||||
#include "cmProperty.h"
|
||||
@@ -1323,6 +1327,130 @@ void cmMakefileTargetGenerator::WriteObjectDependRules(
|
||||
}
|
||||
}
|
||||
|
||||
void cmMakefileTargetGenerator::WriteDeviceLinkRule(
|
||||
std::vector<std::string>& commands, const std::string& output)
|
||||
{
|
||||
std::string architecturesStr =
|
||||
this->GeneratorTarget->GetSafeProperty("CUDA_ARCHITECTURES");
|
||||
|
||||
if (cmIsOff(architecturesStr)) {
|
||||
this->Makefile->IssueMessage(MessageType::FATAL_ERROR,
|
||||
"CUDA_SEPARABLE_COMPILATION on Clang "
|
||||
"requires CUDA_ARCHITECTURES to be set.");
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<std::string> architectures = cmExpandedList(architecturesStr);
|
||||
|
||||
// Ensure there are no duplicates.
|
||||
const std::vector<std::string> linkDeps = [&]() -> std::vector<std::string> {
|
||||
std::vector<std::string> deps;
|
||||
this->AppendTargetDepends(deps, true);
|
||||
this->GeneratorTarget->GetLinkDepends(deps, this->GetConfigName(), "CUDA");
|
||||
std::copy(this->Objects.begin(), this->Objects.end(),
|
||||
std::back_inserter(deps));
|
||||
|
||||
std::unordered_set<std::string> depsSet(deps.begin(), deps.end());
|
||||
deps.clear();
|
||||
std::copy(depsSet.begin(), depsSet.end(), std::back_inserter(deps));
|
||||
return deps;
|
||||
}();
|
||||
|
||||
const std::string objectDir = this->GeneratorTarget->ObjectDirectory;
|
||||
const std::string relObjectDir =
|
||||
this->LocalGenerator->MaybeConvertToRelativePath(
|
||||
this->LocalGenerator->GetCurrentBinaryDirectory(), objectDir);
|
||||
|
||||
// Construct a list of files associated with this executable that
|
||||
// may need to be cleaned.
|
||||
std::vector<std::string> cleanFiles;
|
||||
cleanFiles.push_back(this->LocalGenerator->MaybeConvertToRelativePath(
|
||||
this->LocalGenerator->GetCurrentBinaryDirectory(), output));
|
||||
|
||||
std::string profiles;
|
||||
std::vector<std::string> fatbinaryDepends;
|
||||
std::string registerFile = cmStrCat(objectDir, "cmake_cuda_register.h");
|
||||
|
||||
// Link device code for each architecture.
|
||||
for (const std::string& architectureKind : architectures) {
|
||||
// Clang always generates real code, so strip the specifier.
|
||||
const std::string architecture =
|
||||
architectureKind.substr(0, architectureKind.find('-'));
|
||||
const std::string cubin =
|
||||
cmStrCat(relObjectDir, "sm_", architecture, ".cubin");
|
||||
|
||||
profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
|
||||
fatbinaryDepends.emplace_back(cubin);
|
||||
|
||||
std::string registerFileCmd;
|
||||
|
||||
// The generated register file contains macros that when expanded register
|
||||
// the device routines. Because the routines are the same for all
|
||||
// architectures the register file will be the same too. Thus generate it
|
||||
// only on the first invocation to reduce overhead.
|
||||
if (fatbinaryDepends.size() == 1) {
|
||||
std::string registerFileRel =
|
||||
this->LocalGenerator->MaybeConvertToRelativePath(
|
||||
this->LocalGenerator->GetCurrentBinaryDirectory(), registerFile);
|
||||
registerFileCmd =
|
||||
cmStrCat(" --register-link-binaries=", registerFileRel);
|
||||
cleanFiles.push_back(registerFileRel);
|
||||
}
|
||||
|
||||
std::string command = cmStrCat(
|
||||
this->Makefile->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"),
|
||||
" -arch=sm_", architecture, registerFileCmd, " -o=$@ ",
|
||||
cmJoin(linkDeps, " "));
|
||||
|
||||
this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr, cubin,
|
||||
linkDeps, { command }, false);
|
||||
}
|
||||
|
||||
// Combine all architectures into a single fatbinary.
|
||||
const std::string fatbinaryCommand =
|
||||
cmStrCat(this->Makefile->GetRequiredDefinition("CMAKE_CUDA_FATBINARY"),
|
||||
" -64 -cmdline=--compile-only -compress-all -link "
|
||||
"--embedded-fatbin=$@",
|
||||
profiles);
|
||||
const std::string fatbinaryOutput =
|
||||
cmStrCat(objectDir, "cmake_cuda_fatbin.h");
|
||||
const std::string fatbinaryOutputRel =
|
||||
this->LocalGenerator->MaybeConvertToRelativePath(
|
||||
this->LocalGenerator->GetCurrentBinaryDirectory(), fatbinaryOutput);
|
||||
|
||||
this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr,
|
||||
fatbinaryOutputRel, fatbinaryDepends,
|
||||
{ fatbinaryCommand }, false);
|
||||
|
||||
// Compile the stub that registers the kernels and contains the fatbinaries.
|
||||
cmRulePlaceholderExpander::RuleVariables vars;
|
||||
vars.CMTargetName = this->GetGeneratorTarget()->GetName().c_str();
|
||||
vars.CMTargetType =
|
||||
cmState::GetTargetTypeName(this->GetGeneratorTarget()->GetType()).c_str();
|
||||
|
||||
vars.Language = "CUDA";
|
||||
vars.Object = output.c_str();
|
||||
vars.Fatbinary = fatbinaryOutput.c_str();
|
||||
vars.RegisterFile = registerFile.c_str();
|
||||
|
||||
std::string flags = this->GetFlags("CUDA", this->GetConfigName());
|
||||
vars.Flags = flags.c_str();
|
||||
|
||||
std::string compileCmd = this->GetLinkRule("CMAKE_CUDA_DEVICE_LINK_COMPILE");
|
||||
std::unique_ptr<cmRulePlaceholderExpander> rulePlaceholderExpander(
|
||||
this->LocalGenerator->CreateRulePlaceholderExpander());
|
||||
rulePlaceholderExpander->ExpandRuleVariables(this->LocalGenerator,
|
||||
compileCmd, vars);
|
||||
|
||||
commands.emplace_back(compileCmd);
|
||||
this->LocalGenerator->WriteMakeRule(
|
||||
*this->BuildFileStream, nullptr, output,
|
||||
{ cmStrCat(relObjectDir, "cmake_cuda_fatbin.h") }, commands, false);
|
||||
|
||||
// Clean all the possible executable names and symlinks.
|
||||
this->CleanFiles.insert(cleanFiles.begin(), cleanFiles.end());
|
||||
}
|
||||
|
||||
void cmMakefileTargetGenerator::GenerateCustomRuleFile(
|
||||
cmCustomCommandGenerator const& ccg)
|
||||
{
|
||||
@@ -1579,10 +1707,11 @@ void cmMakefileTargetGenerator::WriteTargetDriverRule(
|
||||
}
|
||||
|
||||
void cmMakefileTargetGenerator::AppendTargetDepends(
|
||||
std::vector<std::string>& depends)
|
||||
std::vector<std::string>& depends, bool ignoreType)
|
||||
{
|
||||
// Static libraries never depend on anything for linking.
|
||||
if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY) {
|
||||
if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY &&
|
||||
!ignoreType) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@@ -104,6 +104,10 @@ protected:
|
||||
void WriteObjectDependRules(cmSourceFile const& source,
|
||||
std::vector<std::string>& depends);
|
||||
|
||||
// CUDA device linking.
|
||||
void WriteDeviceLinkRule(std::vector<std::string>& commands,
|
||||
const std::string& output);
|
||||
|
||||
// write the build rule for a custom command
|
||||
void GenerateCustomRuleFile(cmCustomCommandGenerator const& ccg);
|
||||
|
||||
@@ -127,7 +131,8 @@ protected:
|
||||
void DriveCustomCommands(std::vector<std::string>& depends);
|
||||
|
||||
// append intertarget dependencies
|
||||
void AppendTargetDepends(std::vector<std::string>& depends);
|
||||
void AppendTargetDepends(std::vector<std::string>& depends,
|
||||
bool ignoreType = false);
|
||||
|
||||
// Append object file dependencies.
|
||||
void AppendObjectDepends(std::vector<std::string>& depends);
|
||||
|
@@ -8,6 +8,7 @@
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
#include <cm/memory>
|
||||
@@ -25,6 +26,7 @@
|
||||
#include "cmLocalGenerator.h"
|
||||
#include "cmLocalNinjaGenerator.h"
|
||||
#include "cmMakefile.h"
|
||||
#include "cmMessageType.h"
|
||||
#include "cmNinjaLinkLineDeviceComputer.h"
|
||||
#include "cmNinjaTypes.h"
|
||||
#include "cmOSXBundleGenerator.h"
|
||||
@@ -178,6 +180,33 @@ std::string cmNinjaNormalTargetGenerator::LanguageLinkerDeviceRule(
|
||||
"_", config);
|
||||
}
|
||||
|
||||
std::string cmNinjaNormalTargetGenerator::LanguageLinkerCudaDeviceRule(
|
||||
const std::string& config) const
|
||||
{
|
||||
return cmStrCat(
|
||||
this->TargetLinkLanguage(config), "_DEVICE_LINK__",
|
||||
cmGlobalNinjaGenerator::EncodeRuleName(this->GeneratorTarget->GetName()),
|
||||
'_', config);
|
||||
}
|
||||
|
||||
std::string cmNinjaNormalTargetGenerator::LanguageLinkerCudaDeviceCompileRule(
|
||||
const std::string& config) const
|
||||
{
|
||||
return cmStrCat(
|
||||
this->TargetLinkLanguage(config), "_DEVICE_LINK_COMPILE__",
|
||||
cmGlobalNinjaGenerator::EncodeRuleName(this->GeneratorTarget->GetName()),
|
||||
'_', config);
|
||||
}
|
||||
|
||||
std::string cmNinjaNormalTargetGenerator::LanguageLinkerCudaFatbinaryRule(
|
||||
const std::string& config) const
|
||||
{
|
||||
return cmStrCat(
|
||||
this->TargetLinkLanguage(config), "_FATBINARY__",
|
||||
cmGlobalNinjaGenerator::EncodeRuleName(this->GeneratorTarget->GetName()),
|
||||
'_', config);
|
||||
}
|
||||
|
||||
struct cmNinjaRemoveNoOpCommands
|
||||
{
|
||||
bool operator()(std::string const& cmd)
|
||||
@@ -186,7 +215,7 @@ struct cmNinjaRemoveNoOpCommands
|
||||
}
|
||||
};
|
||||
|
||||
void cmNinjaNormalTargetGenerator::WriteDeviceLinkRule(
|
||||
void cmNinjaNormalTargetGenerator::WriteNvidiaDeviceLinkRule(
|
||||
bool useResponseFile, const std::string& config)
|
||||
{
|
||||
cmNinjaRule rule(this->LanguageLinkerDeviceRule(config));
|
||||
@@ -272,6 +301,55 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkRule(
|
||||
}
|
||||
}
|
||||
|
||||
void cmNinjaNormalTargetGenerator::WriteDeviceLinkRules(
|
||||
const std::string& config)
|
||||
{
|
||||
const cmMakefile* mf = this->GetMakefile();
|
||||
|
||||
cmNinjaRule rule(LanguageLinkerCudaDeviceRule(config));
|
||||
rule.Command = this->GetLocalGenerator()->BuildCommandLine(
|
||||
{ cmStrCat(mf->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"),
|
||||
" -arch=$ARCH $REGISTER -o=$out $in") });
|
||||
rule.Comment = "Rule for CUDA device linking.";
|
||||
rule.Description = "Linking CUDA $out";
|
||||
this->GetGlobalGenerator()->AddRule(rule);
|
||||
|
||||
cmRulePlaceholderExpander::RuleVariables vars;
|
||||
vars.CMTargetName = this->GetGeneratorTarget()->GetName().c_str();
|
||||
vars.CMTargetType =
|
||||
cmState::GetTargetTypeName(this->GetGeneratorTarget()->GetType()).c_str();
|
||||
|
||||
vars.Language = "CUDA";
|
||||
vars.Object = "$out";
|
||||
vars.Fatbinary = "$FATBIN";
|
||||
vars.RegisterFile = "$REGISTER";
|
||||
|
||||
std::string flags = this->GetFlags("CUDA", config);
|
||||
vars.Flags = flags.c_str();
|
||||
|
||||
std::string compileCmd = this->GetMakefile()->GetRequiredDefinition(
|
||||
"CMAKE_CUDA_DEVICE_LINK_COMPILE");
|
||||
std::unique_ptr<cmRulePlaceholderExpander> rulePlaceholderExpander(
|
||||
this->GetLocalGenerator()->CreateRulePlaceholderExpander());
|
||||
rulePlaceholderExpander->ExpandRuleVariables(this->GetLocalGenerator(),
|
||||
compileCmd, vars);
|
||||
|
||||
rule.Name = LanguageLinkerCudaDeviceCompileRule(config);
|
||||
rule.Command = this->GetLocalGenerator()->BuildCommandLine({ compileCmd });
|
||||
rule.Comment = "Rule for compiling CUDA device stubs.";
|
||||
rule.Description = "Compiling CUDA device stub $out";
|
||||
this->GetGlobalGenerator()->AddRule(rule);
|
||||
|
||||
rule.Name = LanguageLinkerCudaFatbinaryRule(config);
|
||||
rule.Command = this->GetLocalGenerator()->BuildCommandLine(
|
||||
{ cmStrCat(mf->GetRequiredDefinition("CMAKE_CUDA_FATBINARY"),
|
||||
" -64 -cmdline=--compile-only -compress-all -link "
|
||||
"--embedded-fatbin=$out $PROFILES") });
|
||||
rule.Comment = "Rule for CUDA fatbinaries.";
|
||||
rule.Description = "Creating fatbinary $out";
|
||||
this->GetGlobalGenerator()->AddRule(rule);
|
||||
}
|
||||
|
||||
void cmNinjaNormalTargetGenerator::WriteLinkRule(bool useResponseFile,
|
||||
const std::string& config)
|
||||
{
|
||||
@@ -586,7 +664,6 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
|
||||
|
||||
// First and very important step is to make sure while inside this
|
||||
// step our link language is set to CUDA
|
||||
std::string cudaLinkLanguage = "CUDA";
|
||||
std::string const& objExt =
|
||||
this->Makefile->GetSafeDefinition("CMAKE_CUDA_OUTPUT_EXTENSION");
|
||||
|
||||
@@ -598,6 +675,118 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
|
||||
std::string targetOutputReal =
|
||||
ConvertToNinjaPath(targetOutputDir + "cmake_device_link" + objExt);
|
||||
|
||||
if (firstForConfig) {
|
||||
globalGen->GetByproductsForCleanTarget(config).push_back(targetOutputReal);
|
||||
}
|
||||
this->DeviceLinkObject = targetOutputReal;
|
||||
|
||||
// Write comments.
|
||||
cmGlobalNinjaGenerator::WriteDivider(this->GetCommonFileStream());
|
||||
this->GetCommonFileStream()
|
||||
<< "# Device Link build statements for "
|
||||
<< cmState::GetTargetTypeName(genTarget->GetType()) << " target "
|
||||
<< this->GetTargetName() << "\n\n";
|
||||
|
||||
if (this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID") == "Clang") {
|
||||
std::string architecturesStr =
|
||||
this->GeneratorTarget->GetSafeProperty("CUDA_ARCHITECTURES");
|
||||
|
||||
if (cmIsOff(architecturesStr)) {
|
||||
this->Makefile->IssueMessage(MessageType::FATAL_ERROR,
|
||||
"CUDA_SEPARABLE_COMPILATION on Clang "
|
||||
"requires CUDA_ARCHITECTURES to be set.");
|
||||
return;
|
||||
}
|
||||
|
||||
this->WriteDeviceLinkRules(config);
|
||||
this->WriteDeviceLinkStatements(config, cmExpandedList(architecturesStr),
|
||||
targetOutputReal);
|
||||
} else {
|
||||
this->WriteNvidiaDeviceLinkStatement(config, fileConfig, targetOutputDir,
|
||||
targetOutputReal);
|
||||
}
|
||||
}
|
||||
|
||||
void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements(
|
||||
const std::string& config, const std::vector<std::string>& architectures,
|
||||
const std::string& output)
|
||||
{
|
||||
// Ensure there are no duplicates.
|
||||
const cmNinjaDeps explicitDeps = [&]() -> std::vector<std::string> {
|
||||
std::unordered_set<std::string> depsSet;
|
||||
const cmNinjaDeps linkDeps =
|
||||
this->ComputeLinkDeps(this->TargetLinkLanguage(config), config, true);
|
||||
const cmNinjaDeps objects = this->GetObjects(config);
|
||||
depsSet.insert(linkDeps.begin(), linkDeps.end());
|
||||
depsSet.insert(objects.begin(), objects.end());
|
||||
|
||||
std::vector<std::string> deps;
|
||||
std::copy(depsSet.begin(), depsSet.end(), std::back_inserter(deps));
|
||||
return deps;
|
||||
}();
|
||||
|
||||
const std::string objectDir =
|
||||
cmStrCat(this->GeneratorTarget->GetSupportDirectory(),
|
||||
this->GetGlobalGenerator()->ConfigDirectory(config));
|
||||
const std::string ninjaOutputDir = this->ConvertToNinjaPath(objectDir);
|
||||
|
||||
cmNinjaBuild fatbinary(LanguageLinkerCudaFatbinaryRule(config));
|
||||
|
||||
// Link device code for each architecture.
|
||||
for (const std::string& architectureKind : architectures) {
|
||||
// Clang always generates real code, so strip the specifier.
|
||||
const std::string architecture =
|
||||
architectureKind.substr(0, architectureKind.find('-'));
|
||||
const std::string cubin =
|
||||
cmStrCat(ninjaOutputDir, "/sm_", architecture, ".cubin");
|
||||
|
||||
fatbinary.Variables["PROFILES"] +=
|
||||
cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
|
||||
fatbinary.ExplicitDeps.emplace_back(cubin);
|
||||
|
||||
cmNinjaBuild dlink(LanguageLinkerCudaDeviceRule(config));
|
||||
dlink.ExplicitDeps = explicitDeps;
|
||||
dlink.Outputs = { cubin };
|
||||
dlink.Variables["ARCH"] = cmStrCat("sm_", architecture);
|
||||
|
||||
// The generated register file contains macros that when expanded register
|
||||
// the device routines. Because the routines are the same for all
|
||||
// architectures the register file will be the same too. Thus generate it
|
||||
// only on the first invocation to reduce overhead.
|
||||
if (fatbinary.ExplicitDeps.size() == 1) {
|
||||
dlink.Variables["REGISTER"] = cmStrCat(
|
||||
"--register-link-binaries=", ninjaOutputDir, "/cmake_cuda_register.h");
|
||||
}
|
||||
|
||||
this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(), dlink);
|
||||
}
|
||||
|
||||
// Combine all architectures into a single fatbinary.
|
||||
fatbinary.Outputs = { cmStrCat(ninjaOutputDir, "/cmake_cuda_fatbin.h") };
|
||||
this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(),
|
||||
fatbinary);
|
||||
|
||||
// Compile the stub that registers the kernels and contains the fatbinaries.
|
||||
cmNinjaBuild dcompile(LanguageLinkerCudaDeviceCompileRule(config));
|
||||
dcompile.Outputs = { output };
|
||||
dcompile.ExplicitDeps = { cmStrCat(ninjaOutputDir, "/cmake_cuda_fatbin.h") };
|
||||
dcompile.Variables["FATBIN"] =
|
||||
this->GetLocalGenerator()->ConvertToOutputFormat(
|
||||
cmStrCat(objectDir, "/cmake_cuda_fatbin.h"), cmOutputConverter::SHELL);
|
||||
dcompile.Variables["REGISTER"] =
|
||||
this->GetLocalGenerator()->ConvertToOutputFormat(
|
||||
cmStrCat(objectDir, "/cmake_cuda_register.h"), cmOutputConverter::SHELL);
|
||||
this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(),
|
||||
dcompile);
|
||||
}
|
||||
|
||||
void cmNinjaNormalTargetGenerator::WriteNvidiaDeviceLinkStatement(
|
||||
const std::string& config, const std::string& fileConfig,
|
||||
const std::string& outputDir, const std::string& output)
|
||||
{
|
||||
cmGeneratorTarget* genTarget = this->GetGeneratorTarget();
|
||||
cmGlobalNinjaGenerator* globalGen = this->GetGlobalGenerator();
|
||||
|
||||
std::string targetOutputImplib = ConvertToNinjaPath(
|
||||
genTarget->GetFullPath(config, cmStateEnums::ImportLibraryArtifact));
|
||||
|
||||
@@ -606,8 +795,8 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
|
||||
cmStrCat(this->GetLocalGenerator()->GetTargetDirectory(genTarget),
|
||||
globalGen->ConfigDirectory(fileConfig), "/");
|
||||
targetOutputFileConfigDir =
|
||||
globalGen->ExpandCFGIntDir(targetOutputDir, fileConfig);
|
||||
if (targetOutputDir == targetOutputFileConfigDir) {
|
||||
globalGen->ExpandCFGIntDir(outputDir, fileConfig);
|
||||
if (outputDir == targetOutputFileConfigDir) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -623,27 +812,15 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
|
||||
}
|
||||
}
|
||||
|
||||
if (firstForConfig) {
|
||||
globalGen->GetByproductsForCleanTarget(config).push_back(targetOutputReal);
|
||||
}
|
||||
this->DeviceLinkObject = targetOutputReal;
|
||||
|
||||
// Write comments.
|
||||
cmGlobalNinjaGenerator::WriteDivider(this->GetCommonFileStream());
|
||||
const cmStateEnums::TargetType targetType = genTarget->GetType();
|
||||
this->GetCommonFileStream() << "# Device Link build statements for "
|
||||
<< cmState::GetTargetTypeName(targetType)
|
||||
<< " target " << this->GetTargetName() << "\n\n";
|
||||
|
||||
// Compute the comment.
|
||||
cmNinjaBuild build(this->LanguageLinkerDeviceRule(config));
|
||||
build.Comment =
|
||||
cmStrCat("Link the ", this->GetVisibleTypeName(), ' ', targetOutputReal);
|
||||
cmStrCat("Link the ", this->GetVisibleTypeName(), ' ', output);
|
||||
|
||||
cmNinjaVars& vars = build.Variables;
|
||||
|
||||
// Compute outputs.
|
||||
build.Outputs.push_back(targetOutputReal);
|
||||
build.Outputs.push_back(output);
|
||||
// Compute specific libraries to link with.
|
||||
build.ExplicitDeps = this->GetObjects(config);
|
||||
build.ImplicitDeps =
|
||||
@@ -659,7 +836,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
|
||||
cmLocalNinjaGenerator& localGen = *this->GetLocalGenerator();
|
||||
|
||||
vars["TARGET_FILE"] =
|
||||
localGen.ConvertToOutputFormat(targetOutputReal, cmOutputConverter::SHELL);
|
||||
localGen.ConvertToOutputFormat(output, cmOutputConverter::SHELL);
|
||||
|
||||
std::unique_ptr<cmLinkLineComputer> linkLineComputer(
|
||||
new cmNinjaLinkLineDeviceComputer(
|
||||
@@ -683,8 +860,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
|
||||
|
||||
// Compute language specific link flags.
|
||||
std::string langFlags;
|
||||
localGen.AddLanguageFlagsForLinking(langFlags, genTarget, cudaLinkLanguage,
|
||||
config);
|
||||
localGen.AddLanguageFlagsForLinking(langFlags, genTarget, "CUDA", config);
|
||||
vars["LANGUAGE_COMPILE_FLAGS"] = langFlags;
|
||||
|
||||
auto const tgtNames = this->TargetNames(config);
|
||||
@@ -692,7 +868,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
|
||||
vars["SONAME_FLAG"] =
|
||||
this->GetMakefile()->GetSONameFlag(this->TargetLinkLanguage(config));
|
||||
vars["SONAME"] = tgtNames.SharedObject;
|
||||
if (targetType == cmStateEnums::SHARED_LIBRARY) {
|
||||
if (genTarget->GetType() == cmStateEnums::SHARED_LIBRARY) {
|
||||
std::string install_dir =
|
||||
this->GetGeneratorTarget()->GetInstallNameDirForBuildTree(config);
|
||||
if (!install_dir.empty()) {
|
||||
@@ -731,7 +907,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
|
||||
// do not check if the user has explicitly forced a response file.
|
||||
int const commandLineLengthLimit =
|
||||
static_cast<int>(cmSystemTools::CalculateCommandLineLengthLimit()) -
|
||||
globalGen->GetRuleCmdLength(this->LanguageLinkerDeviceRule(config));
|
||||
globalGen->GetRuleCmdLength(build.Rule);
|
||||
|
||||
build.RspFile = this->ConvertToNinjaPath(
|
||||
cmStrCat("CMakeFiles/", genTarget->GetName(),
|
||||
@@ -746,7 +922,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
|
||||
bool usedResponseFile = false;
|
||||
globalGen->WriteBuild(this->GetCommonFileStream(), build,
|
||||
commandLineLengthLimit, &usedResponseFile);
|
||||
this->WriteDeviceLinkRule(usedResponseFile, config);
|
||||
this->WriteNvidiaDeviceLinkRule(usedResponseFile, config);
|
||||
}
|
||||
|
||||
void cmNinjaNormalTargetGenerator::WriteLinkStatement(
|
||||
|
@@ -21,18 +21,31 @@ public:
|
||||
private:
|
||||
std::string LanguageLinkerRule(const std::string& config) const;
|
||||
std::string LanguageLinkerDeviceRule(const std::string& config) const;
|
||||
std::string LanguageLinkerCudaDeviceRule(const std::string& config) const;
|
||||
std::string LanguageLinkerCudaDeviceCompileRule(
|
||||
const std::string& config) const;
|
||||
std::string LanguageLinkerCudaFatbinaryRule(const std::string& config) const;
|
||||
|
||||
const char* GetVisibleTypeName() const;
|
||||
void WriteLanguagesRules(const std::string& config);
|
||||
|
||||
void WriteLinkRule(bool useResponseFile, const std::string& config);
|
||||
void WriteDeviceLinkRule(bool useResponseFile, const std::string& config);
|
||||
void WriteDeviceLinkRules(const std::string& config);
|
||||
void WriteNvidiaDeviceLinkRule(bool useResponseFile,
|
||||
const std::string& config);
|
||||
|
||||
void WriteLinkStatement(const std::string& config,
|
||||
const std::string& fileConfig, bool firstForConfig);
|
||||
void WriteDeviceLinkStatement(const std::string& config,
|
||||
const std::string& fileConfig,
|
||||
bool firstForConfig);
|
||||
void WriteDeviceLinkStatements(const std::string& config,
|
||||
const std::vector<std::string>& architectures,
|
||||
const std::string& output);
|
||||
void WriteNvidiaDeviceLinkStatement(const std::string& config,
|
||||
const std::string& fileConfig,
|
||||
const std::string& outputDir,
|
||||
const std::string& output);
|
||||
|
||||
void WriteObjectLibStatement(const std::string& config);
|
||||
|
||||
|
@@ -346,11 +346,13 @@ std::string cmNinjaTargetGenerator::ComputeIncludes(
|
||||
}
|
||||
|
||||
cmNinjaDeps cmNinjaTargetGenerator::ComputeLinkDeps(
|
||||
const std::string& linkLanguage, const std::string& config) const
|
||||
const std::string& linkLanguage, const std::string& config,
|
||||
bool ignoreType) const
|
||||
{
|
||||
// Static libraries never depend on other targets for linking.
|
||||
if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY ||
|
||||
this->GeneratorTarget->GetType() == cmStateEnums::OBJECT_LIBRARY) {
|
||||
if (!ignoreType &&
|
||||
(this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY ||
|
||||
this->GeneratorTarget->GetType() == cmStateEnums::OBJECT_LIBRARY)) {
|
||||
return cmNinjaDeps();
|
||||
}
|
||||
|
||||
@@ -1009,6 +1011,7 @@ void cmNinjaTargetGenerator::WriteObjectBuildStatements(
|
||||
{
|
||||
std::vector<cmSourceFile const*> objectSources;
|
||||
this->GeneratorTarget->GetObjectSources(objectSources, config);
|
||||
|
||||
for (cmSourceFile const* sf : objectSources) {
|
||||
this->WriteObjectBuildStatement(sf, config, fileConfig, firstForConfig);
|
||||
}
|
||||
|
@@ -113,7 +113,8 @@ protected:
|
||||
|
||||
/// @return the list of link dependency for the given target @a target.
|
||||
cmNinjaDeps ComputeLinkDeps(const std::string& linkLanguage,
|
||||
const std::string& config) const;
|
||||
const std::string& config,
|
||||
bool ignoreType = false) const;
|
||||
|
||||
/// @return the source file path for the given @a source.
|
||||
std::string GetSourceFilePath(cmSourceFile const* source) const;
|
||||
|
@@ -141,6 +141,16 @@ std::string cmRulePlaceholderExpander::ExpandRuleVariable(
|
||||
return replaceValues.DependencyFile;
|
||||
}
|
||||
}
|
||||
if (replaceValues.Fatbinary) {
|
||||
if (variable == "FATBINARY") {
|
||||
return replaceValues.Fatbinary;
|
||||
}
|
||||
}
|
||||
if (replaceValues.RegisterFile) {
|
||||
if (variable == "REGISTER_FILE") {
|
||||
return replaceValues.RegisterFile;
|
||||
}
|
||||
}
|
||||
|
||||
if (replaceValues.Target) {
|
||||
if (variable == "TARGET_QUOTED") {
|
||||
|
@@ -64,6 +64,8 @@ public:
|
||||
const char* SwiftOutputFileMap;
|
||||
const char* SwiftSources;
|
||||
const char* ISPCHeader;
|
||||
const char* Fatbinary;
|
||||
const char* RegisterFile;
|
||||
};
|
||||
|
||||
// Expand rule variables in CMake of the type found in language rules
|
||||
|
@@ -17,13 +17,12 @@ add_cuda_test_macro(Cuda.SeparableCompCXXOnly SeparableCompCXXOnly)
|
||||
add_cuda_test_macro(Cuda.Toolkit Toolkit)
|
||||
add_cuda_test_macro(Cuda.IncludePathNoToolkit IncludePathNoToolkit)
|
||||
add_cuda_test_macro(Cuda.SharedRuntimePlusToolkit SharedRuntimePlusToolkit)
|
||||
add_cuda_test_macro(Cuda.Complex CudaComplex)
|
||||
add_cuda_test_macro(Cuda.ProperLinkFlags ProperLinkFlags)
|
||||
|
||||
# Separable compilation is currently only supported on NVCC. Disable tests
|
||||
# using it for other compilers.
|
||||
if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
|
||||
add_cuda_test_macro(Cuda.Complex CudaComplex)
|
||||
# Clang lacks __CUDACC_VER*__ defines.
|
||||
add_cuda_test_macro(Cuda.ProperDeviceLibraries ProperDeviceLibraries)
|
||||
add_cuda_test_macro(Cuda.ProperLinkFlags ProperLinkFlags)
|
||||
endif()
|
||||
|
||||
# The CUDA only ships the shared version of the toolkit libraries
|
||||
|
@@ -12,33 +12,31 @@ add_cuda_test_macro(CudaOnly.SharedRuntimePlusToolkit CudaOnlySharedRuntimePlusT
|
||||
add_cuda_test_macro(CudaOnly.Standard98 CudaOnlyStandard98)
|
||||
add_cuda_test_macro(CudaOnly.Toolkit CudaOnlyToolkit)
|
||||
add_cuda_test_macro(CudaOnly.WithDefs CudaOnlyWithDefs)
|
||||
add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
|
||||
add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
|
||||
add_cuda_test_macro(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
|
||||
|
||||
if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
|
||||
# Clang doesn't have flags for selecting the runtime.
|
||||
add_cuda_test_macro(CudaOnly.SharedRuntimeViaCUDAFlags CudaOnlySharedRuntimeViaCUDAFlags)
|
||||
|
||||
# Separable compilation is currently only supported on NVCC. Disable tests
|
||||
# using it for other compilers.
|
||||
add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
|
||||
add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
|
||||
add_cuda_test_macro(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
|
||||
|
||||
add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
|
||||
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
|
||||
--build-and-test
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/DontResolveDeviceSymbols/"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/DontResolveDeviceSymbols/"
|
||||
${build_generator_args}
|
||||
--build-project DontResolveDeviceSymbols
|
||||
--build-options ${build_options}
|
||||
--test-command ${CMAKE_CTEST_COMMAND} -V -C $<CONFIGURATION>
|
||||
)
|
||||
set_property(TEST "CudaOnly.DontResolveDeviceSymbols" APPEND
|
||||
PROPERTY LABELS "CUDA")
|
||||
|
||||
# Only NVCC defines __CUDACC_DEBUG__ when compiling in debug mode.
|
||||
add_cuda_test_macro(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
|
||||
endif()
|
||||
|
||||
add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
|
||||
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
|
||||
--build-and-test
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/DontResolveDeviceSymbols/"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/DontResolveDeviceSymbols/"
|
||||
${build_generator_args}
|
||||
--build-project DontResolveDeviceSymbols
|
||||
--build-options ${build_options}
|
||||
--test-command ${CMAKE_CTEST_COMMAND} -V -C $<CONFIGURATION>
|
||||
)
|
||||
set_property(TEST "CudaOnly.DontResolveDeviceSymbols" APPEND
|
||||
PROPERTY LABELS "CUDA")
|
||||
|
||||
# The CUDA only ships the shared version of the toolkit libraries
|
||||
# on windows
|
||||
if(NOT WIN32)
|
||||
|
Reference in New Issue
Block a user