mirror of
https://github.com/Kitware/CMake.git
synced 2025-10-16 14:08:35 +08:00
CUDA/Clang: Simplify --register-link-binaries logic
Move the logic for appending cubin afterwards, so the check can simply be empty(). With the Makefile generator the option is now at the front instead of being intermixed with the actual bins.
This commit is contained in:
@@ -1519,6 +1519,20 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
|
||||
|
||||
// Link device code for each architecture.
|
||||
for (const std::string& architectureKind : architectures) {
|
||||
std::string registerFileCmd;
|
||||
|
||||
// The generated register file contains macros that when expanded
|
||||
// register the device routines. Because the routines are the same for
|
||||
// all architectures the register file will be the same too. Thus
|
||||
// generate it only on the first invocation to reduce overhead.
|
||||
if (fatbinaryDepends.empty()) {
|
||||
std::string const registerFileRel =
|
||||
cmStrCat(relPath, relObjectDir, "cmake_cuda_register.h");
|
||||
registerFileCmd =
|
||||
cmStrCat(" --register-link-binaries=", registerFileRel);
|
||||
cleanFiles.push_back(registerFileRel);
|
||||
}
|
||||
|
||||
// Clang always generates real code, so strip the specifier.
|
||||
const std::string architecture =
|
||||
architectureKind.substr(0, architectureKind.find('-'));
|
||||
@@ -1528,20 +1542,6 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
|
||||
profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
|
||||
fatbinaryDepends.emplace_back(cubin);
|
||||
|
||||
std::string registerFileCmd;
|
||||
|
||||
// The generated register file contains macros that when expanded
|
||||
// register the device routines. Because the routines are the same for
|
||||
// all architectures the register file will be the same too. Thus
|
||||
// generate it only on the first invocation to reduce overhead.
|
||||
if (fatbinaryDepends.size() == 1) {
|
||||
std::string const registerFileRel =
|
||||
cmStrCat(relPath, relObjectDir, "cmake_cuda_register.h");
|
||||
registerFileCmd =
|
||||
cmStrCat(" --register-link-binaries=", registerFileRel);
|
||||
cleanFiles.push_back(registerFileRel);
|
||||
}
|
||||
|
||||
std::string command = cmStrCat(
|
||||
this->Makefile->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"),
|
||||
" -arch=sm_", architecture, registerFileCmd, " -o=$@ ",
|
||||
|
@@ -753,10 +753,6 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements(
|
||||
const std::string cubin =
|
||||
cmStrCat(ninjaOutputDir, "/sm_", architecture, ".cubin");
|
||||
|
||||
fatbinary.Variables["PROFILES"] +=
|
||||
cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
|
||||
fatbinary.ExplicitDeps.emplace_back(cubin);
|
||||
|
||||
cmNinjaBuild dlink(this->LanguageLinkerCudaDeviceRule(config));
|
||||
dlink.ExplicitDeps = explicitDeps;
|
||||
dlink.Outputs = { cubin };
|
||||
@@ -766,11 +762,15 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements(
|
||||
// the device routines. Because the routines are the same for all
|
||||
// architectures the register file will be the same too. Thus generate it
|
||||
// only on the first invocation to reduce overhead.
|
||||
if (fatbinary.ExplicitDeps.size() == 1) {
|
||||
if (fatbinary.ExplicitDeps.empty()) {
|
||||
dlink.Variables["REGISTER"] = cmStrCat(
|
||||
"--register-link-binaries=", ninjaOutputDir, "/cmake_cuda_register.h");
|
||||
}
|
||||
|
||||
fatbinary.Variables["PROFILES"] +=
|
||||
cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
|
||||
fatbinary.ExplicitDeps.emplace_back(cubin);
|
||||
|
||||
this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(), dlink);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user