mirror of
https://github.com/Kitware/CMake.git
synced 2025-10-15 03:48:02 +08:00
CUDA: Allow sources to be compiled to .ptx files
When the target property `CUDA_PTX_COMPILATION` is enabled CUDA OBJECT libraries will generate ptx files instead of object files.
This commit is contained in:

committed by
Brad King

parent
44f0d2d991
commit
23691d789e
@@ -152,6 +152,7 @@ Properties on Targets
|
||||
/prop_tgt/CONFIG_OUTPUT_NAME
|
||||
/prop_tgt/CONFIG_POSTFIX
|
||||
/prop_tgt/CROSSCOMPILING_EMULATOR
|
||||
/prop_tgt/CUDA_PTX_COMPILATION
|
||||
/prop_tgt/CUDA_SEPARABLE_COMPILATION
|
||||
/prop_tgt/CUDA_EXTENSIONS
|
||||
/prop_tgt/CUDA_STANDARD
|
||||
|
12
Help/prop_tgt/CUDA_PTX_COMPILATION.rst
Normal file
12
Help/prop_tgt/CUDA_PTX_COMPILATION.rst
Normal file
@@ -0,0 +1,12 @@
|
||||
CUDA_PTX_COMPILATION
|
||||
--------------------
|
||||
|
||||
Compile CUDA sources to ``.ptx`` files instead of ``.obj`` files
|
||||
within :ref:`Object Libraries`.
|
||||
|
||||
For example:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
add_library(myptx OBJECT a.cu b.cu)
|
||||
set_property(TARGET myptx PROPERTY CUDA_PTX_COMPILATION ON)
|
6
Help/release/dev/enable_ptx_compilation.rst
Normal file
6
Help/release/dev/enable_ptx_compilation.rst
Normal file
@@ -0,0 +1,6 @@
|
||||
enable_ptx_compilation
|
||||
----------------------
|
||||
|
||||
* The :prop_tgt:`CUDA_PTX_COMPILATION` target property was added to
|
||||
:ref:`Object Libraries` to support compiling to ``.ptx`` files
|
||||
instead of host object files.
|
@@ -528,6 +528,18 @@ const std::string& cmGeneratorTarget::GetObjectName(cmSourceFile const* file)
|
||||
return this->Objects[file];
|
||||
}
|
||||
|
||||
const char* cmGeneratorTarget::GetCustomObjectExtension() const
|
||||
{
|
||||
static std::string extension;
|
||||
const bool has_ptx_extension =
|
||||
this->GetPropertyAsBool("CUDA_PTX_COMPILATION");
|
||||
if (has_ptx_extension) {
|
||||
extension = ".ptx";
|
||||
return extension.c_str();
|
||||
}
|
||||
return CM_NULLPTR;
|
||||
}
|
||||
|
||||
void cmGeneratorTarget::AddExplicitObjectName(cmSourceFile const* sf)
|
||||
{
|
||||
this->ExplicitObjectName.insert(sf);
|
||||
|
@@ -124,6 +124,7 @@ public:
|
||||
void GetObjectSources(std::vector<cmSourceFile const*>&,
|
||||
const std::string& config) const;
|
||||
const std::string& GetObjectName(cmSourceFile const* file);
|
||||
const char* GetCustomObjectExtension() const;
|
||||
|
||||
bool HasExplicitObjectName(cmSourceFile const* file) const;
|
||||
void AddExplicitObjectName(cmSourceFile const* sf);
|
||||
|
@@ -2162,7 +2162,7 @@ bool cmLocalGenerator::IsNMake() const
|
||||
|
||||
std::string cmLocalGenerator::GetObjectFileNameWithoutTarget(
|
||||
const cmSourceFile& source, std::string const& dir_max,
|
||||
bool* hasSourceExtension)
|
||||
bool* hasSourceExtension, char const* customOutputExtension)
|
||||
{
|
||||
// Construct the object file name using the full path to the source
|
||||
// file which is its only unique identification.
|
||||
@@ -2223,7 +2223,7 @@ std::string cmLocalGenerator::GetObjectFileNameWithoutTarget(
|
||||
}
|
||||
|
||||
// Remove the source extension if it is to be replaced.
|
||||
if (replaceExt) {
|
||||
if (replaceExt || customOutputExtension) {
|
||||
keptSourceExtension = false;
|
||||
std::string::size_type dot_pos = objectName.rfind('.');
|
||||
if (dot_pos != std::string::npos) {
|
||||
@@ -2232,7 +2232,11 @@ std::string cmLocalGenerator::GetObjectFileNameWithoutTarget(
|
||||
}
|
||||
|
||||
// Store the new extension.
|
||||
objectName += this->GlobalGenerator->GetLanguageOutputExtension(source);
|
||||
if (customOutputExtension) {
|
||||
objectName += customOutputExtension;
|
||||
} else {
|
||||
objectName += this->GlobalGenerator->GetLanguageOutputExtension(source);
|
||||
}
|
||||
}
|
||||
if (hasSourceExtension) {
|
||||
*hasSourceExtension = keptSourceExtension;
|
||||
|
@@ -273,7 +273,8 @@ public:
|
||||
// Compute object file names.
|
||||
std::string GetObjectFileNameWithoutTarget(
|
||||
const cmSourceFile& source, std::string const& dir_max,
|
||||
bool* hasSourceExtension = CM_NULLPTR);
|
||||
bool* hasSourceExtension = CM_NULLPTR,
|
||||
char const* customOutputExtension = CM_NULLPTR);
|
||||
|
||||
/** Fill out the static linker flags for the given target. */
|
||||
void GetStaticLibraryFlags(std::string& flags, std::string const& config,
|
||||
|
@@ -249,12 +249,15 @@ void cmLocalNinjaGenerator::ComputeObjectFilenames(
|
||||
std::map<cmSourceFile const*, std::string>& mapping,
|
||||
cmGeneratorTarget const* gt)
|
||||
{
|
||||
// Determine if these object files should use a custom extension
|
||||
char const* custom_ext = gt->GetCustomObjectExtension();
|
||||
for (std::map<cmSourceFile const*, std::string>::iterator si =
|
||||
mapping.begin();
|
||||
si != mapping.end(); ++si) {
|
||||
cmSourceFile const* sf = si->first;
|
||||
si->second =
|
||||
this->GetObjectFileNameWithoutTarget(*sf, gt->ObjectDirectory);
|
||||
bool keptSourceExtension;
|
||||
si->second = this->GetObjectFileNameWithoutTarget(
|
||||
*sf, gt->ObjectDirectory, &keptSourceExtension, custom_ext);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -159,12 +159,15 @@ void cmLocalUnixMakefileGenerator3::ComputeObjectFilenames(
|
||||
std::map<cmSourceFile const*, std::string>& mapping,
|
||||
cmGeneratorTarget const* gt)
|
||||
{
|
||||
// Determine if these object files should use a custom extension
|
||||
char const* custom_ext = gt->GetCustomObjectExtension();
|
||||
for (std::map<cmSourceFile const*, std::string>::iterator si =
|
||||
mapping.begin();
|
||||
si != mapping.end(); ++si) {
|
||||
cmSourceFile const* sf = si->first;
|
||||
si->second =
|
||||
this->GetObjectFileNameWithoutTarget(*sf, gt->ObjectDirectory);
|
||||
bool keptSourceExtension;
|
||||
si->second = this->GetObjectFileNameWithoutTarget(
|
||||
*sf, gt->ObjectDirectory, &keptSourceExtension, custom_ext);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -32,6 +32,7 @@ void cmLocalVisualStudioGenerator::ComputeObjectFilenames(
|
||||
std::map<cmSourceFile const*, std::string>& mapping,
|
||||
cmGeneratorTarget const* gt)
|
||||
{
|
||||
char const* custom_ext = gt->GetCustomObjectExtension();
|
||||
std::string dir_max = this->ComputeLongestObjectDirectory(gt);
|
||||
|
||||
// Count the number of object files with each name. Note that
|
||||
@@ -44,7 +45,12 @@ void cmLocalVisualStudioGenerator::ComputeObjectFilenames(
|
||||
cmSourceFile const* sf = si->first;
|
||||
std::string objectNameLower = cmSystemTools::LowerCase(
|
||||
cmSystemTools::GetFilenameWithoutLastExtension(sf->GetFullPath()));
|
||||
objectNameLower += this->GlobalGenerator->GetLanguageOutputExtension(*sf);
|
||||
if (custom_ext) {
|
||||
objectNameLower += custom_ext;
|
||||
} else {
|
||||
objectNameLower +=
|
||||
this->GlobalGenerator->GetLanguageOutputExtension(*sf);
|
||||
}
|
||||
counts[objectNameLower] += 1;
|
||||
}
|
||||
|
||||
@@ -57,10 +63,16 @@ void cmLocalVisualStudioGenerator::ComputeObjectFilenames(
|
||||
cmSourceFile const* sf = si->first;
|
||||
std::string objectName =
|
||||
cmSystemTools::GetFilenameWithoutLastExtension(sf->GetFullPath());
|
||||
objectName += this->GlobalGenerator->GetLanguageOutputExtension(*sf);
|
||||
if (custom_ext) {
|
||||
objectName += custom_ext;
|
||||
} else {
|
||||
objectName += this->GlobalGenerator->GetLanguageOutputExtension(*sf);
|
||||
}
|
||||
if (counts[cmSystemTools::LowerCase(objectName)] > 1) {
|
||||
const_cast<cmGeneratorTarget*>(gt)->AddExplicitObjectName(sf);
|
||||
objectName = this->GetObjectFileNameWithoutTarget(*sf, dir_max);
|
||||
bool keptSourceExtension;
|
||||
objectName = this->GetObjectFileNameWithoutTarget(
|
||||
*sf, dir_max, &keptSourceExtension, custom_ext);
|
||||
}
|
||||
si->second = objectName;
|
||||
}
|
||||
|
@@ -593,6 +593,9 @@ void cmMakefileTargetGenerator::WriteObjectBuildFile(
|
||||
if (this->GeneratorTarget->GetPropertyAsBool(
|
||||
"CUDA_SEPARABLE_COMPILATION")) {
|
||||
cmdVar = std::string("CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION");
|
||||
} else if (this->GeneratorTarget->GetPropertyAsBool(
|
||||
"CUDA_PTX_COMPILATION")) {
|
||||
cmdVar = std::string("CMAKE_CUDA_COMPILE_PTX_COMPILATION");
|
||||
} else {
|
||||
cmdVar = std::string("CMAKE_CUDA_COMPILE_WHOLE_COMPILATION");
|
||||
}
|
||||
|
@@ -589,6 +589,9 @@ void cmNinjaTargetGenerator::WriteCompileRule(const std::string& lang)
|
||||
if (this->GeneratorTarget->GetPropertyAsBool(
|
||||
"CUDA_SEPARABLE_COMPILATION")) {
|
||||
cmdVar = std::string("CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION");
|
||||
} else if (this->GeneratorTarget->GetPropertyAsBool(
|
||||
"CUDA_PTX_COMPILATION")) {
|
||||
cmdVar = std::string("CMAKE_CUDA_COMPILE_PTX_COMPILATION");
|
||||
} else {
|
||||
cmdVar = std::string("CMAKE_CUDA_COMPILE_WHOLE_COMPILATION");
|
||||
}
|
||||
|
@@ -940,6 +940,14 @@ void cmTarget::SetProperty(const std::string& prop, const char* value)
|
||||
} else if (cmHasLiteralPrefix(prop, "IMPORTED_LIBNAME") &&
|
||||
!this->CheckImportedLibName(prop, value ? value : "")) {
|
||||
/* error was reported by check method */
|
||||
} else if (prop == "CUDA_PTX_COMPILATION" &&
|
||||
this->GetType() != cmStateEnums::OBJECT_LIBRARY) {
|
||||
std::ostringstream e;
|
||||
e << "CUDA_PTX_COMPILATION property can only be applied to OBJECT "
|
||||
"targets (\""
|
||||
<< this->Name << "\")\n";
|
||||
this->Makefile->IssueMessage(cmake::FATAL_ERROR, e.str());
|
||||
return;
|
||||
} else {
|
||||
this->Properties.SetProperty(prop, value);
|
||||
}
|
||||
|
@@ -2467,6 +2467,12 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions(
|
||||
|
||||
if (this->GeneratorTarget->GetPropertyAsBool("CUDA_SEPARABLE_COMPILATION")) {
|
||||
cudaOptions.AddFlag("GenerateRelocatableDeviceCode", "true");
|
||||
} else if (this->GeneratorTarget->GetPropertyAsBool(
|
||||
"CUDA_PTX_COMPILATION")) {
|
||||
cudaOptions.AddFlag("NvccCompilation", "ptx");
|
||||
// We drop the %(Extension) component as CMake expects all PTX files
|
||||
// to not have the source file extension at all
|
||||
cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).ptx");
|
||||
}
|
||||
|
||||
// Convert the host compiler options to the toolset's abstractions
|
||||
|
@@ -1,4 +1,5 @@
|
||||
|
||||
ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard)
|
||||
ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX)
|
||||
ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
|
||||
ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs)
|
||||
|
82
Tests/CudaOnly/ExportPTX/CMakeLists.txt
Normal file
82
Tests/CudaOnly/ExportPTX/CMakeLists.txt
Normal file
@@ -0,0 +1,82 @@
|
||||
cmake_minimum_required(VERSION 3.8)
|
||||
project (CudaOnlyExportPTX CUDA)
|
||||
|
||||
#Goal for this example:
|
||||
# How to generate PTX files instead of OBJECT files
|
||||
# How to reference PTX files for custom commands
|
||||
# How to install PTX files
|
||||
|
||||
add_library(CudaPTX OBJECT kernelA.cu kernelB.cu)
|
||||
set_property(TARGET CudaPTX PROPERTY CUDA_PTX_COMPILATION ON)
|
||||
|
||||
#Test ObjectFiles with file(GENERATE)
|
||||
file(GENERATE
|
||||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/gen_$<LOWER_CASE:$<CONFIG>/>path_to_objs.h
|
||||
CONTENT [[
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#ifndef path_to_objs
|
||||
#define path_to_objs
|
||||
|
||||
static std::string ptx_paths = "$<TARGET_OBJECTS:CudaPTX>";
|
||||
|
||||
#endif
|
||||
|
||||
]]
|
||||
)
|
||||
#We are going to need a wrapper around bin2c for multiple reasons
|
||||
# 1. bin2c only converts a single file at a time
|
||||
# 2. bin2c has only standard out support, so we have to manually
|
||||
# redirect to a cmake buffer
|
||||
# 3. We want to pack everything into a single output file, so we
|
||||
# need to also pass the --name option
|
||||
set(output_file ${CMAKE_CURRENT_BINARY_DIR}/embedded_objs.h)
|
||||
|
||||
get_filename_component(cuda_compiler_bin "${CMAKE_CUDA_COMPILER}" DIRECTORY)
|
||||
find_program(bin_to_c
|
||||
NAMES bin2c
|
||||
PATHS ${cuda_compiler_bin}
|
||||
)
|
||||
if(NOT bin_to_c)
|
||||
message(FATAL_ERROR
|
||||
"bin2c not found:\n"
|
||||
" CMAKE_CUDA_COMPILER='${CMAKE_CUDA_COMPILER}'\n"
|
||||
" cuda_compiler_bin='${cuda_compiler_bin}'\n"
|
||||
)
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT "${output_file}"
|
||||
COMMAND ${CMAKE_COMMAND}
|
||||
"-DBIN_TO_C_COMMAND=${bin_to_c}"
|
||||
"-DOBJECTS=$<TARGET_OBJECTS:CudaPTX>"
|
||||
"-DOUTPUT=${output_file}"
|
||||
-P ${CMAKE_CURRENT_SOURCE_DIR}/bin2c_wrapper.cmake
|
||||
VERBATIM
|
||||
DEPENDS $<TARGET_OBJECTS:CudaPTX>
|
||||
COMMENT "Converting Object files to a C header"
|
||||
)
|
||||
|
||||
add_executable(CudaOnlyExportPTX main.cu ${output_file})
|
||||
add_dependencies(CudaOnlyExportPTX CudaPTX)
|
||||
target_include_directories(CudaOnlyExportPTX PRIVATE
|
||||
${CMAKE_CURRENT_BINARY_DIR} )
|
||||
target_compile_definitions(CudaOnlyExportPTX PRIVATE
|
||||
"CONFIG_TYPE=gen_$<LOWER_CASE:$<CONFIG>>")
|
||||
|
||||
if(APPLE)
|
||||
# We need to add the default path to the driver (libcuda.dylib) as an rpath, so that
|
||||
# the static cuda runtime can find it at runtime.
|
||||
target_link_libraries(CudaOnlyExportPTX PRIVATE -Wl,-rpath,/usr/local/cuda/lib)
|
||||
endif()
|
||||
|
||||
#Verify that we can install object targets properly
|
||||
install(TARGETS CudaPTX CudaOnlyExportPTX
|
||||
EXPORT cudaPTX
|
||||
RUNTIME DESTINATION bin
|
||||
LIBRARY DESTINATION lib
|
||||
OBJECTS DESTINATION objs
|
||||
)
|
||||
install(EXPORT cudaPTX DESTINATION lib/cudaPTX)
|
19
Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake
Normal file
19
Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake
Normal file
@@ -0,0 +1,19 @@
|
||||
|
||||
set(file_contents)
|
||||
foreach(obj ${OBJECTS})
|
||||
get_filename_component(obj_ext ${obj} EXT)
|
||||
get_filename_component(obj_name ${obj} NAME_WE)
|
||||
get_filename_component(obj_dir ${obj} DIRECTORY)
|
||||
|
||||
if(obj_ext MATCHES ".ptx")
|
||||
set(args --name ${obj_name} ${obj})
|
||||
execute_process(COMMAND "${BIN_TO_C_COMMAND}" ${args}
|
||||
WORKING_DIRECTORY ${obj_dir}
|
||||
RESULT_VARIABLE result
|
||||
OUTPUT_VARIABLE output
|
||||
ERROR_VARIABLE error_var
|
||||
)
|
||||
set(file_contents "${file_contents} \n${output}")
|
||||
endif()
|
||||
endforeach()
|
||||
file(WRITE "${OUTPUT}" "${file_contents}")
|
7
Tests/CudaOnly/ExportPTX/kernelA.cu
Normal file
7
Tests/CudaOnly/ExportPTX/kernelA.cu
Normal file
@@ -0,0 +1,7 @@
|
||||
|
||||
__global__ void kernelA(float* r, float* x, float* y, float* z, int size)
|
||||
{
|
||||
for (int i = threadIdx.x; i < size; i += blockDim.x) {
|
||||
r[i] = x[i] * y[i] + z[i];
|
||||
}
|
||||
}
|
8
Tests/CudaOnly/ExportPTX/kernelB.cu
Normal file
8
Tests/CudaOnly/ExportPTX/kernelB.cu
Normal file
@@ -0,0 +1,8 @@
|
||||
|
||||
|
||||
__global__ void kernelB(float* r, float* x, float* y, float* z, int size)
|
||||
{
|
||||
for (int i = threadIdx.x; i < size; i += blockDim.x) {
|
||||
r[i] = x[i] * y[i] + z[i];
|
||||
}
|
||||
}
|
28
Tests/CudaOnly/ExportPTX/main.cu
Normal file
28
Tests/CudaOnly/ExportPTX/main.cu
Normal file
@@ -0,0 +1,28 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
/*
|
||||
Define GENERATED_HEADER macro to allow c++ files to include headers
|
||||
generated based on different configuration types.
|
||||
*/
|
||||
|
||||
/* clang-format off */
|
||||
#define GENERATED_HEADER(x) GENERATED_HEADER0(CONFIG_TYPE/x)
|
||||
/* clang-format on */
|
||||
#define GENERATED_HEADER0(x) GENERATED_HEADER1(x)
|
||||
#define GENERATED_HEADER1(x) <x>
|
||||
|
||||
#include GENERATED_HEADER(path_to_objs.h)
|
||||
|
||||
#include "embedded_objs.h"
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
|
||||
unsigned char* ka = kernelA;
|
||||
unsigned char* kb = kernelB;
|
||||
|
||||
return (ka != NULL && kb != NULL) ? 0 : 1;
|
||||
}
|
Reference in New Issue
Block a user