mirror of
https://github.com/Kitware/CMake.git
synced 2025-10-15 03:48:02 +08:00
cmListFileLexer: Test for broken UTF-32-(BE|LE) BOM
This commit is contained in:
@@ -2715,6 +2715,7 @@ void cmListFileLexer_Delete(cmListFileLexer* lexer)
|
||||
static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
|
||||
{
|
||||
unsigned char b[2];
|
||||
size_t n;
|
||||
if (fread(b, 1, 2, f) == 2) {
|
||||
if (b[0] == 0xEF && b[1] == 0xBB) {
|
||||
if (fread(b, 1, 1, f) == 1 && b[0] == 0xBF) {
|
||||
@@ -2730,13 +2731,21 @@ static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
|
||||
} else if (b[0] == 0xFF && b[1] == 0xFE) {
|
||||
fpos_t p;
|
||||
fgetpos(f, &p);
|
||||
if (fread(b, 1, 2, f) == 2 && b[0] == 0 && b[1] == 0) {
|
||||
n = fread(b, 1, 2, f);
|
||||
if (n == 2 && b[0] == 0 && b[1] == 0) {
|
||||
return cmListFileLexer_BOM_UTF32LE;
|
||||
}
|
||||
if (fsetpos(f, &p) != 0) {
|
||||
return cmListFileLexer_BOM_Broken;
|
||||
}
|
||||
return cmListFileLexer_BOM_UTF16LE;
|
||||
/* In case we were able to subsequently read only a single byte out of two
|
||||
(i.e., three in total), the file must be corrupt and the BOM cannot
|
||||
represent a UTF-16-LE BOM since each code unit must consist of two
|
||||
bytes. This avoids incorrectly detecting an incomplete UTF-32-LE BOM as
|
||||
UTF-16-LE input. */
|
||||
if (n % 2 == 0) {
|
||||
return cmListFileLexer_BOM_UTF16LE;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (fseek(f, 0, SEEK_SET) != 0) {
|
||||
|
@@ -442,6 +442,7 @@ void cmListFileLexer_Delete(cmListFileLexer* lexer)
|
||||
static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
|
||||
{
|
||||
unsigned char b[2];
|
||||
size_t n;
|
||||
if (fread(b, 1, 2, f) == 2) {
|
||||
if (b[0] == 0xEF && b[1] == 0xBB) {
|
||||
if (fread(b, 1, 1, f) == 1 && b[0] == 0xBF) {
|
||||
@@ -457,13 +458,21 @@ static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
|
||||
} else if (b[0] == 0xFF && b[1] == 0xFE) {
|
||||
fpos_t p;
|
||||
fgetpos(f, &p);
|
||||
if (fread(b, 1, 2, f) == 2 && b[0] == 0 && b[1] == 0) {
|
||||
n = fread(b, 1, 2, f);
|
||||
if (n == 2 && b[0] == 0 && b[1] == 0) {
|
||||
return cmListFileLexer_BOM_UTF32LE;
|
||||
}
|
||||
if (fsetpos(f, &p) != 0) {
|
||||
return cmListFileLexer_BOM_Broken;
|
||||
}
|
||||
return cmListFileLexer_BOM_UTF16LE;
|
||||
/* In case we were able to subsequently read only a single byte out of two
|
||||
(i.e., three in total), the file must be corrupt and the BOM cannot
|
||||
represent a UTF-16-LE BOM since each code unit must consist of two
|
||||
bytes. This avoids incorrectly detecting an incomplete UTF-32-LE BOM as
|
||||
UTF-16-LE input. */
|
||||
if (n % 2 == 0) {
|
||||
return cmListFileLexer_BOM_UTF16LE;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (fseek(f, 0, SEEK_SET) != 0) {
|
||||
|
1
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-BE-result.txt
Normal file
1
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-BE-result.txt
Normal file
@@ -0,0 +1 @@
|
||||
1
|
BIN
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-BE-stderr.txt
Normal file
BIN
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-BE-stderr.txt
Normal file
Binary file not shown.
BIN
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-BE.cmake
Normal file
BIN
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-BE.cmake
Normal file
Binary file not shown.
1
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-LE-result.txt
Normal file
1
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-LE-result.txt
Normal file
@@ -0,0 +1 @@
|
||||
1
|
BIN
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-LE-stderr.txt
Normal file
BIN
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-LE-stderr.txt
Normal file
Binary file not shown.
BIN
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-LE.cmake
Normal file
BIN
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-LE.cmake
Normal file
Binary file not shown.
@@ -5,6 +5,8 @@ run_cmake(BOM-UTF-16-LE)
|
||||
run_cmake(BOM-UTF-16-BE)
|
||||
run_cmake(BOM-UTF-32-LE)
|
||||
run_cmake(BOM-UTF-32-BE)
|
||||
run_cmake(Broken-BOM-UTF-32-LE)
|
||||
run_cmake(Broken-BOM-UTF-32-BE)
|
||||
run_cmake(CommandSpaces)
|
||||
run_cmake(CommandTabs)
|
||||
run_cmake(CommandNewlines)
|
||||
|
Reference in New Issue
Block a user