mirror of
https://github.com/Kitware/CMake.git
synced 2025-10-16 05:26:58 +08:00
cmListFileLexer: Test for broken UTF-32-(BE|LE) BOM
This commit is contained in:
@@ -2715,6 +2715,7 @@ void cmListFileLexer_Delete(cmListFileLexer* lexer)
|
|||||||
static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
|
static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
|
||||||
{
|
{
|
||||||
unsigned char b[2];
|
unsigned char b[2];
|
||||||
|
size_t n;
|
||||||
if (fread(b, 1, 2, f) == 2) {
|
if (fread(b, 1, 2, f) == 2) {
|
||||||
if (b[0] == 0xEF && b[1] == 0xBB) {
|
if (b[0] == 0xEF && b[1] == 0xBB) {
|
||||||
if (fread(b, 1, 1, f) == 1 && b[0] == 0xBF) {
|
if (fread(b, 1, 1, f) == 1 && b[0] == 0xBF) {
|
||||||
@@ -2730,13 +2731,21 @@ static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
|
|||||||
} else if (b[0] == 0xFF && b[1] == 0xFE) {
|
} else if (b[0] == 0xFF && b[1] == 0xFE) {
|
||||||
fpos_t p;
|
fpos_t p;
|
||||||
fgetpos(f, &p);
|
fgetpos(f, &p);
|
||||||
if (fread(b, 1, 2, f) == 2 && b[0] == 0 && b[1] == 0) {
|
n = fread(b, 1, 2, f);
|
||||||
|
if (n == 2 && b[0] == 0 && b[1] == 0) {
|
||||||
return cmListFileLexer_BOM_UTF32LE;
|
return cmListFileLexer_BOM_UTF32LE;
|
||||||
}
|
}
|
||||||
if (fsetpos(f, &p) != 0) {
|
if (fsetpos(f, &p) != 0) {
|
||||||
return cmListFileLexer_BOM_Broken;
|
return cmListFileLexer_BOM_Broken;
|
||||||
}
|
}
|
||||||
return cmListFileLexer_BOM_UTF16LE;
|
/* In case we were able to subsequently read only a single byte out of two
|
||||||
|
(i.e., three in total), the file must be corrupt and the BOM cannot
|
||||||
|
represent a UTF-16-LE BOM since each code unit must consist of two
|
||||||
|
bytes. This avoids incorrectly detecting an incomplete UTF-32-LE BOM as
|
||||||
|
UTF-16-LE input. */
|
||||||
|
if (n % 2 == 0) {
|
||||||
|
return cmListFileLexer_BOM_UTF16LE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (fseek(f, 0, SEEK_SET) != 0) {
|
if (fseek(f, 0, SEEK_SET) != 0) {
|
||||||
|
@@ -442,6 +442,7 @@ void cmListFileLexer_Delete(cmListFileLexer* lexer)
|
|||||||
static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
|
static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
|
||||||
{
|
{
|
||||||
unsigned char b[2];
|
unsigned char b[2];
|
||||||
|
size_t n;
|
||||||
if (fread(b, 1, 2, f) == 2) {
|
if (fread(b, 1, 2, f) == 2) {
|
||||||
if (b[0] == 0xEF && b[1] == 0xBB) {
|
if (b[0] == 0xEF && b[1] == 0xBB) {
|
||||||
if (fread(b, 1, 1, f) == 1 && b[0] == 0xBF) {
|
if (fread(b, 1, 1, f) == 1 && b[0] == 0xBF) {
|
||||||
@@ -457,13 +458,21 @@ static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
|
|||||||
} else if (b[0] == 0xFF && b[1] == 0xFE) {
|
} else if (b[0] == 0xFF && b[1] == 0xFE) {
|
||||||
fpos_t p;
|
fpos_t p;
|
||||||
fgetpos(f, &p);
|
fgetpos(f, &p);
|
||||||
if (fread(b, 1, 2, f) == 2 && b[0] == 0 && b[1] == 0) {
|
n = fread(b, 1, 2, f);
|
||||||
|
if (n == 2 && b[0] == 0 && b[1] == 0) {
|
||||||
return cmListFileLexer_BOM_UTF32LE;
|
return cmListFileLexer_BOM_UTF32LE;
|
||||||
}
|
}
|
||||||
if (fsetpos(f, &p) != 0) {
|
if (fsetpos(f, &p) != 0) {
|
||||||
return cmListFileLexer_BOM_Broken;
|
return cmListFileLexer_BOM_Broken;
|
||||||
}
|
}
|
||||||
return cmListFileLexer_BOM_UTF16LE;
|
/* In case we were able to subsequently read only a single byte out of two
|
||||||
|
(i.e., three in total), the file must be corrupt and the BOM cannot
|
||||||
|
represent a UTF-16-LE BOM since each code unit must consist of two
|
||||||
|
bytes. This avoids incorrectly detecting an incomplete UTF-32-LE BOM as
|
||||||
|
UTF-16-LE input. */
|
||||||
|
if (n % 2 == 0) {
|
||||||
|
return cmListFileLexer_BOM_UTF16LE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (fseek(f, 0, SEEK_SET) != 0) {
|
if (fseek(f, 0, SEEK_SET) != 0) {
|
||||||
|
1
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-BE-result.txt
Normal file
1
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-BE-result.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
1
|
BIN
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-BE-stderr.txt
Normal file
BIN
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-BE-stderr.txt
Normal file
Binary file not shown.
BIN
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-BE.cmake
Normal file
BIN
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-BE.cmake
Normal file
Binary file not shown.
1
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-LE-result.txt
Normal file
1
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-LE-result.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
1
|
BIN
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-LE-stderr.txt
Normal file
BIN
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-LE-stderr.txt
Normal file
Binary file not shown.
BIN
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-LE.cmake
Normal file
BIN
Tests/RunCMake/Syntax/Broken-BOM-UTF-32-LE.cmake
Normal file
Binary file not shown.
@@ -5,6 +5,8 @@ run_cmake(BOM-UTF-16-LE)
|
|||||||
run_cmake(BOM-UTF-16-BE)
|
run_cmake(BOM-UTF-16-BE)
|
||||||
run_cmake(BOM-UTF-32-LE)
|
run_cmake(BOM-UTF-32-LE)
|
||||||
run_cmake(BOM-UTF-32-BE)
|
run_cmake(BOM-UTF-32-BE)
|
||||||
|
run_cmake(Broken-BOM-UTF-32-LE)
|
||||||
|
run_cmake(Broken-BOM-UTF-32-BE)
|
||||||
run_cmake(CommandSpaces)
|
run_cmake(CommandSpaces)
|
||||||
run_cmake(CommandTabs)
|
run_cmake(CommandTabs)
|
||||||
run_cmake(CommandNewlines)
|
run_cmake(CommandNewlines)
|
||||||
|
Reference in New Issue
Block a user