mirror of
https://github.com/PCRE2Project/pcre2.git
synced 2025-10-18 17:24:21 +08:00
pcre2grep: add $& as an alias for $0 (#519)
Perl does not use $0 anymore to refer to the text of the matched subject and `pcre2_substitute()` was recently updated to also provide that value using the variable Perl prefers: `$&`. In a similar context, either as part of the formatted output from a match or during the processing of a callback, teach pcre2grep to also populate $&. While at it, update the ChangeLog with recent changes.
This commit is contained in:

committed by
GitHub

parent
223941425f
commit
0d087cce82
2
NEWS
2
NEWS
@@ -52,7 +52,7 @@ a list). Those that are not bugfixes or code tidies are:
|
|||||||
matches the "fullwidth" versions of hex digits. PCRE2_EXTRA_ASCII_DIGIT can
|
matches the "fullwidth" versions of hex digits. PCRE2_EXTRA_ASCII_DIGIT can
|
||||||
be used to keep it ASCII only.
|
be used to keep it ASCII only.
|
||||||
|
|
||||||
* Make PCRE2_UCP the default in UTF mode in pcre2grep and add -no_ucp,
|
* Make PCRE2_UCP the default in UTF mode in pcre2grep and add --no-ucp,
|
||||||
--case-restrict and --posix-digit.
|
--case-restrict and --posix-digit.
|
||||||
|
|
||||||
* Add --group-separator and --no-group-separator to pcre2grep.
|
* Add --group-separator and --no-group-separator to pcre2grep.
|
||||||
|
@@ -637,6 +637,8 @@ echo "RC=$?" >>testtrygrep
|
|||||||
echo "---------------------------- Test 120 ------------------------------" >>testtrygrep
|
echo "---------------------------- Test 120 ------------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $vjs $pcre2grep -HO '$0:$2$1$3' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
|
(cd $srcdir; $valgrind $vjs $pcre2grep -HO '$0:$2$1$3' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtrygrep
|
echo "RC=$?" >>testtrygrep
|
||||||
|
(cd $srcdir; $valgrind $vjs $pcre2grep -HO '$&:$2$1$3' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
|
||||||
|
echo "RC=$?" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $vjs $pcre2grep -m 1 -O '$0:$a$b$e$f$r$t$v' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
|
(cd $srcdir; $valgrind $vjs $pcre2grep -m 1 -O '$0:$a$b$e$f$r$t$v' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
|
||||||
echo "RC=$?" >>testtrygrep
|
echo "RC=$?" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $vjs $pcre2grep -HO '${X}' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1
|
(cd $srcdir; $valgrind $vjs $pcre2grep -HO '${X}' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1
|
||||||
|
@@ -724,9 +724,9 @@ text.
|
|||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
$<digits> or ${<digits>} is replaced by the captured substring of the given
|
$<digits> or ${<digits>} is replaced by the captured substring of the given
|
||||||
decimal number; zero substitutes the whole match. If the number is greater than
|
decimal number; $& (or the legacy $0) substitutes the whole match. If the
|
||||||
the number of capturing substrings, or if the capture is unset, the replacement
|
number is greater than the number of capturing substrings, or if the capture
|
||||||
is empty.
|
is unset, the replacement is empty.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
|
$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
|
||||||
@@ -1025,9 +1025,9 @@ available, provided that callouts were not completely disabled when
|
|||||||
zero-terminated string, which means it should not contain any internal binary
|
zero-terminated string, which means it should not contain any internal binary
|
||||||
zeros. It is written to the output, having first been passed through the same
|
zeros. It is written to the output, having first been passed through the same
|
||||||
escape processing as text from the <b>--output</b> (<b>-O</b>) option (see
|
escape processing as text from the <b>--output</b> (<b>-O</b>) option (see
|
||||||
above). However, $0 cannot be used to insert a matched substring because the
|
above). However, $0 or $& cannot be used to insert a matched substring because
|
||||||
match is still in progress. Instead, the single character '0' is inserted. Any
|
the match is still in progress. Instead, the single character '0' is inserted.
|
||||||
syntax errors in the string (for example, a dollar not followed by another
|
Any syntax errors in the string (for example, a dollar not followed by another
|
||||||
character) causes the callout to be ignored. No terminator is added to the
|
character) causes the callout to be ignored. No terminator is added to the
|
||||||
output string, so if you want a newline, you must include it explicitly using
|
output string, so if you want a newline, you must include it explicitly using
|
||||||
the escape $n. For example:
|
the escape $n. For example:
|
||||||
@@ -1057,9 +1057,9 @@ arguments:
|
|||||||
</pre>
|
</pre>
|
||||||
Any substring (including the executable name) may contain escape sequences
|
Any substring (including the executable name) may contain escape sequences
|
||||||
started by a dollar character. These are the same as for the <b>--output</b>
|
started by a dollar character. These are the same as for the <b>--output</b>
|
||||||
(<b>-O</b>) option documented above, except that $0 cannot insert the matched
|
(<b>-O</b>) option documented above, except that $0 or $& cannot insert the
|
||||||
string because the match is still in progress. Instead, the character '0'
|
matched string because the match is still in progress. Instead, the character
|
||||||
is inserted. If you need a literal dollar or pipe character in any
|
'0' is inserted. If you need a literal dollar or pipe character in any
|
||||||
substring, use $$ or $| respectively. Here is an example:
|
substring, use $$ or $| respectively. Here is an example:
|
||||||
<pre>
|
<pre>
|
||||||
echo -e "abcde\n12345" | pcre2grep \
|
echo -e "abcde\n12345" | pcre2grep \
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
.TH PCRE2DEMO 3 " 4 October 2024" "PCRE2 10.44"
|
.TH PCRE2DEMO 3 " 8 October 2024" "PCRE2 10.44"
|
||||||
.\"AUTOMATICALLY GENERATED BY PrepareRelease - do not EDIT!
|
.\"AUTOMATICALLY GENERATED BY PrepareRelease - do not EDIT!
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2DEMO - A demonstration C program for PCRE2
|
PCRE2DEMO - A demonstration C program for PCRE2
|
||||||
|
@@ -629,9 +629,9 @@ contents of the matched part of the line and/or captured substrings into the
|
|||||||
text.
|
text.
|
||||||
.sp
|
.sp
|
||||||
$<digits> or ${<digits>} is replaced by the captured substring of the given
|
$<digits> or ${<digits>} is replaced by the captured substring of the given
|
||||||
decimal number; zero substitutes the whole match. If the number is greater than
|
decimal number; $& (or the legacy $0) substitutes the whole match. If the
|
||||||
the number of capturing substrings, or if the capture is unset, the replacement
|
number is greater than the number of capturing substrings, or if the capture
|
||||||
is empty.
|
is unset, the replacement is empty.
|
||||||
.sp
|
.sp
|
||||||
$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
|
$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
|
||||||
newline; $r by carriage return; $t by tab; $v by vertical tab.
|
newline; $r by carriage return; $t by tab; $v by vertical tab.
|
||||||
@@ -914,9 +914,9 @@ available, provided that callouts were not completely disabled when
|
|||||||
zero-terminated string, which means it should not contain any internal binary
|
zero-terminated string, which means it should not contain any internal binary
|
||||||
zeros. It is written to the output, having first been passed through the same
|
zeros. It is written to the output, having first been passed through the same
|
||||||
escape processing as text from the \fB--output\fP (\fB-O\fP) option (see
|
escape processing as text from the \fB--output\fP (\fB-O\fP) option (see
|
||||||
above). However, $0 cannot be used to insert a matched substring because the
|
above). However, $0 or $& cannot be used to insert a matched substring because
|
||||||
match is still in progress. Instead, the single character '0' is inserted. Any
|
the match is still in progress. Instead, the single character '0' is inserted.
|
||||||
syntax errors in the string (for example, a dollar not followed by another
|
Any syntax errors in the string (for example, a dollar not followed by another
|
||||||
character) causes the callout to be ignored. No terminator is added to the
|
character) causes the callout to be ignored. No terminator is added to the
|
||||||
output string, so if you want a newline, you must include it explicitly using
|
output string, so if you want a newline, you must include it explicitly using
|
||||||
the escape $n. For example:
|
the escape $n. For example:
|
||||||
@@ -945,9 +945,9 @@ arguments:
|
|||||||
.sp
|
.sp
|
||||||
Any substring (including the executable name) may contain escape sequences
|
Any substring (including the executable name) may contain escape sequences
|
||||||
started by a dollar character. These are the same as for the \fB--output\fP
|
started by a dollar character. These are the same as for the \fB--output\fP
|
||||||
(\fB-O\fP) option documented above, except that $0 cannot insert the matched
|
(\fB-O\fP) option documented above, except that $0 or $& cannot insert the
|
||||||
string because the match is still in progress. Instead, the character '0'
|
matched string because the match is still in progress. Instead, the character
|
||||||
is inserted. If you need a literal dollar or pipe character in any
|
'0' is inserted. If you need a literal dollar or pipe character in any
|
||||||
substring, use $$ or $| respectively. Here is an example:
|
substring, use $$ or $| respectively. Here is an example:
|
||||||
.sp
|
.sp
|
||||||
echo -e "abcde\en12345" | pcre2grep \e
|
echo -e "abcde\en12345" | pcre2grep \e
|
||||||
|
@@ -702,10 +702,10 @@ OPTIONS
|
|||||||
captured substrings into the text.
|
captured substrings into the text.
|
||||||
|
|
||||||
$<digits> or ${<digits>} is replaced by the captured sub-
|
$<digits> or ${<digits>} is replaced by the captured sub-
|
||||||
string of the given decimal number; zero substitutes the
|
string of the given decimal number; $& (or the legacy $0)
|
||||||
whole match. If the number is greater than the number of cap-
|
substitutes the whole match. If the number is greater than
|
||||||
turing substrings, or if the capture is unset, the replace-
|
the number of capturing substrings, or if the capture is un-
|
||||||
ment is empty.
|
set, the replacement is empty.
|
||||||
|
|
||||||
$a is replaced by bell; $b by backspace; $e by escape; $f by
|
$a is replaced by bell; $b by backspace; $e by escape; $f by
|
||||||
form feed; $n by newline; $r by carriage return; $t by tab;
|
form feed; $n by newline; $r by carriage return; $t by tab;
|
||||||
@@ -998,13 +998,13 @@ USING PCRE2'S CALLOUT FACILITY
|
|||||||
processed as a zero-terminated string, which means it should not con-
|
processed as a zero-terminated string, which means it should not con-
|
||||||
tain any internal binary zeros. It is written to the output, having
|
tain any internal binary zeros. It is written to the output, having
|
||||||
first been passed through the same escape processing as text from the
|
first been passed through the same escape processing as text from the
|
||||||
--output (-O) option (see above). However, $0 cannot be used to insert
|
--output (-O) option (see above). However, $0 or $& cannot be used to
|
||||||
a matched substring because the match is still in progress. Instead,
|
insert a matched substring because the match is still in progress. In-
|
||||||
the single character '0' is inserted. Any syntax errors in the string
|
stead, the single character '0' is inserted. Any syntax errors in the
|
||||||
(for example, a dollar not followed by another character) causes the
|
string (for example, a dollar not followed by another character) causes
|
||||||
callout to be ignored. No terminator is added to the output string, so
|
the callout to be ignored. No terminator is added to the output string,
|
||||||
if you want a newline, you must include it explicitly using the escape
|
so if you want a newline, you must include it explicitly using the es-
|
||||||
$n. For example:
|
cape $n. For example:
|
||||||
|
|
||||||
pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' <some file>
|
pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' <some file>
|
||||||
|
|
||||||
@@ -1028,10 +1028,10 @@ USING PCRE2'S CALLOUT FACILITY
|
|||||||
|
|
||||||
Any substring (including the executable name) may contain escape se-
|
Any substring (including the executable name) may contain escape se-
|
||||||
quences started by a dollar character. These are the same as for the
|
quences started by a dollar character. These are the same as for the
|
||||||
--output (-O) option documented above, except that $0 cannot insert the
|
--output (-O) option documented above, except that $0 or $& cannot in-
|
||||||
matched string because the match is still in progress. Instead, the
|
sert the matched string because the match is still in progress. In-
|
||||||
character '0' is inserted. If you need a literal dollar or pipe charac-
|
stead, the character substring, use $$ or $| respectively. Here is an
|
||||||
ter in any substring, use $$ or $| respectively. Here is an example:
|
example:
|
||||||
|
|
||||||
echo -e "abcde\n12345" | pcre2grep \
|
echo -e "abcde\n12345" | pcre2grep \
|
||||||
'(?x)(.)(..(.))
|
'(?x)(.)(..(.))
|
||||||
|
@@ -2024,11 +2024,23 @@ switch (*(++string))
|
|||||||
*last = string;
|
*last = string;
|
||||||
return DDE_ERROR;
|
return DDE_ERROR;
|
||||||
|
|
||||||
|
case '&':
|
||||||
|
/* In a callout, no capture is available. Return the character '0' for
|
||||||
|
consistency with $0. */
|
||||||
|
|
||||||
|
if (callout) *value = '0';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
*value = 0;
|
||||||
|
rc = DDE_CAPTURE;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case '{':
|
case '{':
|
||||||
brace = TRUE;
|
brace = TRUE;
|
||||||
string++;
|
string++;
|
||||||
if (!isdigit((unsigned char)(*string))) /* Syntax error: a decimal number required. */
|
if (!isdigit((unsigned char)(*string))) /* Syntax error: */
|
||||||
{
|
{ /* a decimal number required. */
|
||||||
if (!callout)
|
if (!callout)
|
||||||
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
|
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
|
||||||
(int)(string - begin), "decimal number expected");
|
(int)(string - begin), "decimal number expected");
|
||||||
@@ -2105,9 +2117,9 @@ switch (*(++string))
|
|||||||
{
|
{
|
||||||
if (!isxdigit(*string)) break;
|
if (!isxdigit(*string)) break;
|
||||||
if (*string >= '0' && *string <= '9')
|
if (*string >= '0' && *string <= '9')
|
||||||
c = c *16 + *string++ - '0';
|
c = c *16 + (*string++ - '0');
|
||||||
else
|
else
|
||||||
c = c * 16 + (*string++ | 0x20) - 'a' + 10;
|
c = c * 16 + ((*string++ | 0x20) - 'a') + 10;
|
||||||
}
|
}
|
||||||
*value = c;
|
*value = c;
|
||||||
string--; /* Point to last digit */
|
string--; /* Point to last digit */
|
||||||
|
4
testdata/grepoutput
vendored
4
testdata/grepoutput
vendored
@@ -876,6 +876,10 @@ RC=0
|
|||||||
./testdata/grepinput:a binary zero:zeroa
|
./testdata/grepinput:a binary zero:zeroa
|
||||||
./testdata/grepinput:the binary zero.:zerothe.
|
./testdata/grepinput:the binary zero.:zerothe.
|
||||||
RC=0
|
RC=0
|
||||||
|
./testdata/grepinput:the binary zero.:zerothe.
|
||||||
|
./testdata/grepinput:a binary zero:zeroa
|
||||||
|
./testdata/grepinput:the binary zero.:zerothe.
|
||||||
|
RC=0
|
||||||
the binary zero.:
|
the binary zero.:
|
||||||
|
|
||||||
RC=0
|
RC=0
|
||||||
|
Reference in New Issue
Block a user