mirror of
https://github.com/PCRE2Project/pcre2.git
synced 2025-10-17 07:04:13 +08:00

The pcre2test utility needs quite a few changes to accommodate this. It is simpler to add a new mode to it, than to make it fully EBCDIC-native. On an ASCII system, pcre2test performs ASCII I/O, but tranlates the input when passing it to the fully-EBCDIC-supporting library.
301 lines
12 KiB
Plaintext
301 lines
12 KiB
Plaintext
# This set of tests is run only with the 8-bit library. They must not require
|
|
# UTF-8 or Unicode property support. */
|
|
|
|
#forbid_utf
|
|
#newline_default lf any anycrlf
|
|
|
|
#if !ebcdic
|
|
|
|
/a\xc4\xa3b/
|
|
a\N{U+123}b
|
|
\= Expect no match # error message (too big char)
|
|
a\x{0123}b
|
|
a\o{00443}b
|
|
a\443b
|
|
|
|
/fd bf bf bf bf bf/I,hex
|
|
\= Expect warning
|
|
\N{U+7fffffff}
|
|
\= Expect no match # error message (too big char)
|
|
\x{7fffffff}
|
|
|
|
#endif
|
|
|
|
/\x{100}/I
|
|
|
|
/\o{400}/I
|
|
|
|
#if !ebcdic
|
|
|
|
/ (?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* # optional leading comment
|
|
(?: (?:
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
|
|
|
" (?: # opening quote...
|
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
|
| # or
|
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
|
)* " # closing quote
|
|
) # initial word
|
|
(?: (?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* \. (?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* (?:
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
|
|
|
" (?: # opening quote...
|
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
|
| # or
|
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
|
)* " # closing quote
|
|
) )* # further okay, if led by a period
|
|
(?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* @ (?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* (?:
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
| \[ # [
|
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
|
\] # ]
|
|
) # initial subdomain
|
|
(?: #
|
|
(?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* \. # if led by a period...
|
|
(?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* (?:
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
| \[ # [
|
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
|
\] # ]
|
|
) # ...further okay
|
|
)*
|
|
# address
|
|
| # or
|
|
(?:
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
|
|
|
" (?: # opening quote...
|
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
|
| # or
|
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
|
)* " # closing quote
|
|
) # one word, optionally followed by....
|
|
(?:
|
|
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
|
|
\(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) | # comments, or...
|
|
|
|
" (?: # opening quote...
|
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
|
| # or
|
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
|
)* " # closing quote
|
|
# quoted strings
|
|
)*
|
|
< (?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* # leading <
|
|
(?: @ (?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* (?:
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
| \[ # [
|
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
|
\] # ]
|
|
) # initial subdomain
|
|
(?: #
|
|
(?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* \. # if led by a period...
|
|
(?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* (?:
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
| \[ # [
|
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
|
\] # ]
|
|
) # ...further okay
|
|
)*
|
|
|
|
(?: (?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* , (?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* @ (?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* (?:
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
| \[ # [
|
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
|
\] # ]
|
|
) # initial subdomain
|
|
(?: #
|
|
(?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* \. # if led by a period...
|
|
(?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* (?:
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
| \[ # [
|
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
|
\] # ]
|
|
) # ...further okay
|
|
)*
|
|
)* # further okay, if led by comma
|
|
: # closing colon
|
|
(?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* )? # optional route
|
|
(?:
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
|
|
|
" (?: # opening quote...
|
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
|
| # or
|
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
|
)* " # closing quote
|
|
) # initial word
|
|
(?: (?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* \. (?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* (?:
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
|
|
|
" (?: # opening quote...
|
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
|
| # or
|
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
|
)* " # closing quote
|
|
) )* # further okay, if led by a period
|
|
(?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* @ (?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* (?:
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
| \[ # [
|
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
|
\] # ]
|
|
) # initial subdomain
|
|
(?: #
|
|
(?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* \. # if led by a period...
|
|
(?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* (?:
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
| \[ # [
|
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
|
\] # ]
|
|
) # ...further okay
|
|
)*
|
|
# address spec
|
|
(?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* > # trailing >
|
|
# name and address
|
|
) (?: [\040\t] | \(
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
\) )* # optional trailing comment
|
|
/Ix
|
|
|
|
#endif
|
|
|
|
/\h/I
|
|
|
|
/\H/I
|
|
|
|
/\v/I
|
|
|
|
/\V/I
|
|
|
|
/\R/I
|
|
|
|
/[\h]/B
|
|
>\x09<
|
|
|
|
/[\h]+/B
|
|
>\x09\x20\xa0<
|
|
|
|
/[\v]/B
|
|
|
|
/[\H]/B
|
|
|
|
/[^\h]/B
|
|
|
|
/[\V]/B
|
|
|
|
#if !ebcdic
|
|
|
|
/[\x0a\V]/B
|
|
|
|
#endif
|
|
|
|
/\777/I
|
|
|
|
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
|
|
XX
|
|
|
|
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark,alt_verbnames
|
|
XX
|
|
|
|
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
|
|
XX
|
|
|
|
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark,alt_verbnames
|
|
XX
|
|
|
|
/\u0100/alt_bsux,allow_empty_class,match_unset_backref,dupnames
|
|
|
|
/[\u0100-\u0200]/alt_bsux,allow_empty_class,match_unset_backref,dupnames
|
|
|
|
#if !ebcdic
|
|
|
|
/[^\x00-a]{12,}[^b-\xff]*/B
|
|
|
|
#endif
|
|
|
|
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
|
|
|
|
/(*MARK:a\x{100}b)z/alt_verbnames
|
|
|
|
/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/
|
|
|
|
/(?i:A{1,}\6666666666)/
|
|
A\x{1b6}6666666
|
|
|
|
# Should cause an error
|
|
/abc/substitute_extended,replace=>\777<
|
|
abc
|
|
|
|
# Should cause an error
|
|
/abc/substitute_extended,replace=>\o{012345}<
|
|
abc
|
|
|
|
/i/turkish_casing
|
|
|
|
# End of testinput9
|