Files
pcre2/maint/UpdateAlways
Nicholas Wilson eb3bd3cf14 New pcre2_next_match() API to simplify pcre2demo, test, and substitute (#733)
* The primary purpose of pcre2_next_match() is to make it much easier for
  PCRE2 clients to iterate over matches, without needing an advanced knowledge
  of regular expressions.
* Secondly, we can simplify our own code by merging the three duplicate
  implementations of the /g global match behaviour: pcre2demo, pcre2_substitute,
  and pcre2test.
* Thirdly, as I look closely at the issue, I can improve the documentation.
* Fourthly, I would like to actually simplify the logic, removing a complex loop
  which makes several match attempts, swallows duplicate matches, and more.
  We can have identical behaviour with a simple retry using
  PCRE2_NOTEMPTY_ATSTART.
2025-03-24 13:29:52 +00:00

338 lines
9.5 KiB
Bash
Executable File

#! /bin/bash
# Script to prepare the files for building a PCRE2 release. It does some
# processing of the documentation and detrails files.
# You must run this script before runnning "make dist". If its first argument
# is "doc", it stops after preparing the documentation. There are no other
# arguments. The script makes use of the following files:
# 132html A Perl script that converts a .1 or .3 man page into HTML. It
# "knows" the relevant troff constructs that are used in the PCRE2
# man pages.
# CheckMan A Perl script that checks man pages for typos in the mark up.
# CleanTxt A Perl script that cleans up the output of "nroff -man" by
# removing backspaces and other redundant text so as to produce
# a readable .txt file.
# Detrail A Perl script that removes trailing spaces from files.
# doc/index.html.src
# A file that is copied as index.html into the doc/html directory
# when the HTML documentation is built. It works like this so that
# doc/html can be deleted and re-created from scratch.
# README & NON-AUTOTOOLS-BUILD
# These files are copied into the doc/html directory, with .txt
# extensions so that they can by hyperlinked from the HTML
# documentation, because some people just go to the HTML without
# looking for text files.
# Set the LANG to C, because nroff converts ASCII "HYPHEN-MINUS" to Unicode
# "HYPHEN" if the system is using a UTF-8 locale (like "C.UTF-8").
export LANG=C
# Extract the current release version from configure.ac.
CURRENT_RELEASE=`grep -E 'm4_define\(pcre2_(major|minor|prerelease)' configure.ac | \
grep -E -o '\[.*\]' | \
sed -E -e '1s/$/./' | \
tr -d '[]\n'`
# First, sort out the documentation. Remove pcre2demo.3 first because it won't
# pass the markup check (it is created below, using markup that none of the
# other pages use).
cd doc
echo Processing documentation
/bin/rm -f pcre2demo.3
# Check the remaining man pages
perl ../maint/CheckMan *.1 *.3
if [ $? != 0 ] ; then exit 1; fi
# Verify the version number in the man pages
for file in *.1 *.3 ; do
if ! grep -E ".TH.*\"PCRE2 $CURRENT_RELEASE\"" "$file" >/dev/null ; then
echo "Version number in $file does not match current release"
exit 1
fi
done
# Make Text form of the documentation. It needs some mangling to make it
# tidy for online reading. Concatenate all the .3 stuff, but omit the
# individual function pages.
cat <<End >pcre2.txt
-----------------------------------------------------------------------------
This file contains a concatenation of the PCRE2 man pages, converted to plain
text format for ease of searching with a text editor, or for use on systems
that do not have a man page processor. The small individual files that give
synopses of each function in the library have not been included. Neither has
the pcre2demo program. There are separate text files for the pcre2grep and
pcre2test commands.
-----------------------------------------------------------------------------
End
echo "Making pcre2.txt"
for file in pcre2 pcre2api pcre2build pcre2callout pcre2compat pcre2jit \
pcre2limits pcre2matching pcre2partial pcre2pattern pcre2perform \
pcre2posix pcre2sample pcre2serialize pcre2syntax \
pcre2unicode ; do
echo " Processing $file.3"
nroff -c -man $file.3 >$file.rawtxt
perl ../maint/CleanTxt <$file.rawtxt >>pcre2.txt
/bin/rm $file.rawtxt
echo "------------------------------------------------------------------------------" >>pcre2.txt
if [ "$file" != "pcre2sample" ] ; then
echo "" >>pcre2.txt
echo "" >>pcre2.txt
fi
done
# The three commands
for file in pcre2test pcre2grep pcre2-config ; do
echo Making $file.txt
nroff -c -man $file.1 >$file.rawtxt
perl ../maint/CleanTxt <$file.rawtxt >$file.txt
/bin/rm $file.rawtxt
done
# Make pcre2demo.3 from the pcre2demo.c source file
echo "Making pcre2demo.3"
perl <<"END" >pcre2demo.3
use Time::Piece;
open(VH, "<", "../src/config.h.generic") || die "Failed to open src/config.h.generic\n";
open(IN, "<", "../src/pcre2demo.c") || die "Failed to open src/pcre2demo.c\n";
open(OUT, ">", "pcre2demo.3") || die "Failed to open pcre2demo.3\n";
my $version;
while (<VH>)
{
chomp;
if ( /^#define PACKAGE_STRING "([^"]+)"/ ) { $version = $1 ; last }
}
my $t = `git log -n1 --date=format:"%d %B %Y" --format=%cd ../src/pcre2demo.c`;
chomp $t;
print OUT ".TH PCRE2DEMO 3 \"", $t, '" "', $version, "\"\n" .
".\\\"AUTOMATICALLY GENERATED BY UpdateAlways - do not EDIT!\n" .
".SH NAME\n" .
"PCRE2DEMO - A demonstration C program for PCRE2\n" .
".SH \"SOURCE CODE\"\n" .
".rs\n" .
".sp\n" .
".\\\" Start example.\n" .
".de EX\n" .
". do ds mF \\\\n[.fam]\n" .
". nr mE \\\\n(.f\n" .
". nf\n" .
". nh\n" .
". do fam C\n" .
". ft CW\n" .
"..\n" .
".\n" .
".\n" .
".\\\" End example.\n" .
".de EE\n" .
". do fam \\\\*(mF\n" .
". ft \\\\n(mE\n" .
". fi\n" .
". hy \\\\n(HY\n" .
"..\n" .
".\n" .
".RS -7\n" .
".EX\n" ;
while (<IN>)
{
s/\\/\\e/g;
print OUT;
}
print OUT ".EE\n";
close(IN);
close(OUT);
END
if [ $? != 0 ] ; then exit 1; fi
# Verify that `man` can process the pages without warnings.
for file in *.1 *.3 ; do
MAN_OUT=`MANROFFSEQ='' MANWIDTH=80 man --warnings=w,all -E UTF-8 -l -Tutf8 -Z "$file" 2>&1 >/dev/null`
if [ "$MAN_OUT" != "" ]; then
printf "Running man generated warnings:\n%s\n" "$MAN_OUT"
exit 1
fi
done
# Make HTML form of the documentation.
echo "Making HTML documentation"
/bin/rm html/*
cp index.html.src html/index.html
cp ../README html/README.txt
cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt
for file in *.1 ; do
base=`basename $file .1`
echo " Making $base.html"
perl ../maint/132html -toc $base <$file >html/$base.html
if [ $? != 0 ] ; then exit 1; fi
done
# Exclude table of contents for function summaries. It seems that expr
# forces an anchored regex. Also exclude them for small pages that have
# only one section.
for file in *.3 ; do
base=`basename $file .3`
toc=-toc
if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi
if [ "$base" = "pcre2sample" ] || \
[ "$base" = "pcre2compat" ] || \
[ "$base" = "pcre2demo" ] || \
[ "$base" = "pcre2limits" ] || \
[ "$base" = "pcre2unicode" ] ; then
toc=""
fi
echo " Making $base.html"
perl ../maint/132html $toc $base <$file >html/$base.html
if [ $? != 0 ] ; then exit 1; fi
done
# End of documentation processing; stop if only documentation required.
cd ..
echo Documentation done
if [ "$1" = "doc" ] ; then exit; fi
# These files are detrailed; do not detrail the test data because there may be
# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF
# line endings and the detrail script removes all trailing white space. The
# configure files are also omitted from the detrailing.
txt_files=(
AUTHORS.md
BUILD.bazel
CMakeLists.txt
COPYING
ChangeLog
HACKING
INSTALL
LICENCE.md
MODULE.bazel
Makefile.am
NEWS
NON-AUTOTOOLS-BUILD
README
RunGrepTest
RunTest
SECURITY.md
build.zig
configure.ac
libpcre2-8.pc.in
libpcre2-16.pc.in
libpcre2-32.pc.in
libpcre2-posix.pc.in
pcre2-config.in
perltest.sh
cmake/COPYING-CMAKE-SCRIPTS
cmake/{*.cmake,*.cmake.in}
m4/ax_check_vscript.m4
m4/ax_pthread.m4
m4/pcre2_visibility.m4
m4/pcre2_zos.m4
doc/p*
doc/html/*
src/libpcre2-*.sym
)
crlf_files=(
RunGrepTest.bat
RunTest.bat
)
c_files=(
src/config-cmake.h.in
src/pcre2.h.in
src/pcre2_auto_possess.c
src/pcre2_chartables.c.dist
src/pcre2_chartables.c.ebcdic-1047-nl15
src/pcre2_chartables.c.ebcdic-1047-nl25
src/pcre2_chkdint.c
src/pcre2_compile.c
src/pcre2_compile.h
src/pcre2_compile_cgroup.c
src/pcre2_compile_class.c
src/pcre2_config.c
src/pcre2_context.c
src/pcre2_convert.c
src/pcre2_dfa_match.c
src/pcre2_dftables.c
src/pcre2_error.c
src/pcre2_extuni.c
src/pcre2_find_bracket.c
src/pcre2_fuzzsupport.c
src/pcre2_internal.h
src/pcre2_intmodedep.h
src/pcre2_jit_char_inc.h
src/pcre2_jit_compile.c
src/pcre2_jit_match_inc.h
src/pcre2_jit_misc_inc.h
src/pcre2_jit_neon_inc.h
src/pcre2_jit_simd_inc.h
src/pcre2_jit_test.c
src/pcre2_maketables.c
src/pcre2_match.c
src/pcre2_match_data.c
src/pcre2_match_next.c
src/pcre2_newline.c
src/pcre2_ord2utf.c
src/pcre2_pattern_info.c
src/pcre2_printint_inc.h
src/pcre2_script_run.c
src/pcre2_serialize.c
src/pcre2_string_utils.c
src/pcre2_study.c
src/pcre2_substitute.c
src/pcre2_substring.c
src/pcre2_tables.c
src/pcre2_ucd.c
src/pcre2_ucp.h
src/pcre2_ucptables_inc.h
src/pcre2_util.h
src/pcre2_valid_utf.c
src/pcre2_xclass.c
src/pcre2demo.c
src/pcre2grep.c
src/pcre2posix.c
src/pcre2posix.h
src/pcre2posix_test.c
src/pcre2test.c
)
echo Detrailing
perl maint/Detrail "${txt_files[@]}" "${c_files[@]}"
echo Validating all text
perl maint/CheckTxt "${txt_files[@]}"
perl maint/CheckTxt -ascii "${c_files[@]}"
perl maint/CheckTxt -crlf "${crlf_files[@]}"
# Verify the version number in the Bazel file
if ! grep -E "version = \"$CURRENT_RELEASE\"" MODULE.bazel >/dev/null ; then
echo "Version number in MODULE.bazel does not match current release"
exit 1
fi
echo Done
#End