#! /usr/bin/perl # This is a script for checking whether a file contains any carriage return # characters, and whether it is valid UTF-8. use Encode; # This subroutine does the work for one file. $yield = 0; $ascii = 0; # bool $crlf = 0; # bool sub checktxt { my($file) = $_[0]; open(IN, "<:raw", "$file") || die "Can't open $file for input"; $bin = do { local $/ = undef; }; close(IN); my $data; eval { $data = Encode::decode("UTF-8", $bin, Encode::FB_CROAK); 1; # return true } or do { printf "Bad UTF-8 in $file\n"; $yield = 1; return; }; if (!$crlf && index($data, "\r") != -1) { printf "CR in $file\n"; $yield = 1; } if ($ascii && $data =~ /[^\x01-\x7e]/) { printf "Non-ASCII in $file\n"; $yield = 1; } } # This is the main program $, = ""; # Output field separator for ($i = 0; $i < @ARGV; $i++) { if ($ARGV[$i] eq "-ascii") { $ascii = 1; } elsif ($ARGV[$i] eq "-crlf") { $crlf = 1; } else { checktxt($ARGV[$i]); } } exit $yield; # End