/usr/share/doc/libtext-csv-xs-perl/examples/csv-check is in libtext-csv-xs-perl 1.11-2.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | #!/usr/bin/perl
# csv-check: Check validity of CSV file and report
# (m)'13 [10 Jul 2013] Copyright H.M.Brand 2007-2014
# This code requires the defined-or feature and PerlIO
use strict;
use warnings;
use Data::Peek;
use Encode qw( decode );
our $VERSION = "1.6"; # 2013-07-10
sub usage
{
my $err = shift and select STDERR;
print <<EOU;
usage: csv-check [-s <sep>] [-q <quot>] [-e <esc>] [-u] [--pp] [file.csv]
-s <sep> use <sep> as seperator char. Auto-detect, default = ','
The string "tab" is allowed.
-e <esc> use <sep> as seperator char. Auto-detect, default = ','
The string "undef" is allowed.
-q <quot> use <quot> as quotation char. Default = '"'
The string "undef" will disable quotation.
-u check if all fields are valid unicode
--pp use Text::CSV_PP instead (cross-check)
EOU
exit $err;
} # usage
use Getopt::Long qw(:config bundling nopermute passthrough);
my $sep; # Set after reading first line in a flurry attempt to auto-detect
my $quo = '"';
my $esc = '"';
my $opt_u = 0;
my $opt_p = 0;
GetOptions (
"help|?" => sub { usage (0); },
"c|s=s" => \$sep,
"q=s" => \$quo,
"e=s" => \$esc,
"u" => \$opt_u,
"pp!" => \$opt_p,
) or usage (1);
my $csvmod = "Text::CSV_XS";
if ($opt_p) {
require Text::CSV_PP;
$csvmod = "Text::CSV_PP";
}
else {
require Text::CSV_XS;
}
$csvmod->import ();
my $fn = defined $ARGV[0] ? $ARGV[0] : "-";
my $data = do { local $/; <> } or die "No data to analyze\n";
my ($bin, $rows, $eol, %cols) = (0, 0, undef);
unless ($sep) { # No sep char passed, try to auto-detect;
$sep = $data =~ m/["\d],["\d,]/ ? "," :
$data =~ m/["\d];["\d;]/ ? ";" :
$data =~ m/["\d]\t["\d]/ ? "\t" :
# If neither, then for unquoted strings
$data =~ m/\w,[\w,]/ ? "," :
$data =~ m/\w;[\w;]/ ? ";" :
$data =~ m/\w\t[\w]/ ? "\t" : ",";
$data =~ m/([\r\n]+)\Z/ and $eol = DDisplay "$1";
}
my $csv = $csvmod->new ({
sep_char => $sep eq "tab" ? "\t" : $sep,
quote_char => $quo eq "undef" ? undef : $quo,
escape_char => $esc eq "undef" ? undef : $esc,
binary => 1,
keep_meta_info => 1,
auto_diag => 1,
});
sub done
{
(my $file = defined $ARGV ? $ARGV : "") =~ s{(\S)$}{$1 };
(my $prog = $0) =~ s{.*/}{};
print "Checked $file with $prog $VERSION using $csvmod @{[$csvmod->VERSION]}\n";
my @diag = $csv->error_diag;
if ($diag[0] == 2012 && $csv->eof) {
my @coll = sort { $a <=> $b } keys %cols;
local $" = ", ";
my $cols = @coll == 1 ? $coll[0] : "(@coll)";
defined $eol or $eol = $csv->eol || "--unknown--";
print "OK: rows: $rows, columns: $cols\n";
print " sep = <$sep>, quo = <$quo>, bin = <$bin>, eol = <$eol>\n";
if (@coll > 1) {
print "WARN: multiple column lengths:\n";
printf " %6d line%s with %4d field%s\n",
$cols{$_}, $cols{$_} == 1 ? " " : "s",
$_, $_ == 1 ? "" : "s"
for @coll;
}
exit 0;
}
if ($diag[2]) {
print "$ARGV record $diag[3] at line $./$diag[2] - $diag[0] - $diag[1]\n";
my $ep = $diag[2] - 1; # diag[2] is 1-based
my $err = $csv->error_input . " ";
substr $err, $ep + 1, 0, "*";
substr $err, $ep, 0, "*";
($err = substr $err, $ep - 5, 12) =~ s/ +$//;
print " |$err|\n";
}
else {
print "$ARGV line $. - $diag[1]\n";
}
exit $diag[0];
} # done
sub stats
{
my $r = shift;
$cols{scalar @$r}++;
grep { $_ & 0x0002 } $csv->meta_info and $bin = 1;
if ($opt_u) {
my @r = @$r;
foreach my $x (0 .. $#r) {
utf8::is_utf8 ($r[$x]) and next;
local $SIG{__WARN__} = sub {
(my $msg = shift) =~ s{ at /\S+Encode.pm.*}{};
printf STDERR "Field %3d:%3d - '%s'\t- %s",
$rows, $x, DPeek ($r[$x]), $msg;
};
my $oct = decode ("utf-8", $r[$x], Encode::FB_WARN);
}
}
} # stats
open my $fh, "<", \$data or die "$fn: $!\n";
while (my $row = $csv->getline ($fh)) {
$rows++;
stats $row;
}
done;
|