#!/usr/bin/perl -w ######################################################################### # # # Extended Generation and Checking of MD5 Checksums # # Copyright (C) 2004, John Zaitseff # # # ######################################################################### # Author: John Zaitseff # Date: 10th December, 2004 # Version: 1.0 # This script generates or checks 128-bit MD5 checksums for a series of # files or directories. It is essentially an extended version of the # standard Unix md5sum(1) program: if a directory name is passed, all # files in that directory are processed; if "--recursive" is given, # directories below the specified directory are examined as well. # # Syntax: # xmd5sum [OPTIONS] [FILE ...] # xmd5sum [OPTIONS] --check [SUM] # # where OPTIONS may include one or more of: # # --generate - Generate MD5 sums (default). # -c, --check [SUM] - Check MD5 sums against the list in SUM. # -h, --help - Show a command-line summary. # -V, --version - Show program version information. # # The following four options are only useful when generating the sums: # -b, --binary - Read files in binary mode (default on DOS). # -t, --text - Read files in text mode (default). # -r, -R, --recursive - Process directories recursively. # -o, --output FILE - Send the output to FILE after sums are generated. # # The following four options are only useful when checking the sums: # --status - Don't print anything; status code shows success. # -q, --quiet - Don't print files that checked OK (default). # -v, --verbose - Print all files, whether checked OK or FAILED. # -w, --warn - Warn if checksum file is improperly formatted. # # If no filenames are specified, standard input is used instead. # # WARNING: If "--output" is specified, the output file FILE is removed at # the start of the program and only created after ALL checksums have been # generated. # This program, including associated files, is free software. You may # distribute it and/or modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either Version 2 # of the license, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ######################################################################### # Configuration parameters and default values use strict; # Enforce better programming habits use File::Find qw(find); # Directory tree traversal use Digest::MD5 (); # MD5 checksum algorithms (our $O = $0) =~ s,^.*/,,; # Script name (without path) our $version = "1.0"; # Script version # Important constants our $stdin = "-"; # Standard input filename # Default values our $def_check_mode = 0; # Are we checking sums? our $def_use_binmode = # Use binary mode? (($^O =~ /MSWin|DOS/i) ? 1 : 0); # Yes under DOS/Windows our $def_recurse = 0; # Recurse into subdirectories? our $def_use_outfile = 0; # Put output into a file? our $def_verbose = 0; # Be verbose? (0 = quiet) our $def_status_only = 0; # Generate status only? our $def_warn = 0; # Warn about improper formatting? # Function prototypes sub showusage(); sub showversion(); sub showcmdlerr(@); sub chkparam($); sub do_check_mode(); sub do_generate_mode(); sub generate($); sub sortdir(@); sub print_md5line(*$$); ######################################################################### # Initialise global variables our $check_mode = $def_check_mode; # Are we checking sums? our $use_binmode = $def_use_binmode; # Use binary mode? our $recurse = $def_recurse; # Recurse into subdirectories? our $use_outfile = $def_use_outfile; # Put output into a file? our $verbose = $def_verbose; # Be verbose? our $status_only = $def_status_only; # Generate status only? our $warn = $def_warn; # Warn about improper formatting? our $out_filename; # Output filename, if $use_outfile our $check_filename; # Checksum filename, if $check_mode our $check_filename_given = 0; # Was filename given as --output=FILE our $gen_warnings = 0; # Number of warnings during generate our @g_filename = (); # List of filenames, if $use_outfile our @g_md5sum = (); # List of corresponding MD5 sums ######################################################################### # Process command-line arguments while ($_ = $ARGV[0]) { last if (! /^-/); shift @ARGV; last if ($_ eq '--'); # Split combined short-form options into single arguments if (/^-(\w{2,})/) { my @args = split //, $1; foreach my $arg (@args) { $arg = "-$arg"; } unshift @ARGV, @args; next; } # Process command-line options if (($_ eq "--binary") || ($_ eq "-b")) { $use_binmode = 1; } elsif (($_ eq "--text") || ($_ eq "-t")) { $use_binmode = 0; } elsif (($_ eq "--recursive") || ($_ eq "-r") || ($_ eq "-R")) { $recurse = 1; } elsif ($_ eq "--no-recursive") { $recurse = 0; } elsif (($_ eq "--output") || ($_ eq "-o")) { chkparam($_); $out_filename = shift @ARGV; $use_outfile = 1; } elsif ($_ =~ /^--output=(.*)$/) { $out_filename = $1; $use_outfile = 1; } elsif ($_ eq "--no-output") { $use_outfile = 0; } elsif (($_ eq "--check") || ($_ eq "-c")) { $check_mode = 1; } elsif ($_ =~ /^--check=(.*)$/) { $check_filename = $1; $check_filename_given = 1; $check_mode = 1; } elsif ($_ eq "--generate") { $check_mode = 0; } elsif ($_ eq "--status") { $status_only = 1; $verbose = 0; } elsif (($_ eq "--quiet") || ($_ eq "-q")) { $status_only = 0; $verbose = 0; } elsif (($_ eq "--verbose") || ($_ eq "-v")) { $status_only = 0; $verbose = 1; } elsif (($_ eq "--warn") || ($_ eq "-w")) { $warn = 1; } elsif ($_ eq "--no-warn") { $warn = 0; } elsif (($_ eq "--help") || ($_ eq "-h") || ($_ eq "-?")) { showusage(); } elsif (($_ eq "--version") || ($_ eq "-V")) { showversion(); } elsif ($_ eq $stdin) { # Allow "-" for stdin unshift @ARGV, $_; last; } else { showcmdlerr("Unrecognised option: $_"); } } # Further command line processing if ($check_mode) { # --check mode if (! $check_filename_given) { $check_filename = ($#ARGV < 0) ? $stdin : shift @ARGV; } showcmdlerr("Cannot use --output with --check") if $use_outfile; showcmdlerr("Too many parameters") if $#ARGV >= 0; } else { # --generate mode if ($#ARGV < 0) { unshift @ARGV, $stdin; } } ######################################################################### # Main program if ($check_mode) { do_check_mode; } else { do_generate_mode; } exit(0); ######################################################################### # Compare checksums in a checksum file against the real files sub do_check_mode() { my $file_count = 0; # How many files in the checksum file? my $checked_files = 0; # How many files were checked for MD5? my $failed_files = 0; # How many failed the MD5 test? my $unread_files = 0; # How many could not be read at all? if (($check_filename ne $stdin) && (-d $check_filename)) { die "$O: $check_filename: Is a directory\n"; } open(CHECK, $check_filename) or die "$O: $check_filename: $!\n"; # Process the checksum file line by line while () { chomp; if (/^([0-9A-Fa-f]{32})\s+(\*?)(.*)$/) { # Process the valid checksum line my $csum_read = $1; # MD5 checksum read from checksum file my $use_binmode_read = # Use binmode for this file? ($2 eq "*"); # Only if "*" was present my $filename = $3; # Filename from the checksum file $file_count++; if (! -e $filename) { # File does not exist warn "$O: $filename: $!\n"; print "$filename: FAILED open or read\n" if $verbose; $unread_files++; } elsif (-d _) { # Filename is a directory warn "$O: $filename: Is a directory\n"; print "$filename: FAILED open or read\n" if $verbose; $unread_files++; } else { if (open(FILE, "<", $filename)) { # Read the file and calculate the checksum binmode(FILE) if $use_binmode_read; my $csum_cur = Digest::MD5->new->addfile(*FILE)->hexdigest; close(FILE); $checked_files++; # Compare the checksums if (uc($csum_read) eq uc($csum_cur)) { print "$filename: OK\n" if $verbose; } else { warn "$O: $filename: MD5 check failed\n" if ! $status_only && ! $verbose; print "$filename: FAILED\n" if $verbose; $failed_files++; } } else { # File could not be opened warn "$O: $filename: $!\n"; print "$filename: FAILED open or read\n" if $verbose; $unread_files++; } } } elsif ($warn) { # Improperly formatted checksum line warn "$O: " . (($check_filename eq $stdin) ? "(stdin)" : $check_filename) . ":$.: Improperly formatted MD5 checksum line\n"; } } close(CHECK); # Print warnings at the very end if ($verbose) { warn "$O: WARNING: $unread_files of $file_count files " . "could not be read\n" if $unread_files > 0; warn "$O: WARNING: $failed_files of $checked_files " . "computed MD5 checksums did NOT match\n" if $failed_files > 0; } exit(($failed_files != 0) || ($unread_files != 0)); } ######################################################################### # Generate a checksum file sub do_generate_mode() { # Check that any supplied filenames actually exist foreach $_ (@ARGV) { s,/$,,g; # Trim trailing slashes if (($_ ne $stdin) && (! -e $_)) { die "$O: $_: $!\n"; } } if ($use_outfile && -f $out_filename) { if (! unlink($out_filename)) { warn "$O: WARNING: Cannot remove $out_filename: $!\n"; $gen_warnings++; } } # Process each command-line argument foreach my $param (@ARGV) { if ($param eq $stdin) { # Use standard input # Read stdin until EOF binmode(STDIN) if $use_binmode; my $csum = Digest::MD5->new->addfile(*STDIN)->hexdigest; # Save or print the calculated checksum if ($use_outfile) { push @g_filename, $stdin; push @g_md5sum, $csum; } else { print_md5line(\*STDOUT, $stdin, $csum); } } else { # Normal filename find({wanted => \&generate, preprocess => \&sortdir, dangling_symlinks => 0, no_chdir => 1}, $param); } } # Generate the output file if ($use_outfile) { open(OUTFILE, ">", $out_filename) or die "$O: Cannot create $out_filename: $!\n"; for (my $i = 0; $i <= $#g_filename; $i++) { print_md5line(\*OUTFILE, $g_filename[$i], $g_md5sum[$i]); } close(OUTFILE); } exit($gen_warnings != 0); } # Sort the list of filenames in the currently-processed directory sub sortdir(@) { @_ = sort @_; } # Generate the MD5 checksum for a file sub generate($) { if (! -d $_) { (my $fn = $_) =~ s,^\./,,; # Trim leading "./" if (! -e _) { # File does not exist warn "$O: $fn: $!\n"; $gen_warnings++; } else { if (open(FILE, "<", $fn)) { # Read the file and calculate the checksum binmode(FILE) if $use_binmode; my $csum = Digest::MD5->new->addfile(*FILE)->hexdigest; close(FILE); # Save or print the calculated checksum if ($use_outfile) { push @g_filename, $fn; push @g_md5sum, $csum; } else { print_md5line(\*STDOUT, $fn, $csum); } } else { # File could not be opened warn "$O: $fn: $!\n"; $gen_warnings++; } } } else { # $_ is a directory # Don't recurse into subdirectories unless asked to by "--recurse" if ((! $recurse) && ($_ ne $File::Find::topdir)) { $File::Find::prune = 1; } } } ######################################################################### # Print an MD5 line to a file sub print_md5line(*$$) { my $OUTFILE = shift; my $filename = shift; my $sum = shift; print $OUTFILE $sum, " ", ($use_binmode ? "*" : " "), $filename, "\n"; } ######################################################################### # Check that the next command-line argument is a valid parameter sub chkparam ($) { my $arg = $_[0]; if (! $ARGV[0] || ($ARGV[0] =~ /^-/)) { showcmdlerr("Missing argument for $arg"); } } ######################################################################### # Show an error message relating to the command-line and terminate sub showcmdlerr(@) { map { warn "$O: $_\n" } @_; die "\nUsage:\n" . " $O [--generate] [--binary | --text] [--recursive]" . "[--output FILE]\n" . " [FILE ...]\n" . " $O [--status | --quiet | --verbose] [--warn] --check [SUM]\n" . " $O [--help] [--version]\n"; } ######################################################################### # Display usage information sub showusage() { print <<"DATAEND" $O v$version: Extended Generation and Checking of MD5 Checksums. Copyright (C) 2004, John Zaitseff. The $O program generates or checks MD5 (128-bit) checksums for a series of files or directories. It is essentially an extended version of the standard Unix md5sum(1) program: if a directory name is passed, all files in that directory are processed; if \"--recursive\" is specified, directories below the specified directory are examined as well. Syntax: $O [OPTIONS] [FILE ...] $O [OPTIONS] --check [SUM] where OPTIONS may include one or more of: --generate - Generate MD5 sums (default). -c, --check [SUM] - Check MD5 sums against the list in SUM. -h, --help - Show a command-line summary. -V, --version - Show program version information. The following four options are only useful when generating the sums: -b, --binary - Read files in binary mode (default on DOS). -t, --text - Read files in text mode (default). -r, -R, --recursive - Process directories recursively. -o, --output FILE - Send the output to FILE after sums are generated. The following four options are only useful when checking the sums: --status - Don\'t print anything; status code shows success. -q, --quiet - Don\'t print files that checked OK (default). -v, --verbose - Print all files, whether checked OK or FAILED. -w, --warn - Warn if checksum file is improperly formatted. If no filenames are specified, standard input is used instead. WARNING: If \"--output\" is specified, the output file FILE is removed at the start of the program and only created after ALL checksums have been generated. DATAEND ; exit(0); } ######################################################################### # Display program version information sub showversion() { print <<"DATAEND" $O v$version: Extended Generation and Checking of MD5 Checksums. Copyright (C) 2004, John Zaitseff. This program, including associated files, is distributed under the GNU General Public License. See the file COPYING for more information. DATAEND ; exit(0); } __END__ ######################################################################### # Program documentation in POD format =head1 NAME xmd5sum - Extended generation and checking of MD5 checksums =head1 SYNOPSIS xmd5sum [B<--generate>] [B<--binary> | B<--text>] [B<--recursive>] [B<--output> I] [I ...] xmd5sum [B<--status> | B<--quiet> | B<--verbose>] [B<--warn>] B<--check> [I] xmd5sum [B<--help>] [B<--version>] =head1 DESCRIPTION B generates or checks 128-bit MD5 checksums for a series of files or directories. It is essentially an extended version of the standard md5sum(1) program: if a directory name is passed, all files in that directory are processed; if B<--recursive> is given, directories below the specified directory are examined as well. =head1 OPTIONS The following options are mutually exclusive; only one of these options should be specified for a given B invocation: =over 4 =item B<--generate> Generate MD5 checksums for each file I listed following the command-line options. If I is a directory, B generates MD5 checksums for each file in that directory. If B<--recursive> is specified, subdirectories found in I will also be processed in a recursive fashion. B<--generate> is the default mode and does not need to be specified explicitly. =item B<-c>, B<--check> Read a list of filenames and previously-generated MD5 checksums from the file I (standard input if I is B<-> or is not specified) and check whether the actual files so named match the MD5 checksum data. The input file I is usually generated by a previous run of B; each line in this file has one of the following two formats: checksum filename checksum *filename The first format indicates a text file; the second (with a literal C<*> character) a binary file (see B<--binary> and B<--text> for more information on the difference between the two). The checksum is a 32-character hexadecimal string. The output from running B with B<--check> depends on the B<--status>, B<--quiet>, B<--verbose> and B<--warn> options. The program exits with non-zero status if any file does not match the corresponding MD5 checksum data (or if there were any problems accessing the file); otherwise, it exits successfully. =item B<-h>, B<--help> Print a usage message listing all of the available options, then exit successfully. =item B<-V>, B<--version> Print the program's version number and copyright information, then exit successfully. =back The following four options are only useful when generating checksums: =over 4 =item B<-b>, B<--binary> Treat all files listed on the command line as binary, for systems which distinguish between binary and text files (that is, for systems that have different internal and external representations of certain text characters, such as for the end-of-line marker). On systems where there is no difference between binary and text mode, the only practical effect of this option is to prepend a C<*> character to the filenames listed in the output. This option is the default on Microsoft Windows and MS-DOS. =item B<-t>, B<--text> Treat all files listed on the command line as text. This option is the opposite of B<--binary> and is the default on most systems. =item B<-r>, B<-R>, B<--recursive> Process any directories listed on the command line recursively. In other words, if a listed directory has subdirectories of its own, those subdirectories (and I subdirectories, and so on) will also be processed. =item B<-o> I, B<--output> I Send the output of B to the file I after I of the MD5 checksums have been generated. B If I exists when B is run (and is an ordinary file), I is removed I any checksums are generated. This prevents the file I from being treated as one of the files to be checksummed. However, it also means that if B is interrupted, no output file will be generated at all, and the original file will have been deleted. =back The following four options are only useful when validating (checking) previously-generated MD5 checksums; the first three are mutually exclusive: =over 4 =item B<--status> Do not print anything to standard output or standard error whether a file matches its previously-generated MD5 checksum data or not. Instead, simply return a successful (zero) exit status if all files correctly validated, or a non-zero exit code if at least one file failed its check. Please note that if a file does not exist or cannot be opened for reading, a warning is still printed to standard error. =item B<-q>, B<--quiet> Do not print anything to standard output for files that correctly validated. Files that fail the check are listed on standard error with the message C. =item B<-v>, B<--verbose> Print each filename checked, along with its checksum status, to standard output. If a file correctly validated against its previously-generated MD5 checksum, C is printed; otherwise, C is printed. If a file fails the MD5 check, a warning is also sent to standard error with the message C. =item B<-w>, B<--warn> Print a warning to standard output for any line in the checksum file I that is improperly formatted (does not match the format listed under the B<--check> option). =back If no filenames are specified on the command line for B<--generate>, standard input is used instead. The filename F<-> also represents standard input. If conflicting options are specified on the command line, the last option seen takes precedence. =head1 RETURN VALUE B returns a successful (zero) exit status if all files could be read and/or validated. If a file or directory could not be read, or if a file did not match its previously-generated MD5 checksum, a non-zero exit code is returned. =head1 EXAMPLES To generate the MD5 checksum for any empty file F: cp /dev/null empty xmd5sum empty The output will be: d41d8cd98f00b204e9800998ecf8427e empty To generate MD5 checksums for all files in the current directory, with output being put in a file F in the same directory: xmd5sum -o md5sum . (This example is useful for generating MD5 checksum files for CD-ROMs). To check a previously-generated F file against the current versions of the files: xmd5sum -c md5sum =head1 FEEDBACK Your comments, suggestions, corrections and enhancements are always warmly welcomed! Please send these to: Postal: John Zaitseff, The ZAP Group, Unit 6, 116 Woodburn Road, Berala, NSW, 2141, Australia E-mail: J.Zaitseff@zap.org.au Web: http://www.zap.org.au/software/utils/scripts/ =head1 COPYRIGHT Copyright (C) 2004, John Zaitseff. This program, including associated files, is free software. You may distribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either Version 2 of the license, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA =head1 SEE ALSO md5sum(1) Home page for various scripts on the ZAP Group server: http://www.zap.org.au/software/utils/scripts/ =cut