#!/usr/bin/perl -w # (C) 2003-2007 Willem Jan Hengeveld # Web: http://www.xs4all.nl/~itsme/ # http://wiki.xda-developers.com/ # # $Id: dirsummary 1502 2007-04-15 07:54:20Z itsme $ # use strict; # this script generates a xml summary file with md5sums of all files # in a directory tree. use Digest::MD5; use Getopt::Long; use Time::Local; use POSIX; use IO::File; use Dumpvalue; $|=1; my $d= new Dumpvalue; my $verbose=0; my @excludefiles; my $outputfile = "summary.xml"; GetOptions("verbose|v" => \$verbose, "exclude|X=s"=>\@excludefiles, "output|o=s" =>\$outputfile, ) or die usage(); if (!@ARGV) { die "need to specify directory as argument"; } my %md5; for my $path (@ARGV) { $md5{$path}{contents}= processDirectory($path); } my $md5summary= summarize(\%md5); my $allmd5= {root=>{ contents=>\%md5, info => $md5summary} }; writeXmlFile($allmd5, addTimeStampToFilename($outputfile)); #print "------------------------------------\n"; #$d->dumpValue($md5summary); #print "------------------------------------\n"; #$d->dumpValue($md5); #print "------------------------------------\n"; if ($verbose) { writeSummary(\%md5); } exit(0); sub usage { <<__EOF__; Usage: $0 [-X excludefiles] [-o outputfile] __EOF__ } ################################ sub addTimeStampToFilename { my $filename= shift; my ($base, $ext)= $filename =~ /^(.+?)(?:\.([^.]+))?$/; $ext ||= "xml"; my $stamp= POSIX::strftime("%Y%m%d%H%M%S", localtime time); return "$base-$stamp.$ext"; } sub processDirectory { my ($root, $path)= @_; my %md5; #print "processing $root $path\n"; #print "ignoring files (", join(",", @$ignoreVolumeFilePatterns), ")\n"; #print "ignoring paths (", join(",", @$ignoreVolumePathPatterns), ")\n"; my $files= getFilelist(makeFullPath($root, $path)); for my $file (@$files) { next if ($file eq "." || $file eq ".."); my $fullpath= makeFullPath($root, $path, $file); my $relative= makeFullPath($path, $file); if (!isInIgnoreList($relative)) { if (-f $fullpath) { $md5{$file}{info}= processFile($fullpath); } elsif (-d $fullpath) { $md5{$file}{contents}= processDirectory($root, $relative); } } } return \%md5; } sub makeFullPath { my ($full, @parts)= @_; $full ||=""; for my $path (@parts) { next if (!defined $path); $path =~ s{^/}{}; # remove leading slash $full =~ s{/?$}{/$path}; # remove trailing slash, append path } return $full; } sub getFilelist { my ($path)= @_; opendir(DIR, $path) or warn "$!: reading $path\n"; my @files= readdir DIR; closedir DIR; return \@files; } ################################# sub processFile { my $filename= shift; my $md5= Digest::MD5->new(); if (open(FILE, "<", $filename)) { binmode(FILE); $md5->addfile(*FILE); close(FILE); } else { warn "$!: opening $filename\n"; return {md5=>"unknown", size=>-s $filename}; } return {md5=>$md5->hexdigest, size=>-s $filename}; } sub isInIgnoreList { my $fullfilename= shift; # my ($pathname, $filename)= ($fullfilename =~ m{(.*/)?([^/]*)$}); for my $pattern (@excludefiles) { if (matches($fullfilename, glob2pat("/$pattern")) ) { return 1; } } return 0; } sub matches { my ($filename, $pattern)= @_; return ($filename =~ m{$pattern}i); } sub glob2pat { my $globstr = shift; my %patmap = ( '.' => '\.', '*' => '.*', '?' => '.', '[' => '[', ']' => ']', ); $globstr =~ s{(.)} { $patmap{$1} || "\Q$1" }ge; return $globstr; } # translate a dict of # { svr=> # { comp1=> # { section1=> # { file1=>MD5, file2=>MD5}, # section2=> # { file1=>MD5, file2=>MD5} # }, # comp2=> # { section1=> # { file1=>MD5, file2=>MD5}, # section2=> # { file1=>MD5, file2=>MD5} # } # } # } # into # { svr=>md5(svr.comp1, svr.comp2), # svr.comp1=> md5(svr.comp1.section1,svr.comp1.section2), # svr.comp1.section1=>md5(svr.comp1.section1.file1, svr.comp1.section1.file2), # ...} # # or basically: # { key => dict, key2 => value } # into # { key => summarize(dict), key2 => value } # # # TODO: the suffix '.summary' is sort of in-band, directories with this name # cause processing to fail. fix this. sub summarize { my $hash= shift; for my $key (keys %{$hash}) { my $item= $hash->{$key}; if (!exists $item->{info} && exists $item->{contents}) { $item->{info}= summarize($item->{contents}); } } my $md5= Digest::MD5->new(); my $sizetotal= 0; for my $key (keys %{$hash}) { my $item= $hash->{$key}; $md5->add($item->{info}{md5}); $sizetotal+=$item->{info}{size}; } return {md5=>$md5->hexdigest, size=>$sizetotal}; } sub writeSummary { my $hash= shift; my $path = shift || ""; # first write top level directories for my $key (sort {lc($a) cmp lc($b)} keys %$hash) { my $item= $hash->{$key}; if (exists $item->{contents}) { printf("%12.0f %s %s/%s/\n", $item->{info}{size}, $item->{info}{md5}, $path, $key); } } # then write lower levels. for my $key (sort {lc($a) cmp lc($b)} keys %$hash) { my $item= $hash->{$key}; if (exists $item->{contents}) { writeSummary($item->{contents}, "$path/$key"); } } # write top level files for my $key (sort {lc($a) cmp lc($b)} keys %$hash) { my $item= $hash->{$key}; if (!exists $item->{contents}) { printf("%12.0f %s %s/%s\n", $item->{info}{size}, $item->{info}{md5}, $path, $key); } } } # write recursive structure to filehandle sub writeAsXml { my ($fh, $md5, $name, $level)= @_; $level ||= 0; my $encodedname= $name; $encodedname =~ s/&/&/g; $encodedname =~ s//>/g; $encodedname =~ s/([\x00-\x1f\x22\x7f-\xff])/sprintf("&#%d;", ord($1))/ge; if (keys %{$md5->{contents}}) { $fh->print(indent($level), qq(\n)); for my $filename (sort {lc($a) cmp lc($b)} keys %{$md5->{contents}}) { writeAsXml($fh, $md5->{contents}{$filename}, $filename, $level+1); } $fh->print(indent($level), qq(\n)); } else { $fh->print(indent($level), qq(\n)); } } sub indent { my $indent= shift; return " " x $indent if ($indent>0); return ""; } # create complete xml file. sub writeXmlFile { my ($allmd5, $filename)= @_; my $fh= IO::File->new(); $fh->open($filename, "w"); $fh->print(qq(\n)); $fh->print(qq(\n)); writeAsXml($fh, $allmd5->{root}, "root", 1); $fh->print(qq(\n)); $fh->close(); }