#!/usr/bin/perl -w
# (C) 2003-2007 Willem Jan Hengeveld <itsme@xs4all.nl>
# Web: http://www.xs4all.nl/~itsme/
#      http://wiki.xda-developers.com/
#
# $Id$
#

use strict;

# this script generates a xml summary file with md5sums of all files
# in a directory tree.

use Digest::MD5;
use Getopt::Long;
use Time::Local;
use POSIX;
use IO::File;

use Dumpvalue;
$|=1;

my $d= new Dumpvalue;

my $verbose=0;
my @excludefiles;
my $outputfile = "summary.xml";

GetOptions("verbose|v" => \$verbose,
    "exclude|X=s"=>\@excludefiles,
    "output|o=s" =>\$outputfile,
) or die usage();

if (!@ARGV) { die "need to specify directory as argument"; }

my %md5;
for my $path (@ARGV) {
    $md5{$path}{contents}= processDirectory($path);
}

my $md5summary= summarize(\%md5);

my $allmd5= {root=>{ contents=>\%md5, info => $md5summary} };

writeXmlFile($allmd5, addTimeStampToFilename($outputfile));
#print "------------------------------------\n";
#$d->dumpValue($md5summary);
#print "------------------------------------\n";
#$d->dumpValue($md5);
#print "------------------------------------\n";

if ($verbose) {
    writeSummary(\%md5);
}

exit(0);

sub usage {
<<__EOF__;
Usage: $0 <path> [-X excludefiles] [-o outputfile]
__EOF__
}
################################
sub addTimeStampToFilename {
    my $filename= shift;
    my ($base, $ext)= $filename =~ /^(.+?)(?:\.([^.]+))?$/;

    $ext ||= "xml";

    my $stamp= POSIX::strftime("%Y%m%d%H%M%S", localtime time);
    return "$base-$stamp.$ext";
}



sub processDirectory {
    my ($root, $path)= @_;
    my %md5;

    #print "processing $root $path\n";
    #print "ignoring files (", join(",", @$ignoreVolumeFilePatterns), ")\n";
    #print "ignoring paths (", join(",", @$ignoreVolumePathPatterns), ")\n";

    my $files= getFilelist(makeFullPath($root, $path));

    for my $file (@$files) {
        next if ($file eq "." || $file eq "..");
        my $fullpath= makeFullPath($root, $path, $file);
        my $relative= makeFullPath($path, $file);

        if (!isInIgnoreList($relative)) {
            if (-f $fullpath) {
                $md5{$file}{info}= processFile($fullpath);
            }
            elsif (-d $fullpath) {
                $md5{$file}{contents}= processDirectory($root, $relative);
            }
        }
    }

    return \%md5;
}

sub makeFullPath {
    my ($full, @parts)= @_;

    $full ||="";
    for my $path (@parts) {
        next if (!defined $path);
        $path =~ s{^/}{};               # remove leading slash
        $full =~ s{/?$}{/$path};  # remove trailing slash, append path
    }

    return $full;
}

sub getFilelist {
    my ($path)= @_;

    opendir(DIR, $path) or warn "$!: reading $path\n";

    my @files= readdir DIR;
    closedir DIR;

    return \@files;
}
#################################

sub processFile {
    my $filename= shift;
    my $md5= Digest::MD5->new();

    if (open(FILE, "<", $filename)) {
        binmode(FILE);
        $md5->addfile(*FILE);
        close(FILE);
    }
    else {
        warn "$!: opening $filename\n";
        return {md5=>"unknown", size=>-s $filename};
    }

    return {md5=>$md5->hexdigest, size=>-s $filename};
}

sub isInIgnoreList {
    my $fullfilename= shift;

#    my ($pathname, $filename)= ($fullfilename =~ m{(.*/)?([^/]*)$});

    for my $pattern (@excludefiles) {
        if (matches($fullfilename, glob2pat("/$pattern")) ) {
            return 1;
        }
    }

    return 0;
}

sub matches {
    my ($filename, $pattern)= @_;

    return ($filename =~ m{$pattern}i);
}

sub glob2pat {
    my $globstr = shift;
    my %patmap = (
        '.' => '\.',
        '*' => '.*',
        '?' => '.',
        '[' => '[',
        ']' => ']',
    );
    $globstr =~ s{(.)} { $patmap{$1} || "\Q$1" }ge;
    return $globstr;
}

# translate a dict of 
#  { svr=>
#    { comp1=>
#      { section1=>
#        { file1=>MD5, file2=>MD5},
#        section2=>
#        { file1=>MD5, file2=>MD5}
#      },
#      comp2=>
#      { section1=>
#        { file1=>MD5, file2=>MD5},
#        section2=>
#        { file1=>MD5, file2=>MD5}
#      }
#    }
#  }
# into
#    { svr=>md5(svr.comp1, svr.comp2),
#      svr.comp1=> md5(svr.comp1.section1,svr.comp1.section2),
#      svr.comp1.section1=>md5(svr.comp1.section1.file1, svr.comp1.section1.file2),
#    ...}
#
#  or basically:
#  { key => dict, key2 => value } 
#    into
#  { key => summarize(dict), key2 => value }
#
#
#  TODO: the suffix '.summary' is sort of in-band, directories with this name
#     cause processing to fail. fix this.
sub summarize {
    my $hash= shift;

    for my $key (keys %{$hash}) {
        my $item= $hash->{$key};
        if (!exists $item->{info} && exists $item->{contents}) {
            $item->{info}= summarize($item->{contents});
        }
    }

    my $md5= Digest::MD5->new();
    my $sizetotal= 0;
    for my $key (keys %{$hash}) {
        my $item= $hash->{$key};
        $md5->add($item->{info}{md5});
        $sizetotal+=$item->{info}{size};
    }
    return {md5=>$md5->hexdigest, size=>$sizetotal};
}

sub writeSummary {
    my $hash= shift;
    my $path = shift || "";


    # first write top level directories
    for my $key (sort {lc($a) cmp lc($b)} keys %$hash) {
        my $item= $hash->{$key};
        if (exists $item->{contents}) {
            printf("%12.0f %s %s/%s/\n", $item->{info}{size}, $item->{info}{md5}, $path, $key);
        }
    }

    # then write lower levels.
    for my $key (sort {lc($a) cmp lc($b)} keys %$hash) {
        my $item= $hash->{$key};
        if (exists $item->{contents}) {
            writeSummary($item->{contents}, "$path/$key");
        }
    }
    # write top level files
    for my $key (sort {lc($a) cmp lc($b)} keys %$hash) {
        my $item= $hash->{$key};
        if (!exists $item->{contents}) {
            printf("%12.0f %s %s/%s\n", $item->{info}{size}, $item->{info}{md5}, $path, $key);
        }
    }
}

# write recursive structure to filehandle
sub writeAsXml {
    my ($fh, $md5, $name, $level)= @_;
    $level ||= 0;

    my $encodedname= $name;
    $encodedname =~ s/&/&amp;/g;
    $encodedname =~ s/</&lt;/g;
    $encodedname =~ s/>/&gt;/g;
    $encodedname =~ s/([\x00-\x1f\x22\x7f-\xff])/sprintf("&#%d;", ord($1))/ge;

    if (keys %{$md5->{contents}}) {
        $fh->print(indent($level), qq(<item name="$encodedname" md5="$md5->{info}{md5}" size="$md5->{info}{size}">\n));
        for my $filename (sort {lc($a) cmp lc($b)} keys %{$md5->{contents}}) {
            writeAsXml($fh, $md5->{contents}{$filename}, $filename, $level+1);
        }
        $fh->print(indent($level), qq(</item>\n));
    }
    else {
        $fh->print(indent($level), qq(<item name="$encodedname" md5="$md5->{info}{md5}" size="$md5->{info}{size}"/>\n));
    }
}
sub indent {
    my $indent= shift;
    return "  " x $indent if ($indent>0);

    return "";
}

# create complete xml file.
sub writeXmlFile {
    my ($allmd5, $filename)= @_;
    my $fh= IO::File->new();
    $fh->open($filename, "w");
    $fh->print(qq(<?xml version="1.0"?>\n));
    $fh->print(qq(<dirsummary>\n));
    writeAsXml($fh, $allmd5->{root}, "root", 1);
    $fh->print(qq(</dirsummary>\n));
    $fh->close();
}

