#!/usr/bin/perl -w # (C) 2003-2007 Willem Jan Hengeveld # Web: http://www.xs4all.nl/~itsme/ # http://wiki.xda-developers.com/ # # $Id: findstr 1628 2007-10-17 11:56:28Z itsme $ # use strict; # this script searches all files specified for the given pattern. # both unicode and ascii strings are found. # done: add verbose switch, which lists all matching strings. # todo: add workaround for matches at 16M block boundary ( which currently are not found ) # todo: add option to list rva instead of fileoffset # todo: add option to search for x86/arm/thumb-relative references # todo: in verbose/hex mode, dump result as hex. # examples: # search current dir for ascii or unicode strings RDO1: PER1: SPK1: # findstr -v "\x00(RDO|PER|SPK)1:\x00" . # # search current dir for hex dword 0x12345678 # findstr -x 12345678 . # # search current dir for byte sequence 01 02 # findstr -b "\x01\x02" . # # BUG: somehow a string with '0' in it fails to convert. # use IO::File; use Getopt::Long; my $g_matchword= 0; my $g_matchbinary= 0; my $g_pattern_is_hex= 0; my $g_pattern_is_guid= 0; my $g_verbose= 0; my $bRecursive = 0; my $pattern; my $g_matchcase= 0; my $g_flags= "(?i)"; sub usage { return <<__EOF__ Usage: findstr [-r] [-b] [-x | -g] [-w] pattern [files] -r recurse into directories -b binary match mode -v verbose - list matched content -I match case ( default: case insensitive ) -x pattern is specified as comma separated list of 32bit dwords -g pattern is a GUID, implies binary -w only find whole words __EOF__ } GetOptions( "w" => \$g_matchword, "b" => \$g_matchbinary, "I" => \$g_matchcase, "x" => \$g_pattern_is_hex, "g" => \$g_pattern_is_guid, "v" => \$g_verbose, "r" => \$bRecursive, ) or die usage(); die usage() if (!@ARGV); $pattern= shift; if ($g_pattern_is_guid) { $pattern= parse_guid_pattern($pattern); $g_matchbinary= 1; } if ($g_pattern_is_hex) { $pattern= parse_hex_pattern($pattern); $g_matchbinary= 1; } if ($g_matchbinary || $g_matchcase) { $g_flags= ""; } if (@ARGV) { for (@ARGV) { if (-f $_) { SearchFile($_, $pattern); } elsif (-d $_) { SearchDirectory($_, $pattern); } else { for (glob($_)) { if (-f $_) { SearchFile($_, $pattern); } elsif (-d $_) { SearchDirectory($_, $pattern); } } } } } else { binmode STDIN; $/=undef; my $data= <>; SearchString("STDIN", $data, $pattern); } sub parse_guid_pattern { my @guidstrs= my @guidpatterns; for my $guidstr (split /\|/, shift) { if ($guidstr =~ /(\w{8}-\w{4}-\w{4}-\w{4}-\w{12})/) { my @w= map { length($_)<=8 ? hex($_) : (hex(substr($_,0,8)), hex(substr($_,8))) } split /-/, $1; my $guid= pack("VvvnNn", @w); my $pattern= unpack("H*", $guid); $pattern =~ s/\w\w/\\x$&/g; push @guidpatterns, $pattern; } } return join("|", @guidpatterns); } sub findhexwordsize { my %size; $size{length($_)}++ for map { if (length($_)&1) { s/^0// } $_ } split /[^0-9a-fA-F]+/, $_[0]; my @lengths= sort { $a<=>$b} keys %size; if ($lengths[-1]>8) { return 0; } return 4 if ($size{8} || $size{7} || $size{6} || $size{5}); return 2 if ($size{4} || $size{3}); return 1 if ($size{2} || $size{1}); return 1; } sub parse_hex_pattern { my $hexpattern= shift; my $wordsize= findhexwordsize($hexpattern); my @dw; for my $hexword (split /[^0-9A-Fa-f]+/, $hexpattern) { # ... NOTE: '?' wildcard currently does not work ( \W filters it out ) if ($hexword eq "?") { push @dw, ".{$wordsize}"; } elsif ($wordsize==0) { #pattern is long hex string $hexword =~ s/../\\x$&/g; push @dw, $hexword; } elsif ($hexword =~ /^[0-9a-f?]+$/i) { $hexword =~ s/^0+//; $hexword = ("0" x (2*$wordsize-length($hexword))) . $hexword; my @bytes; for my $hexbyte ($hexword =~ /(..)/g) { push @bytes, byte_to_regex($hexbyte); } push @dw, join("", reverse @bytes); } else { die "unrecognised hex pattern: either ? or 8 digits: '$hexword'\n"; } } return join("", @dw); } sub byte_to_regex { my $bytepattern= shift; if ($bytepattern eq "??") { return "."; } else { return "\\x$bytepattern"; } } sub SearchDirectory { my ($path, $pattern)= @_; #print "search $bRecursive path '$path' for '$pattern'\n"; opendir(DIR, $path) or warn "$!: reading $path\n"; my @files= readdir DIR; closedir DIR; for (@files) { next if ($_ eq "." || $_ eq ".."); my $file= makeFullPath($path, $_); if (-f $file) { SearchFile($file, $pattern); } elsif ($bRecursive && -d $file) { SearchDirectory($file, $pattern); } } } sub SearchFile { my ($path, $pattern)= @_; #print "search file '$path' for '$pattern'\n"; my $fh= IO::File->new("$path", "r") or do { warn "$path: $!\n"; return; }; binmode $fh; my $data; my $ofs= 0; while (!$fh->eof()) { $fh->read($data, 0x1000000); SearchString($path, $ofs, $data, $pattern); $ofs += length($data); } $fh->close(); } sub SearchString { my ($name, $startofs, $data, $pattern)= @_; #print "search string '$name' of ", length($data), " bytes for '$pattern'\n"; my ($pre, $post)= ("", ""); if ($g_matchword) { ($pre, $post)= ('\b', '\b'); } my %offsets; my $pos= 0; while (defined $pos && $poslength($1), d=>$1, t=>"a"}; } } else { last; } } if (!$g_matchbinary) { if ($g_matchword) { ($pre, $post)= ('(?:\W\0|^)', '(?:\W\0|$)'); } $pattern = ToUnicodePattern($pattern); $pos= 0; while (defined $pos && $poslength($1), d=>$1, t=>"u"}; } } else { last; } } } if (!$g_verbose) { if (keys %offsets) { print "$name\n\t", join(", ", map { sprintf("%08lx", $_+$startofs); } sort {$a<=>$b} keys %offsets), "\n"; } } elsif ($g_pattern_is_guid) { printf("%08lx %-20s %s\n", $_+$startofs, $name, MatchAsGuid($offsets{$_}{d}, $offsets{$_}{t})) for (sort {$a<=>$b} keys %offsets); } else { printf("%08lx %-20s %s\n", $_+$startofs, $name, MatchAsString($offsets{$_}{d}, $offsets{$_}{t})) for (sort {$a<=>$b} keys %offsets); } } sub MatchAsGuid { my ($d, $t)= @_; my @w= unpack("VvvnNn", $d); return sprintf("{%08lx-%04x-%04x-%04x-%08x%04x}", @w); } sub MatchAsString { my ($d, $t)= @_; $d= substr($d, 0, 256); if ($g_pattern_is_hex && (length($d)%4==0)) { return join(" ", map { sprintf("%08lx", $_) } unpack("V*", $d)); } elsif ($g_matchbinary) { return join(" ", map { sprintf("%02x", $_) } unpack("C*", $d)) } elsif ($t eq "a") { return "\"$d\""; } elsif ($t eq "u") { $d =~ s/(.)\x00/$1/g; return "L\"$d\""; } else { warn "unexpected match type $t\n"; } } sub makeFullPath { my ($full, @parts)= @_; $full ||=""; for my $path (@parts) { next if (!defined $path); $path =~ s{^/}{}; # remove leading slash $full =~ s{/?$}{/$path}; # remove trailing slash, append path } return $full; } sub ToUnicodePattern { my ($pat)= @_; my $upat= ""; my $pos= 0; # !!! somehow this pattern fails to match '0' while ($pat =~ / ( \[[^\]]+\] # match [...] | \([^)]+\) # match (...) | [^*+?.{()^\$[\]\\] # match normal + | | \\[^x] # match escapes | \\x[0-9a-f]{2} | \. ) ( # quantifier (?: \* | \+ | \? | {\d*,\d*} ) \?? # followed by optional minimal quant. | # match no quantifier ) /gx ) { # .. for vim: / my ($data, $quant)= ($1, $2); #printf("dat=%s q=%s\n", $data, $quant); if ($pos != pos($pat)-length($&)) { printf("WARN: missed regex chars %d-%d : %s\n", $pos, pos($pat)-length($&), substr($pat, $pos, pos($pat)-length($&)-$pos)); } $pos= pos($pat); if (my ($spec, $content)= $data =~ /^\((\?[#:=!>]|\?<[=!]|)(.*)\)$/) { $content =~ s/./$&\x00/g; $data= "($spec$content)"; } elsif ($data =~ /^\[|^\\/) { $data .= "\x00"; } else { $data =~ s/./$&\x00/g; } if ($quant eq "") { $upat .= $data; } elsif ($data =~ /^\(/) { $upat .= "$data$quant"; } else { $upat .= "(?:$data)$quant"; } } #print "ascii pattern: $pat\n"; #print "unicode pattern: $upat\n"; if ($pos!=length($pat)) { printf("WARN: missed regex chars %d-%d : %s\n", $pos, length($pat), substr($pat, $pos)); } return $upat; }