#!/usr/bin/perl -w
use strict;
# vim: sw=4 et ts=4
# how to use:
#
# google "some words, and more words"
# will google this: "some words" "and more words"
#
$|=1;
use HTTP::Request::Common qw(POST GET);
use LWP::Simple;
use LWP::UserAgent;
use Getopt::Long;
sub usage {
return <<__EOF__
Usage: google [-c] [-a] "search string1,word2" "search string2,word3"
will do 2 google queries: ( quotes are always added )
"search string1" "word2"
"search string2" "word3"
options:
-a return all result pages, instead of only the first 100 results.
-c just print the count
-v verbose: print query
__EOF__
}
my $allpages;
my $countonly;
my $verbose;
GetOptions(
"a"=>\$allpages,
"c"=>\$countonly,
"v"=>\$verbose,
) or die usage();
my $ua= LWP::UserAgent->new();
$ua->agent("Mozilla/4.75 [en] (X11; U; Linux 2.2.17 i686)");
binmode STDOUT, ":utf8";
for my $query (@ARGV) {
my $pagenr=0;
my $empty=0;
my %urls;
while ($pagenr==0 || $allpages && $empty<2) {
my $googlehtml;
if ($query eq "-") {
if ($pagenr==0) {
$googlehtml= join "\n", <>;
}
else {
$googlehtml= "";
}
}
else {
$googlehtml = QueryGoogle($query, $pagenr);
}
if ($countonly) {
printf("%12d %s\n", ParseCount($googlehtml), $query);
}
else {
my $result= ParseGoogle($googlehtml);
my $oldcount=scalar keys %urls;
$urls{$_->{url}}++ for @$result;
my $newcount=scalar keys %urls;
PrintResult($result) if ($oldcount!=$newcount);
$empty= ($oldcount!=$newcount) ? 0 : $empty+1;
}
$pagenr++;
}
if (!$countonly) {
printf("\n\ntotal %d pages\n", $pagenr);
}
}
exit(0);
sub uniq {
my ($list)= @_;
my %ref;
$ref{$_}++ for (@$list);
return [keys %ref];
}
sub QueryGoogle {
my ($query, $pagenr)= @_;
$query= join " ", map { (/^\w+:/)?$_:"\"$_\"" } split /\s*,\s*/, $query;
$query=~ s/[&?%+]/sprintf("%%%02x", ord($&))/egs;
$query =~ s/ /+/gs;
$query =~ s/,\s*/"+"/g;
my $num= $countonly ? 10 : 100;
if ($pagenr==0 && $verbose) {
print "query: $query\n";
}
my $pagerq= GET sprintf("http://www.google.com/search?q=%s&num=${num}&hl=en&safe=off&btnG=Search&start=%d", $query, $pagenr*100);
# print $pagerq->as_string;
my $rp= $ua->request($pagerq);
return $rp->content;
}
sub GetQuery {
my ($parts, $qid)= @_;
my $query="";
for my $part (@$parts) {
my $n_choices= scalar @$part;
my $choice= $qid%$n_choices;
$qid= int($qid/$n_choices);
$query .= $part->[$choice];
}
return $query;
}
sub getTotalChoices {
my ($parts)= @_;
my $total= 1;
for my $part (@$parts) {
my $n_choices= scalar @$part;
$total *= $n_choices;
}
return $total;
}
sub ParseGoogle {
my ($html)= @_;
my @result;
my @parts= split(/