#!/usr/bin/perl -w # (C) 2003-2007 Willem Jan Hengeveld # Web: http://www.xs4all.nl/~itsme/ # http://wiki.xda-developers.com/ # # $Id$ # # # this script checks my homepage out of cvs, reads file->topic map # and updates the topics in the phpbb database accordingly # # todo: # - parse meta-devicetype tag -> topic_wallaby, topic_himalaya # - parse order field from either metatag, or topics file. # - convert links to homepage to links to topics # use strict; use DBI; use IO::File; use Getopt::Long; use Cwd 'chdir', 'getcwd'; my $save_to_dir; GetOptions( "s=s"=> sub { $save_to_dir= getfullpath($_[1]); }, ); sub getfullpath { my ($path)= @_; return ($path =~ /^\//) ? $path : getcwd() . '/' . $path; } chdir "$ENV{HOME}/public_html/xs4all/projects/xda"; system "cvs update"; my ($datasource, $dbuser, $dbpass, $dbprefix)= GetDatabaseConfig("/home/hosting/forum.xda-developers.com/config.php"); my $db= DBI->connect($datasource, $dbuser, $dbpass) or die "dbi connect $datasource\n"; $db->{AutoCommit}= 1; my $savepost= $db->prepare("UPDATE phpbb_posts_text SET post_text=?, post_subject=? WHERE post_id=?;"); my $settopictitle= $db->prepare("UPDATE phpbb_topics SET topic_title=? WHERE topic_id=?;"); my $getpostsubject= $db->prepare("SELECT post_subject FROM phpbb_posts_text WHERE post_id=?"); my $gettopictitle= $db->prepare("SELECT topic_title FROM phpbb_topics WHERE topic_id=?"); my $filelist= ReadTopicsFile("site_to_topic_map.txt"); ReadFilesData($filelist); UpdateDatabase($filelist); $db->disconnect; exit(0); sub GetDatabaseConfig { my ($phpbbconfig)= @_; my %params; my $fh= IO::File->new($phpbbconfig, "r") or die "readdbcfg: $phpbbconfig\n"; while (<$fh>) { if (/^\s*\$(\w+)\s*=\s*'(.*?)';/) { $params{$1}= $2; } } $fh->close(); my $dbi_datasource="dbi:$params{dbms}:$params{dbname}"; return $dbi_datasource, @params{qw(dbuser dbpasswd table_prefix)}; } sub ReadTopicsFile { my ($fn)= @_; my @list; my $fh= IO::File->new($fn, "r") or die "$fn\n"; while (<$fh>) { s/\s+$//; if (m{^(\d+)\s+(\d+)\s+(\w.*?\.html)}) { my ($topicid, $postid, $htmlfile)= ($1, $2, $3); push @list, {topicid=>$topicid, postid=>$postid, htmlfile=>$htmlfile}; } } $fh->close(); return \@list; } sub ReadFilesData { my ($filelist)= @_; for my $fileinfo (@{$filelist}) { ReadFileData($fileinfo); } } sub ReadFileData { my ($fileinfo)= @_; my $html; my $fh= IO::File->new($fileinfo->{htmlfile}, "r") or die "$fileinfo->{htmlfile}\n"; $fh->read($html, -s $fh); $fh->close(); ($fileinfo->{title},$fileinfo->{body})= StripHtml($html); } sub StripHtml { my ($html)= @_; my ($title, $body); if ($html =~ m{\s*(.*?)\s*}si) { $title= $1; } if ($html =~ m{(.*?)}si) { $body= $1; } else { $body= $html; } # strip all javascript. $body =~ s{.*?}{}sgi; # strip the first

- this is usually topic like. $body =~ s{

.*?

}{}si; # these are not nescesary, only while editing using the forum editor # you cannot enter character entities. #$body =~ s{<}{<}g; #$body =~ s{>}{>}g; #$body =~ s{&}{&}g; # replace nbsp with space. - phpbb replaces it back on empty table cells. $body =~ s{ }{ }g; # fix my google links $body =~ s{ javascript:msdnlink\(["'](.*?)["']\) }{ "http://www.google.com/search?q=".urlescape($1)."+site%3Amsdn.microsoft.com+%22Microsoft+Windows+CE+3.0%22" }gixe; # strip leading and trailing blanks $body =~ s/^\s+//; $body =~ s/\s+$//; return ($title, $body); } sub urlescape { my ($str)= @_; $str =~ s{(.)}{ my $char=$1; ($char eq " ") ? "+" : ($char =~ m{\w|[+/@-]}) ? $char : sprintf("%%%02X", ord($char)) }gxe; return $str; } sub UpdateDatabase { my ($filelist)= @_; for my $fileinfo (@{$filelist}) { if ($save_to_dir) { SavePostToFile($fileinfo->{topicid}, $fileinfo->{postid}, $fileinfo->{title}, $fileinfo->{body}); } else { UpdatePost($fileinfo->{topicid}, $fileinfo->{postid}, $fileinfo->{title}, $fileinfo->{body}); } } } sub UpdatePost { my ($topicid, $postid, $title, $body)= @_; print "updating post $postid\n"; $savepost->execute($body, $title, $postid); $settopictitle->execute($title, $topicid); } # end of script # # ... test functions... # sub TestPost { my ($topicid, $postid, $title, $body)= @_; $getpostsubject->execute($postid); my $postsubject= $getpostsubject->fetchrow_array; warn "too many post subjects\n" if ($getpostsubject->fetchrow_array); #$getpostsubject->finish; $gettopictitle->execute($topicid); my $topictitle= $gettopictitle->fetchrow_array; warn "too many topic titles\n" if ($gettopictitle->fetchrow_array); #$gettopictitle->finish; print "$postsubject\t$topictitle\n"; } sub SavePostToFile { my ($topicid, $postid, $title, $body)= @_; my $fn= "$save_to_dir/post_${postid}.txt"; my $fh= IO::File->new($fn, "w") or die "write $fn\n"; $fh->print(""); $fh->print("\n"); $fh->print(" $title\n"); $fh->print("\n"); $fh->print("\n"); $fh->print("$body\n"); $fh->print("\n"); $fh->print("\n"); $fh->close(); } sub sqlquote { my ($str)= @_; $str =~ s/'/''/g; return "'$str'"; }