#!/usr/bin/perl -w
# (C) 2003-2007 Willem Jan Hengeveld <itsme@xs4all.nl>
# Web: http://www.xs4all.nl/~itsme/
#      http://wiki.xda-developers.com/
#
# $Id$
#

#
# this script checks my homepage out of cvs, reads file->topic map
# and updates the topics in the phpbb database accordingly
#
# todo:
#   - parse meta-devicetype tag -> topic_wallaby, topic_himalaya
#   - parse order field from either metatag, or topics file.
#   - convert links to homepage to links to topics
#
use strict;
use DBI;
use IO::File;
use Getopt::Long;
use Cwd 'chdir', 'getcwd';

my $save_to_dir;
GetOptions(
	"s=s"=> sub { $save_to_dir= getfullpath($_[1]); },
);
sub getfullpath {
	my ($path)= @_;

	return ($path =~ /^\//) ? $path : getcwd() . '/' . $path;
}
chdir "$ENV{HOME}/public_html/xs4all/projects/xda";
system "cvs update";

my ($datasource, $dbuser, $dbpass, $dbprefix)= GetDatabaseConfig("/home/hosting/forum.xda-developers.com/config.php");

my $db= DBI->connect($datasource, $dbuser, $dbpass) or die "dbi connect $datasource\n";

$db->{AutoCommit}= 1;

my $savepost= $db->prepare("UPDATE phpbb_posts_text SET post_text=?, post_subject=? WHERE post_id=?;");
my $settopictitle= $db->prepare("UPDATE phpbb_topics SET topic_title=? WHERE topic_id=?;");
my $getpostsubject= $db->prepare("SELECT post_subject FROM phpbb_posts_text WHERE post_id=?");
my $gettopictitle= $db->prepare("SELECT topic_title FROM phpbb_topics WHERE topic_id=?");

my $filelist= ReadTopicsFile("site_to_topic_map.txt");

ReadFilesData($filelist);

UpdateDatabase($filelist);

$db->disconnect;

exit(0);

sub GetDatabaseConfig {
	my ($phpbbconfig)= @_;
	my %params;

	my $fh= IO::File->new($phpbbconfig, "r") or die "readdbcfg: $phpbbconfig\n";

	while (<$fh>) {
		if (/^\s*\$(\w+)\s*=\s*'(.*?)';/) {
			$params{$1}= $2;
		}
	}

	$fh->close();

	my $dbi_datasource="dbi:$params{dbms}:$params{dbname}";
	return $dbi_datasource, @params{qw(dbuser dbpasswd table_prefix)};
}
sub ReadTopicsFile {
	my ($fn)= @_;

	my @list;

	my $fh= IO::File->new($fn, "r") or die "$fn\n";

	while (<$fh>) {
		s/\s+$//;
		if (m{^(\d+)\s+(\d+)\s+(\w.*?\.html)}) {
			my ($topicid, $postid, $htmlfile)= ($1, $2, $3);

			push @list, {topicid=>$topicid, postid=>$postid, htmlfile=>$htmlfile};
		}
	}
	$fh->close();

	return \@list;
}
sub ReadFilesData {
	my ($filelist)= @_;

	for my $fileinfo (@{$filelist}) {
		ReadFileData($fileinfo);
	}
}

sub ReadFileData {
	my ($fileinfo)= @_;

	my $html;

	my $fh= IO::File->new($fileinfo->{htmlfile}, "r") or die "$fileinfo->{htmlfile}\n";
	$fh->read($html, -s $fh);
	$fh->close();

	($fileinfo->{title},$fileinfo->{body})= StripHtml($html);
}

sub StripHtml {
	my ($html)= @_;

	my ($title, $body);

	if ($html =~ m{<title>\s*(.*?)\s*</title>}si) {
		$title= $1;
	}
	if ($html =~ m{<body>(.*?)</body>}si) {
		$body= $1;
	}
	else {
		$body= $html;
	}

	# strip all javascript.
	$body =~ s{<script.*?>.*?</script>}{}sgi;

	# strip the first <h1> - this is usually topic like.
	$body =~ s{<h1>.*?</h1>}{}si;


	# these are not nescesary, only while editing using the forum editor
	# you cannot enter character entities.

	#$body =~ s{&lt;}{<}g;
	#$body =~ s{&gt;}{>}g;
	#$body =~ s{&amp;}{&}g;
	
	# replace nbsp with space. - phpbb replaces it back on empty table cells.
	$body =~ s{&nbsp;}{ }g;


	# fix my google links
	$body =~ s{
			javascript:msdnlink\(["'](.*?)["']\)
		}{
	"http://www.google.com/search?q=".urlescape($1)."+site%3Amsdn.microsoft.com+%22Microsoft+Windows+CE+3.0%22" 
		}gixe;

	# strip leading and trailing blanks
	$body =~ s/^\s+//;
	$body =~ s/\s+$//;

	return ($title, $body);
}
sub urlescape {
	my ($str)= @_;
	$str =~ s{(.)}{
		my $char=$1;
		($char eq " ") ? "+"
		: ($char =~ m{\w|[+/@-]}) ? $char 
		: sprintf("%%%02X", ord($char)) 
	}gxe;

	return $str;
}
sub UpdateDatabase {
	my ($filelist)= @_;
	for my $fileinfo (@{$filelist}) {
		if ($save_to_dir) {
			SavePostToFile($fileinfo->{topicid}, $fileinfo->{postid}, $fileinfo->{title}, $fileinfo->{body});
		}
		else {
			UpdatePost($fileinfo->{topicid}, $fileinfo->{postid}, $fileinfo->{title}, $fileinfo->{body});
		}
	}
}

sub UpdatePost {
	my ($topicid, $postid, $title, $body)= @_;

	print "updating post $postid\n";
	$savepost->execute($body, $title, $postid);
	$settopictitle->execute($title, $topicid);
}

# end of script
#
# ... test functions...
#
sub TestPost {
	my ($topicid, $postid, $title, $body)= @_;

	$getpostsubject->execute($postid);
	my $postsubject= $getpostsubject->fetchrow_array;
	warn "too many post subjects\n" if ($getpostsubject->fetchrow_array);
	#$getpostsubject->finish;

	$gettopictitle->execute($topicid);
	my $topictitle= $gettopictitle->fetchrow_array;
	warn "too many topic titles\n" if ($gettopictitle->fetchrow_array);
	#$gettopictitle->finish;

	print "$postsubject\t$topictitle\n";
}
sub SavePostToFile {
	my ($topicid, $postid, $title, $body)= @_;
	my $fn= "$save_to_dir/post_${postid}.txt";
	my $fh= IO::File->new($fn, "w") or die "write $fn\n";

	$fh->print("<html>");
	$fh->print("<head>\n");
	$fh->print("    <title>$title</title>\n");
	$fh->print("</head>\n");
	$fh->print("<body>\n");
	$fh->print("$body\n");
	$fh->print("</body>\n");
	$fh->print("</html>\n");

	$fh->close();
}
sub sqlquote {
	my ($str)= @_;

	$str =~ s/'/''/g;
	return "'$str'";
}
