#!perl -w # (C) 2003-2007 Willem Jan Hengeveld # Web: http://www.xs4all.nl/~itsme/ # http://wiki.xda-developers.com/ # # $Id: tounicode.pl 1558 2007-07-02 15:58:12Z itsme $ # # todo: # - when '-u' is specified, or not, make sure there is only 1 or 0 BOM chars. use strict; use IO::File; use Getopt::Long; my $g_fixedsize; my $with_unicode_id=0; my $decode= 0; my $ascii= 0; sub usage { return <<__EOF__ Usage: tounicode [-s FIXEDSIZE] [-u] [-d] infile outfile -u : add BOM character -a : process simple extended ascii, instead of utf8 -d : decode ( utf16 unicode -> utf8 ) otherwise encode ( utf8 -> utf16 unicode ) __EOF__ } GetOptions( "s=s" => sub { $g_fixedsize= eval($_[1]) }, "u" => \$with_unicode_id, "d" => \$decode, "a" => \$ascii, ) or die usage(); if (@ARGV!=2) { die usage(); } my ($infile, $outfile)= @ARGV; my $suffix = $ascii?'':':utf8'; my ($ilayer, $olayer)= ($suffix, ':raw:encoding(utf-16le):crlf:utf8'); if ($decode) { ($ilayer, $olayer)= ($olayer, $ilayer); } my $data; my $ifh= IO::File->new($infile, "<$ilayer") or die "$infile: $!\n"; # see http://blogs.msdn.com/brettsh/archive/2006/06/07/620986.aspx my $ofh= IO::File->new($outfile, ">$olayer") or die "$outfile: $!\n"; $ofh->print("\x{FEFF}") if $with_unicode_id; # my $firstline= <$ifh>; # if ($firstline) { # s/^\x{FEFF}+//; # $ofh->print($_); # } while (<$ifh>) { $ofh->print($_); } $ifh->close(); binmode $ofh; # flushes file, and switch to ':raw' my $cursize= -s $ofh; if ($g_fixedsize) { if ($g_fixedsize < $cursize+4) { warn "WARNING: file larger than specified fixed size\n"; } my $needed= $g_fixedsize - $cursize; my $n_fullentries= int($needed/128); if (($needed%128)>0 && ($needed%128)<4) { # we would end up with a chunk too small for 1 kv. # -> divide this up over the last 2 keyvals. $n_fullentries--; } for (my $i=0 ; $i< $n_fullentries ; $i++) { $ofh->print(FillerLine(128)); $needed -= 128; } if ($needed > 128) { $ofh->print(FillerLine(int($needed/2))); $needed -= int($needed/2); } $ofh->print(FillerLine($needed)); } sub FillerLine { my ($size)= @_; #return (";" x ($size/2-2)) . "\n"; return (";\x00" x ($size/2-2)) . "\r\x00\n\x00"; } $ofh->close();