#!perl -w # (C) 2003-2007 Willem Jan Hengeveld # Web: http://www.xs4all.nl/~itsme/ # http://wiki.xda-developers.com/ # # $Id$ # # todo: # - when '-u' is specified, or not, make sure there is only 1 or 0 BOM chars. # this script essentially does the same as # # iconv -f UTF-16LE -t UTF-8 # use strict; use IO::File; use Getopt::Long qw(:config no_ignore_case); my $g_fixedsize; my $with_BOM=0; my $without_BOM=0; my $force_lf=0; my $force_crlf=0; my $decode= 0; my $ascii= 0; sub usage { return <<__EOF__ Usage: tounicode [-s FIXEDSIZE] [-u] [-d] infile outfile -u : add BOM character -U : strip BOM character -a : process simple extended ascii, instead of utf8 -d : decode ( utf16 unicode -> utf8 ) otherwise encode ( utf8 -> utf16 unicode ) -lf : changes lineendings to LF -crlf: changes lineendings to CRLF __EOF__ } GetOptions( "s=s" => sub { $g_fixedsize= eval($_[1]) }, "u" => \$with_BOM, "U" => \$without_BOM, "d" => \$decode, "a" => \$ascii, "lf"=> \$force_lf, "crlf"=> \$force_crlf, ) or die usage(); if (@ARGV!=2) { die usage(); } my ($infile, $outfile)= @ARGV; my $suffix = $ascii?'':':utf8'; my ($ilayer, $olayer)= ($suffix, ':raw:encoding(utf-16le)'.($force_crlf?':crlf':'').':utf8'); if ($decode) { ($ilayer, $olayer)= ($olayer, $ilayer); } my $data; my $ifh= IO::File->new($infile, "<$ilayer") or die "$infile: $!\n"; # see http://blogs.msdn.com/brettsh/archive/2006/06/07/620986.aspx my $ofh= IO::File->new($outfile, ">$olayer") or die "$outfile: $!\n"; my $linesprinted=0; # my $firstline= <$ifh>; # if ($firstline) { # s/^\x{FEFF}+//; # $ofh->print($_); # } while (<$ifh>) { if ($linesprinted==0) { if ($without_BOM) { my $n= s/^\x{FEFF}+//; } elsif ($with_BOM) { my $n= s/^\x{FEFF}*/\x{FEFF}/; } } if ($force_lf) { s/\r*\n/\n/s; } elsif ($force_crlf) { s/\r*\n/\n/s; } $ofh->print($_); $linesprinted++; } $ifh->close(); binmode $ofh; # flushes file, and switch to ':raw' my $cursize= -s $ofh; if ($g_fixedsize) { if ($g_fixedsize < $cursize+4) { warn "WARNING: file larger than specified fixed size\n"; } my $needed= $g_fixedsize - $cursize; my $n_fullentries= int($needed/128); if (($needed%128)>0 && ($needed%128)<4) { # we would end up with a chunk too small for 1 kv. # -> divide this up over the last 2 keyvals. $n_fullentries--; } for (my $i=0 ; $i< $n_fullentries ; $i++) { $ofh->print(FillerLine(128)); $needed -= 128; } if ($needed > 128) { $ofh->print(FillerLine(int($needed/2))); $needed -= int($needed/2); } $ofh->print(FillerLine($needed)); } sub FillerLine { my ($size)= @_; #return (";" x ($size/2-2)) . "\n"; return (";\x00" x ($size/2-2)) . "\r\x00\n\x00"; } $ofh->close();