1 #!perl -w
  2 #
  3 # eud2mbox.pl, Eudora to mbox mail converter
  4 #
  5 # Copyright (C) 1999 Jonathan J. Miner
  6 # Portions Copyright (C) 1999 Dave Lorand <davel@src.uchicago.edu>
  7 # 
  8 # This program is free software; you can redistribute it and/or
  9 # modify it under the terms of the GNU General Public License
 10 # as published by the Free Software Foundation; either version 2
 11 # of the License, or (at your option) any later version.
 12 # 
 13 # This program is distributed in the hope that it will be useful,
 14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 # GNU General Public License for more details.
 17 # 
 18 # eud2mbox.pl,v 1.35 2000/08/08 22:42:05 miner Exp
 19 # Jon Miner <miner@doit.wisc.edu>
 20 # 
 21 # TODO: Use HTML::Parser?  Parse "HTML" messages in to chunks and wrap the
 22 # lines. Possibly do this with all messages? Use Text::Wrap to do the actual
 23 # wrapping.
 24 # 
 25 my $RCSiD = 'eud2mbox.pl,v 1.35 2000/08/08 22:42:05 miner Exp';
 26 
 27 use Getopt::Long;
 28 use strict;
 29 use diagnostics;
 30 require 'print_hash.pl';
 31 
 32 my $VERSION_MAJ = 1;
 33 my $VERSION_MIN = 2;
 34 my $VERSION_ALPHA = "b";
 35 my $VERSION = "$VERSION_MAJ.$VERSION_MIN".$VERSION_ALPHA;
 36 
 37 sub get_toc($);
 38 sub get_messages($);
 39 
 40 printf("Eud2mbox %s (build %s), Copyright (C) 1999 Jonathan J. Miner\n\n",
 41        $VERSION, ($RCSiD =~ /,v ([\d.]+)/));
 42 
 43 my %opts = ("toc",1);
 44 GetOptions(\%opts,"v","version","help","prefix=s","out=s","toc!","d");
 45 
 46 my $verbose = (defined($opts{'v'}) || defined($opts{'d'})) ? 1 : 0;
 47 my $debug = defined($opts{'d'}) ? 1 : 0;
 48 
 49 if ( $debug ) {
 50     print "Options: \n";
 51     print_hash(\%opts,2);
 52     print "Args: @ARGV\n";
 53     print "Debug: $debug\n";
 54     print "Verbose: $verbose\n";
 55 }
 56 
 57 CopyRight(0) if ($opts{'version'});
 58 Error(0) if ($opts{'help'});
 59 Error(-1) if (!defined($ARGV[0]));
 60 Error(-1, "Prefix as argument usage deprecated: See --prefix") 
 61     if (defined($ARGV[1]));
 62 
 63 my $mboxName = defined ($opts{'prefix'}) ? "$opts{prefix}.$ARGV[0]" : $ARGV[0];
 64 my $outMboxName = defined($opts{'prefix'}) ? "$opts{'prefix'}.$ARGV[0]" : 
 65                                              $ARGV[0];
 66 my $out = "out/$outMboxName";
 67 
 68 my @statuses = ("N","RO","RO","RO","RO","N","N","N","RO","N");
 69 
 70 if (! -f "$mboxName.mbx" and -f $mboxName) {         # a mac file
 71     system "perl -015l12pe 5 $mboxName > $mboxName.mbx";
 72 }
 73 
 74 
 75 if (defined $opts{'out'}) {
 76     if ( -d $opts{'out'} ) {
 77         $out = $opts{'out'}."/$outMboxName";
 78     } else {
 79         $out = defined($opts{'prefix'}) ? "$opts{'prefix'}.$opts{'out'}" : 
 80                                           $opts{'out'};
 81     }
 82 }
 83 
 84 open OUTMBOX, ">$out" or Error(-1, "error opening '$out' for writing: $!");
 85 
 86 my %toc;
 87 my @messages;
 88 
 89 open MBOX, "<$mboxName.mbx" or Error(-1, "error opening '$mboxName.mbx': $!");
 90 if ($opts{'toc'}) {
 91     open TOC, "<$mboxName.toc" or 
 92         Error(-1, "error opening '$mboxName.toc': $!");
 93 
 94     binmode TOC; 
 95 
 96     %toc = get_toc($mboxName);
 97     @messages = get_messages($toc{'messages'});
 98 
 99     close TOC;
100 
101 } else {
102     print "Processing without a TOC file is currently not implemented.\n";
103 }
104 
105 close MBOX;
106 
107 foreach my $message (@messages) {
108     my @body = @{$message->{'body'}};
109     my @header = @{$message->{'header'}};
110     my %toc = %{$message->{'toc'}};
111 
112     push @header, "Status: $toc{status}";
113     push @header, "X-Status: $toc{'x-status'}"
114         if (defined($toc{'x-status'}));
115 
116     foreach (@header) {
117         print OUTMBOX "$_\n";
118     }
119 
120 # Separate header and body
121     print OUTMBOX "\n";
122 
123     my $html = 0;
124     my $div = 0;
125     my $blockquote = 0;
126     
127     foreach (@body) {
128         s/^(>+)([^ >])/$1 $2/g;
129         if ($html == 0) {
130             if (/^<html>$/) {
131                 $html = 1;
132                 next;
133             } else {
134                 s/<\/?x-flowed>//gi;
135                 print OUTMBOX "$_\n";
136             }
137         } else {
138             $div = 1 if (s/^<DIV>//);
139             $div = 0 if (s#^</DIV>##);
140             if (s/^<blockquote [^>]*>// || $blockquote) {
141                 $blockquote = 1;
142                 $_ = "> $_";
143             }
144             s/<br>/\n/;
145             s/<\/html>/\n/;
146             s/&nbsp\;/ /gi;
147             s/&quot\;/"/gi;
148             s/&amp\;/&/gi;
149             $blockquote = 0 if (s#</blockquote>##);
150 
151             print OUTMBOX "$_";
152         }
153     }
154 }
155 
156 close OUTMBOX;
157 
158 sub Error {
159     my ($exit, $Error_Message) = @_;
160 
161     CopyRight();
162     
163     print "Error: $Error_Message\n\n" if (defined $Error_Message);
164 
165     (undef, my $name) = ($0 =~ /(.*[\\\/])?(.*)/);
166     my $namelen = length($name) - 2;
167     printf("Usage: %s <mailbox name> [-v] [-d] [--out|-o] [--prefix|-p] ".
168            "[--notoc]\n".
169            " "x$namelen.
170            "                         [--help|-h] [--version]\n", 
171            $name);
172 
173     print "     <mailbox name> is the name of the mailbox\n";
174     print "                    (to which .toc and/or .mbx is appended.)\n";
175     print "     [-v] Run in verbose mode.\n";
176     print "     [-d] Run in debug mode.\n";
177     print "     [-o | --out] <path/filename>\n";
178     print "                  Output filename and/or path.\n";
179     print "                   o If set to an existing directory, files will be\n";
180     print "                     written it to the directory.\n";
181     print "                   o If set to a file name (with/without full path)\n";
182     print "                     mailbox will be written to that file.\n";
183     print "     [-p | --prefix] <prefix string> \n";
184     print "                     Prefix to prepend to the output FILE name.\n";
185     print "     [--notoc] Ignores the .toc file and just runs over the mbx\n";
186     print "               Use with a corrupt or missing toc file\n";
187     print "                * For Macintosh Eudora, see \n";
188     print "                  \"Use old-style \".toc\" files\" in Misc. Settings.\n";
189     exit $exit;
190 }
191 
192 sub CopyRight {
193     my ($exit) = @_;
194     print "This program is free software and may be freely distributed and".
195           "modified \n";
196     print "under the terms set forth in the GNU General Public License.\n";
197     print "This progam is distributed with ABSOLUTELY NO WARRANTY.\n";
198     print "See gpl.txt and/or http://www.gnu.org/ for more information.\n\n";
199     if (defined $exit) {
200         print "\nUse --help for usage information.\n";
201         exit $exit;
202     }
203 }
204 
205 sub get_toc($) {
206     my $mboxName = shift;
207 
208     $toc{'version'} = read_toc("l",6);
209 
210     $toc{'mboxname'} = read_toc("A*",28,2);
211 
212     $toc{'mboxtype'} = read_toc("s",2,4);
213 
214     $toc{'messages'} = read_toc("s",2,60);
215 
216     if ($verbose) {
217         printf "Name: %s\n", $toc{'mboxname'};
218         printf "Messages: %d\n", $toc{'messages'};
219 
220         if ($debug) {
221             printf "version: 0x%lx\n", $toc{'version'};
222             printf "Type: %d\n", $toc{'mboxtype'};
223         }
224         print "------\n";
225     }
226 
227     %toc;
228 }
229 
230 # Function read_toc( $pattern, $num, $skip )
231 # Skips $skip bytes (if $skip is defined)
232 # Reads $num bytes from TOC and unpacks with $pattern
233 # Returns the value.
234 
235 sub read_toc($$) {
236 
237     my $temp;
238     my $out;
239     
240     my $pattern = shift;
241     my $num = shift;
242     my $skip = shift;
243 
244     read TOC, $temp, $skip if (defined($skip));
245     
246     read TOC, $temp, $num;
247 
248     $out = unpack($pattern, $temp);
249 
250     $out;
251 }
252 
253 sub get_messages($) {
254 
255     my $num_msg = shift;
256 
257     for (my $i = 0; $i < $num_msg; $i++) {
258         my %message = ();
259         my $status;
260         my %tempmsg = ();
261 
262         $message{'offset'} = read_toc("i",4);
263 
264         $message{'length'} = read_toc("i",4);
265 
266         $message{'date'} = read_toc("i",4);
267 
268         $status = read_toc("s",2);
269 
270         $message{'status'} = $statuses[$status];
271         $message{'x-status'} = ($status == 2) ? 'A' : undef;
272 
273         $message{'options'} = read_toc("c",1);
274 
275         seek TOC, 203, 1;
276 
277         if ($verbose) {
278             print "Message $i\n";
279             printf "Length: %d\n", $message{'length'};
280             printf "Date: %d (%s)\n",$message{'date'}, scalar localtime($message{'date'});
281             printf("Status: %d (%s) [%s]\n",$status, $message{'status'},
282                    defined($message{'x-status'}) ? $message{'x-status'} : "");
283             if ($debug) {
284                 printf "Offset: %d\n", $message{'offset'};
285                 printf "Options: %x\n", $message{'options'};
286             }
287             print "------\n";
288         }
289         $tempmsg{'toc'} = \%message;
290         push @messages, \%tempmsg;
291     }
292 
293     foreach my $entry (@messages) {
294         my $buf = "";
295         my $in = "";
296         my @lines = ();
297         seek MBOX, $entry->{'toc'}->{'offset'}, 0;
298         for (my $i = 1; $i <= $entry->{'toc'}->{'length'}; $i++) {
299             read MBOX, $in, 1;
300             if ((ord $in) == 10) {
301                 $i++;
302                 push @lines, $buf;
303                 $buf = "";
304             } else {
305                 $buf .= $in;
306             }
307         }
308         my $body = 0;
309         my @body = ();
310         my @header = ();
311         foreach (@lines) {
312             if ($body == 0) {
313                 if (/^$/) {
314                     $body = 1;
315                     next;
316                 }
317                 push @header, $_;
318             } else {
319                 push @body, $_;
320             }
321         }
322         $entry->{'body'} = \@body;
323         $entry->{'header'} = \@header;
324     }
325 
326     @messages;
327 }


syntax highlighted by Code2HTML, v. 0.8.9