1 #!perl -w
2 #
3 # eud2mbox.pl, Eudora to mbox mail converter
4 #
5 # Copyright (C) 1999 Jonathan J. Miner
6 # Portions Copyright (C) 1999 Dave Lorand <davel@src.uchicago.edu>
7 #
8 # This program is free software; you can redistribute it and/or
9 # modify it under the terms of the GNU General Public License
10 # as published by the Free Software Foundation; either version 2
11 # of the License, or (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # eud2mbox.pl,v 1.35 2000/08/08 22:42:05 miner Exp
19 # Jon Miner <miner@doit.wisc.edu>
20 #
21 # TODO: Use HTML::Parser? Parse "HTML" messages in to chunks and wrap the
22 # lines. Possibly do this with all messages? Use Text::Wrap to do the actual
23 # wrapping.
24 #
25 my $RCSiD = 'eud2mbox.pl,v 1.35 2000/08/08 22:42:05 miner Exp';
26
27 use Getopt::Long;
28 use strict;
29 use diagnostics;
30 require 'print_hash.pl';
31
32 my $VERSION_MAJ = 1;
33 my $VERSION_MIN = 2;
34 my $VERSION_ALPHA = "b";
35 my $VERSION = "$VERSION_MAJ.$VERSION_MIN".$VERSION_ALPHA;
36
37 sub get_toc($);
38 sub get_messages($);
39
40 printf("Eud2mbox %s (build %s), Copyright (C) 1999 Jonathan J. Miner\n\n",
41 $VERSION, ($RCSiD =~ /,v ([\d.]+)/));
42
43 my %opts = ("toc",1);
44 GetOptions(\%opts,"v","version","help","prefix=s","out=s","toc!","d");
45
46 my $verbose = (defined($opts{'v'}) || defined($opts{'d'})) ? 1 : 0;
47 my $debug = defined($opts{'d'}) ? 1 : 0;
48
49 if ( $debug ) {
50 print "Options: \n";
51 print_hash(\%opts,2);
52 print "Args: @ARGV\n";
53 print "Debug: $debug\n";
54 print "Verbose: $verbose\n";
55 }
56
57 CopyRight(0) if ($opts{'version'});
58 Error(0) if ($opts{'help'});
59 Error(-1) if (!defined($ARGV[0]));
60 Error(-1, "Prefix as argument usage deprecated: See --prefix")
61 if (defined($ARGV[1]));
62
63 my $mboxName = defined ($opts{'prefix'}) ? "$opts{prefix}.$ARGV[0]" : $ARGV[0];
64 my $outMboxName = defined($opts{'prefix'}) ? "$opts{'prefix'}.$ARGV[0]" :
65 $ARGV[0];
66 my $out = "out/$outMboxName";
67
68 my @statuses = ("N","RO","RO","RO","RO","N","N","N","RO","N");
69
70 if (! -f "$mboxName.mbx" and -f $mboxName) { # a mac file
71 system "perl -015l12pe 5 $mboxName > $mboxName.mbx";
72 }
73
74
75 if (defined $opts{'out'}) {
76 if ( -d $opts{'out'} ) {
77 $out = $opts{'out'}."/$outMboxName";
78 } else {
79 $out = defined($opts{'prefix'}) ? "$opts{'prefix'}.$opts{'out'}" :
80 $opts{'out'};
81 }
82 }
83
84 open OUTMBOX, ">$out" or Error(-1, "error opening '$out' for writing: $!");
85
86 my %toc;
87 my @messages;
88
89 open MBOX, "<$mboxName.mbx" or Error(-1, "error opening '$mboxName.mbx': $!");
90 if ($opts{'toc'}) {
91 open TOC, "<$mboxName.toc" or
92 Error(-1, "error opening '$mboxName.toc': $!");
93
94 binmode TOC;
95
96 %toc = get_toc($mboxName);
97 @messages = get_messages($toc{'messages'});
98
99 close TOC;
100
101 } else {
102 print "Processing without a TOC file is currently not implemented.\n";
103 }
104
105 close MBOX;
106
107 foreach my $message (@messages) {
108 my @body = @{$message->{'body'}};
109 my @header = @{$message->{'header'}};
110 my %toc = %{$message->{'toc'}};
111
112 push @header, "Status: $toc{status}";
113 push @header, "X-Status: $toc{'x-status'}"
114 if (defined($toc{'x-status'}));
115
116 foreach (@header) {
117 print OUTMBOX "$_\n";
118 }
119
120 # Separate header and body
121 print OUTMBOX "\n";
122
123 my $html = 0;
124 my $div = 0;
125 my $blockquote = 0;
126
127 foreach (@body) {
128 s/^(>+)([^ >])/$1 $2/g;
129 if ($html == 0) {
130 if (/^<html>$/) {
131 $html = 1;
132 next;
133 } else {
134 s/<\/?x-flowed>//gi;
135 print OUTMBOX "$_\n";
136 }
137 } else {
138 $div = 1 if (s/^<DIV>//);
139 $div = 0 if (s#^</DIV>##);
140 if (s/^<blockquote [^>]*>// || $blockquote) {
141 $blockquote = 1;
142 $_ = "> $_";
143 }
144 s/<br>/\n/;
145 s/<\/html>/\n/;
146 s/ \;/ /gi;
147 s/"\;/"/gi;
148 s/&\;/&/gi;
149 $blockquote = 0 if (s#</blockquote>##);
150
151 print OUTMBOX "$_";
152 }
153 }
154 }
155
156 close OUTMBOX;
157
158 sub Error {
159 my ($exit, $Error_Message) = @_;
160
161 CopyRight();
162
163 print "Error: $Error_Message\n\n" if (defined $Error_Message);
164
165 (undef, my $name) = ($0 =~ /(.*[\\\/])?(.*)/);
166 my $namelen = length($name) - 2;
167 printf("Usage: %s <mailbox name> [-v] [-d] [--out|-o] [--prefix|-p] ".
168 "[--notoc]\n".
169 " "x$namelen.
170 " [--help|-h] [--version]\n",
171 $name);
172
173 print " <mailbox name> is the name of the mailbox\n";
174 print " (to which .toc and/or .mbx is appended.)\n";
175 print " [-v] Run in verbose mode.\n";
176 print " [-d] Run in debug mode.\n";
177 print " [-o | --out] <path/filename>\n";
178 print " Output filename and/or path.\n";
179 print " o If set to an existing directory, files will be\n";
180 print " written it to the directory.\n";
181 print " o If set to a file name (with/without full path)\n";
182 print " mailbox will be written to that file.\n";
183 print " [-p | --prefix] <prefix string> \n";
184 print " Prefix to prepend to the output FILE name.\n";
185 print " [--notoc] Ignores the .toc file and just runs over the mbx\n";
186 print " Use with a corrupt or missing toc file\n";
187 print " * For Macintosh Eudora, see \n";
188 print " \"Use old-style \".toc\" files\" in Misc. Settings.\n";
189 exit $exit;
190 }
191
192 sub CopyRight {
193 my ($exit) = @_;
194 print "This program is free software and may be freely distributed and".
195 "modified \n";
196 print "under the terms set forth in the GNU General Public License.\n";
197 print "This progam is distributed with ABSOLUTELY NO WARRANTY.\n";
198 print "See gpl.txt and/or http://www.gnu.org/ for more information.\n\n";
199 if (defined $exit) {
200 print "\nUse --help for usage information.\n";
201 exit $exit;
202 }
203 }
204
205 sub get_toc($) {
206 my $mboxName = shift;
207
208 $toc{'version'} = read_toc("l",6);
209
210 $toc{'mboxname'} = read_toc("A*",28,2);
211
212 $toc{'mboxtype'} = read_toc("s",2,4);
213
214 $toc{'messages'} = read_toc("s",2,60);
215
216 if ($verbose) {
217 printf "Name: %s\n", $toc{'mboxname'};
218 printf "Messages: %d\n", $toc{'messages'};
219
220 if ($debug) {
221 printf "version: 0x%lx\n", $toc{'version'};
222 printf "Type: %d\n", $toc{'mboxtype'};
223 }
224 print "------\n";
225 }
226
227 %toc;
228 }
229
230 # Function read_toc( $pattern, $num, $skip )
231 # Skips $skip bytes (if $skip is defined)
232 # Reads $num bytes from TOC and unpacks with $pattern
233 # Returns the value.
234
235 sub read_toc($$) {
236
237 my $temp;
238 my $out;
239
240 my $pattern = shift;
241 my $num = shift;
242 my $skip = shift;
243
244 read TOC, $temp, $skip if (defined($skip));
245
246 read TOC, $temp, $num;
247
248 $out = unpack($pattern, $temp);
249
250 $out;
251 }
252
253 sub get_messages($) {
254
255 my $num_msg = shift;
256
257 for (my $i = 0; $i < $num_msg; $i++) {
258 my %message = ();
259 my $status;
260 my %tempmsg = ();
261
262 $message{'offset'} = read_toc("i",4);
263
264 $message{'length'} = read_toc("i",4);
265
266 $message{'date'} = read_toc("i",4);
267
268 $status = read_toc("s",2);
269
270 $message{'status'} = $statuses[$status];
271 $message{'x-status'} = ($status == 2) ? 'A' : undef;
272
273 $message{'options'} = read_toc("c",1);
274
275 seek TOC, 203, 1;
276
277 if ($verbose) {
278 print "Message $i\n";
279 printf "Length: %d\n", $message{'length'};
280 printf "Date: %d (%s)\n",$message{'date'}, scalar localtime($message{'date'});
281 printf("Status: %d (%s) [%s]\n",$status, $message{'status'},
282 defined($message{'x-status'}) ? $message{'x-status'} : "");
283 if ($debug) {
284 printf "Offset: %d\n", $message{'offset'};
285 printf "Options: %x\n", $message{'options'};
286 }
287 print "------\n";
288 }
289 $tempmsg{'toc'} = \%message;
290 push @messages, \%tempmsg;
291 }
292
293 foreach my $entry (@messages) {
294 my $buf = "";
295 my $in = "";
296 my @lines = ();
297 seek MBOX, $entry->{'toc'}->{'offset'}, 0;
298 for (my $i = 1; $i <= $entry->{'toc'}->{'length'}; $i++) {
299 read MBOX, $in, 1;
300 if ((ord $in) == 10) {
301 $i++;
302 push @lines, $buf;
303 $buf = "";
304 } else {
305 $buf .= $in;
306 }
307 }
308 my $body = 0;
309 my @body = ();
310 my @header = ();
311 foreach (@lines) {
312 if ($body == 0) {
313 if (/^$/) {
314 $body = 1;
315 next;
316 }
317 push @header, $_;
318 } else {
319 push @body, $_;
320 }
321 }
322 $entry->{'body'} = \@body;
323 $entry->{'header'} = \@header;
324 }
325
326 @messages;
327 }
syntax highlighted by Code2HTML, v. 0.8.9