1 package net.spamcomplaint.mail;
2
3 import java.io.ByteArrayInputStream;
4 import java.io.IOException;
5 import java.io.InputStream;
6 import java.security.MessageDigest;
7 import java.security.NoSuchAlgorithmException;
8 import java.text.ParseException;
9 import java.text.SimpleDateFormat;
10 import java.util.Date;
11 import java.util.Enumeration;
12 import java.util.Iterator;
13 import java.util.LinkedHashSet;
14 import java.util.Set;
15
16 import javax.mail.Header;
17 import javax.mail.MessagingException;
18 import javax.mail.internet.MimeMessage;
19
20 import net.spamcomplaint.util.NetworkUtil;
21 import net.spamcomplaint.util.StringUtils;
22
23 /***
24 *
25 * @author jcalfee
26 * @see http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html
27 */
28 public class MimeMessageParser {
29
30 public static SimpleDateFormat dateFormat_1 = new SimpleDateFormat("EEE, d MMM yy HH:mm:ss Z");
31 public static SimpleDateFormat dateFormat_2 = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z");
32 public static SimpleDateFormat dateFormat_3 = new SimpleDateFormat("d MMM yyyy HH:mm:ss Z");
33 static int minReceivedHdrWidth = "Received: 0.0.0.0".length();
34
35 private MessageDigest md = null;
36 static final String VALID_DOMAIN_CHARS = "abcdefghijklmnopqrstuvwxyz0123456789-.";
37
38 private static final void log(String s) {
39 System.out.println("MimeMessageParser: " + s);
40 }
41
42 /***
43 * Get The Hash value as a Hex-String. Calling this method resets the object's state to initial state.
44 * @return String representing the Hashvalue in hexadecimal format.
45 */
46 public String digout() {
47 byte[] digest= md.digest();
48 if (digest != null)
49 return StringUtils.hexEncode(digest);
50 else
51 return null;
52 }
53
54 final static String[] daysInDate = { "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" };
55
56 public static MimeMessage getMimeMessage(String msgText) throws MessagingException {
57 return new MimeMessage(null,
58 new ByteArrayInputStream(msgText.getBytes())
59 );
60 }
61
62
63
64
65
66 /***
67 * @param mimeMsg
68 * @return hex SHA1 of key fields in the header { received, from, to }
69 * @throws MessagingException
70 * @throws NoSuchAlgorithmException if SHA1 is not available
71 */
72 public String getUnique_SHA1(Enumeration mimeHeaders)
73 throws NoSuchAlgorithmException
74 {
75 if(md == null)
76 md = MessageDigest.getInstance("SHA1");
77
78 md.reset();
79 while(mimeHeaders.hasMoreElements()) {
80 Header h = (Header) mimeHeaders.nextElement();
81 String name = h.getName().trim().toLowerCase();
82 String value = h.getValue();
83 if(name.equals("received") || name.equals("from") || name.equals("to"))
84 md.update(value.getBytes());
85 }
86 return digout();
87 }
88
89 public static String byHost(String receivedLine) {
90 int byIndex = receivedLine.indexOf("by ");
91 if(byIndex == -1)
92 return null;
93
94 byIndex += "by ".length();
95 int recLen = receivedLine.length();
96 int i;
97 for (i = byIndex; i < recLen; i++) {
98 char ch = Character.toLowerCase(receivedLine.charAt(i));
99 if(VALID_DOMAIN_CHARS.indexOf(ch) == -1)
100 break;
101 }
102
103 if(i == byIndex)
104 return null;
105
106 return receivedLine.substring(byIndex, i);
107 }
108
109 private static Set parseReceivedIpLn(String ln) {
110 Set ips = new LinkedHashSet();
111
112 String ip = "";
113 ln += " ";
114 for (int i = 0; i < ln.length(); i++) {
115 char ch = ln.charAt(i);
116 if(Character.isDigit(ch) || ch == '.')
117 ip += ch;
118 else {
119 if(NetworkUtil.isIp(ip) && ! NetworkUtil.isRemoteIp(ip))
120 log("!!! SKIPPING LOCAL IP : " + ip);
121
122 if(NetworkUtil.isIp(ip) && ! ips.contains(ip) &&
123 NetworkUtil.isRemoteIp(ip)
124 )
125 ips.add(ip);
126
127 ip = "";
128 }
129 }
130 return ips;
131 }
132
133 /***
134 * If the sender used an IP address in their non-validated
135 * HELO command, skip it since it could be fake.
136 * @param ln
137 * @return
138 */
139 private static Set parseReceivedIpLn_skip_HELO(String ln) {
140 int heloIndex = ln.toLowerCase().indexOf("helo");
141 Set ipList;
142 if(heloIndex == -1)
143 return parseReceivedIpLn(ln);
144 else {
145 ipList = parseReceivedIpLn(ln);
146 Iterator it = ipList.iterator();
147 while(it.hasNext()) {
148 String ip = (String)it.next();
149
150 int ipIndex = ln.substring(heloIndex + 5).indexOf(ip);
151 if(ipIndex > -1 && ipIndex < 3) {
152 ipIndex += heloIndex + 5;
153
154 ln = ln.substring(0, ipIndex) +
155 ln.substring(ipIndex + ip.length(), ln.length());
156
157 log("Removed HELO IP " + ln);
158 if(ln.indexOf(ip) == -1)
159
160
161 ipList.remove(ip);
162
163
164 break;
165 }
166 }
167 }
168 return ipList;
169 }
170
171 static String getSendersIP(String receivedLine) {
172 Set ips = parseReceivedIpLn_skip_HELO(receivedLine);
173 Iterator it = ips.iterator();
174 while(it.hasNext()) {
175 String ip = (String)it.next();
176 if(receivedLine.indexOf("[" + ip + "]") != -1)
177 return ip;
178 }
179 it = ips.iterator();
180 while(it.hasNext()) {
181 String ip = (String)it.next();
182 if(receivedLine.indexOf("(" + ip + ")") != -1)
183 return ip;
184 }
185 if(ips.size() == 1)
186 return (String)ips.toArray()[0];
187
188 return null;
189 }
190
191 public static String getSpamComplaintIP(String[] received, int lastTrustedIp) {
192 String spamIp = null;
193 for (int i = 0; i < received.length; i++) {
194 String host = byHost(received[i]);
195 if(host == null)
196 continue;
197
198 if(NetworkUtil.isIp(host)) {
199 if(!NetworkUtil.isRemoteIp(host))
200
201 continue;
202 }
203 String ip = getSendersIP(received[i]);
204 if(ip != null) {
205 spamIp = ip;
206
207
208
209 if( i >= lastTrustedIp)
210 break;
211 }
212 }
213 return spamIp;
214 }
215
216 /***
217 * @param msgText
218 * @param ra
219 * @return Sender IP address, this could be <b>null</b> (undetermined)
220 * @throws MessagingException
221 * @throws TrustedNodeException
222 */
223 public static String getSpamComplaintIP(String msgText, ReceivedAnalysis ra)
224 throws MessagingException, TrustedNodeException
225 {
226 MimeMessage msg = getMimeMessage(msgText);
227 String[] received = msg.getHeader("received");
228 return getSpamComplaintIP(received, ra.lastCommonByHost(received));
229 }
230
231 public static String contentToString(Object content) throws IOException {
232 if(content instanceof String)
233 return (String) content;
234
235 if(content instanceof InputStream) {
236 InputStream in = (InputStream)content;
237 byte[] b = new byte[in.available()];
238 in.read(b);
239 return new String(b);
240 }
241 log("unknown content object: " + content.getClass().getName());
242 return content.toString();
243 }
244
245 /***
246 * Attempts to parse a date using first the standard MIME date format, and fails over
247 * to alternative formats.
248 *
249 * @param dateStr
250 * @return Date represented by dateStr
251 * @throws ParseException
252 */
253 public static Date parseDate(String dateStr) throws ParseException {
254 try {
255 return dateFormat_1.parse(dateStr);
256 } catch(ParseException e) {
257 try {
258 return dateFormat_2.parse(dateStr);
259 } catch(ParseException ex) {
260 return dateFormat_3.parse(dateStr);
261 }
262 }
263 }
264
265 /***
266 * @param text with a date string at the end of the line
267 * @return Fri, 23 Mar 2007 10:45:44 -0500 or <b>null</b>
268 */
269 public static String getDateString(String text) {
270 int startIndex = -1;
271 for (int i = 0; i < daysInDate.length; i++) {
272 startIndex = text.lastIndexOf(daysInDate[i]);
273 if(startIndex != -1)
274 break;
275 }
276 if(startIndex == -1) {
277
278 return null;
279 }
280
281 return text.substring(startIndex);
282
283 }
284
285 /***
286 * @param MimeMessage, date will be extracted from the headers
287 * @return "Date" property from header or from received line from
288 * furthest mail servers to the closest mail server. <b>Null</b> is returned
289 * if a date could not be found or parsed anywhere.
290 *
291 * @throws MessagingException
292 */
293 public static Date getDate(MimeMessage msg) {
294 try {
295 return _getDate(msg);
296 } catch(Exception e) {
297 log("Error parsing date");
298 try {
299 log(getAllHeaders(msg.getAllHeaders()).toString());
300 } catch(MessagingException ex) {
301
302 }
303 }
304 return new Date();
305 }
306
307 public static Date _getDate(MimeMessage msg) throws MessagingException {
308 Date today = new Date();
309
310 String[] received = msg.getHeader("received");
311 if(received == null)
312 log("Missing 'Received' header properties!");
313 else
314
315 for (int i = 0; i < received.length; i++) {
316 log(received[i]);
317 String dateStr = null;
318 try {
319 dateStr = getDateString(received[i]);
320 if(dateStr != null) {
321 Date msgDate = parseDate(dateStr);
322 if(msgDate.after(today))
323 log("'Received' date is after today, skipping: " + dateStr);
324 else
325 return msgDate;
326 }
327 } catch(ParseException ex) {
328 log("Unparsable 'Received' header property value:\n" + received[i]);
329 }
330 }
331
332
333 String[] dateHeader = msg.getHeader("date");
334 if(dateHeader != null) {
335 String dateStr = dateHeader[0];
336 try {
337 Date msgDate = parseDate(dateStr);
338 if(msgDate.after(today))
339 log("'Date' header is after today, skipping: " + dateStr);
340 else
341 return msgDate;
342
343 } catch(ParseException ex) {
344 log("Unparsable 'Date' header property value: " + dateHeader[0]);
345 }
346 }
347 log("Using current time. \n" +
348 "Could not parse a date/time from following 'recieved' or from 'date' properties... \n" +
349 getAllHeaders(msg.getAllHeaders()));
350
351 return new Date();
352 }
353
354
355
356
357
358
359
360
361
362
363
364 public static StringBuffer getAllHeaders(Enumeration msgEnum) {
365 StringBuffer headers = new StringBuffer();
366 while(msgEnum.hasMoreElements()) {
367 Header h = (Header) msgEnum.nextElement();
368 String name = h.getName();
369 String value = h.getValue();
370 headers.append(name + " : " + value + '\n');
371 }
372 return headers;
373 }
374
375 /***
376 * @param one like these:
377 * <br/>a453.domain.example.com, 61.236.8.142, 127.0.0.1
378 *
379 * @return one of the following for a given parameter:
380 * <br/>example.com, 61.236.8.142, internal_ip
381 */
382 public static String primaryDomain(String domain) {
383 if(domain == null)
384 return null;
385
386 if(NetworkUtil.isIp(domain))
387 return NetworkUtil.isRemoteIp(domain) ? domain : "internal_ip";
388
389 String[] domains = domain.split("//.");
390 int domainsLen = domains.length;
391 if(domainsLen == 1)
392 return domains[0];
393
394 return domains[domainsLen - 2] + '.' + domains[domainsLen - 1];
395 }
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439 }