View Javadoc

1   package net.spamcomplaint.mail;
2   
3   import java.io.ByteArrayInputStream;
4   import java.io.IOException;
5   import java.io.InputStream;
6   import java.security.MessageDigest;
7   import java.security.NoSuchAlgorithmException;
8   import java.text.ParseException;
9   import java.text.SimpleDateFormat;
10  import java.util.Date;
11  import java.util.Enumeration;
12  import java.util.Iterator;
13  import java.util.LinkedHashSet;
14  import java.util.Set;
15  
16  import javax.mail.Header;
17  import javax.mail.MessagingException;
18  import javax.mail.internet.MimeMessage;
19  
20  import net.spamcomplaint.util.NetworkUtil;
21  import net.spamcomplaint.util.StringUtils;
22  
23  /***
24   * 
25   * @author jcalfee
26   * @see http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html
27   */
28  public class MimeMessageParser {
29      
30      public static SimpleDateFormat dateFormat_1 = new SimpleDateFormat("EEE, d MMM yy HH:mm:ss Z");
31      public static SimpleDateFormat dateFormat_2 = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z");
32      public static SimpleDateFormat dateFormat_3 = new SimpleDateFormat("d MMM yyyy HH:mm:ss Z");
33      static int minReceivedHdrWidth = "Received: 0.0.0.0".length();
34      
35      private MessageDigest md = null;
36      static final String VALID_DOMAIN_CHARS = "abcdefghijklmnopqrstuvwxyz0123456789-.";
37      
38      private static final void log(String s) {
39          System.out.println("MimeMessageParser: " + s);
40      }
41      
42      /***
43       * Get The Hash value as a Hex-String. Calling this method resets the object's state to initial state.
44       * @return String representing the Hashvalue in hexadecimal format.
45       */
46      public String digout() {
47          byte[] digest= md.digest();
48          if (digest != null)
49              return StringUtils.hexEncode(digest);
50          else
51              return null;
52      }
53      
54      final static String[] daysInDate = { "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" };
55      
56      public static MimeMessage getMimeMessage(String msgText) throws MessagingException {
57          return new MimeMessage(null, 
58              new ByteArrayInputStream(msgText.getBytes())
59          );
60      }
61      
62      /*private static final void log(String s) {
63          System.out.println("MimeMessageParser: " + s);
64      }*/
65      
66      /***
67       * @param mimeMsg
68       * @return hex SHA1 of key fields in the header { received, from, to }
69       * @throws MessagingException
70       * @throws NoSuchAlgorithmException if SHA1 is not available
71       */
72      public String getUnique_SHA1(Enumeration mimeHeaders) 
73          throws NoSuchAlgorithmException
74      {
75          if(md == null)
76              md = MessageDigest.getInstance("SHA1");
77          
78          md.reset();
79          while(mimeHeaders.hasMoreElements()) {
80              Header h = (Header) mimeHeaders.nextElement();
81              String name = h.getName().trim().toLowerCase();
82              String value = h.getValue();
83              if(name.equals("received") || name.equals("from") || name.equals("to")) 
84                  md.update(value.getBytes());
85          }
86          return digout();
87      }
88      
89      public static String byHost(String receivedLine) {
90          int byIndex = receivedLine.indexOf("by ");
91          if(byIndex == -1)
92              return null;
93          
94          byIndex += "by ".length();
95          int recLen = receivedLine.length();
96          int i;
97          for (i = byIndex; i < recLen; i++) {
98              char ch = Character.toLowerCase(receivedLine.charAt(i));
99              if(VALID_DOMAIN_CHARS.indexOf(ch) == -1)
100                 break;
101         }
102         
103         if(i == byIndex)
104             return null;
105         
106         return receivedLine.substring(byIndex, i);
107     }
108 
109     private static Set parseReceivedIpLn(String ln) {
110         Set ips = new LinkedHashSet();
111         //log(ln);
112         String ip = "";
113         ln += " ";//Incase IP is the last thing on the line 
114         for (int i = 0; i < ln.length(); i++) {
115             char ch = ln.charAt(i);
116             if(Character.isDigit(ch) || ch == '.') 
117                 ip += ch;
118             else {
119                 if(NetworkUtil.isIp(ip) && ! NetworkUtil.isRemoteIp(ip))
120                     log("!!! SKIPPING LOCAL IP : " + ip);
121                 
122                 if(NetworkUtil.isIp(ip) && ! ips.contains(ip) && 
123                     NetworkUtil.isRemoteIp(ip)
124                 ) 
125                     ips.add(ip);
126                 
127                 ip = "";
128             }
129         }
130         return ips;
131     }
132 
133     /*** 
134      * If the sender used an IP address in their non-validated 
135      * HELO command, skip it since it could be fake. 
136      * @param ln
137      * @return
138      */
139     private static Set parseReceivedIpLn_skip_HELO(String ln) {
140         int heloIndex = ln.toLowerCase().indexOf("helo");
141         Set ipList;
142         if(heloIndex == -1) 
143             return parseReceivedIpLn(ln);
144         else {
145             ipList = parseReceivedIpLn(ln);
146             Iterator it = ipList.iterator();
147             while(it.hasNext()) {
148                 String ip = (String)it.next();
149                 
150                 int ipIndex = ln.substring(heloIndex + 5).indexOf(ip);
151                 if(ipIndex > -1 && ipIndex < 3) { //IP within 3 char after HELO
152                     ipIndex += heloIndex + 5;
153                     //remove the potentially fake IP
154                     ln = ln.substring(0, ipIndex) + 
155                         ln.substring(ipIndex + ip.length(), ln.length());
156                     
157                     log("Removed HELO IP " + ln);
158                     if(ln.indexOf(ip) == -1)
159                         //If the helo is a legit IP, the IP will be in the line twice
160                         //only remove if helo IP is the only occurence. 
161                         ipList.remove(ip);
162                     
163                     
164                     break;
165                 }
166             }
167         }
168         return ipList;
169     }
170 
171     static String getSendersIP(String receivedLine) {
172         Set ips = parseReceivedIpLn_skip_HELO(receivedLine);
173         Iterator it = ips.iterator();
174         while(it.hasNext()) {
175             String ip = (String)it.next();
176             if(receivedLine.indexOf("[" + ip + "]") != -1)
177                 return ip;
178         }
179         it = ips.iterator();
180         while(it.hasNext()) {
181             String ip = (String)it.next();
182             if(receivedLine.indexOf("(" + ip + ")") != -1)
183                 return ip;
184         }
185         if(ips.size() == 1)
186             return (String)ips.toArray()[0];
187         
188         return null;
189     }
190     
191     public static String getSpamComplaintIP(String[] received, int lastTrustedIp) {
192         String spamIp = null;
193         for (int i = 0; i < received.length; i++) {
194             String host = byHost(received[i]);
195             if(host == null)//qmail will put a line in missing "by mailhost.tld"
196                 continue;
197             
198             if(NetworkUtil.isIp(host)) {
199                 if(!NetworkUtil.isRemoteIp(host))
200                     //still in own network
201                     continue;
202             }
203             String ip = getSendersIP(received[i]);
204             if(ip != null) {
205                 spamIp = ip;
206                 //Go beyond the last common (trusted) host when an IP has not
207                 //been found yet.  This can happend if the ISP changes a mail 
208                 //server host name.
209                 if( i >= lastTrustedIp)
210                     break;
211             }
212         }
213         return spamIp;
214     }
215 
216     /***
217      * @param msgText
218      * @param ra
219      * @return Sender IP address, this could be <b>null</b> (undetermined)
220      * @throws MessagingException
221      * @throws TrustedNodeException
222      */
223     public static String getSpamComplaintIP(String msgText, ReceivedAnalysis ra)
224         throws MessagingException, TrustedNodeException
225     {
226         MimeMessage msg = getMimeMessage(msgText);
227         String[] received = msg.getHeader("received");
228         return getSpamComplaintIP(received, ra.lastCommonByHost(received));
229     }
230 
231     public static String contentToString(Object content) throws IOException {
232         if(content instanceof String)
233             return (String) content;
234         
235         if(content instanceof InputStream) {
236             InputStream in = (InputStream)content;
237             byte[] b = new byte[in.available()];
238             in.read(b);
239             return new String(b);
240         }
241         log("unknown content object: " + content.getClass().getName());
242         return content.toString();
243     }
244 
245     /***
246      * Attempts to parse a date using first the standard MIME date format, and fails over
247      * to alternative formats.
248      * 
249      * @param dateStr
250      * @return Date represented by dateStr
251      * @throws ParseException
252      */
253     public static Date parseDate(String dateStr) throws ParseException {
254         try {
255             return dateFormat_1.parse(dateStr);
256         } catch(ParseException e) {
257             try {
258                 return dateFormat_2.parse(dateStr);
259             } catch(ParseException ex) {
260                 return dateFormat_3.parse(dateStr);
261             }
262         }
263     }
264 
265     /***
266      * @param text with a date string at the end of the line
267      * @return Fri, 23 Mar 2007 10:45:44 -0500 or <b>null</b>
268      */
269     public static String getDateString(String text) {
270         int startIndex = -1;
271         for (int i = 0; i < daysInDate.length; i++) {
272             startIndex = text.lastIndexOf(daysInDate[i]);
273             if(startIndex != -1)
274                 break;
275         }
276         if(startIndex == -1) {
277             //log("Date string does not contain the day abbreviation: " + text);
278             return null;
279         }
280         
281         return text.substring(startIndex);
282         
283     }
284 
285     /***
286      * @param MimeMessage, date will be extracted from the headers
287      * @return "Date" property from header or from received line from 
288      *  furthest mail servers to the closest mail server.  <b>Null</b> is returned
289      *  if a date could not be found or parsed anywhere.
290      *      
291      * @throws MessagingException
292      */
293     public static Date getDate(MimeMessage msg) {
294         try {
295             return _getDate(msg);
296         } catch(Exception e) {
297             log("Error parsing date");
298             try {
299                 log(getAllHeaders(msg.getAllHeaders()).toString());
300             } catch(MessagingException ex) {
301                 
302             }
303         }
304         return new Date();
305     }
306     
307     public static Date _getDate(MimeMessage msg) throws MessagingException {
308         Date today = new Date();
309 
310         String[] received = msg.getHeader("received");
311         if(received == null) 
312             log("Missing 'Received' header properties!");
313         else
314             //this is the best date to go on .. especially the 1st header line
315             for (int i = 0; i < received.length; i++) {
316                 log(received[i]);
317                 String dateStr = null;
318                 try {
319                     dateStr = getDateString(received[i]);
320                     if(dateStr != null) {
321                         Date msgDate = parseDate(dateStr);
322                         if(msgDate.after(today))
323                             log("'Received' date is after today, skipping: " + dateStr);
324                         else
325                             return msgDate;
326                     }
327                 } catch(ParseException ex) {
328                     log("Unparsable 'Received' header property value:\n" + received[i]);
329                 } 
330             }
331         
332         //this date can really off 
333         String[] dateHeader = msg.getHeader("date");
334         if(dateHeader != null) {
335             String dateStr = dateHeader[0];
336             try {
337                 Date msgDate = parseDate(dateStr);
338                 if(msgDate.after(today))
339                     log("'Date' header is after today, skipping: " + dateStr);
340                 else 
341                     return msgDate;
342                 
343             } catch(ParseException ex) {
344                 log("Unparsable 'Date' header property value: " + dateHeader[0]);
345             }
346         }
347         log("Using current time.  \n" + 
348             "Could not parse a date/time from following 'recieved' or from 'date' properties...  \n" +
349             getAllHeaders(msg.getAllHeaders()));
350         
351         return new Date();
352     }
353 
354     /* not used, works fine
355      * public static String headerToStr(Part part, String key) throws MessagingException {
356         StringBuffer sb = new StringBuffer(); 
357         String[] val = part.getHeader(key);
358         for (int i = 0; i < val.length; i++)
359             sb.append(key + ": " + val[0] + "\n");
360         
361         return sb.toString();
362     }*/
363 
364     public static StringBuffer getAllHeaders(Enumeration msgEnum) {
365         StringBuffer headers = new StringBuffer();
366         while(msgEnum.hasMoreElements()) {
367             Header h = (Header) msgEnum.nextElement();
368             String name = h.getName();
369             String value = h.getValue();
370             headers.append(name + " : " + value + '\n');
371         }
372         return headers;
373     }
374     
375     /***
376      * @param one like these: 
377      *   <br/>a453.domain.example.com, 61.236.8.142, 127.0.0.1
378      * 
379      * @return one of the following for a given parameter: 
380      *   <br/>example.com, 61.236.8.142, internal_ip
381      */
382     public static String primaryDomain(String domain) {
383         if(domain == null)
384             return null;
385         
386         if(NetworkUtil.isIp(domain))
387             return NetworkUtil.isRemoteIp(domain) ? domain : "internal_ip";
388         
389         String[] domains = domain.split("//.");
390         int domainsLen = domains.length;
391         if(domainsLen == 1)
392             return domains[0];
393         
394         return domains[domainsLen - 2] + '.' + domains[domainsLen - 1];
395     }
396     
397     /* @todo ? use UTF-8 to decode message and display special characters.
398      * this may shorten the message length so that must be considered
399      * in the protocol.
400      * 
401 /*String test () {
402         BufferedReader dataInput;
403         try {
404             InputStreamReader inr = new InputStreamReader(in,"UTF-8");
405             dataInput = new BufferedReader(inr);
406         } catch (UnsupportedEncodingException e) {
407             e.printStackTrace();
408             dataInput = new BufferedReader(new InputStreamReader(in));
409         }
410         
411         StringBuffer out = new StringBuffer();
412         String line;
413         while((line = dataInput.readLine()) != null) {
414             out.append(line);
415             out.append(System.getProperty("line.separator")); // readLine discards line separator
416         }
417         return out.toString();
418         
419         StringBuffer charset = new StringBuffer();
420         int charSetIndex = message.indexOf("charset=");
421         if(charSetIndex != -1) {
422             for (int i = charSetIndex + "charset=".length(); i < message.length(); i++) {
423                 char ch = message.charAt(i);
424                 if(ch == '\n' || ch == ' ')
425                     break;
426                 
427                 if(ch == '\"')
428                     continue;
429                 
430                 charset.append(ch);
431             }
432             log("charset = " + charset);
433         }
434         if(charset.length() == 0)
435             charset = new StringBuffer("UTF-8");
436     }*/
437  
438 
439 }