#!/usr/bin/env python # # smtpParser.py # # Copyright 2009 Serge Gorbunov # # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # MA 02110-1301, USA. # Scapy collection of classes is required to this program. # Download: http://www.secdev.org/projects/scapy/ import sys, getopt, urlparse, string import re, os, tempfile import base64 from scapy.all import * from streamExtractor import * from smtp import * # Global Variables # ---------------------------------------------------------------------- # outputFolderName is the name of the folder followed by the index # of the SMTP stream. Analysis of every separate SMTP stream are # stored in separated directories for clarity. outputDirName = "stream" outputSummaryFile = "streamSummary.txt" defaultSMTPport = 587 defaultExtractImages = False defaultExtractAttachments = False class smtpParser(): # Fuction: Extracts SMTP from the TCP stream and dumps it into summary file # Attatchments are also decoded and saved in the output # directory. # # Input: stream - TCP data stream # dirName - Directory where summary and attatchments will be saved # fileName - Name of the file where email infor will be stored. # # Output: Mail object containing basic information about the mail extracted def extractSMTPinfo( self, stream ): # Since a request from the server might not necessary # follow by the reply from the client in the pcap, we simply record # the ACK number from the request for the given field and later # when a packet with corresponding Seq is met, we extract its # payload and store in the appropriate field. usernameSeq = None passwordSeq = None dataSeq = None dataAck = None attachmentData = False dataArray = '' attachmentName = '' mail = SMTPmail( stream[0][Ether][IP].src, stream[0][Ether][IP].dst, stream[0][Ether][IP][TCP].sport, stream[0][Ether][IP][TCP].dport ) # Iterate through the stream and looking for basic email headers # for packet in stream: for packetIndex in range (0, len(stream)): packet = stream[packetIndex] attatchments = [] try: seq = packet[Ether][IP][TCP].seq ack = packet[Ether][IP][TCP].ack arr = packet[Ether][IP][TCP][Raw].load.split(' ') for index in range (0, len(arr)): # Look for the introduction field if arr[index] == 'EHLO' or arr[index] == 'ECHO': client = arr[index+1].rstrip("\r\n") mail.client = client # Username request elif arr[index] == '334' and arr[index+1] == 'VXNlcm5hbWU6\r\n': usernameSeq = packet[Ether][IP][TCP].ack # Password request elif arr[index] == '334' and arr[index+1] == 'UGFzc3dvcmQ6\r\n': passwordSeq = packet[Ether][IP][TCP].ack elif arr[index] == 'MAIL' and arr[index+1] == 'FROM:': mail.mailfrom = arr[index+2].rsplit("\r\n")[0] elif arr[index] == 'RCPT' and arr[index+1] == 'TO:': mail.mailto = arr[index+2].rsplit("\r\n")[0] # Once the elif arr[index] == '354': dataSeq = packet[Ether][IP][TCP].ack if ( usernameSeq != None and usernameSeq == seq ): mail.username = self.getPayloadAndDecode( packet ) usernameSeq = None elif ( passwordSeq != None and passwordSeq == seq ): mail.password = self.getPayloadAndDecode( packet ) passwordSeq = None elif ( dataSeq != None and dataSeq == seq): dataAck = packet[Ether][IP][TCP].ack # If the ack of the packet mathes to the ack number for the date response # Then we attatch the data to the dataArray if ( dataAck != None and dataAck == ack ): mail.dataArray = "%s%s" % ( mail.dataArray, packet[Ether][IP][TCP][Raw].load ) if re.search(".filename", packet[Ether][IP][TCP][Raw].load): # Add the attachment name to the dic of attachments m = re.search( r'filename=(.*)', packet[Ether][IP][TCP][Raw].load) attachmentName = m.group().split('"')[1] data = arr[len(arr)-1].split("\r\n\r\n")[1] # Add a new attachment with its beginning of the data stream mail.attachments[attachmentName] = data attachmentData = True # If attachment data is coming then add it to the specified attachment data stream elif (attachmentData == True): if (re.search("\r\n\r\n", packet[Ether][IP][TCP][Raw].load )): attachmentData = False data = arr[len(arr)-1].split("\r\n\r\n")[0] mail.attachments[attachmentName] += data else: mail.attachments[attachmentName] += packet[Ether][IP][TCP][Raw].load except: pass return mail # Function: Extracts the data payload from the packet # # Input: packet # # Output: SMTP payload def getPayloadAndDecode( self, packet ): data = packet[Ether][IP][TCP][Raw].load.rsplit("\r\n")[0] return self.decodeString( data ) # Function: Decodes a string from base 64 # # Input: Base 64 encoded string # # Ouput: Plain string def decodeString( self, str ): return base64.b64decode( str ) # Function: Prints mail information and dump the attachments from email object # # Input: mail - Mail object # dir - Output directory for the stream # outputFile - Output filename for the actual email # attSwitch - Extract attachments from the email (True/False) # imagesSwitch - Extract images from any docx files (True/False) # # Output: True/False def dumpSMTPinfo( self, mail, dir, outputFile, attSwitch, imagesSwitch ): if os.path.exists( dir ): print "%s directory already exists. " % dir print "Please specify a different directory for data streams" return False else: os.mkdir( dir ) outputFile = dir + "/" + outputFile if os.path.exists( outputFile ): print "%s file already exists" % outputFile return False f = open( outputFile, 'w') f.write( "###___General_Mail_Info___###\n\n" ) f.write( "Source IP: %s\n" % mail.src ) f.write( "Destanation IP: %s\n" % mail.dst ) f.write( "Client ID: %s\n\n" % mail.client ) f.write( "Source port: %s\n" % mail.sport ) f.write( "Destanation Post: %s\n\n" % mail.dport ) f.write( "Username: %s\n" % mail.username ) f.write( "Password: %s\n\n" % mail.password ) f.write( "Mail From: %s\n" % mail.mailfrom ) f.write( "Mail To: %s\n" % mail.mailto ) f.write( "\n" ) # The attachments in mail objects are stored in the dictionary # with attachment name -> binary data decoded from base 64 # Iterate through every attachment and store it with the # corresponding name if ( True == attSwitch ): for att, data in mail.attachments.items(): attachment = open( dir + "/" + att, "w" ) attachment.write( self.decodeString( data ) ) attachment.close() f.write( "Attachment checksum found in the mail:\n" ) cmd = 'md5sum ' + dir + '/' + att run = os.popen( cmd ) checksum = run.read().split(' ')[0] f.write( "%s %s\n" % ( att, checksum ) ) if ( True == imagesSwitch ): media = self.extractImagesFromDOCX( dir + "/" + att, dir ) if ( False != media ): f.write( "\tMedia checksums found in the attachment:\n" ) for k, v in media.iteritems(): f.write ( "\t%s %s\n" % ( k, v ) ) f.write( "\n" ) f.write( "###___Mail_DATA___###\n\n" ) f.write( mail.dataArray ) f.write( "\n" ) f.close() return True # Function: Extracts any images from docx files stored inside # and copies them into the destanation directory. # # Input: filename - DOCX filepath # dstDir - Destanation directory for the images # # Output: A dictionary of MD5 checksums for every image found # False otherwise def extractImagesFromDOCX( self, filename, dstDir ): if ( not os.path.exists( filename ) ): return False media = {} tempDir = tempfile.mkdtemp() # Extract docx into a temp directory cmd = 'unzip ' + filename + ' -d ' + tempDir + '>> /dev/null' run = os.system( cmd ) cmd = tempDir + '/word/media' mediaList = os.listdir( cmd ) # Iterate through the media content and collect checksums for f in mediaList: filepath = tempDir + '/word/media/' + f cmd = 'cp ' + filepath + ' ' + dstDir os.system( cmd ) cmd = 'md5sum ' + filepath run = os.popen( cmd ) media[f] = run.read().split(' ')[0] cmd = 'rm -rf ' + tempDir os.system( cmd ) return media def usage(basename): print "\nSMTP Parser" print "Usage:" print "\t %s -f [options] " % basename print "" print "\t -f (--file) Mandatory option followed by the pcap input file name" print "" print "[Options]" print "\t -h (--help) Print the help page" print "\t -d (--destanation) Destination path for output streams" print "\t -p (--port) SMTP destination port number (default is 587)" print "\t -a (--attachments) Extract attachments from the emails" print "\t -i (--images) Extract images from any docx files" print "" print "Examples" print "\t %s -f evidence.pcap" %basename print "\t %s -f evidence.pcap -p 25" %basename print "\t %s -f evidence.pcap -d /home/user/" %basename print "\t %s -f evidence.pcap -a -i" %basename def main(argv): dataStreams = [] destanationPath = None inFile = None smtpPort = defaultSMTPport extractImagesSwitch = defaultExtractImages extractAttSwitch = defaultExtractAttachments # Check options used to run the program try: opts, args = getopt.getopt(sys.argv[1:], "hf:d:p:ai", ["help", "file=", "destanation=", "port=", "attachments", "images"]) except getopt.GetoptError, err: # print help information and exit print str(err) usage(sys.argv[0]) sys.exit(2) for opt, val in opts: if opt in ("-h", "--help"): usage(sys.argv[0]) sys.exit() elif opt in ("-f", "--file"): inFile = val elif opt in ("-d", "--destanation"): destanationPath = val elif opt in ("-p", "--port"): smtpPort = val elif opt in ("-a", "--attachments"): extractAttSwitch = True elif opt in ("-i", "--images"): extractImagesSwitch = True else: assert False, "Unrecognized Option" # Check if -f option was not used, then print usage and exit if (inFile == None): print "ERROR: No input file specified" usage(sys.argv[0]) sys.exit() # Check if valid destanation path was specified if ( None != destanationPath ): if ( not os.path.exists( destanationPath ) or not os.path.isdir( destanationPath ) ): print "ERROR: Invalid destanation path specified" sys.exit() # Extract data streams se = streamExtractor() dataStreams = se.extractStreams(inFile, smtpPort) counter = 0 if ( 0 == len( dataStreams ) ): print "No streams were extracted for destanation port %s" % smtpPort else: print "%d streams extracted from the file on port %d" % ( len( dataStreams ), smtpPort ) # Dump every stream into separate dir/summaryFile # Extract attachments and images from DOCX files if specified sp = smtpParser() for dataStream in dataStreams: if ( None != destanationPath): dir = destanationPath + '/' + outputDirName + str(counter) else: dir = outputDirName + str(counter) # Extractor extract the content of the mail into an object # Dumper stores the content/attachments/images on the hard-drive mail = sp.extractSMTPinfo( dataStream ) print 'Storing stream #%s: srcIP = %s; dstIP = %s into %s' \ % ( counter, mail.src, mail.dst, os.path.abspath(dir) ) if ( True == sp.dumpSMTPinfo( mail, dir, outputSummaryFile, extractAttSwitch, extractImagesSwitch ) ): print "Successfully stored stream #%s" % (counter) print "" else: print "Error storing stream #%s" % (counter) print "" counter += 1 if __name__ == '__main__': main(sys.argv[0:])