This is a simple script that just recurses through your
.eml
messages in a directory and pulls out all of the base64
encoded attachments.For those of you that are wondering what
base64
is, it's an encoding that only uses sixty-four different characters to transmit information. The email system uses this to send documents around so that the protocol didn't have to be reconfigured to account for stuff that wasn't text.Code
#!/usr/bin/env python
import email.parser
import os
import sys
import base64
fileList = []
rootdir = "/path/to/.eml/messages/"
for root, subFolders, files in os.walk(rootdir):
for file in files:
fileList.append(os.path.join(root,file))
id = 0
for path in fileList:
if not path.endswith(".eml"):
continue
fp = email.parser.FeedParser()
fp.feed(open(path).read())
message = fp.close()
for message in message.walk():
fn = message.get_filename()
if fn == None:
continue
try:
with open(fn, 'wb') as out:
out.write(base64.b64decode(message.get_payload()))
except TypeError:
with open(fn, 'wb') as out:
out.write(message.get_payload())
Extensions
- This script isn't very efficient being that it uses python to decode.
- It would be nice to pull arguments from the command line using
sys.argv
Update 2013-09-04 Python 3
#!/usr/bin/env python3
import email.parser
import os
import sys
import base64
import binascii
import sys
def extract(rootdir):
fileList = []
for root, subFolders, files in os.walk(rootdir):
for file in files:
fileList.append(os.path.join(root,file))
for path in fileList:
if not path.endswith(".eml"):
continue
fp = email.parser.BytesFeedParser()
fp.feed(open(path, "rb").read())
message = fp.close()
print("Checking {}".format(path))
for message in message.walk():
fn = message.get_filename()
if fn == None:
continue
try:
try:
with open(fn, 'wb') as out:
out.write(message.get_payload(decode=True))
except (TypeError, binascii.Error):
with open(fn, 'wb') as out:
print(message.get_payload())
out.write(bytes(message.get_payload(), message.get_charset()))
except Exception:
print("Error extracting item from {}".format(path))
if __name__ == "__main__":
if len(sys.argv) == 1:
print("usage: {} path/to/.eml/files".format(sys.argv[0]))
exit(1)
extract(sys.argv[1])
No comments:
Post a Comment