python imaplib to get gmail inbox subjects titles and sender name
I'm using pythons imaplib to connect to my gmail account. I want to retrieve the top 15 messages (unread or read, it doesn't matter) and display just the subjects and sender name (or address) but don't know how to display the contents of the inbox.
Here is my code so far (successful connection)
import imaplib
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login('mygmail@gmail.com', 'somecrazypassword')
mail.list()
mail.select('inbox')
#need to add some stuff in here
mail.logout()
I believe this should be simple enough, I'm just not familiar enough with the commands for the imaplib library. Any help would be must appreciated...
UPDATE thanks to Julian I can iterate through each message and retrieve the entire contents with:
typ, data = mail.search(None, 'ALL')
for num in data[0].split():
typ, data = mail.fetch(num, '(RFC822)')
print 'Message %s\n%s\n' % (num, data[0][1])
mail.close()
but I'm wanting just the subject and the sender. Is there a imaplib command for these items or will I have to parse the entire contents of data[0][1] for the text: Subject, and Sender?
UPDATE OK, got the subject and sender part working but the iteration (1, 15) is done by desc order apparently showing me the oldest messages first. How can I change this? I tried doing this:
for i in range( len(data[0])-15, len(data[0]) ):
print data
but that just gives me None
for all 15 iterations... any ideas? I've also tried mail.sort('REVERSE DATE', 'UTF-8', 'ALL')
but gmail doesnt support the .sort() function
UPDATE Figured out a way to do it:
#....^other code is the same as a开发者_C百科bove except need to import email module
mail.select('inbox')
typ, data = mail.search(None, 'ALL')
ids = data[0]
id_list = ids.split()
#get the most recent email id
latest_email_id = int( id_list[-1] )
#iterate through 15 messages in decending order starting with latest_email_id
#the '-1' dictates reverse looping order
for i in range( latest_email_id, latest_email_id-15, -1 ):
typ, data = mail.fetch( i, '(RFC822)' )
for response_part in data:
if isinstance(response_part, tuple):
msg = email.message_from_string(response_part[1])
varSubject = msg['subject']
varFrom = msg['from']
#remove the brackets around the sender email address
varFrom = varFrom.replace('<', '')
varFrom = varFrom.replace('>', '')
#add ellipsis (...) if subject length is greater than 35 characters
if len( varSubject ) > 35:
varSubject = varSubject[0:32] + '...'
print '[' + varFrom.split()[-1] + '] ' + varSubject
this gives me the most recent 15 message subject and sender address in decending order as requested! Thanks to all who helped!
c.select('INBOX', readonly=True)
for i in range(1, 30):
typ, msg_data = c.fetch(str(i), '(RFC822)')
for response_part in msg_data:
if isinstance(response_part, tuple):
msg = email.message_from_string(response_part[1])
for header in [ 'subject', 'to', 'from' ]:
print '%-8s: %s' % (header.upper(), msg[header])
This should give you an idea on how to retrieve the subject and from?
This was my solution to get the useful bits of information from emails:
import datetime
import email
import imaplib
import mailbox
EMAIL_ACCOUNT = "your@gmail.com"
PASSWORD = "your password"
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login(EMAIL_ACCOUNT, PASSWORD)
mail.list()
mail.select('inbox')
result, data = mail.uid('search', None, "UNSEEN") # (ALL/UNSEEN)
i = len(data[0].split())
for x in range(i):
latest_email_uid = data[0].split()[x]
result, email_data = mail.uid('fetch', latest_email_uid, '(RFC822)')
# result, email_data = conn.store(num,'-FLAGS','\\Seen')
# this might work to set flag to seen, if it doesn't already
raw_email = email_data[0][1]
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
# Header Details
date_tuple = email.utils.parsedate_tz(email_message['Date'])
if date_tuple:
local_date = datetime.datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
local_message_date = "%s" %(str(local_date.strftime("%a, %d %b %Y %H:%M:%S")))
email_from = str(email.header.make_header(email.header.decode_header(email_message['From'])))
email_to = str(email.header.make_header(email.header.decode_header(email_message['To'])))
subject = str(email.header.make_header(email.header.decode_header(email_message['Subject'])))
# Body details
for part in email_message.walk():
if part.get_content_type() == "text/plain":
body = part.get_payload(decode=True)
file_name = "email_" + str(x) + ".txt"
output_file = open(file_name, 'w')
output_file.write("From: %s\nTo: %s\nDate: %s\nSubject: %s\n\nBody: \n\n%s" %(email_from, email_to,local_message_date, subject, body.decode('utf-8')))
output_file.close()
else:
continue
For those looking for how to check mail and parse the headers, this is what I used:
def parse_header(str_after, checkli_name, mailbox) :
#typ, data = m.search(None,'SENTON', str_after)
print mailbox
m.SELECT(mailbox)
date = (datetime.date.today() - datetime.timedelta(1)).strftime("%d-%b-%Y")
#date = (datetime.date.today().strftime("%d-%b-%Y"))
#date = "23-Jul-2012"
print date
result, data = m.uid('search', None, '(SENTON %s)' % date)
print data
doneli = []
for latest_email_uid in data[0].split():
print latest_email_uid
result, data = m.uid('fetch', latest_email_uid, '(RFC822)')
raw_email = data[0][1]
import email
email_message = email.message_from_string(raw_email)
print email_message['To']
print email_message['Subject']
print email.utils.parseaddr(email_message['From'])
print email_message.items() # print all headers
I was looking for a ready made simple script to list last inbox via IMAP without sorting through all messages. The information here is useful, though DIY and misses some aspects. First, IMAP4.select
returns message count. Second, subject header decoding isn't straightforward.
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import imaplib
import email
from email.header import decode_header
import HTMLParser
# to unescape xml entities
_parser = HTMLParser.HTMLParser()
def decodeHeader(value):
if value.startswith('"=?'):
value = value.replace('"', '')
value, encoding = decode_header(value)[0]
if encoding:
value = value.decode(encoding)
return _parser.unescape(value)
def listLastInbox(top = 4):
mailbox = imaplib.IMAP4_SSL('imap.gmail.com')
mailbox.login('mygmail@gmail.com', 'somecrazypassword')
selected = mailbox.select('INBOX')
assert selected[0] == 'OK'
messageCount = int(selected[1][0])
for i in range(messageCount, messageCount - top, -1):
reponse = mailbox.fetch(str(i), '(RFC822)')[1]
for part in reponse:
if isinstance(part, tuple):
message = email.message_from_string(part[1])
yield {h: decodeHeader(message[h]) for h in ('subject', 'from', 'date')}
mailbox.logout()
if __name__ == '__main__':
for message in listLastInbox():
print '-' * 40
for h, v in message.items():
print u'{0:8s}: {1}'.format(h.upper(), v)
BODY
gets almost everything and marks the message as read.
BODY[<parts>]
gets just those parts.
BODY.PEEK[<parts>]
gets the same parts, but doesn't mark the message read.
<parts>
can be HEADER
or TEXT
or HEADER.FIELDS (<list of fields>)
or
HEADER.FIELDS.NOT (<list of fields>)
This is what I use: typ, data = connection.fetch(message_num_s, b'(BODY.PEEK[HEADER.FIELDS (SUBJECT FROM)])')
`
def safe_encode(seq):
if seq not in (list,tuple):
seq = [seq]
for i in seq:
if isinstance(i, (int,float)):
yield str(i).encode()
elif isinstance(i, str):
yield i.encode()
elif isinstance(i, bytes):
yield i
else:
raise ValueError
def fetch_fields(connection, message_num, field_s):
"""Fetch just the fields we care about. Parse them into a dict"""
if isinstance(field_s, (list,tuple)):
field_s = b' '.join(safe_encode(field_s))
else:
field_s = tuple(safe_encode(field_s))[0]
message_num = tuple(safe_encode(message_num))[0]
typ, data = connection.fetch(message_num, b'(BODY.PEEK[HEADER.FIELDS (%s)])'%(field_s.upper()))
if typ != 'OK':
return typ, data #change this to an exception if you'd rather
items={}
lastkey = None
for line in data[0][1].splitlines():
if b':' in line:
lastkey, value = line.strip().split(b':', 1)
lastkey = lastkey.capitalize()
#not all servers capitalize the same, and some just leave it
#as however it arrived from some other mail server.
items[lastkey]=value
else:
#subject was so long it ran onto the next line, luckily it didn't have a ':' in it so its easy to recognize.
items[lastkey]+=line
#print(items[lastkey])
return typ, items
`
You drop it into your code example: by replacing the call to 'mail.fetch()' with fetch_fields(mail, i, 'SUBJECT FROM')
or fetch_fields(mail, i, ('SUBJECT' 'FROM'))
Adding to all the above answers.
import imaplib
import base64
import os
import email
if __name__ == '__main__':
email_user = "email@domain.com"
email_pass = "********"
mail = imaplib.IMAP4_SSL("hostname", 993)
mail.login(email_user, email_pass)
mail.select()
type, data = mail.search(None, 'ALL')
mail_ids = data[0].decode('utf-8')
id_list = mail_ids.split()
mail.select('INBOX', readonly=True)
for i in id_list:
typ, msg_data = mail.fetch(str(i), '(RFC822)')
for response_part in msg_data:
if isinstance(response_part, tuple):
msg = email.message_from_bytes(response_part[1])
print(msg['from']+"\t"+msg['subject'])
This will give you the email's from and subject name.
精彩评论