Python Scrapy Framework Posting Wrong Images - Why/How Can I fix this?
I am working with the Scrapy framework for Python to scrape several entries including text and images from one site and post them to another, one by one. It all works well, except that the images are posting with the wrong corresponding text. I can't, for the life of me, figure out what to do differently.
Here is the code, if anyone could please help me figure this out, I would greatly appreciate it:
from flexmls.items import FlexmlsItem
class Epropertysites(BaseSpider):
name = 'epropertysites'
start_urls = ['http://www.epropertysites.com/']
URL = 'http://www.epropertysites.com'
def parse(self, response):
return FormRequest.from_response(response,
formdata={'i_login':settings.get('EPROP_USER', u''),
'i_password':settings.get('EPROP_PASSW', u'')},
callback=self.after_login)
def after_login(self, response):
if 'is incorrect' in response.body:
print 'Failed to login with\r\n press enter'
self.log('Login failes', log.ERROR)
raw_input()
return
for row in csv.DictReader(open(os.path.join("results", 'flexmls.csv'))):
yield Request('http://www.epropertysites.com/myprop_add.htm',
meta={'item':row},
dont_filter=True,
callback=self.post_ad)
def post_ad(self, response):
item = response.request.meta['item']
try:
print 'posting', item['address'].encode()
except:pass
formdata={'i_address':item['address'],
'i_city':item['city'],
'i_price':item['price'] if item['price'] else u'0',
'i_state':item['state'].strip(),
'i_zip':item['zip'].strip(),
'i_county':item['county'],
'i_mls':item['id'].strip(),
'i_type':'1',
'i_br':item['beds'] if item['beds'] else u'1',
'i_ba':item['baths'] if item['baths'] else u'1',
'i_sqft':item['sqft'],
'i_year_blt':item['year_built'],
'i_tagline':item['address'],
'i_desc':item['description'].replace("\n", '\r\n'),
'i_site_key':item['address'].replace(u" ", u'-').replace(u".", u'').strip(),
'i_domain':'ePropertySites.com',
'i_layout':'%.2d' %random.randint(2,5),
'i_color02':'%.2d' %random.randint(1,12)
}
return FormRequest('http://www.epropertysites.com/myprop_add.htm?&f=3',
formdata=formdata,
meta={'item':item, 'form':formdata},
callback=self.post_images)
def encode_multipart_formdata(self, fields, files):
BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$'
CRLF = '\r\n'
L开发者_StackOverflow中文版 = []
for (key, value) in fields:
L.append('--' + BOUNDARY)
L.append('Content-Disposition: form-data; name="%s"' % key)
L.append('')
L.append(value)
for (filename, value) in files:
L.append('--' + BOUNDARY)
L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (filename, filename))
L.append('Content-Type: image/jpeg')
L.append('')
L.append(value)
L.append('--' + BOUNDARY + '--')
L.append('')
body = CRLF.join(L)
content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
return content_type, body
def post_images(self, response):
if 'That Website Key is already being used' in response.body:return
page = HtmlXPathSelector(response)
item = response.request.meta['item']
images = eval(item['images'])
fields = [('i_caption_1',''), ('v_max','1'),
('Content-Disposition: form-data; name="mode"','send')]
files = [ ( os.path.basename(image),
open(os.path.join(settings.get("IMAGES_STORE"), image)).read())
for image in images]
content_type, body = self.encode_multipart_formdata(fields, files)
return FormRequest(self.URL + page.select("//form/@action").extract()[0],
body=body,
method='POST',
headers={'Content-Type':content_type,
'content-length':len(body)},
meta={"item":item, 'form':response.request.meta['form']},
callback=self.get_change_page)
def get_change_page(self, response):
page = HtmlXPathSelector(response)
ad_id = page.select("//form/@action").re(r"&key=(\d+)&")[0].strip()
return Request("http://www.epropertysites.com/myproperties.htm?&f=mod&key=%s" %ad_id,
meta={'item':response.request.meta['item'],
'form':response.request.meta['form'],
'id':ad_id},
callback=self.post_rest_info)
精彩评论