return <options> as list from <select> box with python (mechanize/twill)
If I were to get somethin开发者_如何学编程g like this with showforms()
, how would I get the Value
s out of the SOME_CODE
input box?
Form name=ttform (#2)
## ## __Name__________________ __Type___ __ID________ __Value__________________
1 NUMBER select (None) ['0'] of ['0', '10', '2', '3', '4', ...
2 SOMEYEAR select (None) ['201009'] of ['201009', '201007']
3 SOME_CODE select (None) ['AR%'] of ['AR%', 'AR01', 'AR02', ' ...
4 OTHR_CODE select (None) ['%'] of ['%', 'AAEC', 'ACIS', 'AEE' ...
Thanks!!
This does what you want. Tested on a website I found with the type of select control you have above:
>>> import twill.commands
>>> import BeautifulSoup
>>> import re
>>>
>>> a=twill.commands
>>> a.config("readonly_controls_writeable", 1)
>>> b = a.get_browser()
>>> b.set_agent_string("Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14")
>>> b.clear_cookies()
>>> url="http://www.thesitewizard.com/archive/navigation.shtml"
>>> b.go(url)
==> at http://www.thesitewizard.com/archive/navigation.shtml
>>> form=b.get_form("1")
>>> b.showforms()
Form #1
## ## __Name__________________ __Type___ __ID________ __Value__________________
1 newurl select dummymenu [''] of ['', '#', '#', '', '#']
Form #2
## ## __Name__________________ __Type___ __ID________ __Value__________________
1 cmd hidden (None) _s-xclick
2 1 submit image (None)
3 encrypted hidden (None) -----BEGIN PKCS7-----MIIHwQYJKoZIhvc ...
Form #3
## ## __Name__________________ __Type___ __ID________ __Value__________________
1 None textarea pagelinkcode <a href="http://www.thesitewizard.co ...
Form #4
## ## __Name__________________ __Type___ __ID________ __Value__________________
1 q text searchterms
2 1 None submit (None) Search
Form #5
## ## __Name__________________ __Type___ __ID________ __Value__________________
1 cmd hidden (None) _s-xclick
2 1 submit image (None)
3 encrypted hidden (None) -----BEGIN PKCS7-----MIIHwQYJKoZIhvc ...
>>> valOpts=[]
>>> for c in form.controls:
... if c.name=="newurl":
... if 'items' in c.__dict__:
... print "control %s has items field length %s" % (c, len(c.items))
... if len(c.items)>0:
... for itm in range(len(c.items)):
... valOpts.append(c.items[itm].attrs['value'])
...
control <SelectControl(newurl=[*, #, #, (), #])> has items field length 5
>>> print valOpts
['', '#', '#', '', '#']
>>>
when debugging forms I use the following:
def getFormControlByLabel(self, form, label):
for control in form.controls:
self.log.debug("checking control %s dict = %s" % (control,control.__dict__))
if 'items' in control.__dict__:
self.log.debug("control %s has items field len %d" % (control, len(control.__dict__['items'])))
if len(control.items) > 0:
if 'label' in control.items[0].attrs:
self.log.debug("control %s has label %s" % (control, control.items[0].attrs['label']))
if control.items[0].attrs['label'] == label:
self.log.debug("control %s has label %s" % (control,label))
return control
for control in form.controls:
try:
if control.items[0].attrs['label'] == label:
# y.items[0].attrs['label']
self.log.debug("control %s has matching label %s" % (control,label))
return control
else:
self.log.debug("control %s has label %s" % (control,control.items[0].attrs['label'] ))
except:
pass
精彩评论