Get URL when handling urllib2.URLError
This pertains to urllib2 specifically, but custom exception handling more generally. How do I pass additional information t开发者_如何学Co a calling function in another module via a raised exception? I'm assuming I would re-raise using a custom exception class, but I'm not sure of the technical details.
Rather than pollute the sample code with what I've tried and failed, I'll simply present it as a mostly blank slate. My end goal is for the last line in the sample to work.
#mymod.py
import urllib2
def openurl():
req = urllib2.Request("http://duznotexist.com/")
response = urllib2.urlopen(req)
#main.py
import urllib2
import mymod
try:
mymod.openurl()
except urllib2.URLError as e:
#how do I do this?
print "Website (%s) could not be reached due to %s" % (e.url, e.reason)
You can add information to and then re-raise the exception.
#mymod.py
import urllib2
def openurl():
req = urllib2.Request("http://duznotexist.com/")
try:
response = urllib2.urlopen(req)
except urllib2.URLError as e:
# add URL and reason to the exception object
e.url = "http://duznotexist.com/"
e.reason = "URL does not exist"
raise e # re-raise the exception, so the calling function can catch it
#main.py
import urllib2
import mymod
try:
mymod.openurl()
except urllib2.URLError as e:
print "Website (%s) could not be reached due to %s" % (e.url, e.reason)
I don't think re-raising the exception is an appropriate way to solve this problem.
As @Jonathan Vanasco said,
if you're opening a.com , and it 301 redirects to b.com , urlopen will automatically follow that because an HTTPError with a redirect was raised. if b.com causes the URLError , the code above marks a.com as not existing
My solution is to overwrite redirect_request
of urllib2.HTTPRedirectHandler
import urllib2
class NewHTTPRedirectHandler(urllib2.HTTPRedirectHandler):
def redirect_request(self, req, fp, code, msg, headers, newurl):
m = req.get_method()
if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
or code in (301, 302, 303) and m == "POST"):
newurl = newurl.replace(' ', '%20')
newheaders = dict((k,v) for k,v in req.headers.items()
if k.lower() not in ("content-length", "content-type")
)
# reuse the req object
# mind that req will be changed if redirection happends
req.__init__(newurl,
headers=newheaders,
origin_req_host=req.get_origin_req_host(),
unverifiable=True)
return req
else:
raise HTTPError(req.get_full_url(), code, msg, headers, fp)
opener = urllib2.build_opener(NewHTTPRedirectHandler)
urllib2.install_opener(opener)
# mind that req will be changed if redirection happends
#req = urllib2.Request('http://127.0.0.1:5000')
req = urllib2.Request('http://www.google.com/')
try:
response = urllib2.urlopen(req)
except urllib2.URLError as e:
print 'error'
print req.get_full_url()
else:
print 'normal'
print response.geturl()
let's try to redirect the url to an unknown url:
import os
from flask import Flask,redirect
app = Flask(__name__)
@app.route('/')
def hello():
# return 'hello world'
return redirect("http://a.com", code=302)
if __name__ == '__main__':
port = int(os.environ.get('PORT', 5000))
app.run(host='0.0.0.0', port=port)
And the result is:
error
http://a.com/
normal
http://www.google.com/
精彩评论