Web Programming

urllib module is available for doing a variety of web-related stuff.

urllib.request - request an url. urllib.parse - parse an url. urllib.error - handle errors urllib.robotparser - handles robots.txt file.

Example of Smart Redirect Handler

import urllib2

class SmartRedirectHandler(urllib2.HTTPRedirectHandler):
    def http_error_302(self, req, fp, code, msg, headers):
        result = urllib2.HTTPRedirectHandler.http_error_302(self, req, fp,
                                                                 code, msg,
                                                                 headers)
        result.status = code
        return result

request = urllib2.Request("http://localhost/index.html")
opener = urllib2.build_opener(SmartRedirectHandler())
obj = opener.open(request)
print 'I capture the http redirect code:', obj.status
print 'Its been redirected to:', obj.url

urllib - basic authentication example

#!/usr/bin/env python3.1

import urllib.request, urllib.error, urllib.parse

URL = 'http://localhost/basic.html' 

ah = urllib.request.HTTPBasicAuthHandler()
ah.add_password('Realm','http://localhost/','senthil','senthil')
urllib.request.install_opener(urllib.request.build_opener(ah))
r = urllib.request.Request(URL)
obj = urllib.request.urlopen(r)
print(obj.read())

urllib - digest authentication example

#!/usr/bin/env python3.1

import urllib.request, urllib.error, urllib.parse
import getpass

URL = 'http://livejournal.com/users/phoe6/data/rss?auth=digest' 

ah = urllib.request.HTTPDigestAuthHandler()
password = getpass.getpass()
ah.add_password('lj','http://phoe6.livejournal.com/','phoe6',password)
urllib.request.install_opener(urllib.request.build_opener(ah))
r = urllib.request.Request(URL)
obj = urllib.request.urlopen(r)
print(obj.read())