WWW‎ > ‎

Download webpage

1) Download and read webpage line by line


# Read complete webpage

# Python3
import urllib.request
fid=urllib.request.urlopen('http://www.example.org/')
webpage=fid.read().decode('utf-8')
print(webpage)

# Python2
import urllib
fid=urllib.urlopen('http://www.example.org/')
webpage=fid.read()
print(webpage)

# Error
AttributeError: 'module' object has no attribute 'request' / 'urlopen'

Mismatch of Python code version 2 versus 3.



# Read line by line

# print line by line
for line in webpage.split('\n'):
    print(line)

# extract webpage title
for line in webpage.split('\n'):
    if '<title>' in line:
        pagetitle=line.split('<title>')[1].split('</title>')[0]
        print(pagetitle)
Example Domain


2) download webpage and save as local file

# save as local file 'webpage.html'
import urllib.request
urllib.request.urlretrieve ("http://www.example.org/", "webpage.html")

# read local file
for line in open('webpage.html'):
    print(line.strip())