Download webpage
1) Download and read webpage line by line
Read complete webpage
# Python3
import urllib.request
fid=urllib.request.urlopen('http://www.example.org/')
webpage=fid.read().decode('utf-8')
print(webpage)
# Python2
import urllib
fid=urllib.urlopen('http://www.example.org/')
webpage=fid.read()
print(webpage)
# Error
AttributeError: 'module' object has no attribute 'request' / 'urlopen'
→ Mismatch of Python code version 2 versus 3.
Read line by line
# print line by line
for line in webpage.split('\n'):
print(line)
# extract webpage title
for line in webpage.split('\n'):
if '<title>' in line:
pagetitle=line.split('<title>')[1].split('</title>')[0]
print(pagetitle)
'Example Domain'
2) download webpage and save as local file
# save as local file 'webpage.html'
import urllib.request
urllib.request.urlretrieve('http://www.example.org/', 'webpage.html')
# read local file
for line in open('webpage.html'):
print(line.strip())