1) Download and read webpage line by line
# Read complete webpage
# Python3
import urllib.request
fid=urllib.request.urlopen('http://www.example.org/')
webpage=fid.read().decode('utf-8')
print(webpage)
# Python2
import urllib
fid=urllib.urlopen('http://www.example.org/')
webpage=fid.read()
print(webpage)
# Error
AttributeError: 'module' object has no attribute 'request' / 'urlopen'
→ Mismatch of Python code version 2 versus 3.
# Read line by line# print line by line
for line in webpage.split('\n'):
print(line)
# extract webpage title
for line in webpage.split('\n'):
if '<title>' in line:
pagetitle=line.split('<title>')[1].split('</title>')[0]
print(pagetitle) Example Domain
2) download webpage and save as local file# save as local file 'webpage.html'
import urllib.request urllib.request.urlretrieve ("http://www.example.org/", "webpage.html") # read local filefor line in open('webpage.html'): print(line.strip())
|