Download our e-book of Introduction To Python
Nithya Rekha
4 years ago
pip install beautifulsoup4 #For Windows
$ apt-get install python3-bs4 # For Linus based OS
$ apt-get install python-lxml
$ easy_install lxml
$ pip install lxml
$ apt-get install python-html5lib
$ easy_install html5lib
$ pip install html5lib
html_doc = """
The Dormouse's storyThe Dormouse's storyOnce upon a time there were three little sisters; and their names were
Elsie,
Lacie and
Tillie;
and they lived at the bottom of a well....
"""
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_doc, 'html.parser')
print(soup.prettify())
#
#
#
# The Dormouse's story
#
#
#
#
#
# The Dormouse's story
#
#
#
# Once upon a time there were three little sisters; and their names were
#
# Elsie
#
# ,
#
# Lacie
#
# and
#
# Tillie
#
# ; and they lived at the bottom of a well.
#
#
# ...
#
#
#
soup.title
# The Dormouse's story
soup.title.name
# u'title'
soup.title.string
# u'The Dormouse's story'
soup.title.parent.name
# u'head'
soup.p
# The Dormouse's story
soup.p['class']
# u'title'
soup.a
# Elsie
soup.find_all('a')
# [Elsie,
# Lacie,
# Tillie]
soup.find(id="link3")
# Tillie
for link in soup.find_all('a'):
print(link.get('href'))
# http://example.com/elsie
# http://example.com/lacie
# http://example.com/tillie
print(soup.get_text())
# The Dormouse's story
#
# The Dormouse's story
#
# Once upon a time there were three little sisters; and their names were
# Elsie,
# Lacie and
# Tillie;
# and they lived at the bottom of a well.