BeautifulSoup4 庫的基本使用

本文轉載自查看原文 2018-09-12 09:11 2082 python

　　喜歡我的博客可以加關注，有問題可以提問我。

　　1.基本使用（下面的html由於過長就不復制了都復用第一個）

html="""
<html>
<head><title>dsojfeoifjosieofiej</title></head>
    
    <meta http-equiv="content-type" content="text/html;charset=utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=Edge">
    <meta content="always" name="referrer">
    <meta name="theme-color" content="#2932e1">
    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon" />
    <link rel="search" type="application/opensearchdescription+xml" href="/content-search.xml" title="百度搜索" />
    <link rel="icon" sizes="any" mask href="//www.baidu.com/img/baidu_85beaf5496f291521eb75ba38eacbd87.svg">
    <link rel="dns-prefetch" href="//s1.bdstatic.com"/>
    <link rel="dns-prefetch" href="//t11.baidu.com"/>
    <link rel="dns-prefetch" href="//t12.baidu.com"/>
    <link rel="dns-prefetch" href="//b1.bdstatic.com"/>
"""
from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
print(soup.prettify())
print(soup.title.string)

　　2.選擇元素

from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
print(soup.title)
print(soup.head)
print(soup.p)(只輸出第一個)

　　3.獲取名稱

from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
print(soup.p.name)

　　4.獲取屬性

from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
print(soup.p.attrs['name'])
print(soup.p['name'])

　　5.獲取內容

from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
print(soup.p.string)

　　6.嵌套選擇

from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
print(soup.head.title.string)

　　7.子節點和子孫節點

from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
print(soup.p.contents)#（子節點）

from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
print(soup.p.children)
for i,child in enumerate(soup.p.children):
    print(i,child)#（子節點）

from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
print(soup.p.desccendants)
for i,child in enumerate(soup.p.desccendants):
    print(i,child)#（子孫節點）

　　8.父節點和祖先節點

from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
print(soup.a.parent)#(父節點)

from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
print(list(enumerate(soup.a.parents)))#(祖先節點)

　　9.兄弟節點

from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
print(list(enumerate(soup.a.next_siblings)))
print(list(enumerate(soup.a.previous_siblings)))

　　10.標准選擇器

from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
for ul in soup.find_all('ul'):
    print(ul.find_all('li'))

　　10.1加參數

from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
print(soup.find_all(attrs={'id':'list-1'}))
print(soup.find_all(attrs={'name':'elements'}))


from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
print(soup.find_all(id='list-1'))
print(soup.find_all(class_='elements'))

　　10.2text

from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
print(soup.find_all(text='Foo'))#(返回內容）

　　10.3 find(返回單個元素就是第一個元素)

from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
print(soup.find('ul'))
print(type(soup.find('ul')))
print(soup.find('page'))

　　10.4 find_parents() find_parent()（這里和上面的類似就不粘貼代碼了）

　　10.5 find_next_siblings() find_next_sibling()（這里和上面的類似就不粘貼代碼了）

　　11. CSS 選擇器

from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
print(soup.select('.panel .panel-heading'))#（選擇class 為.panel 下的class 為。panel0heading的標簽）
print(soup.select('ul li'))#（選擇標簽ul 下的li標簽）
print(soup.select('#list-2 .element'))#（選擇id為list-2 下的class為 element標簽）
print(type(soup.select('ul')[0]))

from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
for ul in soup.select('ul'):
    print(ul.select('ul'))

　　11.1 獲取屬性

from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
for ul in soup.select('ul'):
    print(ul['id'])
    print(ul.attrs['id'])

　　11.2 獲取內容

from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
for li in soup.select('li'):
    print(li.get_text())

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 Python學習之beautifulsoup4庫的使用 BeautifulSoup4基本使用 BeautifulSoup4的使用方法 python3解析庫BeautifulSoup4 BeautifulSoup4庫和CSS選擇器 Python:requests庫、BeautifulSoup4庫的基本使用（實現簡單的網絡爬蟲） python---requests和beautifulsoup4模塊的使用使用pip安裝BeautifulSoup4模塊用requests庫和BeautifulSoup4庫爬取新聞列表 python怎么安裝requests、beautifulsoup4等第三方庫