Gitlab生成訪問私鑰
只有用這里生成的token才可以通過api訪問gitlab 在gitlab的用戶設置菜單里面生成token
python安裝模塊
pip install python-gitlab
gitlab API: https://docs.gitlab.com/ee/api/
python-gitlab SDK: https://python-gitlab.readthedocs.io/en/latest/api-objects.html
前者告訴你Gitlab API有哪些能力 后者告訴你Python如何調用這些API

class GitlabAPI(object): def __init__(self, *args, **kwargs): self.gl = gitlab.Gitlab('https://192.168.20.111:8090/', private_token='dsddddddd', api_version='4',ssl_verify=False) def get_all_group(self): """ 獲取所有群組 :return: """ return self.gl.groups.list(all=True) def get_user_byname(self,username): return self.gl.users.list(username=username)[0]
使用第三方模塊獲取數據的方式受限 要實現更靈活的數據獲取方式使用爬蟲的方式來獲取
爬蟲實現方式
爬蟲抓取數據非常靈活 用戶能按照自己的想法獲取任何數據

import requests,json headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36', } #gitlab地址 git_url='https://192.168.20.111:8090/' #gitlab的token git_token='jEEEEV-wwwwwww' session = requests.Session() headers['PRIVATE-TOKEN']=git_token session.headers = headers git_login=session.get(git_url,headers=headers,verify=False) projects_api = git_url + '/api/v4/projects?simple=yes&per_page=20' projects_url=projects_api+'&page='+str(1) projects = session.get(projects_url,verify=False).text print(projects)
完全模擬瀏覽器的行為 少一個請求頭參數服務端返回的結果就不同 這里返回的html字符串
實際上需要返回的json數據
根據瀏覽器發送請求的面板進行請求參數分析

import requests,json headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36', } #gitlab地址 git_url='https://192.168.20.111:8090/' #gitlab的token git_token='jV-XMxxTg2bscNTvssua' session = requests.Session() headers['PRIVATE-TOKEN']=git_token headers["accept"]="application/json, text/plain, */*" headers["x-csrf-token"]="WmzTz0kgZsJAcZzwB8KmXmyujWdWSdeLde7X924byzFgXlvKrFWfELry5TFb2KrlJmk3+x9vlT/CFLB+wA4VQg==" headers['cookie']="experimentation_subject_id=eyJfcmFpbHMiOn65cca4d63691482c3c8c69db589f; sidebar_collapsed=false; _gitlab_session=9b065ccd39922a9dcf61d80323ac412b; event_filter=all" headers['accept-encoding']="gzip, deflate, br" headers['accept-language']="zh-CN,zh;q=0.9,en;q=0.8" headers['cache-control']="no-cache" headers['pragma']="no-cache" headers['sec-fetch-dest']="empty" headers['sec-fetch-mode']="cors" headers['sec-fetch-site']="same-origin" headers['x-requested-with']="XMLHttpRequest" session.headers = headers test_url="https://192.168.20.111:8090/tf-as/appservice/-/commit/3a5724bb852396de054089ac7631308c7b9297f0/pipelines?ref=dev" response=session.get(test_url,verify=False) response.encoding = 'utf-8' print(response.content.decode("utf-8"))

#根據commit信息查詢提交用戶 commit_url="https://192.168.30.22:8090/tf-as/appservice/-/commit/963ac4e759de4107cb4dae1ceebfb16d5ad2b172" res=session.get(commit_url,verify=False) #解析網頁html內容 soup = BeautifulSoup(res.content.decode("utf-8"),"html.parser") tag = soup.find(name="div",attrs={"class":"header-main-content"}) time= tag.find(name="time",attrs={"class":"js-timeago"}) user= tag.find(name="span",attrs={"class":"commit-author-name"}) #commit提交時間 print([child for child in tag.children]) print(time.get("datetime")) #commit提交用戶 print(user.text)
模擬用戶通過瀏覽器登錄gitlab

import requests from bs4 import BeautifulSoup class login(object): def __init__(self): self.gitlab_url='https://192.168.40.111:8090/users/sign_in' self.session=requests.Session() def get_token(self): #在同一個session對象中發送的所有請求共享一些參數比如token html=self.session.get(url=self.gitlab_url,verify=False) soup=BeautifulSoup(html.text,'html.parser') tokens=soup.find_all('input',type="hidden")[1] token=tokens.attrs['value'] return token def post_gitlab(self,login,password): data={ #authenticity_token不對就會返回422 'authenticity_token':self.get_token(), 'user[login]':login, 'user[password]':password } #這里一定要使用self.session 這樣才能通過session對象在多個請求中共享token參數 res=self.session.post(url=self.gitlab_url,data=data,verify=False) print(res.status_code) if res.status_code != 200: print("gitlab登錄失敗") def get_gitlabPage(self,url): commit_url = url res = self.session.get(commit_url, verify=False) # 解析網頁html內容 soup = BeautifulSoup(res.content.decode("utf-8"), "html.parser") tag = soup.find(name="div", attrs={"class": "header-main-content"}) time = tag.find(name="time", attrs={"class": "js-timeago"}) user = tag.find(name="span", attrs={"class": "commit-author-name"}) # commit提交時間 print(time.get("datetime")) # commit提交用戶 print(user.text) if __name__=='__main__': gl=login() gl.post_gitlab('aaa','bbb') #可以解析gitlab上任何網頁 gl.get_gitlabPage("https://192.168.40.111:8090/tf-as/appservice/-/commit/963ac4e759de4107cb4dae1ceebfb16d5ad2b172")
通過gitlab的api和網頁爬取的方式獲取自己想要的數據即可