Gitlab生成访问私钥
只有用这里生成的token才可以通过api访问gitlab 在gitlab的用户设置菜单里面生成token
python安装模块
pip install python-gitlab
gitlab API: https://docs.gitlab.com/ee/api/
python-gitlab SDK: https://python-gitlab.readthedocs.io/en/latest/api-objects.html
前者告诉你Gitlab API有哪些能力 后者告诉你Python如何调用这些API

class GitlabAPI(object): def __init__(self, *args, **kwargs): self.gl = gitlab.Gitlab('https://192.168.20.111:8090/', private_token='dsddddddd', api_version='4',ssl_verify=False) def get_all_group(self): """ 获取所有群组 :return: """ return self.gl.groups.list(all=True) def get_user_byname(self,username): return self.gl.users.list(username=username)[0]
使用第三方模块获取数据的方式受限 要实现更灵活的数据获取方式使用爬虫的方式来获取
爬虫实现方式
爬虫抓取数据非常灵活 用户能按照自己的想法获取任何数据

import requests,json headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36', } #gitlab地址 git_url='https://192.168.20.111:8090/' #gitlab的token git_token='jEEEEV-wwwwwww' session = requests.Session() headers['PRIVATE-TOKEN']=git_token session.headers = headers git_login=session.get(git_url,headers=headers,verify=False) projects_api = git_url + '/api/v4/projects?simple=yes&per_page=20' projects_url=projects_api+'&page='+str(1) projects = session.get(projects_url,verify=False).text print(projects)
完全模拟浏览器的行为 少一个请求头参数服务端返回的结果就不同 这里返回的html字符串
实际上需要返回的json数据
根据浏览器发送请求的面板进行请求参数分析

import requests,json headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36', } #gitlab地址 git_url='https://192.168.20.111:8090/' #gitlab的token git_token='jV-XMxxTg2bscNTvssua' session = requests.Session() headers['PRIVATE-TOKEN']=git_token headers["accept"]="application/json, text/plain, */*" headers["x-csrf-token"]="WmzTz0kgZsJAcZzwB8KmXmyujWdWSdeLde7X924byzFgXlvKrFWfELry5TFb2KrlJmk3+x9vlT/CFLB+wA4VQg==" headers['cookie']="experimentation_subject_id=eyJfcmFpbHMiOn65cca4d63691482c3c8c69db589f; sidebar_collapsed=false; _gitlab_session=9b065ccd39922a9dcf61d80323ac412b; event_filter=all" headers['accept-encoding']="gzip, deflate, br" headers['accept-language']="zh-CN,zh;q=0.9,en;q=0.8" headers['cache-control']="no-cache" headers['pragma']="no-cache" headers['sec-fetch-dest']="empty" headers['sec-fetch-mode']="cors" headers['sec-fetch-site']="same-origin" headers['x-requested-with']="XMLHttpRequest" session.headers = headers test_url="https://192.168.20.111:8090/tf-as/appservice/-/commit/3a5724bb852396de054089ac7631308c7b9297f0/pipelines?ref=dev" response=session.get(test_url,verify=False) response.encoding = 'utf-8' print(response.content.decode("utf-8"))

#根据commit信息查询提交用户 commit_url="https://192.168.30.22:8090/tf-as/appservice/-/commit/963ac4e759de4107cb4dae1ceebfb16d5ad2b172" res=session.get(commit_url,verify=False) #解析网页html内容 soup = BeautifulSoup(res.content.decode("utf-8"),"html.parser") tag = soup.find(name="div",attrs={"class":"header-main-content"}) time= tag.find(name="time",attrs={"class":"js-timeago"}) user= tag.find(name="span",attrs={"class":"commit-author-name"}) #commit提交时间 print([child for child in tag.children]) print(time.get("datetime")) #commit提交用户 print(user.text)
模拟用户通过浏览器登录gitlab

import requests from bs4 import BeautifulSoup class login(object): def __init__(self): self.gitlab_url='https://192.168.40.111:8090/users/sign_in' self.session=requests.Session() def get_token(self): #在同一个session对象中发送的所有请求共享一些参数比如token html=self.session.get(url=self.gitlab_url,verify=False) soup=BeautifulSoup(html.text,'html.parser') tokens=soup.find_all('input',type="hidden")[1] token=tokens.attrs['value'] return token def post_gitlab(self,login,password): data={ #authenticity_token不对就会返回422 'authenticity_token':self.get_token(), 'user[login]':login, 'user[password]':password } #这里一定要使用self.session 这样才能通过session对象在多个请求中共享token参数 res=self.session.post(url=self.gitlab_url,data=data,verify=False) print(res.status_code) if res.status_code != 200: print("gitlab登录失败") def get_gitlabPage(self,url): commit_url = url res = self.session.get(commit_url, verify=False) # 解析网页html内容 soup = BeautifulSoup(res.content.decode("utf-8"), "html.parser") tag = soup.find(name="div", attrs={"class": "header-main-content"}) time = tag.find(name="time", attrs={"class": "js-timeago"}) user = tag.find(name="span", attrs={"class": "commit-author-name"}) # commit提交时间 print(time.get("datetime")) # commit提交用户 print(user.text) if __name__=='__main__': gl=login() gl.post_gitlab('aaa','bbb') #可以解析gitlab上任何网页 gl.get_gitlabPage("https://192.168.40.111:8090/tf-as/appservice/-/commit/963ac4e759de4107cb4dae1ceebfb16d5ad2b172")
通过gitlab的api和网页爬取的方式获取自己想要的数据即可