公司 Harbor 倉庫一年多沒清理,硬盤被堆滿了,為此寫了個批量清除的 Python 腳本。
# coding: utf-8
from operator import itemgetter
from urllib import parse
import requests
import datetime as dt
# import maya
import logging
logging.basicConfig(filename='harbor_clean.txt', filemode="w", level=logging.INFO)
logger = logging.getLogger(__name__)
"""
清理 Harbor 倉庫的老鏡像
"""
class HarborCleaner(object):
delete_status = {
200: "Delete tag successfully.",
400: "Invalid repo_name.",
401: "Unauthorized.",
403: "Forbidden.",
404: "Repository or tag not found.",
}
def __init__(self, user: str, password: str, hostname: str, port: int, use_https=True):
scheme = "https" if use_https else "http"
api_base = f"{scheme}://{hostname}:{port}/api"
self.search_api = api_base + "/search?q={key_word}"
self.projects_api = api_base + "/projects"
self.repository_query_api = api_base + "/repositories?project_id={project_id}"
# repo_name 一般為 "project_name/repo_name" 格式,必須做轉義處理(因為中間有斜杠)
self.repository_tags_api = api_base + "/repositories/{repo_name}/tags"
self.repository_tag_api = self.repository_tags_api + "/{tag}"
self.session = requests.Session()
self.session.verify = False # 如果公司是使用自簽名證書,不能通過 SSL 驗證,就需要設置這個
self.session.headers = {
"Accept": "application/json"
}
self.session.auth = (user, password)
def get_all_projects(self):
resp = self.session.get(self.projects_api)
success = resp.status_code == 200
return {
"success": success,
"data": resp.json() if success else resp.text
}
def get_all_repos(self, project: dict):
url = self.repository_query_api.format(project_id=project['project_id'])
resp = self.session.get(url)
success = resp.status_code == 200
return {
"success": success,
"data": resp.json() if success else resp.text
}
def get_all_tags(self, repo: dict):
"""repo_name 需要做轉義"""
repo_name = parse.quote(repo['name'], safe="")
url = self.repository_tags_api.format(repo_name=repo_name)
resp = self.session.get(url)
success = resp.status_code == 200
return {
"success": success,
"data": resp.json() if success else resp.text
}
def get_tags_except_lastest_n(self, repo: dict, n: int):
"""獲取除了最新的 n 個 tag 之外的所有 tags"""
# 如果 tags 數小於 n + 1,說明該鏡像不需要做清理。
if repo['tags_count'] <= n+1: # +1 是因為 latest 是重復的 tag
return []
result = self.get_all_tags(repo)
tags: list = result['data']
for tag in tags:
# tag['time'] = maya.MayaDT.from_iso8601(tag['created'])
# '2019-04-09T11:33:49.296960745Z'
# # python 自帶的解析函數,只能處理 6 位小數,下面截去多余的三位
timestamp = tag['created'][:-4] + 'Z'
tag['time'] = dt.datetime.strptime(timestamp, r'%Y-%m-%dT%H:%M:%S.%fZ')
tags.sort(key=itemgetter('time')) # 使用 time 鍵進行原地排序
return tags[:-n-1] # expect the latest n tags, -1 是因為 latest 是重復的 tag
def soft_delete_tag(self, repo: dict, tag: dict):
"""repo_name 需要做轉義
這里刪除后,還需要進行一次 GC,才能真正地清理出可用空間。
"""
repo_name = parse.quote(repo['name'], safe="")
url = self.repository_tag_api.format(repo_name=repo_name, tag=tag['name'])
resp = self.session.delete(url)
return {
"success": resp.status_code == 200,
"message": self.delete_status.get(resp.status_code)
}
def soft_delete_all_tags_except_latest_n(self, n):
"""從每個倉庫中,刪除所有的 tags,只有最新的 n 個 tag 外的所有 tags 除外"""
res_projects = self.get_all_projects()
if not res_projects['success']:
logger.warning("faild to get all projects, message: {}".format(res_projects['data']))
logger.info("we have {} projects".format(len(res_projects['data'])))
for p in res_projects['data']:
res_repos = self.get_all_repos(p)
if not res_projects['success']:
logger.warning("faild to get all repos in project: {}, message: {}".format(p['name'], res_repos['data']))
logger.info("we have {} repos in project:{}".format(len(res_repos['data']), p['name']))
for repo in res_repos['data']:
logger.info("deal with repo: {}".format(repo['name']))
old_tags = self.get_tags_except_lastest_n(repo, n)
logger.info("we have {} tags to delete in repo: {}".format(len(old_tags), repo['name']))
for tag in old_tags:
logger.info("try to delete repo:{}, tag: {}, create_time: {}".format(repo['name'], tag['name'], tag['created']))
result = self.soft_delete_tag(repo, tag)
if result['success']:
logger.info("success delete it.")
else:
logger.warning("delete failed!, message: {}".format(result['message']))
if __name__ == "__main__":
# 1. 通過 harbor 的 restful api 進行軟刪除
harbor_cleaner = HarborCleaner(
user="admin",
password="Admin123",
hostname="reg.harbor.com",
port=8321
)
harbor_cleaner.soft_delete_all_tags_except_latest_n(10) # 每個鏡像只保留最新的十個 tag
# 2. 進行一次 GC,清除掉所有已軟刪除的 images
# 2.1 harbor 1.7 之前的版本,需要停機才能 GC
"""
cd /volume1/docker/harbor/harbor
docker-compose down # 停機
# 下面的 tag 'v2.6.2-v1.4.0' 需要換成當前使用的 registry-photon 鏡像的版本號
# --dry-run 表示嘗試進行 GC,輸出 log 與正式 gc 一致,可用於提前發現問題
docker run -it --name gc --rm --volumes-from registry vmware/registry-photon:v2.6.2-v1.4.0 garbage-collect --dry-run /etc/registry/config.yml
# 正式 gc,這個才會真正的 gc 掉已經軟刪除的鏡像
docker run -it --name gc --rm --volumes-from registry vmware/registry-photon:v2.6.2-v1.4.0 garbage-collect /etc/registry/config.yml
"""
# 2.2 harbor 1.7+ 可以通過 restful api 進行在線 GC 或定期自動 GC。