1、項目背景
經過幾年的發展,公司最早是人工發布工程代碼,之后使用了jenkins大部分工作只要新建一次腳本、nginx配置后就能比較自動的完成,但是回退依然使用人工處理的方式。自從公司業務發展起來以后,每個月2次的活動,都要上下主機,根據工程擴容。其中主要工作有新建ecs主機,初始化主機環境,添加dns解析,添加監控,添加jenkins發布腳本配置,配置項目發布模板等,這些給運維工作帶了許多重復而且非常容易出錯的過程效率也非常低下,同時活動結束后還要完成下線刪除nginx配置,刪除jenkins發布腳本配置,刪除dns解析,刪除ecs等,一次活動擴容、縮容往往要耗費2個運維1天的時間,而且頻繁的線上文本配置變革已經不止一次的出現人為事故,剛開始的時候為了降低工作疲勞度,甚至提前2天開始擴容這樣下來對公司業務基礎設施的成本也有不小的開銷。在這個時候准備思考使用可視化,平台化的運維去解決這樣的問題,並且為后期公司技術人員的擴容做好運維支撐。(感謝公司前端大神路飛的給力支持,用react快速的搞出了一套界面)
2、平台功能
因為不是專業的開發和產品,整個平台的需求和后端設計都是由運維自己完成,大方向上有如下幾個需求:新建ecs主機;新建工程;根據工程關聯ecs主機形成發布調用;可以實時查看發布日志;批量和串行的發布支持;發布單的概念以便日后的發布權限審批。但是做着做着就發現細節的東西越來越多,比如說和前端的對接方式,代碼變更日志等,處理這些小細節實際上耗費了不少時間。整個平台使用python django web框架開發完成。
3、流程圖
ecs與工程:
工程構建與發布:
下發salt發布命令流程:
4、平台數據庫設計
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
class
project_type(models.Model):
#給前端用的工程的類型名稱
ProjectType
=
models.CharField(max_length
=
30
, default
=
None
, null
=
True
)
class
project_info(models.Model):
#工程信息
ProjectName
=
models.CharField(max_length
=
50
,default
=
None
,null
=
False
,unique
=
True
)
#工程名
PackageName
=
models.CharField(max_length
=
50
,default
=
None
,null
=
True
)
#java工程打包出來的包名字
ProjectDir
=
models.TextField(max_length
=
300
,default
=
None
,null
=
True
)
#工程的運行路徑
UpsName
=
models.CharField(max_length
=
50
,default
=
None
,null
=
True
)
#nginx中upstream的名字
Repertory
=
models.CharField(max_length
=
10
,default
=
None
,null
=
True
)
#倉庫類型,git還是svn
DeployAddress
=
models.TextField(max_length
=
300
,default
=
None
,null
=
True
)
#倉庫地址
Branch
=
models.CharField(max_length
=
100
,default
=
None
,null
=
True
)
#分支
ProjectType
=
models.CharField(max_length
=
30
,default
=
None
,null
=
True
)
#工程類型
CreationTime
=
models.IntegerField(default
=
None
,null
=
True
)
#創建時間
Port
=
models.CharField(max_length
=
10
,default
=
None
,null
=
True
)
#tcp端口
MavenArgs
=
models.CharField(max_length
=
100
,default
=
None
,null
=
True
)
#java打包參數
LastBuildStatus
=
models.CharField(max_length
=
30
,default
=
None
,null
=
True
)
#最近一次的構建狀態
LastBuildTime
=
models.IntegerField(default
=
None
, null
=
True
)
#最近一次的構建時間
class
server_project_r(models.Model):
#工程和ecs主機的關系表,表示ecs屬於某個工程
ProjectName
=
models.CharField(max_length
=
50
, default
=
None
, null
=
False
, db_index
=
True
)
#工程名
InstanceId
=
models.CharField(max_length
=
50
, default
=
None
, null
=
False
, db_index
=
True
)
#ecs的id號,唯一
CreationTime
=
models.IntegerField(default
=
None
, null
=
True
)
#創建時間
class
server_info(models.Model):
#ecs主機信息表
InstanceId
=
models.CharField(max_length
=
50
,default
=
None
,null
=
False
,unique
=
True
)
#ecs的id號
InstanceName
=
models.CharField(max_length
=
50
,default
=
None
,null
=
True
,db_index
=
True
)
#ecs的名字
ZoneId
=
models.CharField(max_length
=
50
,default
=
None
,null
=
True
)
#ecs所屬的區域,比如說杭州B
PrivateIp
=
models.CharField(max_length
=
50
,default
=
None
,null
=
True
)
#vpc網絡ip
Cpu
=
models.CharField(max_length
=
50
,default
=
None
,null
=
True
)
#CPU個數
Memory
=
models.CharField(max_length
=
50
,default
=
None
,null
=
True
)
#內存大小
OsType
=
models.CharField(max_length
=
50
,default
=
None
,null
=
True
)
#系統類型,linux還是windows
PayType
=
models.CharField(max_length
=
50
,default
=
None
,null
=
True
)
#支付類型,預付費,后付費
Status
=
models.CharField(max_length
=
30
,default
=
None
,null
=
True
)
#狀態,running,stoped,
CreationTime
=
models.IntegerField(default
=
None
,null
=
True
)
#創建時間
class
build_history(models.Model):
#工程構建歷史表
BuildId
=
models.CharField(max_length
=
50
,default
=
None
,null
=
False
,unique
=
True
)
#構建id,唯一
ProjectName
=
models.CharField(max_length
=
50
, default
=
None
, null
=
False
, db_index
=
True
)
#工程名字
Status
=
models.CharField(max_length
=
30
,default
=
None
,null
=
True
)
#構建狀態
BuildLog
=
models.TextField(max_length
=
1000
,default
=
None
,null
=
True
)
#構建日志的路徑
CreationTime
=
models.IntegerField(default
=
None
, null
=
True
)
#創建日期
Note
=
models.TextField(max_length
=
1000
,default
=
None
,null
=
True
)
#備注
class
deploy_build_r(models.Model):
#工程構建發布關系表
BuildId
=
models.CharField(max_length
=
50
, default
=
None
, null
=
False
, db_index
=
True
)
#構建id,唯一
DeployId
=
models.CharField(max_length
=
50
, default
=
None
, null
=
False
, db_index
=
True
)
#發布id,唯一
ProjectName
=
models.CharField(max_length
=
50
, default
=
None
, null
=
False
, db_index
=
True
)
#工程名字
CreationTime
=
models.IntegerField(default
=
None
, null
=
True
)
#創建日期
Status
=
models.CharField(max_length
=
30
,default
=
None
,null
=
True
)
#發布狀態
class
deploy_server(models.Model):
#工程發布ecs表
DeployId
=
models.CharField(max_length
=
50
,default
=
None
,null
=
False
,db_index
=
True
)
#發布id
InstanceId
=
models.CharField(max_length
=
50
, default
=
None
, null
=
False
, db_index
=
True
)
#ecs的id
PrivateIp
=
models.CharField(max_length
=
50
, default
=
None
, null
=
True
)
#vpc私網ip
ProjectName
=
models.CharField(max_length
=
50
, default
=
None
, null
=
False
, db_index
=
True
)
#ecs的id
CreationTime
=
models.IntegerField(default
=
None
, null
=
True
)
#創建日期
Note
=
models.TextField(max_length
=
1000
, default
=
None
, null
=
True
)
#備注
Status
=
models.CharField(max_length
=
30
, default
=
None
, null
=
True
)
#發布狀態
class
ecs_type(models.Model):
#ecs類型表給前端用的
TypeName
=
models.CharField(max_length
=
30
,default
=
None
,null
=
False
)
TypeId
=
models.CharField(max_length
=
50
,default
=
None
,null
=
False
)
class
vswitch(models.Model):
#vswitch阿里雲網絡的id表
VswitchName
=
models.CharField(max_length
=
30
,default
=
None
,null
=
False
)
VswitchId
=
models.CharField(max_length
=
50
, default
=
None
, null
=
False
)
|
5、核心業務代碼實現
其中有用到的python庫如下:svn,gitpython,json,django,salt,aliyunsdkcore,aliyunsdkecs,shutil,threadpool,time,datetime,subprocess,uuid,base64,jinja2,sqlalchemy等
下載git或者svn倉庫代碼:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
def
getcode(Repertory
=
None
, DeployAddress
=
None
,buildProjectDir
=
None
,Branch
=
None
,BuildLog
=
None
,ProjectName
=
None
):
p1
=
project_info.objects.
filter
(ProjectName
=
ProjectName).get()
LastBuildTime
=
p1.LastBuildTime
if
Repertory
=
=
"git"
:
try
:
Repo.clone_from(DeployAddress, buildProjectDir, branch
=
Branch)
except
Exception, e:
print
e
return
False
if
LastBuildTime
or
LastBuildTime !
=
"null"
:
with
open
(BuildLog,
"a"
) as codelog:
codelog.write(
"代碼倉庫日志如下,如果第一次構建或者無變更記錄則為空:\n\n"
)
g
=
Git(buildProjectDir)
log
=
g.log(
"--since="
+
time.strftime(
"%Y-%m-%d %H:%M:%S"
, time.localtime(LastBuildTime)))
with
open
(BuildLog,
"a"
) as codelog:
codelog.write(log.encode(
"utf8"
))
return
True
elif
Repertory
=
=
"svn"
:
try
:
svncmd
=
svn.remote.RemoteClient(DeployAddress, username
=
"xxxxx"
, password
=
"xxxxx"
)
except
Exception, e:
print
e
return
False
svncmd.checkout(buildProjectDir)
localsvn
=
svn.local.LocalClient(buildProjectDir, username
=
"xxxxx"
, password
=
"xxxxx"
)
if
LastBuildTime
or
LastBuildTime !
=
"null"
:
with
open
(BuildLog,
"a"
) as codelog:
codelog.write(
"代碼倉庫日志如下,如果第一次構建或者無變更記錄則為空:\n\n"
)
for
e
in
localsvn.log_default(timestamp_from_dt
=
datetime.datetime.utcfromtimestamp(LastBuildTime),
timestamp_to_dt
=
datetime.datetime.now()):
with
open
(BuildLog,
"a"
) as codelog:
codelog.write(e.author.encode(
"utf8"
)
+
" "
+
e.msg.encode(
"utf8"
)
+
" "
+
e.date.strftime(
"%Y-%m-%d %H:%M:%S"
)
+
"\n"
)
return
True
|
maven打包實現:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
def
mavenProject(ProjectName
=
None
,BuildId
=
None
, BuildLog
=
None
, Repertory
=
None
,DeployAddress
=
None
,
Branch
=
None
, ProjectType
=
None
, MavenArgs
=
None
, PackageName
=
None
):
basedir
=
"/data/src/"
buildProjectDir
=
basedir
+
ProjectName
cmd
=
"cd "
+
buildProjectDir
+
";/opt/apache-maven/bin/mvn -B -f pom.xml -s /opt/apache-maven/conf/settings.xml -gs /opt/apache-maven/conf/settings.xml "
+
MavenArgs
print
cmd
if
os.path.exists(buildProjectDir):
shutil.rmtree(buildProjectDir)
os.makedirs(buildProjectDir)
else
:
os.makedirs(buildProjectDir)
#下載倉庫代碼
coderesult
=
getcode(Repertory
=
Repertory, DeployAddress
=
DeployAddress, buildProjectDir
=
buildProjectDir,
Branch
=
Branch, BuildLog
=
BuildLog, ProjectName
=
ProjectName)
if
coderesult
is
False
:
mavenWriteDb(ProjectName
=
ProjectName, BuildId
=
BuildId, Result
=
"Failed"
)
#構建工程,並且實時得到輸出寫入文件中。
with
open
(BuildLog,
"a"
) as loggin:
loggin.write(
"\n打包日志日志如下:\n\n"
)
buildcommand
=
subprocess.Popen(cmd, shell
=
True
, stdout
=
subprocess.PIPE, stderr
=
subprocess.STDOUT)
while
buildcommand.poll()
is
None
:
line
=
buildcommand.stdout.readline()
if
line:
with
open
(BuildLog,
"a"
) as loggin:
loggin.write(line)
else
:
time.sleep(
2
)
continue
if
buildcommand.returncode
=
=
0
:
backupMaven(ProjectType
=
ProjectType, PackageName
=
PackageName, buildProjectDir
=
buildProjectDir, BuildId
=
BuildId)
mavenWriteDb(ProjectName
=
ProjectName, BuildId
=
BuildId, Result
=
"Success"
)
else
:
mavenWriteDb(ProjectName
=
ProjectName, BuildId
=
BuildId, Result
=
"Failed"
)
|
saltstack調用方式:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
def
SaltDepoly(PrivateIp
=
None
,DeployId
=
None
,ProjectName
=
None
,ProjectType
=
None
):
p1
=
deploy_server.objects.get(DeployId
=
DeployId, PrivateIp
=
PrivateIp)
p1.Status
=
"InProcessing"
p1.save()
config_file_path
=
"/etc/salt/master"
projectargs
=
list
()
projectargs.append(ProjectName)
projectargs.append(DeployId)
try
:
print
projectargs
client
=
salt.client.LocalClient(config_file_path)
if
ProjectType
=
=
"tomcat"
:
result
=
client.cmd(tgt
=
PrivateIp,tgt_type
=
"ipcidr"
, fun
=
'projectdeploy.deploy'
,arg
=
projectargs,
timeout
=
600
)
elif
ProjectType
=
=
"dubbo"
:
result
=
client.cmd(tgt
=
PrivateIp, tgt_type
=
"ipcidr"
, fun
=
'dubbodeploy.deploy'
, arg
=
projectargs,
timeout
=
600
)
print
result
return
True
except
Exception, e:
print
e
return
False
|
發布項目控制實現部分:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
|
if
len
(IpList) >
3
:
#並行發布
print
"主機大於3台,開始第一輪發布"
pool
=
threadpool.ThreadPool(
len
(IpList))
args_list
=
list
()
for
i
in
range
(
0
,
len
(IpList)
/
2
):
args_tup
=
([IpList[i], DeployId, ProjectName, ProjectType],
None
)
args_list.append(args_tup)
request
=
threadpool.makeRequests(SaltDepoly, args_list)
[pool.putRequest(req)
for
req
in
request]
pool.wait()
#檢測這個DeployId中是否有fail的主機,如果有報錯並且退出
if
GodeployCheck(DeployId
=
DeployId) !
=
True
:
GoDeployStatus(DeployId
=
DeployId, Action
=
"close"
)
print
"發布過程中有部分主機失敗,退出發布主進程"
return
elif
GodeployCheck(DeployId
=
DeployId)
=
=
True
:
GoDeployStatus(DeployId
=
DeployId, Action
=
"close"
)
print
"開始第二輪發布"
pool
=
threadpool.ThreadPool(
len
(IpList))
args_list
=
list
()
for
i
in
range
(
len
(IpList)
/
2
,
len
(IpList)):
args_tup
=
([IpList[i], DeployId, ProjectName, ProjectType],
None
)
args_list.append(args_tup)
request
=
threadpool.makeRequests(SaltDepoly, args_list)
[pool.putRequest(req)
for
req
in
request]
pool.wait()
#檢測這個DeployId中是否有fail的主機,如果有報錯並且退出
if
GodeployCheck(DeployId
=
DeployId) !
=
True
:
GoDeployStatus(DeployId
=
DeployId, Action
=
"close"
)
print
"發布過程中有部分主機失敗,退出發布主進程"
return
elif
GodeployCheck(DeployId
=
DeployId)
=
=
True
:
GoDeployStatus(DeployId
=
DeployId, Action
=
"close"
)
print
"全部發布成功"
else
:
#串行發布
for
i
in
range
(
0
,
len
(IpList)):
ip
=
IpList[i]
print
ip
SaltDepoly(ip, DeployId, ProjectName, ProjectType)
if
GodeployCheck(DeployId
=
DeployId) !
=
True
:
GoDeployStatus(DeployId
=
DeployId, Action
=
"close"
)
print
"發布過程中有部分主機失敗,退出發布主進程"
return
elif
GodeployCheck(DeployId
=
DeployId)
=
=
True
:
GoDeployStatus(DeployId
=
DeployId, Action
=
"close"
)
print
"一台發布成功"
|
jwt檢測,裝飾器實現:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
def
CheckToken(func):
@wraps(func)
def
Check(
*
keys,
*
*
kw):
if
keys[
0
].COOKIES.has_key(
"token"
)
is
True
:
secret_key
=
"xxxxxx"
tokenstring
=
keys[
0
].COOKIES.get(
"token"
,"")
try
:
userid
=
jwt.decode(tokenstring, key
=
secret_key)[
"userInfo"
][
"id"
]
except
Exception,e:
print
e
print
"Token不正確,禁止訪問"
result
=
json.dumps({
"isSuccess"
:
0
,
"message"
:
"接口不支持單獨調用"
})
return
HttpResponse(result, content_type
=
"application/json"
)
return
func(
*
keys,
*
*
kw)
else
:
print
"沒有token,禁止訪問"
result
=
json.dumps({
"isSuccess"
:
0
,
"message"
:""})
return
HttpResponse(result, content_type
=
"application/json"
)
return
Check
#裝飾器在django里的應用
@CheckToken
def
webEcsList (request):
#ecs展示接口
if
request.method
=
=
"GET"
:
PageSize
=
request.GET.get(
"PageSize"
,
None
)
PageNumber
=
request.GET.get(
"PageNumber"
,
None
)
aliyun
=
request.GET.get(
"aliyun"
,
None
)
result
=
getDescribeInstances(PageNumber
=
PageNumber, PageSize
=
PageSize,aliyun
=
aliyun)
return
HttpResponse(result,content_type
=
"application/json"
)
|
6、salt擴展模塊
用過salt的人應該知道,salt在執行起來比ansible要快,而且配置方法比chef要舒服很多。但是有個比較致命的缺點就是sdk調用的時候只要內部的python代碼執行不出錯,salt就無法明確的告訴你本次調用是否真的達到你想要結果了,所以我采取了一個思路用salt模塊去檢測每台ecs的發布結果然后落庫。
其實整個平台的核心不止是http的調用和顯示,salt這邊也非常非常的重要和復雜。其中要先寫最基礎的發布bash腳本,給salt調用,然后判斷project類型,是否tomcat項目要用jinja2渲染配置配置。最終判斷結果落庫,下面是一些核心的代碼。其中落庫使用了sqlalchemy這個非常著名的python orm。
渲染配置:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
def
rendering_argv(
self
):
if
not
os.path.exists(
self
.ProjectDir):
os.system(
"cp -rp /data/tomcat_template %s"
%
(
self
.ProjectDir))
port
=
int
(
self
.Port)
http_port
=
port
shutdown_port
=
port
+
1
ajp_port
=
port
+
2
redirect_port
=
port
+
3
jmx_port
=
port
+
4
templateloader
=
jinja2.FileSystemLoader(
"/data/tomcat_template/conf"
)
env
=
jinja2.Environment(loader
=
templateloader)
template
=
env.get_template(
"server_template.xml"
)
server_xml
=
template.render(http_port
=
http_port, shutdown_port
=
shutdown_port, ajp_port
=
ajp_port,
redirect_port
=
redirect_port)
with
open
(
self
.ProjectDir
+
"/conf/server.xml"
,
"w"
) as f:
f.write(server_xml)
templateloader
=
jinja2.FileSystemLoader(
"/data/tomcat_template/bin"
)
env
=
jinja2.Environment(loader
=
templateloader)
template
=
env.get_template(
"catalina_template.sh"
)
catalina_sh
=
template.render(jmx_port
=
jmx_port)
with
open
(
self
.ProjectDir
+
"/bin/catalina.sh"
,
"w"
) as f:
f.write(catalina_sh)
else
:
return
|
啟動、停止業務程序:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
def
stopApp(
self
,ProjectDir
=
None
):
stoppath
=
self
.ProjectDir
+
"/bin/stop.sh"
cpid
=
os.system(
"sh "
+
stoppath)
if
cpid !
=
0
:
exit(
1
)
os.system(
"cd "
+
self
.ProjectDir
+
";rm -rf webapps/*"
)
def
startApp(
self
,ProjectDir
=
None
):
self
.getPackage()
startpath
=
self
.ProjectDir
+
"/bin/start.sh"
cpid
=
os.system(
"sh "
+
startpath
+
" "
+
self
.myaddr)
if
cpid !
=
0
:
print
"發布失敗"
result
=
self
.db.sessiondb.query(deploy_server).
filter
(and_(deploy_server.DeployId
=
=
self
.DeployId,
deploy_server.PrivateIp
=
=
self
.myaddr))
result.update({deploy_server.Status:
"Failed"
})
result.update({deploy_server.CreationTime:
int
(time.mktime(datetime.datetime.now().timetuple()))})
self
.db.sessiondb.commit()
exit(
1
)
else
:
result
=
self
.db.sessiondb.query(deploy_server).
filter
(and_(deploy_server.DeployId
=
=
self
.DeployId,
deploy_server.PrivateIp
=
=
self
.myaddr))
result.update({deploy_server.Status:
"Success"
})
result.update({deploy_server.CreationTime:
int
(time.mktime(datetime.datetime.now().timetuple()))})
self
.db.sessiondb.commit()
print
"發布成功"
|
6、效果圖
工程界面
ecs界面
工程詳情
構建界面
發布界面
7、結束語
這套平台上線后,日常耗費半天甚至一天時間的擴容和鎖容。基本上15分鍾就能搞定。當然其中還是有很多不足和需要改進的地方。后續還會慢慢優化。