需求:
公司做了几个nodejs项目,牵扯到License问题。希望做 一个 把这个项目中引用到的dependency中的license 的文件内容 整合到一个TXT文档里。
for example:
根目录下的 package.json 中dependencies{ "protobufjs": "6.10.1"}
则需要查找protobufjs 中的package.json 中的
"dependencies": {
"@protobufjs/aspromise": "^1.1.2",
"@protobufjs/base64": "^1.1.2",
"@protobufjs/codegen": "^2.0.4",
"@protobufjs/eventemitter": "^1.1.0",
"@protobufjs/fetch": "^1.1.0",
"@protobufjs/float": "^1.0.2",
"@protobufjs/inquire": "^1.1.0",
"@protobufjs/path": "^1.1.2",
"@protobufjs/pool": "^1.1.0",
"@protobufjs/utf8": "^1.1.0",
"@types/long": "^4.0.1",
"@types/node": "^13.7.0",
"long": "^4.0.0"
}
在去查找@protobufjs/aspromise下的package.json ,直到找不到dependencies为止,
然后把这些dependencies对应的license内容整理到TXT文件中去。
** 不可重复
解题思路:
我比较愚钝,这个功能做了好几天,问题的难点在于如何查找所有引用到的依赖库。
思考一:递归思想
1. 先解析根目录下的package.json 文件,查询到用的dependencies
2.用os.walk(path) 循环的方式获取到遍历根目录下方与查询到的dependencies一致的文件夹A
3.在文件夹A的路径下,判断license文件是否存在,若存在则读取license文件到TXT中,如果不存在,在TXT中写入dependency的名字,注明license 未查询到。
4.在文件夹A的路径下,读取package.json 文件,读取解析dependencies(循环过程1,2,3)
在文件夹少的情况下,可行,过大的时候,× 不可行。
思考二:
1. 遍历文件夹下的所有package.json 文件,获取所有package.json 文件的路径,作为一个list存储下来 pkgDirList(如果出现重复的需要对其进行解析,争取排除重复项)
2. 以pkgDirList为输入,计算每一个dependency出现的次数,和dependency对应的路径,比如dictCountDir["axios"]=1,dictNameDir["axios"] ="C:xxx\\mxxx\node_modules\axios"
2. 以根目录中的pakage.json 文件中的dependencies为入口,然后循环遍历pkgDirList,在pkgDirList中找到对应的文件路径,
3. 读取package.json 文件,解析后在去pkgDirList 匹配查找对应路径,以此下去, 直到查完所有的package.json 文件中的dependency,返回一个包含所有dependencies的list, depList
4. 将获取到的所有dependencies的depList 中的元素作为 dictNameDir的key查找对应的license 路径,
4.1 如果license 文件存在,根据license路径读取文件。写入TXT文档。
4.2 如果license文件不存在,再去判断license内容是否存在于readme.md 文件中,
4.2.1如果存在,加入提醒信息,请手动copy,
4.2.2如果不存在,判断是不是MIT license,如果是MIT license 导入MIT license 模板。
4.2.3 如果都不是,备注找不到。
源码:

1 import os 2 import sys 3 import json 4 import time 5 6 7 #return dependencies string 8 #for example: 9 # '"axios": "^0.19.2","myjs-common": "^1.0.6","oneport": "^1.0.2" ' 10 def readPackageJson(filename): 11 dependenceStr = "" 12 try: 13 if os.path.exists(filename): 14 with open(filename,"r",encoding='utf8') as dependencies: 15 packageJson = json.load(dependencies) 16 if "dependencies" in packageJson and packageJson["dependencies"] != {}: 17 dependenceStr = str(packageJson["dependencies"]).replace("{","").replace("}","") 18 return dependenceStr 19 except Exception as e: 20 print(e) 21 22 23 #for example: 24 # '"license": "MIT"' in package.json file 25 # if keyName == license, valueStr==MIT; 26 # if keyName = homepage, valueStr=https://github.com/indutny/node-spdy 27 def GetPackageJsonInfo(filePath,keyName): 28 valueStr = "" 29 try: 30 filePath = os.path.join("%s%s" % (filePath,"\\package.json")) 31 if os.path.exists(filePath): 32 with open(filePath,"r",encoding='utf8') as pkgJson: 33 packageJson = json.load(pkgJson) 34 if keyName in packageJson: 35 valueStr = str(packageJson[keyName]) 36 return valueStr 37 except Exception as e: 38 print(e) 39 40 # Whether the readme.md file contains MIT license content, 41 # it returns True and file dir, but does not return False. 42 def readReadmefile(filePath): 43 fileNames = ["\\readme.md","\\README.md","\\Readme.md","\\readme.txt"] 44 for fileName in fileNames: 45 filePath = os.path.join("%s%s"% (filePath,fileName)) 46 if os.path.exists(filePath): 47 with open(filePath,"r",encoding="utf8") as readMe: 48 if "copies or substantial portions of the Software." in readMe.read(): 49 return True 50 else: 51 return False 52 53 54 55 #depStr is dependencies from package.json file,like '"axios": "^0.19.2","myjs-common": "^1.0.6","oneport": "^1.0.2" ' 56 #return a dependencies list, for example:["axios","myjs-common","oneport"] 57 def ParsingPackageJson(depStr): 58 depList = [] 59 for dep in depStr.split(","): 60 if len(dep.split(":")) == 2: 61 depList.append(dep.split(":")[0].strip().replace("'","")) 62 return sorted(list(set(depList))) 63 64 def getLicenseFilePath(filepath): 65 licensefilename = ["LICENSE","LICENCE","LICENSE-MIT","LICENCE-MIT"] 66 licensepath = None 67 stopCircle = False 68 count = 0 69 for dirpath,dirnames,filenames in os.walk(filepath): 70 for filename in filenames: 71 count = count + 1 72 fileNameTemp = filename.upper().split(".") 73 if (len(fileNameTemp) >= 2 and (fileNameTemp[-1] != "JS" or fileNameTemp[-1] != "HTML")) or len(fileNameTemp) == 1: 74 if fileNameTemp[0] in licensefilename : 75 licensepath = os.path.join('%s%s' % (filepath, "\\" + filename)) 76 stopCircle = True 77 break 78 #print(filename) 79 if stopCircle or count == len(filenames): 80 break 81 return licensepath 82 83 #Get the dependencies in the package.json file in the root directory 84 # return dependencies info str 85 def getRootPackageJson(rootPath): 86 depStr = "" 87 findPackageJson = False 88 for fileNames in os.walk(rootPath): 89 if "node_modules" in fileNames[1]: 90 global nodeModulesDir 91 nodeModulesDir = os.path.join('%s%s' % (fileNames[0], "\\node_modules")) 92 for pfile in fileNames[-1]: 93 if pfile == "package.json": 94 packageJsonPath = os.path.join('%s%s' % (fileNames[0], "\\package.json")) 95 depStr = readPackageJson(packageJsonPath) 96 findPackageJson = True 97 break 98 break 99 if findPackageJson: 100 print("get pacakageJson") 101 else: 102 print("No pacakageJson") 103 return depStr 104 105 # DepDir,pkg file dir 106 def getDependencyName(DepDir): 107 if "@" in DepDir: 108 dependenceName ="@" + (DepDir.split("@")[-1]).replace("\\","/") 109 else: 110 dependenceName = DepDir.split("\\")[-1] 111 return dependenceName 112 113 # Find the dir where all package.json files are located, 114 # Return a list containing all package.json dir 115 def getAllPackageJsonDir(Dir): 116 packageJsonDir = [] 117 depName = [] 118 for dirs,fileNames,files in os.walk(Dir): 119 for file in files: 120 if file=="package.json": 121 dependenceName = getDependencyName(dirs) 122 if dependenceName not in depName: 123 depName.append(dependenceName) 124 packageJsonDir.append(dirs) 125 else: 126 # The dependencies in package.json in the dependency folders of the same name under different folders may be different 127 # for example: 128 # G:...\..\node_modules\body-parser 129 # G:...\..\node_modules\@types\body-parser 130 for pjsonDir in packageJsonDir: 131 if pjsonDir.split("\\")[-1]==dirs.split("\\")[-1]: 132 pjsonPath = os.path.join("%s%s" % (pjsonDir,"\\package.json")) 133 pjsonPathNew = os.path.join("%s%s" % (dirs,"\\package.json")) 134 pjsonStr = readPackageJson(pjsonPath) 135 pjsonStrNew = readPackageJson(pjsonPathNew) 136 pName = ParsingPackageJson(pjsonStr) 137 pNameNew = ParsingPackageJson(pjsonStrNew) 138 if pName!=pNameNew: 139 packageJsonDir.append(dirs) 140 #saveInfoToTxt(ResultInfoPath,dirs) 141 print("all package.json file dir count:",len(packageJsonDir)) 142 return packageJsonDir 143 144 # for example: dictCountDir["axios"]=1,dictNameDir["axios"] =r"C:...\meetingServer_1.0.8\node_modules\axios" 145 # return dictCountDir,dictNameDir 146 def getPkgDirDict(pacakageDir,rootDir): 147 #dictCountDir key: dependence name, value: dir count of same dependencies 148 dictCountDir={} 149 #dictNameDir key: dependence name,value: dependence dir 150 dictNameDir={} 151 for pkgDir in pacakageDir: 152 if pkgDir == rootDir: 153 continue 154 pkgDirName = getDependencyName(pkgDir) 155 countDir=0 156 if pkgDirName not in dictNameDir.keys(): 157 dictNameDir[pkgDirName] = pkgDir 158 dictCountDir[pkgDirName] = countDir+1 159 else: 160 dictCountDir[pkgDirName] = dictCountDir[pkgDirName] +1 161 return dictNameDir,dictCountDir 162 163 # Read the package.json file under pkgPath, if dependencies is not empty, 164 # analyze whether the dependence is in the deplist, if not, add it to the deplist, and return to the deplist. 165 # pkgName:dependence name, depList:dependence list, pkgPath: package.json file path 166 # return dependency list 167 def getSubDepList(pkgName,deplist,pkgPath): 168 pkgJson = readPackageJson(pkgPath) 169 if pkgJson: 170 pkgDep = ParsingPackageJson(pkgJson) 171 for subDep in pkgDep: 172 if subDep not in deplist: 173 deplist.append(subDep) 174 info = "pkgName:"+ pkgName + " pkgPath:"+ pkgPath +"\r pkgJson:"+ str(pkgJson)+"\r\n" 175 saveInfoToTxt(ResultInfoPath,info) 176 return deplist 177 178 #Query all dependencies according to the dependencies value in package.json 179 #Return a list of all dependencies 180 def getDepList(rootDepStr,pacakageDir,dirDict): 181 deplist=[] 182 dictNameDirs=dirDict[0] 183 dictCountDir = dirDict[1] 184 if rootDepStr: 185 # 根据packageDir 分析依赖库 186 deplist = ParsingPackageJson(rootDepStr) 187 for dep in deplist: 188 for pkgDir in pacakageDir: 189 pkgDirName = getDependencyName(pkgDir) 190 if dep==pkgDirName: 191 pkgPath = os.path.join('%s%s' % (pkgDir, "\\package.json")) 192 getSubDepList(dep,deplist,pkgPath) 193 if dictCountDir[pkgDirName] ==1: 194 break 195 saveInfoToTxt(ResultInfoPath,"deplist count:"+ str(len(deplist))+ "\r" +str(deplist)) 196 dcount = len(deplist) 197 print("dependence count:",str(dcount)) 198 199 return deplist 200 201 # test function, find all depencies 202 def getAllDepList(pacakageDir): 203 depStr = "" 204 for dir in pacakageDir: 205 pkgPath = os.path.join('%s%s' % (dir, "\\package.json")) 206 pJson = readPackageJson(pkgPath) 207 if pJson: 208 depStr = depStr + pJson 209 depStr = depStr +"," 210 allDep = ParsingPackageJson(depStr) 211 return allDep 212 213 def getLicenses(rootdir): 214 215 pkgDirList = getAllPackageJsonDir(rootdir) 216 dirDict = getPkgDirDict(pkgDirList,rootdir) 217 rootDepStr = getRootPackageJson(rootdir) 218 deplist = getDepList(rootDepStr,pkgDirList,dirDict) 219 220 licenseTypeDict= {"FindInReadme":0, "AddMITLicenseTemp":0,"NotMITLicense":0,"Others":0} 221 # According to find all deplist, find the license file under the corresponding file. 222 licenseNo = 0 # license number in AllLicenses.txt file 223 lNoFindCount = 0 # Total number of license files not found 224 LicenseFileNotFind=[] # 225 depFileNotFind=[] # The file directory where the dependency file is not found 226 for depName in deplist: 227 licenseNo = licenseNo + 1 228 dictNameDir = dirDict[0] 229 if depName in dictNameDir.keys(): 230 licensepath = getLicenseFilePath(dictNameDir[depName]) 231 if licensepath is None: 232 licensepath = None 233 LicenseFileNotFind.append(depName) 234 lNoFindCount = lNoFindCount+1 235 res = AddLicenseTemp(licenseNo,dictNameDir[depName],depName) 236 licenseTypeDict[res] = licenseTypeDict[res]+1 237 else: 238 readLicense(licenseNo,licensepath,depName) 239 else: 240 lNoFindCount = lNoFindCount+1 241 depFileNotFind.append(depName) 242 res = AddLicenseTemp(licenseNo,None,depName) 243 licenseTypeDict[res] = licenseTypeDict[res]+1 244 245 notFindDepCount = str(len(depFileNotFind)) 246 notFindLinceseCount = str(len(LicenseFileNotFind)) 247 248 print("Not find Dependence file :",notFindDepCount) 249 print("Not find license file:", notFindLinceseCount) 250 print("License file Not find count:",str(lNoFindCount)) 251 for key in licenseTypeDict: 252 print(key,licenseTypeDict[key]) 253 saveInfoToTxt(ResultInfoPath,"\r\n Info of license file is found"+ str(licenseTypeDict)) 254 saveInfoToTxt(ResultInfoPath,"\r\n Not find Dependence dir count:"+ notFindDepCount +" \r"+ str(depFileNotFind)) 255 saveInfoToTxt(ResultInfoPath,"\r\n Not find license file count: "+ notFindLinceseCount + "\r"+ str(LicenseFileNotFind)) 256 saveInfoToTxt(ResultInfoPath,"\r\n total license file Not find count:"+str(lNoFindCount)) 257 258 def saveInfoToTxt(savePath,line): 259 with open(savePath,"a+") as f: 260 f.write(str(line)) 261 f.write('\r') 262 263 # no:license number 264 # filepath: license file path 265 # depName: dependence name 266 # There is a license file under dir, read the license content 267 def readLicense(no,filepath,depName): 268 with open(ResultLincensePath,"a+",encoding="utf8") as f: 269 f.write("\r==================================\r") 270 f.write(str(no) + ") " + depName) 271 f.writelines("\r==================================\r") 272 with open(filepath,"r", encoding="utf8") as licensefile: 273 f.write(licensefile.read()) 274 f.write('\r') 275 276 # There is no license file under dir, add license template 277 # 278 def AddLicenseTemp(no,filepath,depName): 279 if filepath != None: 280 isMITlicense = readReadmefile(filepath) 281 with open(ResultLincensePath,"a+",encoding="utf8") as f: 282 f.write("\r==================================\r") 283 f.write(str(no) + ") " + depName) 284 f.writelines("\r==================================\r") 285 if isMITlicense: 286 saveInfoToTxt(ResultInfoPath,"** %s MIT License in readme.md file,Please paste manually. %s \r" % (depName,filepath)) 287 with open(ResultLincensePath,"a+",encoding="utf8") as f: 288 f.write("MIT License in readme.md file,Please paste manually! " ) 289 f.write('\r') 290 return "FindInReadme" 291 else: 292 license = GetPackageJsonInfo(filepath,"license") 293 homepage = GetPackageJsonInfo(filepath,"homepage") 294 if homepage: 295 with open(ResultLincensePath,"a+",encoding="utf8") as f: 296 f.write("Homepage: %s%s" % (homepage,"\r")) 297 if license=="MIT": 298 with open(ResultLincensePath,"a+",encoding="utf8") as f: 299 with open(lincenseTempPath,"r", encoding="utf8") as licenseTemp: 300 f.write(licenseTemp.read()) 301 f.write('\r') 302 saveInfoToTxt(ResultInfoPath,">> %s Add MIT License Template. " % depName) 303 return "AddMITLicenseTemp" 304 else: 305 #with open(ResultLincensePath,"a+",encoding="utf8") as f: 306 # f.write("License:%s" % license ) 307 saveInfoToTxt(ResultInfoPath, "@@ %s is not an MIT License, its license is: %s" % (depName,license)) 308 return "NotMITLicense" 309 else: 310 with open(ResultLincensePath,"a+",encoding="utf8") as f: 311 f.write("\r==================================\r") 312 f.write(str(no) + ") " + depName) 313 f.writelines("\r==================================\r") 314 f.write("Nothing find !" ) 315 saveInfoToTxt(ResultInfoPath,"/(ㄒoㄒ)/~~ Nothing find ! %s" % depName) 316 return "Others" 317 318 # Initialization Result path 319 def inite(): 320 global ResultDefaultPath,ResultLincensePath,ResultInfoPath,lincenseTempPath 321 ResultDefaultPath = os.path.join(os.getcwd(),"Results") 322 if not os.path.exists(ResultDefaultPath): 323 os.mkdir(ResultDefaultPath) 324 ResultLincensePath = os.path.join('%s%s' % (ResultDefaultPath, "\\AllLicenses.txt")) 325 ResultInfoPath = os.path.join('%s%s' % (ResultDefaultPath, "\\AllInfo.txt")) 326 lincenseTempPath = os.path.join('%s%s' % (ResultDefaultPath, "\\MITLicenseTemp.txt")) 327 if not os.path.exists(lincenseTempPath): 328 print("license template file in Result dir is not exists.") 329 time.sleep(20) 330 exit(0) 331 if os.path.exists(ResultLincensePath): 332 os.remove(ResultLincensePath) 333 if os.path.exists(ResultInfoPath): 334 os.remove(ResultInfoPath) 335 336 if __name__ == '__main__': 337 338 inite() 339 rootDir = os.getcwd() 340 #os.getcwd() 341 print("***Root File Path:",rootDir) 342 print("Begin Time:",time.strftime('%Y/%m/%d %H:%M:%S')) 343 saveInfoToTxt(ResultInfoPath,"Begin Time:"+time.strftime('%Y/%m/%d %H:%M:%S')) 344 getLicenses(rootDir) 345 print("All finished:",time.strftime('%Y/%m/%d %H:%M:%S')) 346 saveInfoToTxt(ResultInfoPath,"All finished:"+ time.strftime('%Y/%m/%d %H:%M:%S')) 347 print("\r\n ***Complete! Please refer Results folder.***") 348 input("Press any key to close.") 349