|
| 1 | +import requests |
| 2 | +import re |
| 3 | +import os |
| 4 | +import glob |
| 5 | +from lxml import etree |
| 6 | +import lxml.html |
| 7 | +import subprocess as sp |
| 8 | +from azure.storage.blob import BlobClient |
| 9 | + |
| 10 | + |
| 11 | +def my_print(cmd): |
| 12 | + print('== ' + cmd + ' ==\n') |
| 13 | + |
| 14 | + |
| 15 | +def print_check(cmd): |
| 16 | + my_print(cmd) |
| 17 | + sp.check_call(cmd, shell=True) |
| 18 | + |
| 19 | + |
| 20 | +class PyPIClient: |
| 21 | + def __init__(self, host="https://pypi.org", package_name='', track_config='', |
| 22 | + readme_link='', rm_link='', cli_version=''): |
| 23 | + self._host = host |
| 24 | + self._session = requests.Session() |
| 25 | + self._package_name = package_name |
| 26 | + self.version_date_dict = {} |
| 27 | + self.whether_track2 = None # whether published track2 to pypi |
| 28 | + self.track1_ga = 'NO' |
| 29 | + self.track1_latest = 'NA' |
| 30 | + self.track2_ga = 'NO' |
| 31 | + self.track2_latest = 'NA' |
| 32 | + self.pypi_link = 'NA' |
| 33 | + self.track_config = track_config |
| 34 | + self.readme_link = readme_link |
| 35 | + self.rm_link = rm_link |
| 36 | + self.cli_version = cli_version |
| 37 | + self.bot_warning = '' |
| 38 | + |
| 39 | + def get_package_name(self): |
| 40 | + return self._package_name |
| 41 | + |
| 42 | + def project_html(self): |
| 43 | + self.pypi_link = "{host}/pypi/{project_name}".format( |
| 44 | + host=self._host, |
| 45 | + project_name=self._package_name |
| 46 | + ) |
| 47 | + response = self._session.get(self.pypi_link + "/#history") |
| 48 | + |
| 49 | + return response |
| 50 | + |
| 51 | + def get_release_info(self, response, xpath, type): |
| 52 | + DATE_DICT = {'Jan': '01', 'Feb': '02', 'Mar': '03', 'Apr': '04', 'May': '05', 'Jun': '06', |
| 53 | + 'Jul': '07', 'Aug': '08', 'Sep': '09', 'Oct': '10', 'Nov': '11', 'Dec': '12', } |
| 54 | + text = response.text |
| 55 | + parse_text = lxml.etree.HTML(text) |
| 56 | + release_info = parse_text.xpath(xpath) |
| 57 | + strip_info = [] |
| 58 | + for info in release_info: |
| 59 | + info = info.strip() |
| 60 | + if type == 'date': |
| 61 | + info = info.replace(',', '').split(' ') |
| 62 | + info = info[2] + '/' + DATE_DICT[info[0]] + '/' + info[1] |
| 63 | + if not len(info) == 0: |
| 64 | + strip_info.append(info) |
| 65 | + |
| 66 | + return strip_info |
| 67 | + |
| 68 | + def get_release_dict(self, response): |
| 69 | + version_list = self.get_release_info(response, xpath='//p[@class="release__version"]/text()', type='version') |
| 70 | + self.version_handler(version_list) |
| 71 | + data_list = self.get_release_info(response, xpath='//p[@class="release__version-date"]/time/text()', |
| 72 | + type='date') |
| 73 | + self.version_date_dict = dict(zip(version_list, data_list)) |
| 74 | + self.version_date_dict['NA'] = 'NA' |
| 75 | + |
| 76 | + def write_to_list(self): |
| 77 | + response = self.project_html() |
| 78 | + if 199 < response.status_code < 400: |
| 79 | + self.get_release_dict(response) |
| 80 | + self.bot_analysis() |
| 81 | + return '{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(self._package_name, |
| 82 | + self.pypi_link, |
| 83 | + self.track1_latest, |
| 84 | + self.version_date_dict[self.track1_latest], |
| 85 | + self.track1_ga, |
| 86 | + self.track2_latest, |
| 87 | + self.track2_ga, |
| 88 | + self.version_date_dict[self.track2_latest], |
| 89 | + self.cli_version, |
| 90 | + self.track_config, |
| 91 | + self.bot_warning, |
| 92 | + self.rm_link) |
| 93 | + else: |
| 94 | + self.pypi_link = 'NA' |
| 95 | + return |
| 96 | + |
| 97 | + def version_handler(self, version_list): |
| 98 | + # Scenario 1 |
| 99 | + # rule 1: this package have track2 version |
| 100 | + # rule 2: check whether CLI is using this package |
| 101 | + # rule 3: by comparing the versions of CLI and package, we can judge whether cli is using track1 or 2 |
| 102 | + # rule 4: judge whether track1 is exist |
| 103 | + # rule 5: whether track1 is GA |
| 104 | + # rule 6: whether track2 is GA |
| 105 | + # Scenario 2 |
| 106 | + # rule 7: this package doesn't have track2 version |
| 107 | + # rule 8: check whether CLI is using this package |
| 108 | + # rule 9: whether track1 is GA |
| 109 | + ga_re = re.compile(r'[A-Za-z]') |
| 110 | + version_index = 0 |
| 111 | + versions = list(reversed(version_list)) |
| 112 | + for version in versions: |
| 113 | + if 'b1' in version and self.whether_track2 is None: |
| 114 | + self.whether_track2 = version |
| 115 | + if self.cli_version != 'NA': |
| 116 | + if int(self.cli_version.split('.')[0]) >= int(version.split('.')[0]): |
| 117 | + self.cli_version = 'track2_' + self.cli_version |
| 118 | + else: |
| 119 | + self.cli_version = 'track1_' + self.cli_version |
| 120 | + if version_index != 0: |
| 121 | + self.track1_latest = versions[version_index - 1] |
| 122 | + self.track2_latest = versions[-1] |
| 123 | + if not re.findall(ga_re, self.track1_latest) and len(self.track1_latest) != 0 and int( |
| 124 | + self.track1_latest.split('.')[0]) > 0: |
| 125 | + self.track1_ga = 'YES' |
| 126 | + if not re.findall(ga_re, self.track2_latest): |
| 127 | + self.track2_ga = 'YES' |
| 128 | + break |
| 129 | + version_index += 1 |
| 130 | + if self.whether_track2 is None: |
| 131 | + if self.cli_version != 'NA': |
| 132 | + self.cli_version = 'track1_' + self.cli_version |
| 133 | + self.track1_latest = versions[-1] |
| 134 | + if not re.findall(ga_re, self.track1_latest) and len(self.track1_latest) != 0 and int( |
| 135 | + self.track1_latest.split('.')[0]) > 0: |
| 136 | + self.track1_ga = 'YES' |
| 137 | + |
| 138 | + def bot_analysis(self): |
| 139 | + # rule 1: readme.python.md must exist |
| 140 | + # rule 2: track1 config must be deleted if azure-cli doesn't use track1 |
| 141 | + # rule 3: track2 config must be added if track2 package has been published to pypi |
| 142 | + if self.readme_link == 'NA': |
| 143 | + self.bot_warning += 'The readme.python.md has not been created. ' |
| 144 | + if self.cli_version != 'NA': |
| 145 | + cli_version = int(self.cli_version.split('_')[1].split('.')[0]) |
| 146 | + if self.whether_track2 is not None: |
| 147 | + whether_track2 = int(self.whether_track2.split('.')[0]) |
| 148 | + if cli_version >= whether_track2 and self.track_config == 'both': |
| 149 | + self.bot_warning += 'The cli using track2 now but readme.python still have track1 config.' |
| 150 | + if self.whether_track2 and self.track_config == 'track1': |
| 151 | + self.bot_warning += 'Need to add track2 config.' |
| 152 | + |
| 153 | + |
| 154 | +def sdk_info_from_pypi(sdk_info, cli_dependency): |
| 155 | + all_sdk_status = [] |
| 156 | + for package in sdk_info: |
| 157 | + if ',' in package: |
| 158 | + package = package.split(',') |
| 159 | + sdk_name = package[0].strip() |
| 160 | + if sdk_name in cli_dependency.keys(): |
| 161 | + cli_version = cli_dependency[sdk_name] |
| 162 | + else: |
| 163 | + cli_version = 'NA' |
| 164 | + track_config = package[1].strip() |
| 165 | + readme_link = package[2].strip() |
| 166 | + rm_link = package[3].strip() |
| 167 | + pypi_ins = PyPIClient(package_name=sdk_name, track_config=track_config, |
| 168 | + readme_link=readme_link, rm_link=rm_link, cli_version=cli_version) |
| 169 | + text_to_write = pypi_ins.write_to_list() |
| 170 | + if pypi_ins.pypi_link != 'NA': |
| 171 | + all_sdk_status.append(text_to_write) |
| 172 | + |
| 173 | + my_print(f'total pypi package kinds: {len(all_sdk_status)}') |
| 174 | + return all_sdk_status |
| 175 | + |
| 176 | + |
| 177 | +def write_to_csv(sdk_status_list, csv_name): |
| 178 | + with open(csv_name, 'w') as file_out: |
| 179 | + file_out.write('package name,' |
| 180 | + 'pypi link,' |
| 181 | + 'latest track1,' |
| 182 | + 'release date,' |
| 183 | + 'track1 GA,' |
| 184 | + 'latest track2,' |
| 185 | + 'track2 GA,' |
| 186 | + 'release date,' |
| 187 | + 'cli dependency,' |
| 188 | + 'readme config,' |
| 189 | + 'bot advice,' |
| 190 | + 'readme link\n') |
| 191 | + file_out.writelines( |
| 192 | + [package for package in sorted(sdk_status_list, key=lambda x: x.split(',')[10], reverse=True)]) |
| 193 | + |
| 194 | + |
| 195 | +def get_cli_dependency(): |
| 196 | + CLI_URL = 'https://github.com/azure/azure-cli/blob/dev/src/azure-cli/setup.py' |
| 197 | + cli_lines = project_html(CLI_URL).xpath('//table[@class="highlight tab-size js-file-line-container"]//text()') |
| 198 | + cli_dependency = {} |
| 199 | + for line in cli_lines: |
| 200 | + if 'azure-mgmt-' in line: |
| 201 | + line = line[1:-1] |
| 202 | + if '==' in line: |
| 203 | + line = line.split('==') |
| 204 | + cli_dependency[line[0]] = line[1] |
| 205 | + elif '~=' in line: |
| 206 | + line = line.split('~=') |
| 207 | + cli_dependency[line[0]] = line[1] |
| 208 | + return cli_dependency |
| 209 | + |
| 210 | + |
| 211 | +def project_html(url): |
| 212 | + response = requests.Session().get(url) |
| 213 | + response.encoding = 'gbk' |
| 214 | + text = response.text |
| 215 | + parse_result = lxml.etree.HTML(text) |
| 216 | + return parse_result |
| 217 | + |
| 218 | + |
| 219 | +def read_file(file_name): |
| 220 | + with open(file_name, 'r', encoding='utf-8') as file_in: |
| 221 | + content = file_in.readlines() |
| 222 | + return content |
| 223 | + |
| 224 | + |
| 225 | +def sdk_info_from_swagger(): |
| 226 | + sdk_name_re = re.compile(r'azure-mgmt-[a-z]+-*([a-z])+') |
| 227 | + resource_manager = [] |
| 228 | + SWAGGER_FOLDER = os.getenv('SWAGGER_REPO') |
| 229 | + readme_folders = glob.glob(f'{SWAGGER_FOLDER}/specification/*/resource-manager/readme.md') |
| 230 | + my_print(f'total readme folders: {len(readme_folders)}') |
| 231 | + |
| 232 | + for folder in readme_folders: |
| 233 | + track_config = 0 |
| 234 | + package_name = '' |
| 235 | + folder = folder.replace('readme.md', '') |
| 236 | + readme_python = 'NA' if 'readme.python.md' not in os.listdir(folder) else f'{folder}/readme.python.md' |
| 237 | + readme_text = read_file(folder + 'readme.md') |
| 238 | + for line in readme_text: |
| 239 | + if line.find('azure-sdk-for-python-track2') > -1: |
| 240 | + track_config += 2 |
| 241 | + elif line.find('azure-sdk-for-python') > -1: |
| 242 | + track_config += 1 |
| 243 | + if readme_python == 'NA' and sdk_name_re.search(line) is not None and package_name == '': |
| 244 | + package_name = sdk_name_re.search(line).group() |
| 245 | + |
| 246 | + if readme_python != 'NA': |
| 247 | + readme_python_text = read_file(readme_python) |
| 248 | + for text in readme_python_text: |
| 249 | + if sdk_name_re.search(text) is not None: |
| 250 | + package_name = sdk_name_re.search(text).group() |
| 251 | + |
| 252 | + TRACK_CONFIG = {0: 'NA', 1: 'track1', 2: 'track2', 3: 'both'} |
| 253 | + track_config = TRACK_CONFIG.get(track_config, 'Rule error') |
| 254 | + readme_html = folder.replace(SWAGGER_FOLDER, 'https://github.com/Azure/azure-rest-api-specs/tree/master') |
| 255 | + if package_name != '': |
| 256 | + resource_manager.append('{},{},{},{}\n'.format(package_name, |
| 257 | + track_config, |
| 258 | + readme_python, |
| 259 | + readme_html)) |
| 260 | + my_print(f'{folder} : {package_name}') |
| 261 | + |
| 262 | + my_print(f'total package kinds: {len(resource_manager)}') |
| 263 | + return resource_manager |
| 264 | + |
| 265 | + |
| 266 | +def commit_to_github(): |
| 267 | + print_check('git add .') |
| 268 | + print_check('git commit -m \"update excel\"') |
| 269 | + print_check('git push -f origin HEAD') |
| 270 | + |
| 271 | + |
| 272 | +def upload_to_azure(out_file): |
| 273 | + # upload to storage account(it is created in advance) |
| 274 | + blob = BlobClient.from_connection_string(conn_str=os.getenv('CONN_STR'), container_name=os.getenv('FILE'), |
| 275 | + blob_name=out_file) |
| 276 | + with open(out_file, 'rb') as data: |
| 277 | + blob.upload_blob(data, overwrite=True) |
| 278 | + |
| 279 | + |
| 280 | +def main(): |
| 281 | + cli_dependency = get_cli_dependency() |
| 282 | + sdk_info = sdk_info_from_swagger() |
| 283 | + all_sdk_status = sdk_info_from_pypi(sdk_info, cli_dependency) |
| 284 | + |
| 285 | + OUT_FILE = 'release_sdk_status.csv' |
| 286 | + write_to_csv(all_sdk_status, OUT_FILE) |
| 287 | + commit_to_github() |
| 288 | + upload_to_azure(OUT_FILE) |
| 289 | + |
| 290 | + |
| 291 | +if __name__ == '__main__': |
| 292 | + main() |
0 commit comments