emqx-docs/.github/scripts/directory_check.py at 962fd7d4b93cf4bc4b24a0f6a7f19e75f4b3e4e0 · emqx/emqx-docs · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import os
import sys
import json
import re
from urllib.parse import urlparse

directory_file = sys.argv[1]
docs_path = sys.argv[2]
success = True


def check_md_content(md_file):
    global success

    if not os.path.exists(md_file):
        print(f'{md_file} not exists')
        success = False
        return

    md_content = re.sub(r'<!--([\s\S]*?)-->', '', open(md_file, 'r').read())

    if 'ee' in directory_file:
        md_content = re.sub(r'{% emqxce %}([\s\S]*?){% endemqxce %}', '', md_content)
    else:
        md_content = re.sub(r'{% emqxee %}([\s\S]*?){% endemqxee %}', '', md_content)

    image_list = re.findall('(.*?)!\[(.*?)\]\((.*?)\)', md_content)
    url_list = re.findall('(.*?)\[(.*?)\]\((.*?)\)', md_content)
    for url in url_list:
        if url[0].endswith('!'):
            continue
        if url[2].startswith(('http://', 'https://', '<', '#', 'mailto:', 'tel:')):
            continue
        if urlparse(url[2]).path.endswith('.html'):
            continue
        url_path = url[2].split('.md')[0]
        ref_md_path = os.path.join(f'{"/".join(md_file.split("/")[:-1])}/', f'{url_path}.md')

        if not os.path.exists(ref_md_path):
            print(f'In {md_file}：', end='')
            print(f'{url[2]} not found or not in {directory_file}')
            success = False

    for image in image_list:
        if image[0].startswith('<!--'):
            continue
        if image[2].startswith(('http://', 'https://', '<')):
            continue
        image_path = os.path.join(f'{"/".join(md_file.split("/")[:-1])}/', image[2])

        if not os.path.exists(image_path):
            print(f'In {md_file}：', end='')
            print(image[2], 'does not exist')
            success = False


def get_md_files(dir_config, path):
    global success
    md_list = []
    for i in dir_config:
        md_name = i.get('path')
        md_children = i.get('children')

        if md_name:
            if md_name.startswith(('http://', 'https://')):
                continue
            elif md_name == './':
                md_list.append(f'{docs_path}/{path}/index.md')
            else:
                md_list.append(f'{docs_path}/{path}/{md_name}.md')

        if md_children:
            md_list += get_md_files(md_children, path)

    return list(set(md_list))


if __name__ == '__main__':
    if os.path.exists(f'{docs_path}/{directory_file}'):
        md_file_list = []
        config_dict = json.load(open(f'{docs_path}/{directory_file}'))

        def flatten_config(config):
            if isinstance(config, dict):
                return [item for sublist in config.values() for item in sublist]
            return config

        md_file_list += get_md_files(flatten_config(config_dict['cn']), 'zh_CN')
        md_file_list += get_md_files(flatten_config(config_dict['en']), 'en_US')

        for file_path, dir_list, file_list in os.walk(docs_path):
            for file_name in file_list:
                if file_name.split('.')[-1] != 'md':
                    continue
                md_path = os.path.join(file_path, file_name)
                if md_path not in md_file_list:
                    os.remove(md_path)

        for file in md_file_list:
            check_md_content(file)

    if not success:
        sys.exit('No pass!')
    else:
        print('Check completed!')