Skip to content

Commit a1288b5

Browse files
committed
update
1 parent 15b3785 commit a1288b5

6 files changed

+206
-117
lines changed

llvm_ir_dataset_utils/tools/portage_analyze_failures.py

+120-111
Original file line numberDiff line numberDiff line change
@@ -6,131 +6,140 @@
66

77

88
def run_equery_depgraph(pkg):
9-
pkg = pkg.replace('_','/',1)
10-
command = f"emerge -pv {pkg}"
9+
pkg = pkg.replace('_', '/', 1)
10+
command = f"emerge -pv {pkg}"
11+
try:
12+
result = subprocess.run(
13+
command, shell=True, check=True, capture_output=True, text=True)
14+
return result.stdout
15+
except subprocess.CalledProcessError as e:
16+
if "The following USE changes are necessary to proceed" in e.stderr:
17+
print(f"Package {pkg} needs USE flag")
18+
use_changes = parse_emerge_output(e.stderr)
19+
update_package_use_custom(use_changes)
1120
try:
12-
result = subprocess.run(command, shell=True, check=True, capture_output=True, text=True)
13-
return result.stdout
21+
result = subprocess.run(
22+
command, shell=True, check=True, capture_output=True, text=True)
23+
return result.stdout
1424
except subprocess.CalledProcessError as e:
15-
if "The following USE changes are necessary to proceed" in e.stderr:
16-
print(f"Package {pkg} needs USE flag")
17-
use_changes = parse_emerge_output(e.stderr)
18-
update_package_use_custom(use_changes)
19-
try:
20-
result = subprocess.run(command, shell=True, check=True, capture_output=True, text=True)
21-
return result.stdout
22-
except subprocess.CalledProcessError as e:
23-
if "have been masked." in e.stderr:
24-
return 1
25-
if "The following USE changes are necessary to proceed" in e.stderr:
26-
return 2
27-
return None
25+
if "have been masked." in e.stderr:
26+
return 1
27+
if "The following USE changes are necessary to proceed" in e.stderr:
28+
return 2
29+
return None
30+
2831

2932
def parse_emerge_output(output):
30-
use_changes = re.findall(r'>=([^\s]+)\s+([^\n]+)', output)
31-
use_changes = [(re.sub(r'-[0-9.]+(?:-r[0-9]+)?$', '', package), flags) for package, flags in use_changes]
32-
return use_changes
33+
use_changes = re.findall(r'>=([^\s]+)\s+([^\n]+)', output)
34+
use_changes = [(re.sub(r'-[0-9.]+(?:-r[0-9]+)?$', '', package), flags)
35+
for package, flags in use_changes]
36+
return use_changes
3337

3438

3539
def update_package_use_custom(use_changes):
36-
package_use_dir = '/etc/portage/package.use'
37-
custom_file_path = os.path.join(package_use_dir, 'custom')
38-
39-
if not os.path.exists(package_use_dir):
40-
os.makedirs(package_use_dir)
41-
42-
if not os.path.exists(custom_file_path):
43-
open(custom_file_path, 'a').close()
44-
45-
with open(custom_file_path, 'r+') as f:
46-
content = f.read()
47-
for package, flags in use_changes:
48-
if package not in content:
49-
f.write(f"{package} {flags}\n")
50-
print(f"Added to {custom_file_path}: {package} {flags}")
51-
else:
52-
print(f"Package {package} already exists in {custom_file_path}")
40+
package_use_dir = '/etc/portage/package.use'
41+
custom_file_path = os.path.join(package_use_dir, 'custom')
42+
43+
if not os.path.exists(package_use_dir):
44+
os.makedirs(package_use_dir)
45+
46+
if not os.path.exists(custom_file_path):
47+
open(custom_file_path, 'a').close()
48+
49+
with open(custom_file_path, 'r+') as f:
50+
content = f.read()
51+
for package, flags in use_changes:
52+
if package not in content:
53+
f.write(f"{package} {flags}\n")
54+
print(f"Added to {custom_file_path}: {package} {flags}")
55+
else:
56+
print(f"Package {package} already exists in {custom_file_path}")
5357

5458

5559
def parse_depgraph(content):
56-
lines = content.split('\n')
57-
dependencies = []
58-
for line in lines[1:]:
59-
match = re.search(r"(\w+-[\w+/-]+?)(?:-\d[\w\._-]*)", line)
60-
if match:
61-
dependencies.append(match.group(1))
60+
lines = content.split('\n')
61+
dependencies = []
62+
for line in lines[1:]:
63+
match = re.search(r"(\w+-[\w+/-]+?)(?:-\d[\w\._-]*)", line)
64+
if match:
65+
dependencies.append(match.group(1))
66+
67+
return dependencies
6268

63-
return dependencies
6469

6570
def analyse_neither():
66-
failed = []
67-
successed = []
68-
# neither.json is a log analyzing the results of a package whose installation result type is neither.
69-
# 'Y' means it has been built successfully.
70-
# 'N' means its build failed.
71-
with open('./portage-lists/neither.json') as neither_pkgs_files:
72-
neither_pkgs = json.load(neither_pkgs_files)
73-
for pkg in neither_pkgs:
74-
if neither_pkgs[pkg] == 'N':
75-
failed.append(pkg)
76-
if neither_pkgs[pkg] == 'Y':
77-
successed.append(pkg)
78-
79-
return failed, successed
71+
failed = []
72+
succeeded = []
73+
# neither.json is a log analyzing the results of a package whose installation result type is neither.
74+
# 'Y' means it has been built successfully.
75+
# 'N' means its build failed.
76+
with open('./portage-lists/neither.json') as neither_pkgs_files:
77+
neither_pkgs = json.load(neither_pkgs_files)
78+
for pkg in neither_pkgs:
79+
if neither_pkgs[pkg] == 'N':
80+
failed.append(pkg)
81+
if neither_pkgs[pkg] == 'Y':
82+
succeeded.append(pkg)
83+
84+
return failed, succeeded
85+
8086

8187
def preprocessed_notinstalled_pkgs():
82-
notinstalled = []
83-
with open('./portage-lists/notinstalled.list', 'r') as notinstalled_file:
84-
for i in notinstalled_file:
85-
notinstalled.append(i[:-1])
86-
return notinstalled
87-
88+
notinstalled = []
89+
with open('./portage-lists/notinstalled.list', 'r') as notinstalled_file:
90+
for i in notinstalled_file:
91+
notinstalled.append(i[:-1])
92+
return notinstalled
93+
94+
8895
def main():
89-
failed_pkgs, successed_pkgs = analyse_neither()
90-
91-
notinstalled_pkgs = preprocessed_notinstalled_pkgs()
92-
installed_pkgs = preprocessed_notinstalled_pkgs()
93-
94-
failed_pkgs.extend(notinstalled_pkgs)
95-
successed_pkgs.extend(installed_pkgs)
96-
97-
error_dict = {}
98-
error_list = []
99-
mask_list = []
100-
use_list = []
101-
for pkg in failed_pkgs:
102-
content = run_equery_depgraph(pkg)
103-
if content == 1:
104-
mask_list.append(pkg)
105-
continue
106-
if content == 2:
107-
use_list.append(pkg)
108-
continue
109-
if content is None:
110-
error_list.append(pkg)
111-
print(f"Package {pkg} cannot be merged.")
112-
else:
113-
parsed_data = parse_depgraph(content)
114-
for i in parsed_data:
115-
if i.replace('/','_') not in successed_pkgs:
116-
if i in error_dict:
117-
error_dict[i] += 1
118-
else:
119-
error_dict[i] = 1
120-
121-
sorted_dict = OrderedDict(sorted(error_dict.items(), key=lambda item: item[1], reverse=True))
122-
123-
with open('./portage-lists/error.list', 'w') as file:
124-
for i in error_list:
125-
file.write(i+'\n')
126-
with open('./portage-lists/mask.list', 'w') as file:
127-
for i in mask_list:
128-
file.write(i+'\n')
129-
with open('./portage-lists/use.list', 'w') as file:
130-
for i in use_list:
131-
file.write(i+'\n')
132-
with open('./portage-lists/depedencies_pkgs.json', 'w') as file:
133-
json.dump(sorted_dict, file, indent=4)
134-
96+
failed_pkgs, succeeded_pkgs = analyse_neither()
97+
98+
notinstalled_pkgs = preprocessed_notinstalled_pkgs()
99+
installed_pkgs = preprocessed_notinstalled_pkgs()
100+
101+
failed_pkgs.extend(notinstalled_pkgs)
102+
succeeded_pkgs.extend(installed_pkgs)
103+
104+
error_dict = {}
105+
error_list = []
106+
mask_list = []
107+
use_list = []
108+
for pkg in failed_pkgs:
109+
content = run_equery_depgraph(pkg)
110+
if content == 1:
111+
mask_list.append(pkg)
112+
continue
113+
if content == 2:
114+
use_list.append(pkg)
115+
continue
116+
if content is None:
117+
error_list.append(pkg)
118+
print(f"Package {pkg} cannot be merged.")
119+
else:
120+
parsed_data = parse_depgraph(content)
121+
for i in parsed_data:
122+
if i.replace('/', '_') not in succeeded_pkgs:
123+
if i in error_dict:
124+
error_dict[i] += 1
125+
else:
126+
error_dict[i] = 1
127+
128+
sorted_dict = OrderedDict(
129+
sorted(error_dict.items(), key=lambda item: item[1], reverse=True))
130+
131+
with open('./portage-lists/error.list', 'w') as file:
132+
for i in error_list:
133+
file.write(i + '\n')
134+
with open('./portage-lists/mask.list', 'w') as file:
135+
for i in mask_list:
136+
file.write(i + '\n')
137+
with open('./portage-lists/use.list', 'w') as file:
138+
for i in use_list:
139+
file.write(i + '\n')
140+
with open('./portage-lists/depedencies_pkgs.json', 'w') as file:
141+
json.dump(sorted_dict, file, indent=4)
142+
143+
135144
if __name__ == "__main__":
136-
main()
145+
main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import os
2+
3+
packages = []
4+
5+
6+
def get_packages(parent_directory):
7+
valid_dirs_count = 0
8+
for first_level_subdir in os.listdir(parent_directory):
9+
first_level_path = os.path.join(parent_directory, first_level_subdir)
10+
if os.path.isdir(first_level_path):
11+
for second_level_subdir in os.listdir(first_level_path):
12+
second_level_path = os.path.join(first_level_path, second_level_subdir)
13+
if os.path.isdir(second_level_path):
14+
files = os.listdir(second_level_path)
15+
if any(file.endswith('.ebuild') for file in files):
16+
valid_dirs_count += 1
17+
packages.append(second_level_path[2:])
18+
19+
20+
def processEbuild_cpp(file):
21+
with open(file, 'r', encoding='utf-8') as f:
22+
for line in f:
23+
if "toolchain-funcs" in line and "inherit" in line:
24+
return True
25+
elif "cmake" in line:
26+
return True
27+
elif "emake" in line:
28+
return True
29+
elif "CFLAGS" in line:
30+
return True
31+
elif "CXXFLAGS" in line:
32+
return True
33+
elif "toolchain" in line:
34+
return True
35+
elif "meson" in line:
36+
return True
37+
return False
38+
39+
40+
def processEbuild_trunk(file):
41+
with open(file, 'r', encoding='utf-8') as f:
42+
for line in f:
43+
if "KEYWORDS" in line and "amd64 " in line and "~amd64 " not in line:
44+
return True
45+
return False
46+
47+
48+
def readpackage(package):
49+
files = os.listdir(package)
50+
for file in files:
51+
if file.endswith('.ebuild'):
52+
with open(os.path.join(package, file), 'r', encoding='utf-8') as f:
53+
for line in f:
54+
print(line[:-1])
55+
return
56+
57+
58+
def main():
59+
ebuild_directory = "./"
60+
get_packages(ebuild_directory)
61+
cpp_pkgs = []
62+
for pkg in packages:
63+
files = os.listdir(pkg)
64+
for file in files:
65+
if file.endswith('.ebuild'):
66+
if processEbuild_trunk(os.path.join(pkg, file)):
67+
if processEbuild_cpp(os.path.join(pkg, file)):
68+
cpp_pkgs.append(pkg)
69+
continue
70+
71+
cpp_pkgs = list(set(cpp_pkgs))
72+
with open(
73+
"../../corpus_descriptions_test/portage_pkg.list", 'w',
74+
encoding='utf-8') as f:
75+
for i in cpp_pkgs:
76+
f.write(i + "\n")
77+
78+
79+
if __name__ == "__main__":
80+
main()

llvm_ir_dataset_utils/tools/portage_list_build.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def build(target_package):
142142
try:
143143
subprocess.run(renew_command, check=True)
144144
except subprocess.CalledProcessError:
145-
print("Error to build depedency.")
145+
print("Error to build dependency.")
146146
continue
147147
json_filename = create_json_file(package)
148148
run_corpus_command(json_filename)

llvm_ir_dataset_utils/util/extract_source_lib.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ def copy_source(source_base_dir, output_dir):
1515
'**/*' + source_extension):
1616
# Make sure the ".source" file is not a directory
1717
if os.path.isfile(source_base_path):
18-
source_rel_path = os.path.relpath(source_base_path, start=source_base_dir)
18+
source_rel_path = os.path.relpath(
19+
source_base_path, start=source_base_dir)
1920
destination_path = os.path.join(output_dir, source_rel_path)
2021
os.makedirs(os.path.dirname(destination_path), exist_ok=True)
2122
shutil.copy(source_base_path, destination_path)

llvm_ir_dataset_utils/util/portage.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,15 @@ def get_portage_compiler_config(filename):
2121
'\n'
2222
'LC_MESSAGES=C.utf8\n'
2323
'FEATURES="keepwork noclean -ipc-sandbox -xattr -network-sandbox '
24-
'-pid-sandbox -sandbox -usersandbox -usersync -userfetch -userpriv"'
25-
)
24+
'-pid-sandbox -sandbox -usersandbox -usersync -userfetch -userpriv"')
2625
with open(filename, 'w') as file:
2726
file.write(content)
2827

2928

3029
def portage_setup_compiler(build_dir):
3130
# Same as spack, path is variable depending upon the system.
3231
# Path to the Portage make.conf file within the build directory
33-
32+
3433
source_config_folder = '/etc/portage/'
3534
config_path = os.path.join(build_dir, "etc/portage")
3635
make_conf_path = os.path.join(config_path, "make.conf")
@@ -40,7 +39,6 @@ def portage_setup_compiler(build_dir):
4039
# Delete make.profile and make a new soft link to the default profile
4140
shutil.rmtree(make_profile_path)
4241

43-
4442
os.symlink('/etc/portage/make.profile', make_profile_path)
4543
get_portage_compiler_config(make_conf_path)
4644

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ ignore = ["F401", "E731"]
4545

4646
[tool.codespell]
4747
ignore-words-list = "crate,"
48+
skip = "*.list"
4849

4950
[build-system]
5051
requires = ["poetry-core"]

0 commit comments

Comments
 (0)