Skip to content

Commit 397780a

Browse files
WIP: this script creates a condensed dataframe of glob_filename and glob_count from a log .darshan file
1 parent 8277396 commit 397780a

File tree

1 file changed

+4
-12
lines changed

1 file changed

+4
-12
lines changed

git_project/glob_feature/glob_feature.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,13 @@ def group_paths(paths):
3030

3131
def regex_df_condenser(df, paths):
3232
path_grouper_func = make_path_grouper()
33+
3334
df["filename_glob"] = df["filename_glob"].apply(path_grouper_func)
34-
print("Paths after grouping:")
35-
print(df["filename_glob"])
3635

3736
df = df.groupby("filename_glob").size().reset_index(name="glob_count")
3837

3938
df = df.sort_values(by="glob_count", ascending=False)
4039

41-
print("Paths after grouping and counting:")
42-
print(df)
43-
4440

4541
def find_common_prefix(paths):
4642
# Sort the paths in lexicographical order
@@ -61,12 +57,8 @@ def find_common_prefix(paths):
6157
common_path = find_common_prefix(group_df["filename_glob"])
6258
df.loc[df["filename_glob"] == group, "filename_glob"] = common_path
6359

64-
print("Paths after modifying filename_glob:")
65-
print(df)
6660

67-
df["regex"] = df.apply(lambda row: re.escape(row["filename_glob"]) + r".*", axis=1)
68-
print("Paths after applying regex:")
69-
print(df)
61+
df["filename_glob"] = df.apply(lambda row: (row["filename_glob"]) + r".*", axis=1)
7062

7163
return df
7264

@@ -92,10 +84,10 @@ def main(log_path, output_path):
9284
html = style.to_html()
9385

9486
# can change name of the output html report here
95-
with open("name_record_glob.html", "w") as html_file:
87+
with open("name_record_glob_hd5f.html", "w") as html_file:
9688
html_file.write(html)
9789

98-
# go back to hdf5_diagonal dxt
90+
9991
if __name__ == "__main__":
10092
parser = argparse.ArgumentParser()
10193
parser.add_argument('-p', '--log-path', type=str, help="Path to the log file")

0 commit comments

Comments
 (0)