Skip to content

Commit 0d89c08

Browse files
committed
drop duplicates
1 parent b6ab3ad commit 0d89c08

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

transform_excel_to_json.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@ def set_up_macro_category():
7272
"keyword", "is_empty", "general", "agriculture", "transport",
7373
"batiments", "energie", "industrie", "eau", "ecosysteme", "economie_ressources"
7474
]
75-
75+
df["keyword"] = df["keyword"].str.strip()
76+
df = df.drop_duplicates()
7677
for _, row in df.iterrows():
7778
keyword=row["keyword"]
7879
if pd.isna(keyword) or keyword.startswith("#"):
@@ -106,6 +107,8 @@ def set_up_macro_category():
106107
f.write(" },\n")
107108
f.write("]\n")
108109
logging.info(f"{len(records)} macro categories written to {output_file_macro_category} successfully.")
110+
print(df["keyword"].value_counts()[df["keyword"].value_counts()==2])
111+
109112

110113

111114
# Initialize the THEME_KEYWORDS dictionary

0 commit comments

Comments
 (0)