python3 -m fm_data_tasks.run_inference \
--dry_run \
--num_run 10 \
--k 3 \
--sample_method random \
--data_dir data/datasets/entity_matching/structured/Fodors-Zagats
[04/11/23 17:50:21] INFO 2023-04-11 17:50:21,823 [run_inference] [INFO] { run_inference.py:145
"data_dir":
"/Users/thangduong/fm_data_tasks/data/datasets/entity_matching/struc
tured/Fodors-Zagats",
"output_dir": "outputs",
"cache_name": "sqlite",
"cache_connection": "fm_data_tasks.sqlite",
"client_name": "openai",
"client_connection": null,
"run_tag": "default",
"overwrite_cache": false,
"k": 3,
"sample_method": "random",
"seed": 1234,
"class_balanced": false,
"sep_tok": ".",
"nan_tok": "nan",
"num_run": 10,
"num_trials": 1,
"num_print": 10,
"add_task_instruction": false,
"task_instruction_idx": 0,
"do_test": false,
"dry_run": true,
"stop_token": "\n",
"temperature": 0.0,
"max_tokens": 3
}
INFO 2023-04-11 17:50:21,827 [data_utils] [INFO] Processing data_utils.py:330
/Users/thangduong/fm_data_tasks/data/datasets/entity_matching/structure
d/Fodors-Zagats
Traceback (most recent call last):
File "/opt/miniconda3/envs/fm_data_tasks/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/opt/miniconda3/envs/fm_data_tasks/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/Users/thangduong/fm_data_tasks/fm_data_tasks/run_inference.py", line 297, in <module>
main()
File "/Users/thangduong/fm_data_tasks/fm_data_tasks/run_inference.py", line 153, in main
pd_data_files = data_utils.read_data(
File "/Users/thangduong/fm_data_tasks/fm_data_tasks/utils/data_utils.py", line 463, in read_data
data_files_sep, label_col = read_raw_data(
File "/Users/thangduong/fm_data_tasks/fm_data_tasks/utils/data_utils.py", line 442, in read_raw_data
data_files_sep["train"] = read_data_func(train_file)
File "/Users/thangduong/fm_data_tasks/fm_data_tasks/utils/data_utils.py", line 175, in read_blocked_pairs
merged["text"] = merged.apply(
File "/opt/miniconda3/envs/fm_data_tasks/lib/python3.10/site-packages/pandas/core/frame.py", line 9568, in apply
return op.apply().__finalize__(self, method="apply")
File "/opt/miniconda3/envs/fm_data_tasks/lib/python3.10/site-packages/pandas/core/apply.py", line 764, in apply
return self.apply_standard()
File "/opt/miniconda3/envs/fm_data_tasks/lib/python3.10/site-packages/pandas/core/apply.py", line 891, in apply_standard
results, res_index = self.apply_series_generator()
File "/opt/miniconda3/envs/fm_data_tasks/lib/python3.10/site-packages/pandas/core/apply.py", line 907, in apply_series_generator
results[i] = self.f(v)
File "/Users/thangduong/fm_data_tasks/fm_data_tasks/utils/data_utils.py", line 176, in <lambda>
lambda row: serialize_match_pair(
File "/Users/thangduong/fm_data_tasks/fm_data_tasks/utils/data_utils.py", line 56, in serialize_match_pair
f"{prod_name} A is {serialize_row(row, column_mapA, sep_tok, nan_tok)}."
File "/Users/thangduong/fm_data_tasks/fm_data_tasks/utils/data_utils.py", line 36, in serialize_row
row[c_og] = f"{row[c_og].strip()}"
AttributeError: 'int' object has no attribute 'strip'
I tried the dry run 10 examples cmd in the readme and got the error: