Skip to content

Commit a4121c7

Browse files
committed
adjust to new readers for csv
Signed-off-by: dafnapension <[email protected]>
1 parent 709ac74 commit a4121c7

File tree

3 files changed

+12
-13
lines changed

3 files changed

+12
-13
lines changed

performance/compare_benchmark_performance_results.py

+4-9
Original file line numberDiff line numberDiff line change
@@ -37,22 +37,17 @@
3737
ratio_line4 = (
3838
pr_perf["load_time"] / main_perf["load_time"] if main_perf["load_time"] > 0 else 1
3939
)
40-
line4 = f" Load time | {main_perf['load_time_no_initial_ms']:>11} | {pr_perf['load_time_no_initial_ms']:>11} | {ratio_line4:.2f}\n"
40+
line4 = f" Load time | {main_perf['load_time']:>11} | {pr_perf['load_time']:>11} | {ratio_line4:.2f}\n"
4141
line5 = f" DS Gen. inc. Load | {main_perf['generate_benchmark_dataset_time']:>11} | {pr_perf['generate_benchmark_dataset_time']:>11} | {pr_perf['generate_benchmark_dataset_time'] / main_perf['generate_benchmark_dataset_time']:.2f}\n"
42-
line6 = f" DS Gen. exc. Load | {round(main_perf['generate_benchmark_dataset_time'] - main_perf['load_time_no_initial_ms'], 3):>11} | {round(pr_perf['generate_benchmark_dataset_time'] - pr_perf['load_time_no_initial_ms'], 3):>11} | {ratio1:.2f}\n"
43-
line7 = f" Inference time | {main_perf['inference_time']:>11} | {pr_perf['inference_time']:>11} | {pr_perf['inference_time'] / main_perf['inference_time']:.2f}\n"
44-
line8 = f" Evaluate time | {main_perf['evaluation_time']:>11} | {pr_perf['evaluation_time']:>11} | {ratio2:.2f}\n"
45-
line9 = f" Benchmark Instant. | {main_perf['instantiate_benchmark_time']:>11} | {pr_perf['instantiate_benchmark_time']:>11} | {pr_perf['instantiate_benchmark_time'] / main_perf['instantiate_benchmark_time']:.2f}\n"
46-
line10 = f" Model Instantiation| {main_perf['instantiate_model_time']:>11} | {pr_perf['instantiate_model_time']:>11} | {pr_perf['instantiate_model_time'] / main_perf['instantiate_model_time']:.2f}\n"
42+
line6 = f" DS Gen. exc. Load | {round(main_perf['generate_benchmark_dataset_time'] - main_perf['load_time'], 3):>11} | {round(pr_perf['generate_benchmark_dataset_time'] - pr_perf['load_time'], 3):>11} | {ratio1:.2f}\n"
43+
line7 = f" Benchmark Instant. | {main_perf['instantiate_benchmark_time']:>11} | {pr_perf['instantiate_benchmark_time']:>11} | {pr_perf['instantiate_benchmark_time'] / main_perf['instantiate_benchmark_time']:.2f}\n"
4744

4845
print("### Performance Comparison Results, time expressed in seconds:\n")
4946
print(line1 + line2 + line3 + line4 + line5 + line6 + line7)
5047
print("\n\n")
5148
# Performance degradation check (5% threshold)
5249
if ratio1 > 1.05:
53-
print(
54-
"\n**Warning**: Performance degradation in Dataset Generation and/or Evaluation exceeds 5%!"
55-
)
50+
print("\n**Warning**: Performance degradation in Dataset Generation exceeds 5%!")
5651
print(
5752
"Explore branch performance via 'python performance/bluebench_profiler.py --output_file=<path to json file>',"
5853
"followed by 'snakeviz <the performance.prof file specified in the output json file>'."

src/unitxt/loaders.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -447,9 +447,13 @@ def load_iterables(self):
447447
return iterables
448448

449449
def split_generator(self, split: str) -> Generator:
450-
dataset = pd.read_csv(
451-
self.files[split], nrows=self.get_limit(), sep=self.sep
452-
).to_dict("records")
450+
dataset = self.__class__._loader_cache.get(str(self) + "_" + split, None)
451+
if dataset is None:
452+
reader = self.get_reader()
453+
dataset = reader(self.files[split], **self.get_args()).to_dict("records")
454+
self.__class__._loader_cache.max_size = settings.loader_cache_size
455+
self.__class__._loader_cache[str(self) + "_" + split] = dataset
456+
453457
yield from dataset
454458

455459

utils/.secrets.baseline

+1-1
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@
151151
"filename": "src/unitxt/loaders.py",
152152
"hashed_secret": "840268f77a57d5553add023cfa8a4d1535f49742",
153153
"is_verified": false,
154-
"line_number": 595,
154+
"line_number": 599,
155155
"is_secret": false
156156
}
157157
],

0 commit comments

Comments
 (0)