|
| 1 | +import os |
| 2 | +import pathlib |
| 3 | +from typing import Tuple, List, Union, Dict |
| 4 | + |
| 5 | +import pandas as pd |
| 6 | + |
| 7 | +from autorag.evaluation import evaluate_retrieval |
| 8 | +from autorag.schema.metricinput import MetricInput |
| 9 | +from autorag.strategy import measure_speed, filter_by_threshold, select_best |
| 10 | + |
| 11 | + |
| 12 | +def evaluate_retrieval_node( |
| 13 | + result_df: pd.DataFrame, |
| 14 | + metric_inputs: List[MetricInput], |
| 15 | + metrics: Union[List[str], List[Dict]], |
| 16 | +) -> pd.DataFrame: |
| 17 | + """ |
| 18 | + Evaluate retrieval node from retrieval node result dataframe. |
| 19 | + :param result_df: The result dataframe from a retrieval node. |
| 20 | + :param metric_inputs: List of metric input schema for AutoRAG. |
| 21 | + :param metrics: Metric list from input strategies. |
| 22 | + :return: Return result_df with metrics columns. |
| 23 | + The columns will be 'retrieved_contents', 'retrieved_ids', 'retrieve_scores', and metric names. |
| 24 | + """ |
| 25 | + |
| 26 | + @evaluate_retrieval( |
| 27 | + metric_inputs=metric_inputs, |
| 28 | + metrics=metrics, |
| 29 | + ) |
| 30 | + def evaluate_this_module(df: pd.DataFrame): |
| 31 | + return ( |
| 32 | + df["retrieved_contents"].tolist(), |
| 33 | + df["retrieved_ids"].tolist(), |
| 34 | + df["retrieve_scores"].tolist(), |
| 35 | + ) |
| 36 | + |
| 37 | + return evaluate_this_module(result_df) |
| 38 | + |
| 39 | + |
| 40 | +def run( |
| 41 | + input_modules, |
| 42 | + input_module_params, |
| 43 | + project_dir: Union[str, pathlib.Path, pathlib.PurePath], |
| 44 | + previous_result: pd.DataFrame, |
| 45 | + strategies, |
| 46 | + metric_inputs: List[MetricInput], |
| 47 | +) -> Tuple[List[pd.DataFrame], List]: |
| 48 | + """ |
| 49 | + Run input modules and parameters. |
| 50 | + :param input_modules: Input modules |
| 51 | + :param input_module_params: Input module parameters |
| 52 | + :param project_dir: Project directory path. |
| 53 | + :param previous_result: Previous result dataframe. |
| 54 | + :param strategies: Strategies for retrieval node. |
| 55 | + :param metric_inputs: List of metric input schema for AutoRAG. |
| 56 | + :return: First, it returns list of result dataframe. |
| 57 | + Second, it returns list of execution times. |
| 58 | + """ |
| 59 | + result, execution_times = zip( |
| 60 | + *map( |
| 61 | + lambda task: measure_speed( |
| 62 | + task[0].run_evaluator, |
| 63 | + project_dir=project_dir, |
| 64 | + previous_result=previous_result, |
| 65 | + **task[1], |
| 66 | + ), |
| 67 | + zip(input_modules, input_module_params), |
| 68 | + ) |
| 69 | + ) |
| 70 | + average_times = list(map(lambda x: x / len(result[0]), execution_times)) |
| 71 | + |
| 72 | + # run metrics before filtering |
| 73 | + if strategies.get("metrics") is None: |
| 74 | + raise ValueError("You must at least one metrics for retrieval evaluation.") |
| 75 | + result = list( |
| 76 | + map( |
| 77 | + lambda x: evaluate_retrieval_node( |
| 78 | + x, |
| 79 | + metric_inputs, |
| 80 | + strategies.get("metrics"), |
| 81 | + ), |
| 82 | + result, |
| 83 | + ) |
| 84 | + ) |
| 85 | + |
| 86 | + return result, average_times |
| 87 | + |
| 88 | + |
| 89 | +def save_and_summary( |
| 90 | + input_modules, |
| 91 | + input_module_params, |
| 92 | + result_list, |
| 93 | + execution_time_list, |
| 94 | + filename_start: int, |
| 95 | + save_dir: Union[str, pathlib.Path, pathlib.PurePath], |
| 96 | + strategies, |
| 97 | +): |
| 98 | + """ |
| 99 | + Save the result and make summary file |
| 100 | + :param input_modules: Input modules |
| 101 | + :param input_module_params: Input module parameters |
| 102 | + :param result_list: Result list |
| 103 | + :param execution_time_list: Execution times |
| 104 | + :param filename_start: The first filename to use |
| 105 | + :return: First, it returns list of result dataframe. |
| 106 | + Second, it returns list of execution times. |
| 107 | + """ |
| 108 | + |
| 109 | + # save results to folder |
| 110 | + filepaths = list( |
| 111 | + map( |
| 112 | + lambda x: os.path.join(save_dir, f"{x}.parquet"), |
| 113 | + range(filename_start, filename_start + len(input_modules)), |
| 114 | + ) |
| 115 | + ) |
| 116 | + list( |
| 117 | + map( |
| 118 | + lambda x: x[0].to_parquet(x[1], index=False), |
| 119 | + zip(result_list, filepaths), |
| 120 | + ) |
| 121 | + ) # execute save to parquet |
| 122 | + filename_list = list(map(lambda x: os.path.basename(x), filepaths)) |
| 123 | + |
| 124 | + summary_df = pd.DataFrame( |
| 125 | + { |
| 126 | + "filename": filename_list, |
| 127 | + "module_name": list(map(lambda module: module.__name__, input_modules)), |
| 128 | + "module_params": input_module_params, |
| 129 | + "execution_time": execution_time_list, |
| 130 | + **{ |
| 131 | + metric: list(map(lambda result: result[metric].mean(), result_list)) |
| 132 | + for metric in strategies.get("metrics") |
| 133 | + }, |
| 134 | + } |
| 135 | + ) |
| 136 | + summary_df.to_csv(os.path.join(save_dir, "summary.csv"), index=False) |
| 137 | + return summary_df |
| 138 | + |
| 139 | + |
| 140 | +def find_best(results, average_times, filenames, strategies): |
| 141 | + # filter by strategies |
| 142 | + if strategies.get("speed_threshold") is not None: |
| 143 | + results, filenames = filter_by_threshold( |
| 144 | + results, average_times, strategies["speed_threshold"], filenames |
| 145 | + ) |
| 146 | + selected_result, selected_filename = select_best( |
| 147 | + results, |
| 148 | + strategies.get("metrics"), |
| 149 | + filenames, |
| 150 | + strategies.get("strategy", "mean"), |
| 151 | + ) |
| 152 | + return selected_result, selected_filename |
0 commit comments