Skip to content

Commit 0fe6ac6

Browse files
plutasnyycragwolfe
andauthored
Add backward compatibility for metric calculation (#3798)
Co-authored-by: cragwolfe <[email protected]>
1 parent e48d79e commit 0fe6ac6

File tree

3 files changed

+24
-3
lines changed

3 files changed

+24
-3
lines changed

CHANGELOG.md

+10
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
## 0.16.8
2+
3+
### Enhancements
4+
- **Metrics: Weighted table average is optional**
5+
6+
### Features
7+
8+
### Fixes
9+
110
## 0.16.7
211

312
### Enhancements
@@ -7,6 +16,7 @@
716

817
### Fixes
918

19+
1020
## 0.16.6
1121

1222
### Enhancements

unstructured/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.16.7" # pragma: no cover
1+
__version__ = "0.16.8" # pragma: no cover

unstructured/metrics/evaluate.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,8 @@ class TableStructureMetricsCalculator(BaseMetricsCalculator):
216216
"""
217217

218218
cutoff: Optional[float] = None
219+
weighted_average: bool = True
220+
include_false_positives: bool = True
219221

220222
def __post_init__(self):
221223
super().__post_init__()
@@ -287,11 +289,20 @@ def _generate_dataframes(self, rows):
287289

288290
df = pd.DataFrame(rows, columns=headers)
289291
df["_table_weights"] = df["total_tables"]
290-
# we give false positive tables a 1 table worth of weight in computing table level acc
291-
df["_table_weights"][df.total_tables.eq(0) & df.total_predicted_tables.gt(0)] = 1
292+
293+
if self.include_false_positives:
294+
# we give false positive tables a 1 table worth of weight in computing table level acc
295+
df["_table_weights"][df.total_tables.eq(0) & df.total_predicted_tables.gt(0)] = 1
296+
292297
# filter down to only those with actual and/or predicted tables
293298
has_tables_df = df[df["_table_weights"] > 0]
294299

300+
if not self.weighted_average:
301+
# for all non zero elements assign them value 1
302+
df["_table_weights"] = df["_table_weights"].apply(
303+
lambda table_weight: 1 if table_weight != 0 else 0
304+
)
305+
295306
if has_tables_df.empty:
296307
agg_df = pd.DataFrame(
297308
[[metric, None, None, None, 0] for metric in self.supported_metric_names]

0 commit comments

Comments
 (0)