Skip to content

Commit cd182e4

Browse files
committed
fix: merge histogram with range
1 parent 96b5508 commit cd182e4

File tree

2 files changed

+19
-9
lines changed

2 files changed

+19
-9
lines changed

lavender_data/server/dataset/statistics.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,23 @@ class CategoricalColumnStatistics(TypedDict):
4343

4444

4545
def _merge_histograms(hist: list[float], bin_edges: list[float]) -> Histogram:
46-
_map = {}
47-
for v, bin_edge in zip(hist, bin_edges):
48-
_map[bin_edge] = _map.get(bin_edge, 0) + v
49-
5046
_restored_values = []
51-
_bin_edges = sorted(_map.keys())
52-
for _value in _bin_edges:
53-
h = _map[_value]
54-
_restored_values.extend([_value] * int(h))
47+
for i in range(len(hist)):
48+
_min = bin_edges[i]
49+
_max = bin_edges[i + 1]
50+
_count = hist[i]
51+
if _count == 0:
52+
continue
53+
elif _count == 1:
54+
if i == len(hist) - 1:
55+
_restored_values.append(_max)
56+
else:
57+
_restored_values.append(_min)
58+
else:
59+
_restored_values.append(_min)
60+
_gap = (_max - _min) / (_count - 1)
61+
_restored_values.extend([_min + j * _gap for j in range(1, _count - 1)])
62+
_restored_values.append(_max)
5563

5664
return get_outlier_aware_hist(_restored_values)
5765

lavender_data/server/routes/datasets.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,11 @@ def get_dataset_preview(
138138
raise HTTPException(status_code=404, detail="Dataset not found")
139139

140140
if cache.exists(f"preview:{preview_id}:error"):
141+
error = cache.get(f"preview:{preview_id}:error").decode()
142+
cache.delete(f"preview:{preview_id}:error")
141143
raise HTTPException(
142144
status_code=500,
143-
detail=cache.get(f"preview:{preview_id}:error").decode(),
145+
detail=error,
144146
)
145147

146148
if not cache.exists(f"preview:{preview_id}"):

0 commit comments

Comments
 (0)