-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgg-stat.scm
More file actions
415 lines (344 loc) · 18.5 KB
/
gg-stat.scm
File metadata and controls
415 lines (344 loc) · 18.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
;;;; ========================================================================
;;;; Helper Functions for Data Transformation
;;;; ========================================================================
(define (transpose-group-stats group-stats)
"Convert list of group statistics to columnar format
Input: List of alists, one per group
'(((x . group1) (y . val1) (ymin . min1) ...)
((x . group2) (y . val2) (ymin . min2) ...)
...)
Output: Columnar format
((x . (group1 group2 ...))
(y . (val1 val2 ...))
(ymin . (min1 min2 ...))
...)"
(if (null? group-stats)
'()
(let ((keys (map car (car group-stats))))
(map (lambda (key)
(cons key (map (lambda (row) (cdr (assoc key row)))
group-stats)))
keys))))
;;;; ========================================================================
;;;; Statistical Transformation Functions
;;;; ========================================================================
;;; ------------------------------------------------------------------------
;;; stat-bin: Transform raw data into histogram bins
;;; ------------------------------------------------------------------------
(define (stat-bin layer-data aes params)
"Transform raw data into histogram bins
Input aesthetics: x (raw values)
Output columns: x (bin centers), width (bin widths), y (counts)
Parameters:
bins: Number of bins (default 30)
binwidth: Width of bins (overrides bins)
breaks: Explicit breakpoint vector
method: Binning method ('sturges, 'scott, 'fd, 'equal-width)"
(let* ((x-col (aes-get aes 'x))
(values (data-column layer-data x-col))
(bins (or (assq-ref params 'bins) 30))
(binwidth (assq-ref params 'binwidth))
(breaks-param (assq-ref params 'breaks))
(method (or (assq-ref params 'method) 'sturges)))
;; Determine breaks
(let ((breaks
(cond
;; Explicit breaks provided
(breaks-param breaks-param)
;; Binwidth specified - compute breaks
(binwidth
(let* ((min-val (reduce* min values))
(max-val (reduce* max values))
(n-bins (inexact->exact
(ceiling (/ (- max-val min-val) binwidth)))))
(list->vector
(map (lambda (i) (+ min-val (* i binwidth)))
(iota (+ n-bins 1))))))
;; Use histogram-breaks with specified method
(else
(histogram-breaks values bins #:method method)))))
;; Compute histogram
(let-values (((bin-edges counts) (histogram values breaks)))
;; Compute bin centers and widths
(let* ((n-bins (vector-length counts))
(bin-centers (map (lambda (i)
(/ (+ (vector-ref bin-edges i)
(vector-ref bin-edges (+ i 1)))
2.0))
(iota n-bins)))
(bin-widths (map (lambda (i)
(- (vector-ref bin-edges (+ i 1))
(vector-ref bin-edges i)))
(iota n-bins))))
;; Return preprocessed data in columnar format
`((x . ,bin-centers)
(width . ,bin-widths)
(y . ,(vector->list counts))))))))
;;; ------------------------------------------------------------------------
;;; stat-density: Kernel density estimation
;;; ------------------------------------------------------------------------
(define (stat-density layer-data aes params)
"Transform raw data into kernel density estimate
Input aesthetics: x (raw values)
Output columns: x (evaluation points), y (density values)
Parameters:
bandwidth: KDE bandwidth (auto-selected if #f)
adjust: Bandwidth adjustment factor (default 1.0)
kernel: Kernel type (default 'gaussian)
n: Number of evaluation points (default 512)"
(let* ((x-col (aes-get aes 'x))
(values (data-column layer-data x-col))
(adjust (or (assq-ref params 'adjust) 1.0))
(kernel (or (assq-ref params 'kernel) 'gaussian))
(n (or (assq-ref params 'n) 512))
(bandwidth-param (assq-ref params 'bandwidth)))
;; Compute bandwidth if not provided
(let ((bandwidth (or bandwidth-param
(* adjust (kde-bandwidth-nrd values)))))
;; Compute KDE
(let ((kde-result (kde values bandwidth kernel n)))
;; Convert list of pairs to columnar format
`((x . ,(map car kde-result))
(y . ,(map cdr kde-result)))))))
;;; ------------------------------------------------------------------------
;;; stat-boxplot: Compute boxplot summary statistics
;;; ------------------------------------------------------------------------
(define (stat-boxplot layer-data aes params)
"Transform raw data into boxplot summary statistics
Input aesthetics: x (group), y (values)
Output columns: x, ymin, lower, middle, upper, ymax, outliers
Parameters:
coef: IQR multiplier for whisker length (default 1.5)"
(let* ((x-col (aes-get aes 'x))
(y-col (aes-get aes 'y))
(groups (delete-duplicates (data-column layer-data x-col)))
(coef (or (assq-ref params 'coef) 1.5)))
;; Compute statistics for each group
(let ((group-stats
(map (lambda (group)
(let* ((group-rows (filter (lambda (row)
(equal? (cdr (assoc x-col row))
group))
(data-rows layer-data)))
(y-values (map (lambda (row) (cdr (assoc y-col row)))
group-rows)))
;; Check if we have enough data
(if (< (length y-values) 5)
;; Not enough data for boxplot
`((x . ,group)
(ymin . ,(apply min y-values))
(lower . ,(apply min y-values))
(middle . ,(mean y-values))
(upper . ,(apply max y-values))
(ymax . ,(apply max y-values))
(outliers . ()))
;; Normal boxplot computation
(let* ((five-num (fivenum y-values)))
(let-values (((outlier-vals clean-vals)
(outliers y-values #:coef coef)))
;; Compute whiskers from clean values
(let* ((q1 (list-ref five-num 1))
(q3 (list-ref five-num 3))
(iqr-val (- q3 q1))
(lower-fence (- q1 (* coef iqr-val)))
(upper-fence (+ q3 (* coef iqr-val))))
;; Whiskers extend to most extreme data point
;; within coef*IQR from quartiles
(let ((lower-whisker
(if (null? clean-vals)
q1
(apply min (filter (lambda (v)
(>= v lower-fence))
clean-vals))))
(upper-whisker
(if (null? clean-vals)
q3
(apply max (filter (lambda (v)
(<= v upper-fence))
clean-vals)))))
`((x . ,group)
(ymin . ,lower-whisker)
(lower . ,q1)
(middle . ,(list-ref five-num 2))
(upper . ,q3)
(ymax . ,upper-whisker)
(outliers . ,outlier-vals)))))))))
groups)))
;; Transpose to columnar format
(transpose-group-stats group-stats)))
)
;;; ------------------------------------------------------------------------
;;; stat-violin: Compute violin plot density grid
;;; ------------------------------------------------------------------------
(define (stat-violin layer-data aes params)
"Transform raw data into violin plot density grid
Input aesthetics: x (group), y (values)
Output: Nested structure with density curves per group + boxplot stats
Parameters:
bandwidth: KDE bandwidth (auto-selected if #f)
kernel: Kernel type (default 'gaussian)
scale: Scaling method ('area or 'count)
n: Number of density evaluation points"
(let* ((x-col (aes-get aes 'x))
(y-col (aes-get aes 'y))
(groups (delete-duplicates (data-column layer-data x-col)))
(scale-method (or (assq-ref params 'scale) 'area))
(kernel (or (assq-ref params 'kernel) 'gaussian))
(n (or (assq-ref params 'n) 128))
(bandwidth-param (assq-ref params 'bandwidth)))
;; Compute density + boxplot for each group
(let ((group-results
(map (lambda (group)
(let* ((group-rows (filter (lambda (row)
(equal? (cdr (assoc x-col row))
group))
(data-rows layer-data)))
(y-values (map (lambda (row) (cdr (assoc y-col row)))
group-rows)))
;; Compute bandwidth for this group
(let ((bandwidth (or bandwidth-param
(kde-bandwidth-nrd y-values))))
;; Compute density
(let ((density-pairs (kde y-values bandwidth kernel n)))
`((group . ,group)
(density-x . ,(map car density-pairs))
(density-y . ,(map cdr density-pairs))
(y-values . ,y-values))))))
groups)))
;; For now, return simplified format
;; Full violin implementation would mirror density on both sides
;; and overlay boxplot - this is complex and deferred
;; Return columnar format for basic rendering
(transpose-group-stats group-results))))
;;; ------------------------------------------------------------------------
;;; stat-summary: Compute summary statistics with error bounds
;;; ------------------------------------------------------------------------
(define (stat-summary layer-data aes params)
"Compute summary statistics with error bounds
Input aesthetics: x (group), y (values)
Output columns: x, y (summary), ymin, ymax
Parameters:
fun: Summary function (default: mean)
fun-ymin: Lower bound function (default: mean - SE)
fun-ymax: Upper bound function (default: mean + SE)"
(let* ((x-col (aes-get aes 'x))
(y-col (aes-get aes 'y))
(groups (delete-duplicates (data-column layer-data x-col)))
(fun-y (or (assq-ref params 'fun-y)
(assq-ref params 'fun)
mean))
(fun-ymin (assq-ref params 'fun-ymin))
(fun-ymax (assq-ref params 'fun-ymax)))
;; Default error functions: ±1 SE
(let ((ymin-fn (or fun-ymin
(lambda (vals)
(- (mean vals)
(standard-error-of-the-mean vals)))))
(ymax-fn (or fun-ymax
(lambda (vals)
(+ (mean vals)
(standard-error-of-the-mean vals))))))
(let ((summaries
(map (lambda (group)
(let* ((group-rows (filter (lambda (row)
(equal? (cdr (assoc x-col row))
group))
(data-rows layer-data)))
(y-values (map (lambda (row) (cdr (assoc y-col row)))
group-rows)))
`((x . ,group)
(y . ,(fun-y y-values))
(ymin . ,(ymin-fn y-values))
(ymax . ,(ymax-fn y-values)))))
groups)))
(transpose-group-stats summaries)))))
;;;; ========================================================================
;;;; Helper: Rendering Functions for Statistical Geometries
;;;; ========================================================================
;;; These helper functions are called from render-layer to render
;;; the statistical geometries with preprocessed data.
(define (render-geom-errorbar processed-data processed-aes scales params plotter)
"Render errorbar geometry with preprocessed data"
(let ((color (or (assq-ref params 'color) "black"))
(width (or (assq-ref params 'width) 2))
(cap-width (or (assq-ref params 'cap-width) 0.1)))
(let-values (((drawer _) (geom-errorbar processed-data processed-aes
#:scales scales
#:color color
#:width width
#:cap-width cap-width)))
(render-drawer drawer plotter))))
(define (render-geom-pointrange processed-data processed-aes scales params plotter)
"Render pointrange geometry (point + error bar) with preprocessed data"
(let ((color (or (assq-ref params 'color) "black"))
(point-size (or (assq-ref params 'point-size) 4))
(line-width (or (assq-ref params 'line-width) 2)))
(let-values (((drawer _) (geom-pointrange processed-data processed-aes
#:scales scales
#:color color
#:point-size point-size
#:line-width line-width)))
(render-drawer drawer plotter))))
(define (render-geom-linerange processed-data processed-aes scales params plotter)
"Render linerange geometry with preprocessed data"
(let ((color (or (assq-ref params 'color) "black"))
(width (or (assq-ref params 'width) 2)))
(let-values (((drawer _) (geom-linerange processed-data processed-aes
#:scales scales
#:color color
#:width width)))
(render-drawer drawer plotter))))
(define (render-geom-crossbar processed-data processed-aes scales params plotter)
"Render crossbar geometry (horizontal bar with center line) with preprocessed data"
(let ((fill (or (assq-ref params 'fill) "white"))
(color (or (assq-ref params 'color) "black"))
(width (or (assq-ref params 'width) 0.5)))
(let-values (((drawer _) (geom-crossbar processed-data processed-aes
#:scales scales
#:fill fill
#:color color
#:width width)))
(render-drawer drawer plotter))))
(define (render-geom-histogram processed-data processed-aes scales params plotter)
"Render histogram geometry with preprocessed bin data"
(let ((fill (or (assq-ref params 'fill) "steelblue"))
(color (or (assq-ref params 'color) "black"))
(alpha (or (assq-ref params 'alpha) 0.9)))
(let-values (((drawer _) (geom-histogram processed-data processed-aes
#:scales scales
#:fill fill
#:color color
#:alpha alpha)))
(render-drawer drawer plotter))))
(define (render-geom-density processed-data processed-aes scales params plotter)
"Render density geometry with preprocessed KDE data"
(let ((fill (or (assq-ref params 'fill) "lightblue"))
(color (or (assq-ref params 'color) "navy"))
(alpha (or (assq-ref params 'alpha) 0.5))
(geom-type (or (assq-ref params 'geom-type) 'area)))
(let-values (((drawer _) (geom-density processed-data processed-aes
#:scales scales
#:fill fill
#:color color
#:alpha alpha
#:geom-type geom-type)))
(render-drawer drawer plotter))))
(define (render-geom-boxplot processed-data processed-aes scales params plotter)
"Render boxplot geometry with preprocessed statistics"
(let ((fill (or (assq-ref params 'fill) "white"))
(color (or (assq-ref params 'color) "black"))
(outlier-color (or (assq-ref params 'outlier-color) "red"))
(outlier-size (or (assq-ref params 'outlier-size) 3))
(width (or (assq-ref params 'width) 0.6)))
(let-values (((drawer _) (geom-boxplot processed-data processed-aes
#:scales scales
#:fill fill
#:color color
#:outlier-color outlier-color
#:outlier-size outlier-size
#:width width)))
(render-drawer drawer plotter))))
(define (render-geom-violin processed-data processed-aes scales params plotter)
"Render violin geometry (placeholder - delegates to boxplot for now)"
;; Full violin implementation is complex - for now use boxplot
(render-geom-boxplot processed-data processed-aes scales params plotter))