|
12 | 12 | from hypothesis import assume, given, settings |
13 | 13 | from hypothesis import strategies as st |
14 | 14 |
|
| 15 | +from non_local_detector.continuous_state_transitions import RandomWalk |
15 | 16 | from non_local_detector.core import ( |
16 | 17 | _condition_on, |
17 | 18 | _divide_safe, |
18 | 19 | _normalize, |
19 | 20 | _safe_log, |
20 | 21 | ) |
| 22 | +from non_local_detector.models.decoder import ClusterlessDecoder |
| 23 | +from non_local_detector.simulate.clusterless_simulation import make_simulated_run_data |
21 | 24 |
|
22 | 25 |
|
23 | 26 | # Custom strategies for probability distributions |
@@ -223,3 +226,198 @@ def test_normalize_scales_correctly(self, dist, scale_factor): |
223 | 226 | normalized_scaled, _ = _normalize(jnp.asarray(scaled)) |
224 | 227 |
|
225 | 228 | assert jnp.allclose(normalized_original, normalized_scaled, rtol=1e-6) |
| 229 | + |
| 230 | + @settings(deadline=5000, max_examples=10) # Decoder tests are slower |
| 231 | + @given(st.integers(min_value=42, max_value=9999)) |
| 232 | + def test_posterior_probabilities_sum_to_one(self, seed): |
| 233 | + """Property: decoder posteriors sum to 1 across position dimension.""" |
| 234 | + # Generate simulation |
| 235 | + # NOTE: n_runs must be >= 3 to create proper 2D position data |
| 236 | + # NOTE: Need substantial data for RandomWalk to build proper position bins |
| 237 | + sim = make_simulated_run_data( |
| 238 | + n_tetrodes=2, |
| 239 | + place_field_means=np.arange(0, 80, 20), # 4 neurons |
| 240 | + sampling_frequency=500, |
| 241 | + n_runs=3, # Multiple runs to ensure 2D position array |
| 242 | + seed=seed, |
| 243 | + ) |
| 244 | + |
| 245 | + # Use 70/30 train/test split on all data |
| 246 | + n_encode = int(0.7 * len(sim.position_time)) |
| 247 | + is_training = np.ones(len(sim.position_time), dtype=bool) |
| 248 | + is_training[n_encode:] = False |
| 249 | + |
| 250 | + decoder = ClusterlessDecoder( |
| 251 | + clusterless_algorithm="clusterless_kde", |
| 252 | + clusterless_algorithm_params={ |
| 253 | + "position_std": 6.0, |
| 254 | + "waveform_std": 24.0, |
| 255 | + "block_size": 50, |
| 256 | + }, |
| 257 | + continuous_transition_types=[[RandomWalk(movement_var=25.0)]], |
| 258 | + ) |
| 259 | + |
| 260 | + decoder.fit( |
| 261 | + sim.position_time, |
| 262 | + sim.position, |
| 263 | + sim.spike_times, |
| 264 | + sim.spike_waveform_features, |
| 265 | + is_training=is_training, |
| 266 | + ) |
| 267 | + |
| 268 | + # Predict on small test set (10 time bins only for speed) |
| 269 | + test_start_idx = n_encode |
| 270 | + test_end_idx = min(n_encode + 10, len(sim.position_time)) |
| 271 | + results = decoder.predict( |
| 272 | + spike_times=[ |
| 273 | + st[ |
| 274 | + (st >= sim.position_time[test_start_idx]) |
| 275 | + & (st < sim.position_time[test_end_idx]) |
| 276 | + ] |
| 277 | + for st in sim.spike_times |
| 278 | + ], |
| 279 | + spike_waveform_features=[ |
| 280 | + swf[ |
| 281 | + (sim.spike_times[i] >= sim.position_time[test_start_idx]) |
| 282 | + & (sim.spike_times[i] < sim.position_time[test_end_idx]) |
| 283 | + ] |
| 284 | + for i, swf in enumerate(sim.spike_waveform_features) |
| 285 | + ], |
| 286 | + time=sim.position_time[test_start_idx:test_end_idx], |
| 287 | + position=sim.position[test_start_idx:test_end_idx], |
| 288 | + position_time=sim.position_time[test_start_idx:test_end_idx], |
| 289 | + ) |
| 290 | + |
| 291 | + # Check posterior sums to 1 across spatial dimension (state_bins) |
| 292 | + posterior_sums = results.acausal_posterior.sum(dim="state_bins") |
| 293 | + assert np.allclose(posterior_sums.values, 1.0, atol=1e-10) |
| 294 | + |
| 295 | + @settings(deadline=5000, max_examples=10) |
| 296 | + @given(st.integers(min_value=42, max_value=9999)) |
| 297 | + def test_posteriors_nonnegative_and_bounded(self, seed): |
| 298 | + """Property: decoder posteriors are in [0, 1].""" |
| 299 | + # NOTE: n_runs must be >= 3 to create proper 2D position data |
| 300 | + # NOTE: Need substantial data for RandomWalk to build proper position bins |
| 301 | + sim = make_simulated_run_data( |
| 302 | + n_tetrodes=2, |
| 303 | + place_field_means=np.arange(0, 80, 20), |
| 304 | + sampling_frequency=500, |
| 305 | + n_runs=3, |
| 306 | + seed=seed, |
| 307 | + ) |
| 308 | + |
| 309 | + n_encode = int(0.7 * len(sim.position_time)) |
| 310 | + is_training = np.ones(len(sim.position_time), dtype=bool) |
| 311 | + is_training[n_encode:] = False |
| 312 | + |
| 313 | + decoder = ClusterlessDecoder( |
| 314 | + clusterless_algorithm="clusterless_kde", |
| 315 | + clusterless_algorithm_params={ |
| 316 | + "position_std": 6.0, |
| 317 | + "waveform_std": 24.0, |
| 318 | + "block_size": 50, |
| 319 | + }, |
| 320 | + continuous_transition_types=[[RandomWalk(movement_var=25.0)]], |
| 321 | + ) |
| 322 | + |
| 323 | + decoder.fit( |
| 324 | + sim.position_time, |
| 325 | + sim.position, |
| 326 | + sim.spike_times, |
| 327 | + sim.spike_waveform_features, |
| 328 | + is_training=is_training, |
| 329 | + ) |
| 330 | + |
| 331 | + test_start_idx = n_encode |
| 332 | + test_end_idx = min(n_encode + 10, len(sim.position_time)) |
| 333 | + results = decoder.predict( |
| 334 | + spike_times=[ |
| 335 | + st[ |
| 336 | + (st >= sim.position_time[test_start_idx]) |
| 337 | + & (st < sim.position_time[test_end_idx]) |
| 338 | + ] |
| 339 | + for st in sim.spike_times |
| 340 | + ], |
| 341 | + spike_waveform_features=[ |
| 342 | + swf[ |
| 343 | + (sim.spike_times[i] >= sim.position_time[test_start_idx]) |
| 344 | + & (sim.spike_times[i] < sim.position_time[test_end_idx]) |
| 345 | + ] |
| 346 | + for i, swf in enumerate(sim.spike_waveform_features) |
| 347 | + ], |
| 348 | + time=sim.position_time[test_start_idx:test_end_idx], |
| 349 | + position=sim.position[test_start_idx:test_end_idx], |
| 350 | + position_time=sim.position_time[test_start_idx:test_end_idx], |
| 351 | + ) |
| 352 | + |
| 353 | + # Check all values in [0, 1] |
| 354 | + assert np.all(results.acausal_posterior.values >= 0.0) |
| 355 | + assert np.all(results.acausal_posterior.values <= 1.0) |
| 356 | + |
| 357 | + @settings(deadline=5000, max_examples=10) |
| 358 | + @given(st.integers(min_value=42, max_value=9999)) |
| 359 | + def test_log_probabilities_finite(self, seed): |
| 360 | + """Property: log probabilities should be finite (or -inf for zero prob).""" |
| 361 | + # NOTE: n_runs must be >= 3 to create proper 2D position data |
| 362 | + # NOTE: Need substantial data for RandomWalk to build proper position bins |
| 363 | + sim = make_simulated_run_data( |
| 364 | + n_tetrodes=2, |
| 365 | + place_field_means=np.arange(0, 80, 20), |
| 366 | + sampling_frequency=500, |
| 367 | + n_runs=3, |
| 368 | + seed=seed, |
| 369 | + ) |
| 370 | + |
| 371 | + n_encode = int(0.7 * len(sim.position_time)) |
| 372 | + is_training = np.ones(len(sim.position_time), dtype=bool) |
| 373 | + is_training[n_encode:] = False |
| 374 | + |
| 375 | + decoder = ClusterlessDecoder( |
| 376 | + clusterless_algorithm="clusterless_kde", |
| 377 | + clusterless_algorithm_params={ |
| 378 | + "position_std": 6.0, |
| 379 | + "waveform_std": 24.0, |
| 380 | + "block_size": 50, |
| 381 | + }, |
| 382 | + continuous_transition_types=[[RandomWalk(movement_var=25.0)]], |
| 383 | + ) |
| 384 | + |
| 385 | + decoder.fit( |
| 386 | + sim.position_time, |
| 387 | + sim.position, |
| 388 | + sim.spike_times, |
| 389 | + sim.spike_waveform_features, |
| 390 | + is_training=is_training, |
| 391 | + ) |
| 392 | + |
| 393 | + test_start_idx = n_encode |
| 394 | + test_end_idx = min(n_encode + 10, len(sim.position_time)) |
| 395 | + results = decoder.predict( |
| 396 | + spike_times=[ |
| 397 | + st[ |
| 398 | + (st >= sim.position_time[test_start_idx]) |
| 399 | + & (st < sim.position_time[test_end_idx]) |
| 400 | + ] |
| 401 | + for st in sim.spike_times |
| 402 | + ], |
| 403 | + spike_waveform_features=[ |
| 404 | + swf[ |
| 405 | + (sim.spike_times[i] >= sim.position_time[test_start_idx]) |
| 406 | + & (sim.spike_times[i] < sim.position_time[test_end_idx]) |
| 407 | + ] |
| 408 | + for i, swf in enumerate(sim.spike_waveform_features) |
| 409 | + ], |
| 410 | + time=sim.position_time[test_start_idx:test_end_idx], |
| 411 | + position=sim.position[test_start_idx:test_end_idx], |
| 412 | + position_time=sim.position_time[test_start_idx:test_end_idx], |
| 413 | + ) |
| 414 | + |
| 415 | + # Take log of posteriors |
| 416 | + log_posterior = np.log( |
| 417 | + results.acausal_posterior.values + 1e-300 |
| 418 | + ) # Avoid log(0) |
| 419 | + |
| 420 | + # Should not have NaN |
| 421 | + assert not np.any(np.isnan(log_posterior)) |
| 422 | + # Should be finite or -inf |
| 423 | + assert np.all(np.isfinite(log_posterior) | np.isneginf(log_posterior)) |
0 commit comments