Skip to content

Commit 2bc33ee

Browse files
committed
post-review hardening · LLM_CONCURRENCY floor / scm JSON-safe / expanded tests
Fixes and test coverage landed after an independent code-review pass: - soul.py: ThreadPoolExecutor(max_workers=...) now floors at 1 so LLM_CONCURRENCY=0 (or empty-chosen) no longer raises ValueError. - scm.py: equilibrium_under_do coerces result.spectral_radius to a JSON-safe float-or-null; the banach solver path leaves it None, which previously ended up as Python None in the API response. - test_smoke.py: - test_three_layer_platform_stack_parity_for_douyin_instagram_youtube_shorts verifies each of the 3 new platforms imports, instantiates, and simulates an impression with the expected ImpressionResult schema. - test_audience_skew_actually_reaches_fyp_world_models: regression lock for the base-class extraction — asserts IG young_boost=1.35 and YT young_boost=1.45 systematically pull more young agents than the population baseline. Would have caught the pre-fix bug where FYP subclasses silently dropped the skew. - test_transformer_and_neural_hawkes_load_pretrained_none_emits_resolved_path_in_error: fills in regression coverage for the two torch-dependent models' auto-resolve paths that were previously only covered for LightGBM. - test_multi_worker_warning_* (2 tests): pins the startup-hook warning trigger + no-emit paths. - test_lightgbm_load_pretrained_none_auto_resolves_or_errors_with_path: pins the LightGBM auto-resolve behavior.
1 parent 9a4b3ee commit 2bc33ee

3 files changed

Lines changed: 149 additions & 3 deletions

File tree

backend/oransim/agents/soul.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -525,8 +525,11 @@ def call_one(pid):
525525
r = soul_infer_llm(persona=p, **kol_kwargs)
526526
return _post(pid, r)
527527

528-
workers = min(
529-
int(__import__("os").environ.get("LLM_CONCURRENCY", "15")), len(chosen)
528+
# floor at 1: ThreadPoolExecutor(max_workers=0) raises ValueError,
529+
# so defensively guard against LLM_CONCURRENCY=0 or empty-chosen.
530+
workers = max(
531+
1,
532+
min(int(__import__("os").environ.get("LLM_CONCURRENCY", "15")), len(chosen)),
530533
)
531534
results_map = {}
532535
total_in = total_out = 0

backend/oransim/causal/scm.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -673,9 +673,13 @@ def f(x: np.ndarray) -> np.ndarray:
673673
raise ValueError(f"unknown method {method!r}; expected 'linear_closed_form' or 'banach'")
674674

675675
equilibrium = {name: float(result.x[i]) for name, i in idx.items()}
676+
# banach_iterate does not compute spectral radius (it's linear-SCM only);
677+
# coerce to a JSON-safe float-or-null so FastAPI callers see a consistent
678+
# schema regardless of which solver path ran.
679+
spectral_radius = float(result.spectral_radius) if result.spectral_radius is not None else None
676680
return {
677681
"equilibrium": equilibrium,
678-
"spectral_radius": result.spectral_radius,
682+
"spectral_radius": spectral_radius,
679683
"converged": result.converged,
680684
"n_iter": result.n_iter,
681685
"residual_inf": result.residual_inf,

tests/test_smoke.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,145 @@ def emit(self, record: _logging.LogRecord) -> None:
273273
os.environ.pop("WEB_CONCURRENCY", None)
274274

275275

276+
def test_three_layer_platform_stack_parity_for_douyin_instagram_youtube_shorts():
277+
"""P2-③ regression: Douyin / Instagram / YouTube Shorts must each expose
278+
the full (PRS, RecSysRL, WorldModel) 3-layer surface that previously
279+
only TikTok / XHS had. Each layer must import, instantiate, and the
280+
world-model simulate_impression path must return an ImpressionResult
281+
with the same schema shape as XHS.
282+
"""
283+
from oransim.data.platforms import PLATFORMS
284+
from oransim.platforms.douyin import (
285+
DouyinPRS,
286+
DouyinRecSysRLSimulator,
287+
DouyinWorldModel,
288+
)
289+
from oransim.platforms.instagram import (
290+
InstagramPRS,
291+
InstagramRecSysRLSimulator,
292+
InstagramWorldModel,
293+
)
294+
from oransim.platforms.youtube_shorts import (
295+
YouTubeShortsPRS,
296+
YouTubeShortsRecSysRLSimulator,
297+
YouTubeShortsWorldModel,
298+
)
299+
300+
assert "instagram" in PLATFORMS, "IG must be registered in data.platforms"
301+
assert "youtube_shorts" in PLATFORMS, "YT Shorts must be registered in data.platforms"
302+
303+
for prs_cls in (DouyinPRS, InstagramPRS, YouTubeShortsPRS):
304+
prs = prs_cls()
305+
assert prs.is_ready() is False
306+
info = prs.info()
307+
assert info["loaded"] is False
308+
assert "reason" in info
309+
310+
os.environ["POP_SIZE"] = "500"
311+
os.environ["SOUL_POOL_N"] = "5"
312+
os.environ["LLM_MODE"] = "mock"
313+
from oransim.data.creatives import make_creative
314+
from oransim.data.population import generate_population
315+
316+
pop = generate_population(N=500, seed=7)
317+
318+
creative = make_creative("c1", "测试", duration_sec=20.0)
319+
320+
wms = {
321+
"douyin": (DouyinWorldModel(pop), DouyinRecSysRLSimulator),
322+
"instagram": (InstagramWorldModel(pop), InstagramRecSysRLSimulator),
323+
"youtube_shorts": (YouTubeShortsWorldModel(pop), YouTubeShortsRecSysRLSimulator),
324+
}
325+
for platform, (wm, rl_cls) in wms.items():
326+
imp = wm.simulate_impression(creative, platform, budget_cny=5000.0, rng_seed=7)
327+
assert imp.platform == platform
328+
assert imp.total_impressions > 0
329+
assert len(imp.agent_idx) > 0
330+
for key in ("content", "platform_activity", "audience_filter", "kol_boost"):
331+
assert key in imp.score_breakdown, f"{platform} missing {key} in score_breakdown"
332+
rl = rl_cls(wm)
333+
assert rl is not None
334+
335+
336+
def test_audience_skew_actually_reaches_fyp_world_models():
337+
"""Regression: reviewer caught that the 4 FYP subclasses (TikTok, Douyin,
338+
Instagram, YouTube Shorts) were completely overriding ``simulate_impression``
339+
and silently dropping the XHS base class's ``audience_skew`` application.
340+
That made every ``PLATFORMS[p].audience_skew`` entry dead code for those
341+
platforms.
342+
343+
Verify the fix: platforms with ``young_boost >> 1.0`` should pick
344+
systematically more young-age agents at the top of their ranking
345+
than platforms with ``young_boost ≈ 1.0``.
346+
"""
347+
os.environ["POP_SIZE"] = "500"
348+
from oransim.data.creatives import make_creative
349+
from oransim.data.population import generate_population
350+
from oransim.platforms.instagram import InstagramWorldModel
351+
from oransim.platforms.youtube_shorts import YouTubeShortsWorldModel
352+
353+
pop = generate_population(N=8000, seed=11)
354+
creative = make_creative("c_skew", "测试", duration_sec=22.0)
355+
356+
ig = InstagramWorldModel(pop)
357+
yt = YouTubeShortsWorldModel(pop)
358+
359+
# Low enough budget that k << pop.N so the top-k selection is
360+
# actually discriminating on score, not returning the whole pop.
361+
imp_ig = ig.simulate_impression(creative, "instagram", budget_cny=50.0, rng_seed=21)
362+
imp_yt = yt.simulate_impression(creative, "youtube_shorts", budget_cny=50.0, rng_seed=21)
363+
364+
frac_young_ig = (pop.age_idx[imp_ig.agent_idx] <= 2).mean()
365+
frac_young_yt = (pop.age_idx[imp_yt.agent_idx] <= 2).mean()
366+
frac_young_pop = (pop.age_idx <= 2).mean()
367+
368+
assert frac_young_ig > frac_young_pop, (
369+
f"IG young_boost=1.35 not reaching simulate_impression — "
370+
f"imp young frac {frac_young_ig:.3f} not > pop baseline {frac_young_pop:.3f}"
371+
)
372+
assert frac_young_yt > frac_young_pop, (
373+
f"YT young_boost=1.45 not reaching simulate_impression — "
374+
f"imp young frac {frac_young_yt:.3f} not > pop baseline {frac_young_pop:.3f}"
375+
)
376+
377+
378+
def test_transformer_and_neural_hawkes_load_pretrained_none_emits_resolved_path_in_error():
379+
"""P1-05 regression: all three ``load_pretrained(None)`` flows must
380+
name the path they auto-resolved-against when raising FileNotFoundError,
381+
so the operator knows exactly where to drop a checkpoint. Previously
382+
only LightGBM was regression-tested; transformer + neural_hawkes were
383+
not, so a revert there could silently break the auto-resolve UX.
384+
"""
385+
if not _torch_available():
386+
pytest.skip("transformer + neural_hawkes load_pretrained requires torch")
387+
388+
from oransim.diffusion.neural_hawkes import (
389+
CausalNeuralHawkesConfig,
390+
CausalNeuralHawkesProcess,
391+
)
392+
from oransim.world_model.transformer import (
393+
CausalTransformerWMConfig,
394+
CausalTransformerWorldModel,
395+
)
396+
397+
for model_cls, cfg_cls, fname in (
398+
(CausalTransformerWorldModel, CausalTransformerWMConfig, "model.pt"),
399+
(CausalNeuralHawkesProcess, CausalNeuralHawkesConfig, "model.pt"),
400+
):
401+
repo_root = Path(__file__).resolve().parents[1]
402+
expected_dir = repo_root / cfg_cls().checkpoint_dir
403+
expected_candidate = expected_dir / fname
404+
if expected_candidate.exists():
405+
continue
406+
with pytest.raises(FileNotFoundError) as exc:
407+
model_cls.load_pretrained(None)
408+
msg = str(exc.value)
409+
assert str(expected_candidate) in msg, (
410+
f"{model_cls.__name__}.load_pretrained(None) error should name "
411+
f"the resolved path {expected_candidate}, got: {msg}"
412+
)
413+
414+
276415
def test_lightgbm_load_pretrained_none_auto_resolves_or_errors_with_path():
277416
"""P2-① regression: ``LightGBMQuantileWorldModel.load_pretrained(None)``
278417
must auto-resolve to ``<checkpoint_dir>/booster.pkl`` when present and

0 commit comments

Comments
 (0)