|
1 | | -"""Competitive matching baseline. |
| 1 | +"""Competitive matching baseline evaluated on Dominick's test data. |
2 | 2 |
|
3 | | -Set price = competitor_median_price +/- 5% noise. |
4 | | -Since Dominick's lacks time-varying competitor data, we use |
5 | | -category average price as proxy for competitor median. |
| 3 | +Sets each SKU's price to the category-average price (± 2% noise) and |
| 4 | +computes gross margin using actual units sold in the test period. |
6 | 5 | """ |
7 | 6 |
|
8 | 7 | from __future__ import annotations |
9 | 8 |
|
10 | 9 | import argparse |
| 10 | +import json |
| 11 | +from pathlib import Path |
11 | 12 |
|
12 | 13 | import numpy as np |
| 14 | +import pandas as pd |
13 | 15 |
|
14 | | -try: |
15 | | - import wandb |
16 | 16 |
|
17 | | - HAS_WANDB = True |
18 | | -except ImportError: |
19 | | - HAS_WANDB = False |
| 17 | +def load_test_data(data_dir: Path) -> pd.DataFrame: |
| 18 | + """Load and filter Dominick's CSO data to test weeks.""" |
| 19 | + df = pd.read_csv( |
| 20 | + data_dir / "category" / "wcso.csv", |
| 21 | + usecols=["STORE", "UPC", "WEEK", "MOVE", "QTY", "PRICE", "PROFIT", "OK"], |
| 22 | + ) |
| 23 | + df = df[(df["OK"] == 1) & (df["PRICE"] > 0)] |
| 24 | + df["unit_price"] = df["PRICE"] / df["QTY"] |
| 25 | + df["cost"] = df["PRICE"] * (1 - df["PROFIT"] / 100) / df["QTY"] |
| 26 | + test = df[df["WEEK"] >= 341].copy() |
| 27 | + return test |
20 | 28 |
|
21 | 29 |
|
22 | 30 | def run_competitive_matching( |
23 | | - category_avg_prices: np.ndarray, |
24 | | - cost_vector: np.ndarray, |
25 | | - demand_fn, |
26 | | - noise_pct: float = 0.05, |
27 | | - n_episodes: int = 10, |
28 | | - H: int = 13, |
| 31 | + test_df: pd.DataFrame, |
| 32 | + noise_pct: float = 0.02, |
29 | 33 | seed: int = 42, |
30 | | -) -> dict[str, float]: |
31 | | - """Run competitive matching baseline. |
32 | | -
|
33 | | - Args: |
34 | | - category_avg_prices: (n_skus,) average category prices as competitor proxy. |
35 | | - cost_vector: (n_skus,) per-SKU cost. |
36 | | - demand_fn: Callable(prices) -> units_sold. |
37 | | - noise_pct: Price noise as fraction of category average (default 5%). |
38 | | - n_episodes: Number of evaluation episodes. |
39 | | - H: Steps per episode. |
40 | | - seed: Random seed. |
41 | | - """ |
| 34 | +) -> dict: |
| 35 | + """Match category-average price with small noise.""" |
42 | 36 | rng = np.random.default_rng(seed) |
| 37 | + df = test_df.copy() |
| 38 | + |
| 39 | + cat_avg = df.groupby("UPC")["unit_price"].transform("mean") |
| 40 | + noise = rng.uniform(-noise_pct, noise_pct, len(df)) |
| 41 | + df["proposed_price"] = cat_avg * (1 + noise) |
| 42 | + df["proposed_price"] = np.maximum(df["proposed_price"], df["cost"] * 1.01) |
| 43 | + df["gross_margin"] = (df["proposed_price"] - df["cost"]) * df["MOVE"] |
43 | 44 |
|
44 | | - total_profit = 0.0 |
45 | | - for ep in range(n_episodes): |
46 | | - ep_profit = 0.0 |
47 | | - for step in range(H): |
48 | | - noise = rng.uniform(-noise_pct, noise_pct, len(category_avg_prices)) |
49 | | - prices = category_avg_prices * (1 + noise) |
50 | | - prices = np.clip(prices, cost_vector * 1.01, None) # ensure above cost |
51 | | - units_sold = demand_fn(prices) |
52 | | - profit = ((prices - cost_vector) * units_sold).sum() |
53 | | - ep_profit += profit |
54 | | - total_profit += ep_profit |
55 | | - |
56 | | - avg_profit = total_profit / n_episodes |
| 45 | + total_margin = df["gross_margin"].sum() |
| 46 | + n_weeks = df["WEEK"].nunique() |
57 | 47 | return { |
58 | | - "avg_episode_profit": float(avg_profit), |
59 | | - "profit_per_step": float(avg_profit / H), |
| 48 | + "method": "competitive_matching", |
60 | 49 | "noise_pct": noise_pct, |
| 50 | + "total_gross_margin": float(total_margin), |
| 51 | + "mean_return": float(total_margin / n_weeks), |
| 52 | + "n_rows": len(df), |
| 53 | + "n_weeks": int(n_weeks), |
| 54 | + "eval_type": "data_replay", |
61 | 55 | } |
62 | 56 |
|
63 | 57 |
|
64 | 58 | def main() -> None: |
65 | 59 | parser = argparse.ArgumentParser(description="Competitive matching baseline") |
| 60 | + parser.add_argument( |
| 61 | + "--data-dir", |
| 62 | + type=Path, |
| 63 | + default=Path("/workspace/docs/data"), |
| 64 | + ) |
| 65 | + parser.add_argument("--noise-pct", type=float, default=0.02) |
66 | 66 | parser.add_argument("--seed", type=int, default=42) |
67 | | - parser.add_argument("--n-episodes", type=int, default=10) |
68 | | - parser.add_argument("--n-skus", type=int, default=25) |
69 | | - parser.add_argument("--use-wandb", action="store_true") |
| 67 | + parser.add_argument( |
| 68 | + "--output", |
| 69 | + type=Path, |
| 70 | + default=Path("/workspace/docs/results/baselines/competitive_matching.json"), |
| 71 | + ) |
70 | 72 | args = parser.parse_args() |
71 | 73 |
|
72 | | - rng = np.random.default_rng(args.seed) |
73 | | - cost_vector = rng.uniform(0.50, 3.00, args.n_skus).astype(np.float32) |
74 | | - category_avg = cost_vector * 1.25 # assume 25% markup is category average |
| 74 | + print("Loading Dominick's CSO test data (weeks 341-400)...") |
| 75 | + test_df = load_test_data(args.data_dir) |
| 76 | + print(f" {len(test_df)} rows, {test_df['WEEK'].nunique()} weeks, {test_df['UPC'].nunique()} UPCs") |
75 | 77 |
|
76 | | - def demand_fn(prices: np.ndarray) -> np.ndarray: |
77 | | - base = 100.0 |
78 | | - return np.clip(base * np.exp(-2.5 * np.log(np.clip(prices, 0.01, None))), 0, 10000) |
79 | | - |
80 | | - if args.use_wandb and HAS_WANDB: |
81 | | - wandb.init(project="dreamprice", group="baselines", name="competitive-matching") |
82 | | - |
83 | | - metrics = run_competitive_matching( |
84 | | - category_avg, |
85 | | - cost_vector, |
86 | | - demand_fn, |
87 | | - n_episodes=args.n_episodes, |
88 | | - seed=args.seed, |
89 | | - ) |
90 | | - print(f"Competitive matching: profit/step={metrics['profit_per_step']:.2f}") |
| 78 | + results = run_competitive_matching(test_df, args.noise_pct, args.seed) |
| 79 | + print(f"Competitive matching: mean return = {results['mean_return']:.2f}") |
91 | 80 |
|
92 | | - if args.use_wandb and HAS_WANDB: |
93 | | - wandb.log(metrics) |
94 | | - wandb.finish() |
| 81 | + args.output.parent.mkdir(parents=True, exist_ok=True) |
| 82 | + with open(args.output, "w") as f: |
| 83 | + json.dump(results, f, indent=2) |
| 84 | + print(f"Saved to {args.output}") |
95 | 85 |
|
96 | 86 |
|
97 | 87 | if __name__ == "__main__": |
|
0 commit comments