Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- Dockerfile +2 -1
- README.md +103 -38
- client.py +4 -0
- inference.py +32 -28
- models.py +4 -2
- server/constants.py +8 -0
- server/grader.py +40 -89
- server/inventory_env.py +26 -7
Dockerfile
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
FROM ghcr.io/meta-pytorch/openenv-base:latest AS builder
|
| 2 |
|
|
|
|
|
|
|
| 3 |
RUN apt-get update && apt-get install -y git curl && \
|
| 4 |
curl -LsSf https://astral.sh/uv/install.sh | sh
|
| 5 |
ENV PATH="/root/.local/bin:$PATH"
|
|
@@ -25,5 +27,4 @@ EXPOSE 8000
|
|
| 25 |
HEALTHCHECK --interval=30s --timeout=3s \
|
| 26 |
CMD curl -f http://localhost:8000/health || exit 1
|
| 27 |
|
| 28 |
-
ENV ENABLE_WEB_INTERFACE=true
|
| 29 |
CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
|
|
|
| 1 |
FROM ghcr.io/meta-pytorch/openenv-base:latest AS builder
|
| 2 |
|
| 3 |
+
ENV ENABLE_WEB_INTERFACE=true
|
| 4 |
+
|
| 5 |
RUN apt-get update && apt-get install -y git curl && \
|
| 6 |
curl -LsSf https://astral.sh/uv/install.sh | sh
|
| 7 |
ENV PATH="/root/.local/bin:$PATH"
|
|
|
|
| 27 |
HEALTHCHECK --interval=30s --timeout=3s \
|
| 28 |
CMD curl -f http://localhost:8000/health || exit 1
|
| 29 |
|
|
|
|
| 30 |
CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
README.md
CHANGED
|
@@ -12,7 +12,11 @@ base_path: /web
|
|
| 12 |
|
| 13 |
# Retail Inventory Optimization Environment
|
| 14 |
|
| 15 |
-
An OpenEnv reinforcement learning environment that simulates day-by-day retail inventory management across 5 product categories. An AI agent must
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
## Environment Description
|
| 18 |
|
|
@@ -26,22 +30,73 @@ You manage a retail store selling 5 products with different characteristics:
|
|
| 26 |
| Furniture | $200 | $130 | $70 | No expiry |
|
| 27 |
| Toys | $25 | $12 | $13 | No expiry |
|
| 28 |
|
| 29 |
-
Each day
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
## Action Space
|
| 32 |
|
| 33 |
```python
|
| 34 |
class InventoryAction(Action):
|
| 35 |
-
buy_quantities: Dict[str, int] = {}
|
| 36 |
delivery_method: Literal["slow", "medium", "fast"] = "slow"
|
| 37 |
-
liquidate: Dict[str, int] = {}
|
|
|
|
| 38 |
```
|
| 39 |
|
| 40 |
| Field | Description |
|
| 41 |
|-------|-------------|
|
| 42 |
| `buy_quantities` | Products and amounts to order. Empty `{}` to skip buying. |
|
| 43 |
-
| `delivery_method` | `"slow"` ($2/unit,
|
| 44 |
| `liquidate` | Products and amounts to dispose of (no revenue). Use for expiring groceries or freeing warehouse space. |
|
|
|
|
| 45 |
|
| 46 |
## Observation Space
|
| 47 |
|
|
@@ -51,56 +106,48 @@ class InventoryObservation(Observation):
|
|
| 51 |
total_cash: float
|
| 52 |
day_profit: float
|
| 53 |
total_profit: float
|
| 54 |
-
demand_today: Dict[str, int]
|
| 55 |
-
updated_inventory: Dict[str, List
|
| 56 |
-
remaining_capacity: Dict[str, int]
|
| 57 |
-
updated_events: Dict[str, int]
|
| 58 |
-
updated_deliveries: List[Dict
|
| 59 |
```
|
| 60 |
|
| 61 |
-
The inventory uses a batch format with FIFO selling: `{"groceries": [[20, 3], [10, 5]]}` means 20 units expiring in 3 days and 10 units expiring in 5 days.
|
| 62 |
-
|
| 63 |
## Tasks (Easy / Medium / Hard)
|
| 64 |
|
| 65 |
### Easy — "Steady State"
|
| 66 |
- Low starting stock, low steady demand, no events
|
| 67 |
- Starting cash: $1,000 | Full warehouse capacity
|
| 68 |
- Agent needs to restock regularly but demand is predictable
|
|
|
|
| 69 |
|
| 70 |
### Medium — "Seasonal Rush"
|
| 71 |
- Default stock/cash, all 5 events spread across 30 days
|
| 72 |
- Events: Black Friday (day 6), Christmas (day 12), Back to School (day 18), Summer Clearance (day 24), New Competitor (day 28)
|
| 73 |
-
- Agent must anticipate demand spikes and restock
|
| 74 |
|
| 75 |
### Hard — "Chaos Mode"
|
| 76 |
-
- Half starting cash ($500), low stock, events packed close together
|
| 77 |
-
- Higher demand, smaller warehouse capacity
|
| 78 |
-
- Agent must balance tight budget, overlapping event spikes, and
|
| 79 |
-
|
| 80 |
-
## Reward Function
|
| 81 |
-
|
| 82 |
-
Per-step reward based on multiple signals:
|
| 83 |
-
- **Successful sales**: `+sold_units * sell_price * 0.001` (proportional to revenue)
|
| 84 |
-
- **Missed sales**: `-missed_units * sell_price * 0.001` (proportional to lost revenue)
|
| 85 |
-
- **Expired groceries**: `-0.05 * expired_count`
|
| 86 |
-
- **Failed purchases**: `-0.5` per order that exceeds available cash
|
| 87 |
-
- **Liquidation loss**: `-liquidated_value * 0.001` (proportional to cost of disposed stock)
|
| 88 |
|
| 89 |
## Grading (0.0 - 1.0)
|
| 90 |
|
| 91 |
-
Each task is scored by comparing agent profit against two baselines:
|
| 92 |
-
- **Floor**: Passive agent that never buys (sells initial stock until
|
| 93 |
-
- **Ceiling**:
|
| 94 |
|
| 95 |
```
|
| 96 |
score = clamp((agent_profit - floor) / (ceiling - floor), 0.0, 1.0)
|
| 97 |
```
|
| 98 |
|
|
|
|
|
|
|
| 99 |
## Setup
|
| 100 |
|
| 101 |
```bash
|
| 102 |
# Install dependencies
|
| 103 |
-
pip install openenv-core[core] fastapi uvicorn pydantic openai numpy
|
| 104 |
|
| 105 |
# Run grader baselines
|
| 106 |
python -c "from server.grader import compute_baselines; [print(f'{t}: floor={f:.2f}, ceiling={c:.2f}') for t in ['easy','medium','hard'] for f,c in [compute_baselines(t)]]"
|
|
@@ -116,10 +163,17 @@ curl -X POST http://localhost:8000/reset
|
|
| 116 |
## Running Inference
|
| 117 |
|
| 118 |
```bash
|
|
|
|
| 119 |
export API_BASE_URL="https://router.huggingface.co/v1"
|
| 120 |
-
export MODEL_NAME="Qwen/
|
| 121 |
export HF_TOKEN="your-token"
|
| 122 |
python inference.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
```
|
| 124 |
|
| 125 |
## Docker
|
|
@@ -129,21 +183,32 @@ docker build -t inventory-env .
|
|
| 129 |
docker run -p 8000:8000 inventory-env
|
| 130 |
```
|
| 131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
## Project Structure
|
| 133 |
|
| 134 |
```
|
| 135 |
-
|
| 136 |
-
├── models.py # InventoryAction, InventoryObservation, InventoryState
|
| 137 |
├── client.py # EnvClient for remote WebSocket connections
|
| 138 |
-
├── inference.py # LLM inference script (runs all 3 tasks)
|
| 139 |
├── openenv.yaml # OpenEnv spec manifest
|
| 140 |
├── pyproject.toml # Python dependencies
|
| 141 |
-
├── Dockerfile #
|
| 142 |
├── server/
|
| 143 |
-
│ ├── app.py # FastAPI server (create_app)
|
| 144 |
-
│ ├── inventory_env.py # Environment (reset, step, state)
|
| 145 |
-
│ ├── constants.py #
|
| 146 |
-
│ └── grader.py # Floor/ceiling baselines and scoring
|
| 147 |
└── scripts/
|
| 148 |
└── validate-submission.sh # Pre-submission validator
|
| 149 |
```
|
|
|
|
| 12 |
|
| 13 |
# Retail Inventory Optimization Environment
|
| 14 |
|
| 15 |
+
An OpenEnv reinforcement learning environment that simulates day-by-day retail inventory management across 5 product categories. An AI agent must balance purchasing, pricing, shipping, and liquidation decisions to maximize profit over a 30-day episode.
|
| 16 |
+
|
| 17 |
+
## Why Inventory Management?
|
| 18 |
+
|
| 19 |
+
Retail inventory optimization is a real-world task performed daily by store managers, warehouse operators, and supply chain planners. The agent faces the same challenges as a human manager: uncertain demand, perishable goods, shipping delays, seasonal events, and limited cash flow. Poor decisions lead to stockouts (lost sales), waste (expired goods), or cash tied up in unsold inventory.
|
| 20 |
|
| 21 |
## Environment Description
|
| 22 |
|
|
|
|
| 30 |
| Furniture | $200 | $130 | $70 | No expiry |
|
| 31 |
| Toys | $25 | $12 | $13 | No expiry |
|
| 32 |
|
| 33 |
+
Each day the agent receives the current store state (cash, inventory with batch expiry, pending deliveries, upcoming events) and must decide:
|
| 34 |
+
- **What to buy** and how much of each product
|
| 35 |
+
- **How to ship** — slow (cheap but unreliable), medium, or fast (expensive but guaranteed)
|
| 36 |
+
- **What to liquidate** — dispose of expiring or excess stock
|
| 37 |
+
- **How to price** — set per-product price multipliers that affect demand via elasticity
|
| 38 |
+
|
| 39 |
+
Customer demand is generated each day based on base ranges, weekend boosts (1.2x on days 5-6), and seasonal event multipliers (up to 3x during Black Friday, Christmas, etc.). The agent cannot see future demand — only yesterday's demand as feedback.
|
| 40 |
+
|
| 41 |
+
The episode runs for 30 days. The goal is to maximize total profit.
|
| 42 |
+
|
| 43 |
+
## Environment Design Highlights
|
| 44 |
+
|
| 45 |
+
### Batch-Tracked Inventory with FIFO
|
| 46 |
+
Inventory is tracked per batch with individual expiry dates. Groceries expire after 5 days. Selling and liquidation follow FIFO (First In, First Out) — oldest batches are consumed first, mimicking real warehouse operations.
|
| 47 |
+
|
| 48 |
+
```json
|
| 49 |
+
{"groceries": [[20, 3], [15, 5], [10, 1]]}
|
| 50 |
+
```
|
| 51 |
+
Three batches: 20 units (3 days left), 15 units (5 days left), 10 units (1 day left — liquidate or lose them).
|
| 52 |
+
|
| 53 |
+
### Dynamic Pricing with Price Elasticity
|
| 54 |
+
The agent can set per-product price multipliers (0.5x to 1.5x) each day. Demand responds to pricing via realistic elasticity values — groceries are inelastic (people buy regardless), while clothing and toys are highly elastic (price-sensitive customers).
|
| 55 |
+
|
| 56 |
+
| Product | Elasticity | Effect of 1.3x price |
|
| 57 |
+
|---------|-----------|----------------------|
|
| 58 |
+
| Electronics | 1.2 | Demand drops ~24% |
|
| 59 |
+
| Clothing | 1.5 | Demand drops ~38% |
|
| 60 |
+
| Groceries | 0.4 | Demand drops only ~11% |
|
| 61 |
+
| Furniture | 0.8 | Demand drops ~22% |
|
| 62 |
+
| Toys | 1.3 | Demand drops ~33% |
|
| 63 |
+
|
| 64 |
+
### Delivery Jitter
|
| 65 |
+
Shipping isn't perfectly reliable. Slow delivery has +/-2 day variance, medium has +/-1 day. Only fast delivery (at 5x the cost) is guaranteed next-day. The agent must account for uncertainty when planning restocks before events.
|
| 66 |
+
|
| 67 |
+
### Seasonal Events with Demand Spikes
|
| 68 |
+
Five events are spread across the 30-day episode. Each event triggers a 2-day demand multiplier — Black Friday triples electronics demand, Christmas triples toys, etc. A "new competitor" event actually reduces demand. The agent sees countdowns and must stock up in advance.
|
| 69 |
+
|
| 70 |
+
### Decomposed Per-Step Reward
|
| 71 |
+
The reward function provides granular feedback every step, not just end-of-episode:
|
| 72 |
+
|
| 73 |
+
| Signal | Formula | Purpose |
|
| 74 |
+
|--------|---------|---------|
|
| 75 |
+
| Successful sales | `+sold * sell_price * 0.001` | Reward revenue proportional to product value |
|
| 76 |
+
| Missed sales | `-missed * sell_price * 0.001` | Penalize stockouts, weighted by product value |
|
| 77 |
+
| Expired groceries | `-0.05 * expired_count` | Penalize waste from overbuying perishables |
|
| 78 |
+
| Failed purchases | `-0.5 per rejected order` | Penalize ordering beyond cash budget |
|
| 79 |
+
| Liquidation loss | `-disposed_value * 0.001` | Penalize disposal proportional to cost |
|
| 80 |
+
|
| 81 |
+
### Conversation History for LLM Agents
|
| 82 |
+
The inference script maintains a rolling 7-day conversation history. The LLM sees its past observations and decisions, enabling it to spot demand trends, learn from mistakes, and adjust strategy across the episode.
|
| 83 |
|
| 84 |
## Action Space
|
| 85 |
|
| 86 |
```python
|
| 87 |
class InventoryAction(Action):
|
| 88 |
+
buy_quantities: Dict[str, int] = {}
|
| 89 |
delivery_method: Literal["slow", "medium", "fast"] = "slow"
|
| 90 |
+
liquidate: Dict[str, int] = {}
|
| 91 |
+
price_multipliers: Dict[str, float] = {}
|
| 92 |
```
|
| 93 |
|
| 94 |
| Field | Description |
|
| 95 |
|-------|-------------|
|
| 96 |
| `buy_quantities` | Products and amounts to order. Empty `{}` to skip buying. |
|
| 97 |
+
| `delivery_method` | `"slow"` ($2/unit, 3-7 days), `"medium"` ($5/unit, 2-4 days), `"fast"` ($10/unit, 1 day guaranteed) |
|
| 98 |
| `liquidate` | Products and amounts to dispose of (no revenue). Use for expiring groceries or freeing warehouse space. |
|
| 99 |
+
| `price_multipliers` | Per-product selling price multiplier (0.5-1.5). Affects demand via elasticity. Default 1.0 if omitted. |
|
| 100 |
|
| 101 |
## Observation Space
|
| 102 |
|
|
|
|
| 106 |
total_cash: float
|
| 107 |
day_profit: float
|
| 108 |
total_profit: float
|
| 109 |
+
demand_today: Dict[str, int] # yesterday's demand (feedback)
|
| 110 |
+
updated_inventory: Dict[str, List] # [[qty, days_left], ...] per batch
|
| 111 |
+
remaining_capacity: Dict[str, int] # warehouse space left per product
|
| 112 |
+
updated_events: Dict[str, int] # event countdowns (negative = active/ended)
|
| 113 |
+
updated_deliveries: List[Dict] # in-transit shipments
|
| 114 |
```
|
| 115 |
|
|
|
|
|
|
|
| 116 |
## Tasks (Easy / Medium / Hard)
|
| 117 |
|
| 118 |
### Easy — "Steady State"
|
| 119 |
- Low starting stock, low steady demand, no events
|
| 120 |
- Starting cash: $1,000 | Full warehouse capacity
|
| 121 |
- Agent needs to restock regularly but demand is predictable
|
| 122 |
+
- No events, no demand spikes — pure supply chain management
|
| 123 |
|
| 124 |
### Medium — "Seasonal Rush"
|
| 125 |
- Default stock/cash, all 5 events spread across 30 days
|
| 126 |
- Events: Black Friday (day 6), Christmas (day 12), Back to School (day 18), Summer Clearance (day 24), New Competitor (day 28)
|
| 127 |
+
- Agent must anticipate demand spikes and restock before events hit
|
| 128 |
|
| 129 |
### Hard — "Chaos Mode"
|
| 130 |
+
- Half starting cash ($500), low stock, events packed close together (days 4, 8, 12, 16, 20)
|
| 131 |
+
- Higher base demand, smaller warehouse capacity
|
| 132 |
+
- Agent must balance tight budget, overlapping event spikes, perishable goods, and limited storage
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
## Grading (0.0 - 1.0)
|
| 135 |
|
| 136 |
+
Each task is scored by comparing agent profit against two deterministic baselines:
|
| 137 |
+
- **Floor**: Passive agent that never buys (sells initial stock until depleted)
|
| 138 |
+
- **Ceiling**: Theoretical max profit assuming perfect demand knowledge and cheapest shipping
|
| 139 |
|
| 140 |
```
|
| 141 |
score = clamp((agent_profit - floor) / (ceiling - floor), 0.0, 1.0)
|
| 142 |
```
|
| 143 |
|
| 144 |
+
Both baselines are deterministic (seeded RNG) and computed fresh each run to ensure reproducibility.
|
| 145 |
+
|
| 146 |
## Setup
|
| 147 |
|
| 148 |
```bash
|
| 149 |
# Install dependencies
|
| 150 |
+
pip install openenv-core[core] fastapi uvicorn pydantic openai numpy python-dotenv
|
| 151 |
|
| 152 |
# Run grader baselines
|
| 153 |
python -c "from server.grader import compute_baselines; [print(f'{t}: floor={f:.2f}, ceiling={c:.2f}') for t in ['easy','medium','hard'] for f,c in [compute_baselines(t)]]"
|
|
|
|
| 163 |
## Running Inference
|
| 164 |
|
| 165 |
```bash
|
| 166 |
+
# Using HuggingFace Router
|
| 167 |
export API_BASE_URL="https://router.huggingface.co/v1"
|
| 168 |
+
export MODEL_NAME="Qwen/Qwen3-32B"
|
| 169 |
export HF_TOKEN="your-token"
|
| 170 |
python inference.py
|
| 171 |
+
|
| 172 |
+
# Using OpenAI
|
| 173 |
+
export API_BASE_URL="https://api.openai.com/v1"
|
| 174 |
+
export MODEL_NAME="gpt-4o"
|
| 175 |
+
export API_KEY="sk-your-key"
|
| 176 |
+
python inference.py
|
| 177 |
```
|
| 178 |
|
| 179 |
## Docker
|
|
|
|
| 183 |
docker run -p 8000:8000 inventory-env
|
| 184 |
```
|
| 185 |
|
| 186 |
+
## Step Execution Order
|
| 187 |
+
|
| 188 |
+
Each `step()` call processes in this order:
|
| 189 |
+
1. Tick event countdowns (into negatives to track active duration)
|
| 190 |
+
2. Remove expired groceries (shelf life = 0)
|
| 191 |
+
3. Receive arriving deliveries (add to inventory with fresh shelf life)
|
| 192 |
+
4. Process purchase orders (deduct cash, schedule deliveries with jitter)
|
| 193 |
+
5. Generate demand (base + weekend boost + event multipliers + price elasticity)
|
| 194 |
+
6. Sell products FIFO (oldest batches first, track missed sales)
|
| 195 |
+
7. Liquidate requested stock FIFO (no revenue)
|
| 196 |
+
8. Compute profit, reward, update state, return observation
|
| 197 |
+
|
| 198 |
## Project Structure
|
| 199 |
|
| 200 |
```
|
| 201 |
+
├── models.py # InventoryAction, InventoryObservation, InventoryState (Pydantic)
|
|
|
|
| 202 |
├── client.py # EnvClient for remote WebSocket connections
|
| 203 |
+
├── inference.py # LLM inference script with conversation history (runs all 3 tasks)
|
| 204 |
├── openenv.yaml # OpenEnv spec manifest
|
| 205 |
├── pyproject.toml # Python dependencies
|
| 206 |
+
├── Dockerfile # Multi-stage container build from openenv-base
|
| 207 |
├── server/
|
| 208 |
+
│ ├── app.py # FastAPI server (create_app + uvicorn entry point)
|
| 209 |
+
│ ├── inventory_env.py # Environment (reset, step, state, demand generation)
|
| 210 |
+
│ ├── constants.py # All configs: prices, stock, events, tasks, elasticity
|
| 211 |
+
│ └── grader.py # Floor/ceiling baselines and 0.0-1.0 scoring
|
| 212 |
└── scripts/
|
| 213 |
└── validate-submission.sh # Pre-submission validator
|
| 214 |
```
|
client.py
CHANGED
|
@@ -25,6 +25,9 @@ class InventoryEnv(EnvClient[InventoryAction, InventoryObservation, InventorySta
|
|
| 25 |
if action.liquidate is not None:
|
| 26 |
payload["liquidate"] = action.liquidate
|
| 27 |
|
|
|
|
|
|
|
|
|
|
| 28 |
return payload
|
| 29 |
|
| 30 |
|
|
@@ -40,6 +43,7 @@ class InventoryEnv(EnvClient[InventoryAction, InventoryObservation, InventorySta
|
|
| 40 |
total_profit = obs_data.get("total_profit", 0),
|
| 41 |
demand_today = obs_data.get("demand_today", {}),
|
| 42 |
updated_inventory = obs_data.get("updated_inventory", {}),
|
|
|
|
| 43 |
updated_events = obs_data.get("updated_events", {}),
|
| 44 |
updated_deliveries = obs_data.get("updated_deliveries", []),
|
| 45 |
done = obs_data.get("done", False),
|
|
|
|
| 25 |
if action.liquidate is not None:
|
| 26 |
payload["liquidate"] = action.liquidate
|
| 27 |
|
| 28 |
+
if action.price_multipliers is not None:
|
| 29 |
+
payload["price_multipliers"] = action.price_multipliers
|
| 30 |
+
|
| 31 |
return payload
|
| 32 |
|
| 33 |
|
|
|
|
| 43 |
total_profit = obs_data.get("total_profit", 0),
|
| 44 |
demand_today = obs_data.get("demand_today", {}),
|
| 45 |
updated_inventory = obs_data.get("updated_inventory", {}),
|
| 46 |
+
remaining_capacity = obs_data.get("remaining_capacity", {}),
|
| 47 |
updated_events = obs_data.get("updated_events", {}),
|
| 48 |
updated_deliveries = obs_data.get("updated_deliveries", []),
|
| 49 |
done = obs_data.get("done", False),
|
inference.py
CHANGED
|
@@ -25,8 +25,9 @@ from server.constants import EXTRA_INVENTORY_COST, EVENT_DURATION
|
|
| 25 |
from models import InventoryAction
|
| 26 |
|
| 27 |
API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
|
| 28 |
-
API_KEY = os.getenv("API_KEY") or os.getenv("HF_TOKEN")
|
| 29 |
-
MODEL_NAME = os.getenv("MODEL_NAME")
|
|
|
|
| 30 |
MAX_DAYS = 30
|
| 31 |
|
| 32 |
SYSTEM_PROMPT = textwrap.dedent("""
|
|
@@ -42,7 +43,7 @@ SYSTEM_PROMPT = textwrap.dedent("""
|
|
| 42 |
Product selling prices: electronics=$150, clothing=$40, groceries=$10, furniture=$200, toys=$25
|
| 43 |
Product cost prices: electronics=$100, clothing=$25, groceries=$5, furniture=$130, toys=$12
|
| 44 |
Profit margins: electronics=$50, clothing=$15, groceries=$5, furniture=$70, toys=$13
|
| 45 |
-
Shipping costs per unit: slow=$2 (
|
| 46 |
Warehouse capacity: electronics=100, clothing=200, groceries=500, furniture=50, toys=300
|
| 47 |
|
| 48 |
Events (like black_friday, christmas) boost demand when their countdown hits 0 and last for 2 days.
|
|
@@ -50,25 +51,33 @@ SYSTEM_PROMPT = textwrap.dedent("""
|
|
| 50 |
|
| 51 |
CRITICAL STRATEGY:
|
| 52 |
- Review your history: if reward was negative, identify why and change approach.
|
| 53 |
-
- Track demand trends across days
|
| 54 |
- You MUST restock products when inventory is low. Missed sales = lost revenue = negative reward.
|
| 55 |
- Do NOT overbuy when demand is low — unsold stock ties up cash and perishables expire.
|
| 56 |
-
-
|
| 57 |
-
- Stock up BEFORE events hit (check event countdowns — order 3-5 days ahead using slow/medium shipping).
|
| 58 |
- When no events are approaching, slow shipping is often sufficient and saves significant cost.
|
| 59 |
- Near end of episode (last 2 days), stop buying — focus on selling remaining stock.
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
Each day you must respond with a JSON action:
|
| 62 |
{
|
| 63 |
"buy_quantities": {"product_name": quantity, ...},
|
| 64 |
"delivery_method": "slow" | "medium" | "fast",
|
| 65 |
-
"liquidate": {"product_name": quantity, ...}
|
|
|
|
| 66 |
}
|
| 67 |
|
| 68 |
- buy_quantities: products and amounts to order.
|
| 69 |
- delivery_method: shipping speed for this order
|
| 70 |
- liquidate: products and amounts to dispose of (no revenue, empty {} to skip)
|
| 71 |
Use liquidate to free up warehouse space before a restock.
|
|
|
|
| 72 |
|
| 73 |
LEARNING FROM HISTORY:
|
| 74 |
- Compare your past buy quantities to the demand that followed — were you over or under?
|
|
@@ -182,6 +191,8 @@ def parse_action(response_text):
|
|
| 182 |
clean["delivery_method"] = data["delivery_method"]
|
| 183 |
if "liquidate" in data:
|
| 184 |
clean["liquidate"] = data["liquidate"]
|
|
|
|
|
|
|
| 185 |
|
| 186 |
return InventoryAction(**clean)
|
| 187 |
except Exception as e:
|
|
@@ -191,10 +202,11 @@ def parse_action(response_text):
|
|
| 191 |
buy_quantities={},
|
| 192 |
delivery_method="slow",
|
| 193 |
liquidate={},
|
| 194 |
-
|
|
|
|
| 195 |
|
| 196 |
|
| 197 |
-
HISTORY_WINDOW =
|
| 198 |
|
| 199 |
|
| 200 |
def run_task(client, task_name):
|
|
@@ -242,7 +254,7 @@ def run_task(client, task_name):
|
|
| 242 |
model=MODEL_NAME,
|
| 243 |
messages=messages,
|
| 244 |
temperature=0.0,
|
| 245 |
-
max_completion_tokens=
|
| 246 |
stream=False,
|
| 247 |
)
|
| 248 |
response_text = completion.choices[0].message.content or ""
|
|
@@ -255,7 +267,7 @@ def run_task(client, task_name):
|
|
| 255 |
|
| 256 |
action = parse_action(response_text)
|
| 257 |
|
| 258 |
-
print(f"Day {day}: buy={action.buy_quantities} delivery={action.delivery_method} liquidate={action.liquidate}")
|
| 259 |
|
| 260 |
obs = env.step(action)
|
| 261 |
|
|
@@ -266,35 +278,27 @@ def run_task(client, task_name):
|
|
| 266 |
|
| 267 |
|
| 268 |
def main():
|
| 269 |
-
from server.grader import
|
| 270 |
|
| 271 |
if not MODEL_NAME:
|
| 272 |
raise RuntimeError("MODEL_NAME is not set. Please export MODEL_NAME before running inference.")
|
| 273 |
|
| 274 |
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
| 275 |
|
| 276 |
-
# print
|
|
|
|
| 277 |
print(f"\n{'=' * 50}")
|
| 278 |
-
print("
|
| 279 |
print(f"{'=' * 50}")
|
| 280 |
-
for task_name in ["easy", "medium", "hard"]:
|
| 281 |
-
floor, ceiling = compute_baselines(task_name)
|
| 282 |
-
print(f" {task_name}: floor=${floor:.2f} (passive) | ceiling=${ceiling:.2f} (heuristic)")
|
| 283 |
-
|
| 284 |
-
results = {}
|
| 285 |
-
for task_name in ["easy", "medium", "hard"]:
|
| 286 |
-
profit = run_task(client, task_name)
|
| 287 |
-
results[task_name] = profit
|
| 288 |
|
| 289 |
-
|
|
|
|
| 290 |
|
| 291 |
print(f"\n{'=' * 50}")
|
| 292 |
-
print("FINAL
|
| 293 |
print(f"{'=' * 50}")
|
| 294 |
-
|
| 295 |
-
floor, ceiling = compute_baselines(task_name)
|
| 296 |
-
print(f" {task_name}: {score:.3f} (profit: ${results[task_name]:.2f} | floor: ${floor:.2f} | ceiling: ${ceiling:.2f})")
|
| 297 |
|
| 298 |
|
| 299 |
if __name__ == "__main__":
|
| 300 |
-
main()
|
|
|
|
| 25 |
from models import InventoryAction
|
| 26 |
|
| 27 |
API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
|
| 28 |
+
API_KEY = os.getenv("API_KEY") or os.getenv("HF_TOKEN") or os.getenv("OPENAI_API_KEY")
|
| 29 |
+
MODEL_NAME = os.getenv("MODEL_NAME") or "Qwen/Qwen3-32B"
|
| 30 |
+
TASK_NAME = os.getenv("TASK_NAME") or "easy"
|
| 31 |
MAX_DAYS = 30
|
| 32 |
|
| 33 |
SYSTEM_PROMPT = textwrap.dedent("""
|
|
|
|
| 43 |
Product selling prices: electronics=$150, clothing=$40, groceries=$10, furniture=$200, toys=$25
|
| 44 |
Product cost prices: electronics=$100, clothing=$25, groceries=$5, furniture=$130, toys=$12
|
| 45 |
Profit margins: electronics=$50, clothing=$15, groceries=$5, furniture=$70, toys=$13
|
| 46 |
+
Shipping costs per unit: slow=$2 (3-7 days), medium=$5 (2-4 days), fast=$10 (1 day, always reliable)
|
| 47 |
Warehouse capacity: electronics=100, clothing=200, groceries=500, furniture=50, toys=300
|
| 48 |
|
| 49 |
Events (like black_friday, christmas) boost demand when their countdown hits 0 and last for 2 days.
|
|
|
|
| 51 |
|
| 52 |
CRITICAL STRATEGY:
|
| 53 |
- Review your history: if reward was negative, identify why and change approach.
|
| 54 |
+
- Track demand trends across days.
|
| 55 |
- You MUST restock products when inventory is low. Missed sales = lost revenue = negative reward.
|
| 56 |
- Do NOT overbuy when demand is low — unsold stock ties up cash and perishables expire.
|
| 57 |
+
- Stock up BEFORE events hit (check event countdowns — order 3-5 days ahead).
|
|
|
|
| 58 |
- When no events are approaching, slow shipping is often sufficient and saves significant cost.
|
| 59 |
- Near end of episode (last 2 days), stop buying — focus on selling remaining stock.
|
| 60 |
|
| 61 |
+
DYNAMIC PRICING:
|
| 62 |
+
You can set a price multiplier (0.5 to 1.5) per product each day. Default is 1.0.
|
| 63 |
+
- Lower price (e.g. 0.7) = more demand but less revenue per unit. Good for clearing excess stock.
|
| 64 |
+
- Higher price (e.g. 1.3) = less demand but more revenue per unit. Good when stock is low.
|
| 65 |
+
- Price elasticity varies across different products.
|
| 66 |
+
- Elasticity values: electronics=1.2, clothing=1.5, groceries=0.4, furniture=0.8, toys=1.3
|
| 67 |
+
|
| 68 |
Each day you must respond with a JSON action:
|
| 69 |
{
|
| 70 |
"buy_quantities": {"product_name": quantity, ...},
|
| 71 |
"delivery_method": "slow" | "medium" | "fast",
|
| 72 |
+
"liquidate": {"product_name": quantity, ...},
|
| 73 |
+
"price_multipliers": {"product_name": multiplier, ...}
|
| 74 |
}
|
| 75 |
|
| 76 |
- buy_quantities: products and amounts to order.
|
| 77 |
- delivery_method: shipping speed for this order
|
| 78 |
- liquidate: products and amounts to dispose of (no revenue, empty {} to skip)
|
| 79 |
Use liquidate to free up warehouse space before a restock.
|
| 80 |
+
- price_multipliers: set selling price multiplier per product (0.5-1.5, default 1.0 if omitted)
|
| 81 |
|
| 82 |
LEARNING FROM HISTORY:
|
| 83 |
- Compare your past buy quantities to the demand that followed — were you over or under?
|
|
|
|
| 191 |
clean["delivery_method"] = data["delivery_method"]
|
| 192 |
if "liquidate" in data:
|
| 193 |
clean["liquidate"] = data["liquidate"]
|
| 194 |
+
if "price_multipliers" in data:
|
| 195 |
+
clean["price_multipliers"] = data["price_multipliers"]
|
| 196 |
|
| 197 |
return InventoryAction(**clean)
|
| 198 |
except Exception as e:
|
|
|
|
| 202 |
buy_quantities={},
|
| 203 |
delivery_method="slow",
|
| 204 |
liquidate={},
|
| 205 |
+
price_multipliers={},
|
| 206 |
+
)
|
| 207 |
|
| 208 |
|
| 209 |
+
HISTORY_WINDOW = 7 # rolling window of past days to include in context
|
| 210 |
|
| 211 |
|
| 212 |
def run_task(client, task_name):
|
|
|
|
| 254 |
model=MODEL_NAME,
|
| 255 |
messages=messages,
|
| 256 |
temperature=0.0,
|
| 257 |
+
max_completion_tokens=500,
|
| 258 |
stream=False,
|
| 259 |
)
|
| 260 |
response_text = completion.choices[0].message.content or ""
|
|
|
|
| 267 |
|
| 268 |
action = parse_action(response_text)
|
| 269 |
|
| 270 |
+
print(f"Day {day}: buy={action.buy_quantities} delivery={action.delivery_method} liquidate={action.liquidate} prices={action.price_multipliers}")
|
| 271 |
|
| 272 |
obs = env.step(action)
|
| 273 |
|
|
|
|
| 278 |
|
| 279 |
|
| 280 |
def main():
|
| 281 |
+
from server.grader import grade, compute_baselines
|
| 282 |
|
| 283 |
if not MODEL_NAME:
|
| 284 |
raise RuntimeError("MODEL_NAME is not set. Please export MODEL_NAME before running inference.")
|
| 285 |
|
| 286 |
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
| 287 |
|
| 288 |
+
# print baseline for selected task
|
| 289 |
+
floor, ceiling = compute_baselines(TASK_NAME)
|
| 290 |
print(f"\n{'=' * 50}")
|
| 291 |
+
print(f"BASELINE ({TASK_NAME}): floor=${floor:.2f} (passive) | ceiling=${ceiling:.2f} (heuristic)")
|
| 292 |
print(f"{'=' * 50}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
|
| 294 |
+
profit = run_task(client, TASK_NAME)
|
| 295 |
+
score = grade(TASK_NAME, profit)
|
| 296 |
|
| 297 |
print(f"\n{'=' * 50}")
|
| 298 |
+
print("FINAL SCORE")
|
| 299 |
print(f"{'=' * 50}")
|
| 300 |
+
print(f" {TASK_NAME}: {score:.3f} (profit: ${profit:.2f} | floor: ${floor:.2f} | ceiling: ${ceiling:.2f})")
|
|
|
|
|
|
|
| 301 |
|
| 302 |
|
| 303 |
if __name__ == "__main__":
|
| 304 |
+
main()
|
models.py
CHANGED
|
@@ -1,16 +1,18 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
|
|
|
| 3 |
from openenv.core.env_server import Action, Observation, State
|
| 4 |
from typing import Literal, Dict, List, Optional
|
| 5 |
-
|
| 6 |
from pydantic import field_validator
|
| 7 |
|
|
|
|
| 8 |
class InventoryAction(Action):
|
| 9 |
buy_quantities : Dict[str, int] = {}
|
| 10 |
delivery_method : Literal["slow", "medium", "fast"] = "slow"
|
| 11 |
liquidate : Dict[str, int] = {}
|
|
|
|
| 12 |
|
| 13 |
-
@field_validator("buy_quantities", "liquidate", mode="before")
|
| 14 |
@classmethod
|
| 15 |
def parse_dict_strings(cls, v):
|
| 16 |
if isinstance(v, str):
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
+
import json
|
| 4 |
from openenv.core.env_server import Action, Observation, State
|
| 5 |
from typing import Literal, Dict, List, Optional
|
|
|
|
| 6 |
from pydantic import field_validator
|
| 7 |
|
| 8 |
+
|
| 9 |
class InventoryAction(Action):
|
| 10 |
buy_quantities : Dict[str, int] = {}
|
| 11 |
delivery_method : Literal["slow", "medium", "fast"] = "slow"
|
| 12 |
liquidate : Dict[str, int] = {}
|
| 13 |
+
price_multipliers : Dict[str, float] = {} # product -> 0.5 to 1.5 (default 1.0)
|
| 14 |
|
| 15 |
+
@field_validator("buy_quantities", "liquidate", "price_multipliers", mode="before")
|
| 16 |
@classmethod
|
| 17 |
def parse_dict_strings(cls, v):
|
| 18 |
if isinstance(v, str):
|
server/constants.py
CHANGED
|
@@ -175,4 +175,12 @@ TASKS = {
|
|
| 175 |
"toys": (8, 18),
|
| 176 |
},
|
| 177 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
}
|
|
|
|
| 175 |
"toys": (8, 18),
|
| 176 |
},
|
| 177 |
},
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
PRICE_ELASTICITY = {
|
| 181 |
+
"electronics": 1.2,
|
| 182 |
+
"clothing": 1.5,
|
| 183 |
+
"groceries": 0.4,
|
| 184 |
+
"furniture": 0.8,
|
| 185 |
+
"toys": 1.3,
|
| 186 |
}
|
server/grader.py
CHANGED
|
@@ -2,12 +2,17 @@
|
|
| 2 |
Grader for inventory optimization tasks.
|
| 3 |
Scores agent performance on a 0.0-1.0 scale using floor/ceiling approach.
|
| 4 |
- floor: passive agent (no buys, just sells initial stock until empty)
|
| 5 |
-
- ceiling:
|
| 6 |
"""
|
| 7 |
|
| 8 |
from server.inventory_env import InventoryEnvironment
|
| 9 |
from models import InventoryAction
|
| 10 |
-
from server.constants import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
def _run_passive(task_name):
|
|
@@ -27,100 +32,46 @@ def _run_passive(task_name):
|
|
| 27 |
|
| 28 |
|
| 29 |
def _run_heuristic(task_name):
|
| 30 |
-
"""Ceiling baseline: smart heuristic that stocks up before events."""
|
| 31 |
task = TASKS[task_name]
|
| 32 |
-
|
| 33 |
-
obs = env.reset()
|
| 34 |
|
| 35 |
-
|
| 36 |
-
demand_history = {}
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
for event, days in obs.updated_events.items():
|
| 45 |
-
if 0 < days < nearest_event_days:
|
| 46 |
-
nearest_event_days = days
|
| 47 |
-
|
| 48 |
-
# pick shipping based on urgency
|
| 49 |
-
if nearest_event_days <= 2:
|
| 50 |
-
delivery = "fast"
|
| 51 |
-
elif nearest_event_days <= 5:
|
| 52 |
-
delivery = "medium"
|
| 53 |
-
else:
|
| 54 |
-
delivery = "slow"
|
| 55 |
-
|
| 56 |
-
# update demand history from observation
|
| 57 |
-
if obs.demand_today:
|
| 58 |
-
for product, units in obs.demand_today.items():
|
| 59 |
-
if product not in demand_history:
|
| 60 |
-
demand_history[product] = []
|
| 61 |
-
demand_history[product].append(units)
|
| 62 |
|
| 63 |
for product, (lo, hi) in task["base_demand"].items():
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
# use recent demand if available (last 5 days)
|
| 67 |
-
if product in demand_history and len(demand_history[product]) >= 2:
|
| 68 |
-
recent = demand_history[product][-5:]
|
| 69 |
-
avg_demand = max(avg_demand, int(sum(recent) / len(recent)))
|
| 70 |
-
|
| 71 |
-
current = sum(b[0] for b in obs.updated_inventory.get(product, []))
|
| 72 |
-
|
| 73 |
-
# count in-transit units
|
| 74 |
-
in_transit = 0
|
| 75 |
-
for d in obs.updated_deliveries:
|
| 76 |
-
for p, shipment in d.items():
|
| 77 |
-
if p == product:
|
| 78 |
-
in_transit += shipment[0]
|
| 79 |
-
|
| 80 |
-
available = current + in_transit
|
| 81 |
-
|
| 82 |
-
# how many days of stock to target
|
| 83 |
-
if nearest_event_days <= 5:
|
| 84 |
-
target = avg_demand * 6
|
| 85 |
-
else:
|
| 86 |
-
target = avg_demand * 4
|
| 87 |
-
|
| 88 |
-
# prioritize high-margin products — order more aggressively
|
| 89 |
-
margin = BASE_PRICES[product] - COST_PRICES[product]
|
| 90 |
-
if margin >= 50: # electronics, furniture
|
| 91 |
-
target = int(target * 1.3)
|
| 92 |
-
|
| 93 |
-
if available < target:
|
| 94 |
-
buy[product] = target - available
|
| 95 |
-
|
| 96 |
-
# liquidate groceries about to expire (1 day left)
|
| 97 |
-
for batch in obs.updated_inventory.get("groceries", []):
|
| 98 |
-
if batch[1] is not None and batch[1] <= 1:
|
| 99 |
-
liquidate["groceries"] = liquidate.get("groceries", 0) + batch[0]
|
| 100 |
-
|
| 101 |
-
# stop buying when deliveries can't arrive in time
|
| 102 |
-
days_left = task["max_days"] - obs.current_day
|
| 103 |
-
if delivery == "slow" and days_left <= 5:
|
| 104 |
-
buy = {}
|
| 105 |
-
elif delivery == "medium" and days_left <= 3:
|
| 106 |
-
buy = {}
|
| 107 |
-
elif delivery == "fast" and days_left <= 1:
|
| 108 |
-
buy = {}
|
| 109 |
-
|
| 110 |
-
# don't buy more than cash allows (rough check)
|
| 111 |
-
total_cost = sum(qty * (COST_PRICES[p] + SHIPPING_COST[delivery]) for p, qty in buy.items())
|
| 112 |
-
if total_cost > obs.total_cash * 0.85:
|
| 113 |
-
scale = (obs.total_cash * 0.85) / total_cost if total_cost > 0 else 0
|
| 114 |
-
buy = {p: max(1, int(qty * scale)) for p, qty in buy.items()}
|
| 115 |
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
liquidate=liquidate,
|
| 120 |
-
)
|
| 121 |
-
obs = env.step(action)
|
| 122 |
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
|
| 126 |
def compute_baselines(task_name):
|
|
|
|
| 2 |
Grader for inventory optimization tasks.
|
| 3 |
Scores agent performance on a 0.0-1.0 scale using floor/ceiling approach.
|
| 4 |
- floor: passive agent (no buys, just sells initial stock until empty)
|
| 5 |
+
- ceiling: theoretical max profit with perfect demand knowledge
|
| 6 |
"""
|
| 7 |
|
| 8 |
from server.inventory_env import InventoryEnvironment
|
| 9 |
from models import InventoryAction
|
| 10 |
+
from server.constants import (
|
| 11 |
+
TASKS, BASE_PRICES, COST_PRICES, SHIPPING_COST, EVENT_EFFECTS,
|
| 12 |
+
WEEKEND_MULTIPLIER, EVENT_DURATION,
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
import random
|
| 16 |
|
| 17 |
|
| 18 |
def _run_passive(task_name):
|
|
|
|
| 32 |
|
| 33 |
|
| 34 |
def _run_heuristic(task_name):
|
|
|
|
| 35 |
task = TASKS[task_name]
|
| 36 |
+
events = dict(task["events"])
|
|
|
|
| 37 |
|
| 38 |
+
total_demand = {p: 0 for p in task["base_demand"]}
|
|
|
|
| 39 |
|
| 40 |
+
for day in range(1, task["max_days"] + 1):
|
| 41 |
+
# tick events
|
| 42 |
+
for event_name in events:
|
| 43 |
+
events[event_name] -= 1
|
| 44 |
+
|
| 45 |
+
rng = random.Random(task["seed"] * 1000 + day)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
for product, (lo, hi) in task["base_demand"].items():
|
| 48 |
+
demand = rng.randint(lo, hi)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
# weekend boost
|
| 51 |
+
if day % 7 == 5 or day % 7 == 6:
|
| 52 |
+
demand = int(WEEKEND_MULTIPLIER * demand)
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
+
# event multipliers
|
| 55 |
+
for event_name, days_left in events.items():
|
| 56 |
+
if -EVENT_DURATION < days_left <= 0 and event_name in EVENT_EFFECTS:
|
| 57 |
+
mult = EVENT_EFFECTS[event_name].get(product, 1.0)
|
| 58 |
+
demand = int(demand * mult)
|
| 59 |
+
|
| 60 |
+
total_demand[product] += demand
|
| 61 |
+
|
| 62 |
+
total_profit = 0.0
|
| 63 |
+
|
| 64 |
+
# sell the initial stock first
|
| 65 |
+
initial_stock = task["initial_stock"]
|
| 66 |
+
|
| 67 |
+
for product in task["base_demand"]:
|
| 68 |
+
total_profit += min(initial_stock.get(product, 0), total_demand[product]) * BASE_PRICES[product]
|
| 69 |
+
total_demand[product] = max(0, total_demand[product] - initial_stock.get(product, 0))
|
| 70 |
+
|
| 71 |
+
# cost price and shipping cost applies after initial stock
|
| 72 |
+
total_profit += total_demand[product] * (BASE_PRICES[product] - COST_PRICES[product] - SHIPPING_COST["slow"])
|
| 73 |
+
|
| 74 |
+
return total_profit
|
| 75 |
|
| 76 |
|
| 77 |
def compute_baselines(task_name):
|
server/inventory_env.py
CHANGED
|
@@ -8,7 +8,7 @@ from .constants import (
|
|
| 8 |
INITIAL_CASH, BASE_PRICES, COST_PRICES, SHELF_LIFE, INITIAL_STOCK,
|
| 9 |
EVENTS, SHIPPING_COST, SHIPPING_DAYS, INVENTORY_CAPACITY,
|
| 10 |
EXTRA_INVENTORY_COST, BASE_DEMAND, WEEKEND_MULTIPLIER, EVENT_EFFECTS,
|
| 11 |
-
EVENT_DURATION, MAX_DAYS, UPGRADE_DELIVERY_COST, TASKS,
|
| 12 |
)
|
| 13 |
|
| 14 |
|
|
@@ -128,28 +128,45 @@ class InventoryEnvironment(Environment):
|
|
| 128 |
day_cost += total_cost
|
| 129 |
|
| 130 |
arrival_day = self.current_day + SHIPPING_DAYS[action.delivery_method]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
self.deliveries.append({product: [qty, arrival_day]})
|
| 132 |
|
| 133 |
# 5. generate demand
|
| 134 |
demand = self._generate_demand()
|
| 135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
# 6. sell products (fifo)
|
| 137 |
for product, demand_today in demand.items():
|
| 138 |
|
|
|
|
| 139 |
product_availability = sum(batch[0] for batch in self.inventory[product])
|
| 140 |
|
| 141 |
|
| 142 |
if demand_today > product_availability:
|
| 143 |
missed_sales = demand_today - product_availability
|
| 144 |
sold = product_availability
|
| 145 |
-
day_revenue += sold *
|
| 146 |
self.inventory[product] = []
|
| 147 |
-
self.reward -= missed_sales *
|
| 148 |
-
self.reward += sold *
|
| 149 |
|
| 150 |
else:
|
| 151 |
-
day_revenue += demand_today *
|
| 152 |
-
self.reward += demand_today *
|
| 153 |
|
| 154 |
new_batches = []
|
| 155 |
|
|
@@ -162,7 +179,9 @@ class InventoryEnvironment(Environment):
|
|
| 162 |
new_batches.append(batch)
|
| 163 |
|
| 164 |
else:
|
| 165 |
-
|
|
|
|
|
|
|
| 166 |
demand_today = 0
|
| 167 |
|
| 168 |
self.inventory[product] = new_batches
|
|
|
|
| 8 |
INITIAL_CASH, BASE_PRICES, COST_PRICES, SHELF_LIFE, INITIAL_STOCK,
|
| 9 |
EVENTS, SHIPPING_COST, SHIPPING_DAYS, INVENTORY_CAPACITY,
|
| 10 |
EXTRA_INVENTORY_COST, BASE_DEMAND, WEEKEND_MULTIPLIER, EVENT_EFFECTS,
|
| 11 |
+
EVENT_DURATION, MAX_DAYS, UPGRADE_DELIVERY_COST, TASKS, PRICE_ELASTICITY
|
| 12 |
)
|
| 13 |
|
| 14 |
|
|
|
|
| 128 |
day_cost += total_cost
|
| 129 |
|
| 130 |
arrival_day = self.current_day + SHIPPING_DAYS[action.delivery_method]
|
| 131 |
+
# add jitter: slow ±2 days, medium ±1 day, fast is reliable
|
| 132 |
+
jitter_rng = random.Random(self.seed * 2000 + self.current_day * 100 + hash(product))
|
| 133 |
+
if action.delivery_method == "slow":
|
| 134 |
+
arrival_day += jitter_rng.randint(-2, 2)
|
| 135 |
+
elif action.delivery_method == "medium":
|
| 136 |
+
arrival_day += jitter_rng.randint(-1, 1)
|
| 137 |
+
# ensure arrival is at least next day
|
| 138 |
+
arrival_day = max(self.current_day + 1, arrival_day)
|
| 139 |
self.deliveries.append({product: [qty, arrival_day]})
|
| 140 |
|
| 141 |
# 5. generate demand
|
| 142 |
demand = self._generate_demand()
|
| 143 |
|
| 144 |
+
# apply price elasticity: demand scales with price^(-elasticity)
|
| 145 |
+
price_mults = {}
|
| 146 |
+
for product in demand:
|
| 147 |
+
pm = max(0.5, min(1.5, action.price_multipliers.get(product, 1.0)))
|
| 148 |
+
price_mults[product] = pm
|
| 149 |
+
e = PRICE_ELASTICITY[product]
|
| 150 |
+
demand[product] = max(0, int(demand[product] * pm ** -e))
|
| 151 |
+
|
| 152 |
# 6. sell products (fifo)
|
| 153 |
for product, demand_today in demand.items():
|
| 154 |
|
| 155 |
+
sell_price = BASE_PRICES[product] * price_mults[product]
|
| 156 |
product_availability = sum(batch[0] for batch in self.inventory[product])
|
| 157 |
|
| 158 |
|
| 159 |
if demand_today > product_availability:
|
| 160 |
missed_sales = demand_today - product_availability
|
| 161 |
sold = product_availability
|
| 162 |
+
day_revenue += sold * sell_price
|
| 163 |
self.inventory[product] = []
|
| 164 |
+
self.reward -= missed_sales * sell_price * 0.001
|
| 165 |
+
self.reward += sold * sell_price * 0.001
|
| 166 |
|
| 167 |
else:
|
| 168 |
+
day_revenue += demand_today * sell_price
|
| 169 |
+
self.reward += demand_today * sell_price * 0.001
|
| 170 |
|
| 171 |
new_batches = []
|
| 172 |
|
|
|
|
| 179 |
new_batches.append(batch)
|
| 180 |
|
| 181 |
else:
|
| 182 |
+
remaining = batch[0] - demand_today
|
| 183 |
+
if remaining > 0:
|
| 184 |
+
new_batches.append([remaining, batch[1]])
|
| 185 |
demand_today = 0
|
| 186 |
|
| 187 |
self.inventory[product] = new_batches
|