Spaces:

saketh1201
/

inventory_env

Sleeping

App Files Files Community

saketh1201 commited on Mar 31

Commit

d97ff83

verified ·

1 Parent(s): 852bf33

Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

client.py +0 -3
inference.py +107 -35
models.py +9 -1
pyproject.toml +4 -1
scripts/validate-submission.sh +172 -0
server/grader.py +57 -19
server/inventory_env.py +4 -5
uv.lock +0 -0

client.py CHANGED Viewed

@@ -22,9 +22,6 @@ class InventoryEnv(EnvClient[InventoryAction, InventoryObservation, InventorySta
         if action.delivery_method is not None:
             payload["delivery_method"] = action.delivery_method
-        if action.upgrade_delivery is not None:
-            payload["upgrade_delivery"] = action.upgrade_delivery
         if action.liquidate is not None:
             payload["liquidate"] = action.liquidate

         if action.delivery_method is not None:
             payload["delivery_method"] = action.delivery_method
         if action.liquidate is not None:
             payload["liquidate"] = action.liquidate

inference.py CHANGED Viewed

@@ -1,34 +1,42 @@
 """
-Inference Script — Inventory Optimization Environment
-=======================================================
 Required env vars:
     API_BASE_URL   The API endpoint for the LLM.
     MODEL_NAME     The model identifier to use for inference.
-    HF_TOKEN       Your Hugging Face / API key.
 """
 import os
 import json
 import textwrap
 from openai import OpenAI
 from server.inventory_env import InventoryEnvironment
-from server.constants import EXTRA_INVENTORY_COST
 from models import InventoryAction
-from dotenv import load_dotenv
-load_dotenv()
 API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
-API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
 MODEL_NAME = os.getenv("MODEL_NAME")
 MAX_DAYS = 30
 SYSTEM_PROMPT = textwrap.dedent("""
-You are an inventory management AI agent. Each day you receive the current state
     of a retail store with 5 products: electronics, clothing, groceries, furniture, toys.
     Groceries are perishable (5-day shelf life). Other products don't expire.
     Product selling prices: electronics=$150, clothing=$40, groceries=$10, furniture=$200, toys=$25
@@ -37,16 +45,18 @@ You are an inventory management AI agent. Each day you receive the current state
     Shipping costs per unit: slow=$2 (5 days), medium=$5 (3 days), fast=$10 (1 day)
     Warehouse capacity: electronics=100, clothing=200, groceries=500, furniture=50, toys=300
-    Events (like black_friday, christmas) boost demand when their countdown hits 0.
     Weekends (day%7 == 5 or 6) have 1.2x demand.
     CRITICAL STRATEGY:
-    - You MUST restock products when inventory is low. If you don't buy, you run out of
-      stock and miss sales. Missed sales = lost revenue = negative reward.
-    - Check today's demand to estimate tomorrow's needs.
-    - Do NOT overbuy when demand is low - unsold stock ties up cash, warehouse space and perishables expire.
     - Prioritize high-margin products: furniture ($70 profit), electronics ($50 profit).
-    - Stock up BEFORE events hit (check event countdowns).
     Each day you must respond with a JSON action:
     {
@@ -60,11 +70,18 @@ You are an inventory management AI agent. Each day you receive the current state
     - liquidate: products and amounts to dispose of (no revenue, empty {} to skip)
       Use liquidate to free up warehouse space before a restock.
-    You will see what demand occurred today AFTER it happened. Use this to spot trends
-    and plan restocking. A negative reward means your last action was bad — adjust.
-    Do NOT buy more than you can afford. Do NOT buy on the last day.
-    Respond with ONLY valid JSON, no explanation.
 """).strip()
@@ -89,8 +106,10 @@ def format_observation(obs):
     for event, days in obs.updated_events.items():
         if days > 0:
             event_lines.append(f"  {event}: in {days} days")
-        else:
             event_lines.append(f"  {event}: ACTIVE NOW")
     events_text = "\n".join(event_lines) if event_lines else "  None"
     # format deliveries
@@ -102,7 +121,7 @@ def format_observation(obs):
             delivery_lines.append(f"  {product}: {qty} units arriving in {days_away} days")
     deliveries_text = "\n".join(delivery_lines) if delivery_lines else "  None"
-    # format demand (already happened today — feedback, not prediction)
     demand_lines = []
     for product, units in obs.demand_today.items():
         demand_lines.append(f"  {product}: {units} units")
@@ -117,7 +136,7 @@ Last Step Reward: {obs.reward:.3f}
 Inventory:
 {inv_text}
-Demand That Occurred Today:
 {demand_text}
 Upcoming Events:
@@ -132,17 +151,42 @@ Respond with your action as JSON."""
 def parse_action(response_text):
-    """Parse LLM response into InventoryAction."""
     try:
         text = response_text.strip()
-        if text.startswith("```"):
-            text = text.split("\n", 1)[1]
-            text = text.rsplit("```", 1)[0]
         data = json.loads(text)
-        return InventoryAction(**data)
-    except Exception:
-        print(response_text)
         return InventoryAction(
             buy_quantities={},
             delivery_method="slow",
@@ -150,6 +194,9 @@ def parse_action(response_text):
         )
 def run_task(client, task_name):
     """Run a single task and return total profit."""
     env = InventoryEnvironment(task_name)
@@ -159,6 +206,9 @@ def run_task(client, task_name):
     print(f"Task: {task_name.upper()} | Cash: ${obs.total_cash:.2f} | Days: {env.max_days}")
     print(f"{'=' * 50}")
     for day in range(1, env.max_days + 1):
         if obs.done:
             print("Episode ended early.")
@@ -166,16 +216,32 @@ def run_task(client, task_name):
         user_prompt = format_observation(obs)
-        messages = [
-            {"role": "system", "content": SYSTEM_PROMPT},
-            {"role": "user", "content": user_prompt},
-        ]
         try:
             completion = client.chat.completions.create(
                 model=MODEL_NAME,
                 messages=messages,
-                # temperature=0.2,
                 max_completion_tokens=300,
                 stream=False,
             )
@@ -184,6 +250,9 @@ def run_task(client, task_name):
             print(f"  LLM request failed: {exc}. Skipping turn.")
             response_text = "{}"
         action = parse_action(response_text)
         print(f"Day {day}: buy={action.buy_quantities} delivery={action.delivery_method} liquidate={action.liquidate}")
@@ -199,6 +268,9 @@ def run_task(client, task_name):
 def main():
     from server.grader import grade_all, compute_baselines
     client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
     # print baselines first
@@ -225,4 +297,4 @@ def main():
 if __name__ == "__main__":
-    main()

 """
+Inference Script - Inventory Optimization Environment
+=====================================================
 Required env vars:
     API_BASE_URL   The API endpoint for the LLM.
     MODEL_NAME     The model identifier to use for inference.
+    HF_TOKEN       Hugging Face token (preferred for HF Router).
+Supported key env vars (first non-empty wins): HF_TOKEN, API_KEY, OPENAI_API_KEY.
+For non-OpenAI endpoints, a dummy key is used when no key is provided because
+the OpenAI Python SDK requires a non-empty api_key argument.
 """
 import os
 import json
 import textwrap
+from dotenv import load_dotenv
+load_dotenv()
 from openai import OpenAI
 from server.inventory_env import InventoryEnvironment
+from server.constants import EXTRA_INVENTORY_COST, EVENT_DURATION
 from models import InventoryAction
 API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
+API_KEY = os.getenv("API_KEY") or os.getenv("HF_TOKEN")
 MODEL_NAME = os.getenv("MODEL_NAME")
 MAX_DAYS = 30
 SYSTEM_PROMPT = textwrap.dedent("""
+    You are an inventory management AI agent. Each day you receive the current state
     of a retail store with 5 products: electronics, clothing, groceries, furniture, toys.
+    You will be shown your decision history from recent days so you can learn from
+    past outcomes. Use this history to spot demand trends, identify what worked vs.
+    what didn't, and adjust your strategy accordingly.
     Groceries are perishable (5-day shelf life). Other products don't expire.
     Product selling prices: electronics=$150, clothing=$40, groceries=$10, furniture=$200, toys=$25
     Shipping costs per unit: slow=$2 (5 days), medium=$5 (3 days), fast=$10 (1 day)
     Warehouse capacity: electronics=100, clothing=200, groceries=500, furniture=50, toys=300
+    Events (like black_friday, christmas) boost demand when their countdown hits 0 and last for 2 days.
     Weekends (day%7 == 5 or 6) have 1.2x demand.
     CRITICAL STRATEGY:
+    - Review your history: if reward was negative, identify why and change approach.
+    - Track demand trends across days — if a product's demand is rising, stock up early.
+    - You MUST restock products when inventory is low. Missed sales = lost revenue = negative reward.
+    - Do NOT overbuy when demand is low — unsold stock ties up cash and perishables expire.
     - Prioritize high-margin products: furniture ($70 profit), electronics ($50 profit).
+    - Stock up BEFORE events hit (check event countdowns — order 3-5 days ahead using slow/medium shipping).
+    - When no events are approaching, slow shipping is often sufficient and saves significant cost.
+    - Near end of episode (last 2 days), stop buying — focus on selling remaining stock.
     Each day you must respond with a JSON action:
     {
     - liquidate: products and amounts to dispose of (no revenue, empty {} to skip)
       Use liquidate to free up warehouse space before a restock.
+    LEARNING FROM HISTORY:
+    - Compare your past buy quantities to the demand that followed — were you over or under?
+    - If you see repeated stockouts for a product, increase orders for it.
+    - If groceries expired, you overbought — reduce grocery orders or use faster shipping.
+    - A negative reward means your last action was bad — adjust immediately.
+    Before responding with JSON, briefly reason (2-3 lines max):
+    1. What did I learn from recent history? What went wrong/right?
+    2. What products need restocking vs. are overstocked?
+    3. Are any events approaching?
+    Then output ONLY the final JSON action on the last line.
 """).strip()
     for event, days in obs.updated_events.items():
         if days > 0:
             event_lines.append(f"  {event}: in {days} days")
+        elif -EVENT_DURATION < days <= 0:
             event_lines.append(f"  {event}: ACTIVE NOW")
+        else:
+            event_lines.append(f"  {event}: ended")
     events_text = "\n".join(event_lines) if event_lines else "  None"
     # format deliveries
             delivery_lines.append(f"  {product}: {qty} units arriving in {days_away} days")
     deliveries_text = "\n".join(delivery_lines) if delivery_lines else "  None"
+    # format demand (yesterday's demand — feedback, not prediction)
     demand_lines = []
     for product, units in obs.demand_today.items():
         demand_lines.append(f"  {product}: {units} units")
 Inventory:
 {inv_text}
+Yesterday's Demand:
 {demand_text}
 Upcoming Events:
 def parse_action(response_text):
+    """Parse LLM response into InventoryAction. Extracts JSON even if surrounded by text."""
     try:
         text = response_text.strip()
+        # strip markdown code fences
+        if "```" in text:
+            parts = text.split("```")
+            for part in parts:
+                part = part.strip()
+                if part.startswith("json"):
+                    part = part[4:].strip()
+                if part.startswith("{"):
+                    text = part
+                    break
+        # find the first { and last } to extract JSON
+        start = text.find("{")
+        end = text.rfind("}")
+        if start != -1 and end != -1 and end > start:
+            text = text[start:end + 1]
         data = json.loads(text)
+        # only keep valid fields
+        clean = {}
+        if "buy_quantities" in data:
+            clean["buy_quantities"] = data["buy_quantities"]
+        if "delivery_method" in data:
+            clean["delivery_method"] = data["delivery_method"]
+        if "liquidate" in data:
+            clean["liquidate"] = data["liquidate"]
+        return InventoryAction(**clean)
+    except Exception as e:
+        print(f"  [DEBUG] Parse FAILED: {e}")
+        print(f"  [DEBUG] Raw LLM response: {response_text[:500]}")
         return InventoryAction(
             buy_quantities={},
             delivery_method="slow",
         )
+HISTORY_WINDOW = 15  # rolling window of past days to include in context
 def run_task(client, task_name):
     """Run a single task and return total profit."""
     env = InventoryEnvironment(task_name)
     print(f"Task: {task_name.upper()} | Cash: ${obs.total_cash:.2f} | Days: {env.max_days}")
     print(f"{'=' * 50}")
+    # Rolling history of (user_observation, assistant_response) pairs
+    history = []
     for day in range(1, env.max_days + 1):
         if obs.done:
             print("Episode ended early.")
         user_prompt = format_observation(obs)
+        # Build messages: system + history context + current observation
+        messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+        recent = history[-HISTORY_WINDOW:]
+        if recent:
+            # Tell the LLM it's about to see its past decisions and their outcomes
+            messages.append({
+                "role": "user",
+                "content": f"Here is your decision history from the last {len(recent)} day(s). "
+                           "Use this to identify demand trends, adjust restocking, and avoid repeating mistakes.",
+            })
+            messages.append({
+                "role": "assistant",
+                "content": "Understood. I'll review my past decisions and their outcomes to make better choices today.",
+            })
+            for past_user, past_assistant in recent:
+                messages.append({"role": "user", "content": past_user})
+                messages.append({"role": "assistant", "content": past_assistant})
+        messages.append({"role": "user", "content": user_prompt})
         try:
             completion = client.chat.completions.create(
                 model=MODEL_NAME,
                 messages=messages,
+                temperature=0.0,
                 max_completion_tokens=300,
                 stream=False,
             )
             print(f"  LLM request failed: {exc}. Skipping turn.")
             response_text = "{}"
+        # Save this turn to rolling history
+        history.append((user_prompt, response_text))
         action = parse_action(response_text)
         print(f"Day {day}: buy={action.buy_quantities} delivery={action.delivery_method} liquidate={action.liquidate}")
 def main():
     from server.grader import grade_all, compute_baselines
+    if not MODEL_NAME:
+        raise RuntimeError("MODEL_NAME is not set. Please export MODEL_NAME before running inference.")
     client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
     # print baselines first
 if __name__ == "__main__":
+    main()

models.py CHANGED Viewed

@@ -3,12 +3,20 @@ from __future__ import annotations
 from openenv.core.env_server import Action, Observation, State
 from typing import Literal, Dict, List, Optional
 class InventoryAction(Action):
     buy_quantities : Dict[str, int] = {}
-    delivery_method : Literal["slow", "medium", "fast"] = "slow"
     liquidate : Dict[str, int] = {}
 class InventoryObservation(Observation):
     current_day : int

 from openenv.core.env_server import Action, Observation, State
 from typing import Literal, Dict, List, Optional
+from pydantic import field_validator
 class InventoryAction(Action):
     buy_quantities : Dict[str, int] = {}
+    delivery_method : Literal["slow", "medium", "fast"] = "slow"
     liquidate : Dict[str, int] = {}
+    @field_validator("buy_quantities", "liquidate", mode="before")
+    @classmethod
+    def parse_dict_strings(cls, v):
+        if isinstance(v, str):
+            return json.loads(v)
+        return v
 class InventoryObservation(Observation):
     current_day : int

pyproject.toml CHANGED Viewed

@@ -15,4 +15,7 @@ dependencies = [
 [build-system]
 requires = ["setuptools>=61.0"]
-build-backend = "setuptools.build_meta"

 [build-system]
 requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+[project.scripts]
+server = "server.app:main"

scripts/validate-submission.sh ADDED Viewed

	@@ -0,0 +1,172 @@

+#!/usr/bin/env bash
+#
+# validate-submission.sh — OpenEnv Submission Validator
+#
+# Checks that your HF Space is live, Docker image builds, and openenv validate passes.
+#
+# Run:
+#   ./scripts/validate-submission.sh <ping_url> [repo_dir]
+#
+# Arguments:
+#   ping_url   Your HuggingFace Space URL (e.g. https://your-space.hf.space)
+#   repo_dir   Path to your repo (default: current directory)
+#
+set -uo pipefail
+DOCKER_BUILD_TIMEOUT=600
+if [ -t 1 ]; then
+  RED='\033[0;31m'
+  GREEN='\033[0;32m'
+  YELLOW='\033[1;33m'
+  BOLD='\033[1m'
+  NC='\033[0m'
+else
+  RED='' GREEN='' YELLOW='' BOLD='' NC=''
+fi
+run_with_timeout() {
+  local secs="$1"; shift
+  if command -v timeout &>/dev/null; then
+    timeout "$secs" "$@"
+  elif command -v gtimeout &>/dev/null; then
+    gtimeout "$secs" "$@"
+  else
+    "$@" &
+    local pid=$!
+    ( sleep "$secs" && kill "$pid" 2>/dev/null ) &
+    local watcher=$!
+    wait "$pid" 2>/dev/null
+    local rc=$?
+    kill "$watcher" 2>/dev/null
+    wait "$watcher" 2>/dev/null
+    return $rc
+  fi
+}
+portable_mktemp() {
+  local prefix="${1:-validate}"
+  mktemp "${TMPDIR:-/tmp}/${prefix}-XXXXXX" 2>/dev/null || mktemp
+}
+CLEANUP_FILES=()
+cleanup() { rm -f "${CLEANUP_FILES[@]+"${CLEANUP_FILES[@]}"}"; }
+trap cleanup EXIT
+PING_URL="${1:-}"
+REPO_DIR="${2:-.}"
+if [ -z "$PING_URL" ]; then
+  printf "Usage: %s <ping_url> [repo_dir]\n" "$0"
+  printf "\n"
+  printf "  ping_url   Your HuggingFace Space URL (e.g. https://your-space.hf.space)\n"
+  printf "  repo_dir   Path to your repo (default: current directory)\n"
+  exit 1
+fi
+if ! REPO_DIR="$(cd "$REPO_DIR" 2>/dev/null && pwd)"; then
+  printf "Error: directory '%s' not found\n" "${2:-.}"
+  exit 1
+fi
+PING_URL="${PING_URL%/}"
+export PING_URL
+PASS=0
+log()  { printf "[%s] %b\n" "$(date -u +%H:%M:%S)" "$*"; }
+pass() { log "${GREEN}PASSED${NC} -- $1"; PASS=$((PASS + 1)); }
+fail() { log "${RED}FAILED${NC} -- $1"; }
+hint() { printf "  ${YELLOW}Hint:${NC} %b\n" "$1"; }
+stop_at() {
+  printf "\n"
+  printf "${RED}${BOLD}Validation stopped at %s.${NC} Fix the above before continuing.\n" "$1"
+  exit 1
+}
+printf "\n"
+printf "${BOLD}========================================${NC}\n"
+printf "${BOLD}  OpenEnv Submission Validator${NC}\n"
+printf "${BOLD}========================================${NC}\n"
+log "Repo:     $REPO_DIR"
+log "Ping URL: $PING_URL"
+printf "\n"
+log "${BOLD}Step 1/3: Pinging HF Space${NC} ($PING_URL/reset) ..."
+CURL_OUTPUT=$(portable_mktemp "validate-curl")
+CLEANUP_FILES+=("$CURL_OUTPUT")
+HTTP_CODE=$(curl -s -o "$CURL_OUTPUT" -w "%{http_code}" -X POST \
+  -H "Content-Type: application/json" -d '{}' \
+  "$PING_URL/reset" --max-time 30 2>"$CURL_OUTPUT" || printf "000")
+if [ "$HTTP_CODE" = "200" ]; then
+  pass "HF Space is live and responds to /reset"
+elif [ "$HTTP_CODE" = "000" ]; then
+  fail "HF Space not reachable (connection failed or timed out)"
+  hint "Check your network connection and that the Space is running."
+  hint "Try: curl -s -o /dev/null -w '%%{http_code}' -X POST $PING_URL/reset"
+  stop_at "Step 1"
+else
+  fail "HF Space /reset returned HTTP $HTTP_CODE (expected 200)"
+  hint "Make sure your Space is running and the URL is correct."
+  hint "Try opening $PING_URL in your browser first."
+  stop_at "Step 1"
+fi
+log "${BOLD}Step 2/3: Running docker build${NC} ..."
+if ! command -v docker &>/dev/null; then
+  fail "docker command not found"
+  hint "Install Docker: https://docs.docker.com/get-docker/"
+  stop_at "Step 2"
+fi
+if [ -f "$REPO_DIR/Dockerfile" ]; then
+  DOCKER_CONTEXT="$REPO_DIR"
+elif [ -f "$REPO_DIR/server/Dockerfile" ]; then
+  DOCKER_CONTEXT="$REPO_DIR/server"
+else
+  fail "No Dockerfile found in repo root or server/ directory"
+  stop_at "Step 2"
+fi
+log "  Found Dockerfile in $DOCKER_CONTEXT"
+BUILD_OK=false
+BUILD_OUTPUT=$(run_with_timeout "$DOCKER_BUILD_TIMEOUT" docker build "$DOCKER_CONTEXT" 2>&1) && BUILD_OK=true
+if [ "$BUILD_OK" = true ]; then
+  pass "Docker build succeeded"
+else
+  fail "Docker build failed (timeout=${DOCKER_BUILD_TIMEOUT}s)"
+  printf "%s\n" "$BUILD_OUTPUT" | tail -20
+  stop_at "Step 2"
+fi
+log "${BOLD}Step 3/3: Running openenv validate${NC} ..."
+if ! command -v openenv &>/dev/null; then
+  fail "openenv command not found"
+  hint "Install it: pip install openenv-core"
+  stop_at "Step 3"
+fi
+VALIDATE_OK=false
+VALIDATE_OUTPUT=$(cd "$REPO_DIR" && openenv validate 2>&1) && VALIDATE_OK=true
+if [ "$VALIDATE_OK" = true ]; then
+  pass "openenv validate passed"
+  [ -n "$VALIDATE_OUTPUT" ] && log "  $VALIDATE_OUTPUT"
+else
+  fail "openenv validate failed"
+  printf "%s\n" "$VALIDATE_OUTPUT"
+  stop_at "Step 3"
+fi
+printf "\n"
+printf "${BOLD}========================================${NC}\n"
+printf "${GREEN}${BOLD}  All 3/3 checks passed!${NC}\n"
+printf "${GREEN}${BOLD}  Your submission is ready to submit.${NC}\n"
+printf "${BOLD}========================================${NC}\n"
+printf "\n"
+exit 0

server/grader.py CHANGED Viewed

@@ -32,47 +32,85 @@ def _run_heuristic(task_name):
     env = InventoryEnvironment(task_name)
     obs = env.reset()
     while not obs.done:
         buy = {}
-        delivery = "medium"
         liquidate = {}
-        # check if any event is imminent (within 3 days)
-        event_soon = False
         for event, days in obs.updated_events.items():
-            if 0 < days <= 3:
-                event_soon = True
-                break
         for product, (lo, hi) in task["base_demand"].items():
             avg_demand = (lo + hi) // 2
             current = sum(b[0] for b in obs.updated_inventory.get(product, []))
-            if event_soon:
-                # stock up 5 days' worth before events, use fast shipping
-                target = avg_demand * 5
-                delivery = "fast"
             else:
-                # normal: keep 3 days' buffer
-                target = avg_demand * 3
-            if current < target:
-                buy[product] = target - current
         # liquidate groceries about to expire (1 day left)
         for batch in obs.updated_inventory.get("groceries", []):
             if batch[1] is not None and batch[1] <= 1:
                 liquidate["groceries"] = liquidate.get("groceries", 0) + batch[0]
-        # don't buy on last 2 days
-        if obs.current_day >= task["max_days"] - 2:
             buy = {}
         # don't buy more than cash allows (rough check)
         total_cost = sum(qty * (COST_PRICES[p] + SHIPPING_COST[delivery]) for p, qty in buy.items())
-        if total_cost > obs.total_cash * 0.8:
-            # scale down proportionally
-            scale = (obs.total_cash * 0.8) / total_cost if total_cost > 0 else 0
             buy = {p: max(1, int(qty * scale)) for p, qty in buy.items()}
         action = InventoryAction(

     env = InventoryEnvironment(task_name)
     obs = env.reset()
+    # track recent demand to adapt ordering
+    demand_history = {}
     while not obs.done:
         buy = {}
         liquidate = {}
+        # determine nearest event distance
+        nearest_event_days = 999
         for event, days in obs.updated_events.items():
+            if 0 < days < nearest_event_days:
+                nearest_event_days = days
+        # pick shipping based on urgency
+        if nearest_event_days <= 2:
+            delivery = "fast"
+        elif nearest_event_days <= 5:
+            delivery = "medium"
+        else:
+            delivery = "slow"
+        # update demand history from observation
+        if obs.demand_today:
+            for product, units in obs.demand_today.items():
+                if product not in demand_history:
+                    demand_history[product] = []
+                demand_history[product].append(units)
         for product, (lo, hi) in task["base_demand"].items():
             avg_demand = (lo + hi) // 2
+            # use recent demand if available (last 5 days)
+            if product in demand_history and len(demand_history[product]) >= 2:
+                recent = demand_history[product][-5:]
+                avg_demand = max(avg_demand, int(sum(recent) / len(recent)))
             current = sum(b[0] for b in obs.updated_inventory.get(product, []))
+            # count in-transit units
+            in_transit = 0
+            for d in obs.updated_deliveries:
+                for p, shipment in d.items():
+                    if p == product:
+                        in_transit += shipment[0]
+            available = current + in_transit
+            # how many days of stock to target
+            if nearest_event_days <= 5:
+                target = avg_demand * 6
             else:
+                target = avg_demand * 4
+            # prioritize high-margin products — order more aggressively
+            margin = BASE_PRICES[product] - COST_PRICES[product]
+            if margin >= 50:  # electronics, furniture
+                target = int(target * 1.3)
+            if available < target:
+                buy[product] = target - available
         # liquidate groceries about to expire (1 day left)
         for batch in obs.updated_inventory.get("groceries", []):
             if batch[1] is not None and batch[1] <= 1:
                 liquidate["groceries"] = liquidate.get("groceries", 0) + batch[0]
+        # stop buying when deliveries can't arrive in time
+        days_left = task["max_days"] - obs.current_day
+        if delivery == "slow" and days_left <= 5:
+            buy = {}
+        elif delivery == "medium" and days_left <= 3:
+            buy = {}
+        elif delivery == "fast" and days_left <= 1:
             buy = {}
         # don't buy more than cash allows (rough check)
         total_cost = sum(qty * (COST_PRICES[p] + SHIPPING_COST[delivery]) for p, qty in buy.items())
+        if total_cost > obs.total_cash * 0.85:
+            scale = (obs.total_cash * 0.85) / total_cost if total_cost > 0 else 0
             buy = {p: max(1, int(qty * scale)) for p, qty in buy.items()}
         action = InventoryAction(

server/inventory_env.py CHANGED Viewed

@@ -79,10 +79,9 @@ class InventoryEnvironment(Environment):
         day_cost = 0.0
         day_revenue = 0.0
-        # 1. tick event countdowns
         for event_name in self.events:
-            if self.events[event_name] > 0:
-                self.events[event_name] -= 1
         # 2. remove expired groceries
         new_batches = []
@@ -232,9 +231,9 @@ class InventoryEnvironment(Environment):
             for product in demand:
                 demand[product] = int(demand[product] * WEEKEND_MULTIPLIER)
-        # active event multipliers
         for event_name, days in self.events.items():
-            if days <= 0 and event_name in EVENT_EFFECTS:
                 for product, mult in EVENT_EFFECTS[event_name].items():
                     demand[product] = int(demand[product] * mult)

         day_cost = 0.0
         day_revenue = 0.0
+        # 1. tick event countdowns (keep ticking into negative to track active duration)
         for event_name in self.events:
+            self.events[event_name] -= 1
         # 2. remove expired groceries
         new_batches = []
             for product in demand:
                 demand[product] = int(demand[product] * WEEKEND_MULTIPLIER)
+        # active event multipliers (only for EVENT_DURATION days after triggering)
         for event_name, days in self.events.items():
+            if -EVENT_DURATION < days <= 0 and event_name in EVENT_EFFECTS:
                 for product, mult in EVENT_EFFECTS[event_name].items():
                     demand[product] = int(demand[product] * mult)

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff