saketh1201 commited on
Commit
8e28c8f
·
verified ·
1 Parent(s): ff7be06

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +28 -0
  2. server/app.py +0 -9
README.md CHANGED
@@ -183,6 +183,34 @@ docker build -t inventory-env .
183
  docker run -p 8000:8000 inventory-env
184
  ```
185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  ## Step Execution Order
187
 
188
  Each `step()` call processes in this order:
 
183
  docker run -p 8000:8000 inventory-env
184
  ```
185
 
186
+ ## API Endpoints
187
+
188
+ | Endpoint | Method | Description |
189
+ |----------|--------|-------------|
190
+ | `/health` | GET | Health check — returns 200 if server is running |
191
+ | `/reset` | POST | Reset environment, returns initial observation |
192
+ | `/step` | POST | Submit an action (JSON body), returns next observation with reward |
193
+ | `/state` | GET | Get current episode state (day, cash, inventory) |
194
+ | `/tasks` | GET | List all 3 tasks with full config (stock, capacity, demand ranges, events) |
195
+ | `/grader` | POST | Score an episode given task name and agent profit |
196
+ | `/baseline` | GET | Run LLM inference on a task and return the score |
197
+
198
+ ### Example Queries
199
+
200
+ ```bash
201
+ # List all tasks with full schemas
202
+ curl http://localhost:8000/tasks
203
+
204
+ # Grade a specific profit
205
+ curl -X POST "http://localhost:8000/grader?task_name=easy&agent_profit=5000"
206
+ # → {"task_name":"easy","agent_profit":5000.0,"floor":2200.0,"ceiling":10011.0,"score":0.358}
207
+
208
+ # Run baseline inference (requires API keys in container env)
209
+ curl "http://localhost:8000/baseline"
210
+ curl "http://localhost:8000/baseline?task_name=hard"
211
+ # → {"task_name":"easy","score":0.822}
212
+ ```
213
+
214
  ## Step Execution Order
215
 
216
  Each `step()` call processes in this order:
server/app.py CHANGED
@@ -64,27 +64,18 @@ def baseline_endpoint(task_name: str = "easy"):
64
  env=env,
65
  )
66
  output = result.stdout
67
- stderr = result.stderr
68
 
69
  # parse score from output
70
  score = None
71
- profit = None
72
  for line in output.splitlines():
73
  if task_name + ":" in line and "profit" in line:
74
  score_match = re.search(r"(\d+\.\d+)\s*\(profit", line)
75
- profit_match = re.search(r"profit:\s*\$([0-9.]+)", line)
76
  if score_match:
77
  score = float(score_match.group(1))
78
- if profit_match:
79
- profit = float(profit_match.group(1))
80
 
81
  return {
82
  "task_name": task_name,
83
  "score": score,
84
- "profit": profit,
85
- "stdout": output[-2000:] if len(output) > 2000 else output,
86
- "stderr": stderr[-500:] if stderr else None,
87
- "returncode": result.returncode,
88
  }
89
  except subprocess.TimeoutExpired:
90
  return {"error": "Inference timed out (20 min limit)"}
 
64
  env=env,
65
  )
66
  output = result.stdout
 
67
 
68
  # parse score from output
69
  score = None
 
70
  for line in output.splitlines():
71
  if task_name + ":" in line and "profit" in line:
72
  score_match = re.search(r"(\d+\.\d+)\s*\(profit", line)
 
73
  if score_match:
74
  score = float(score_match.group(1))
 
 
75
 
76
  return {
77
  "task_name": task_name,
78
  "score": score,
 
 
 
 
79
  }
80
  except subprocess.TimeoutExpired:
81
  return {"error": "Inference timed out (20 min limit)"}