phi4-mini-raw / test_handler.py
Yong Liu
update handler
051c5a5
import json
import argparse
import sys
from handler import EndpointHandler
def test_inference(model_path=".", prompt=None, max_tokens=150, temperature=0.7):
"""
Test the inference endpoint handler with a sample request.
Args:
model_path: Path to the model directory
prompt: Custom prompt to use (optional)
max_tokens: Maximum number of tokens to generate
temperature: Temperature for generation
"""
try:
print(f"Initializing handler with model path: {model_path}")
handler = EndpointHandler(model_path)
# Default or custom prompt
if prompt is None:
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Explain quantum computing in simple terms."}
]
else:
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
# Sample request with OpenAI-like format
request = {
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
"top_p": 0.95
}
print("Sending request to handler...")
print(f"Request: {json.dumps(request, indent=2)}")
# Generate response
response = handler(request)
# Print response in a readable format
print("\nResponse:")
print(json.dumps(response, indent=2))
return response
except Exception as e:
print(f"Error during inference: {str(e)}", file=sys.stderr)
import traceback
traceback.print_exc()
return {"error": str(e)}
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Test Phi-4 Mini inference")
parser.add_argument("--model_path", type=str, default=".", help="Path to the model directory")
parser.add_argument("--prompt", type=str, help="Custom prompt to use")
parser.add_argument("--max_tokens", type=int, default=150, help="Maximum number of tokens to generate")
parser.add_argument("--temperature", type=float, default=0.7, help="Temperature for generation")
args = parser.parse_args()
test_inference(
model_path=args.model_path,
prompt=args.prompt,
max_tokens=args.max_tokens,
temperature=args.temperature
)