phi4-mini-raw / example_client.py
Yong Liu
update handler
093ad9c
import requests
import json
import os
import argparse
def query_endpoint(endpoint_url, api_token=None, prompt="Hello, how are you?",
system_message="You are a helpful assistant.",
max_tokens=256, temperature=0.7,
format_type="openai"):
"""
Query the Phi-4 Mini model at the specified HuggingFace Inference Endpoint.
Args:
endpoint_url: The URL of your HuggingFace Inference Endpoint
api_token: Your HuggingFace API token (if needed)
prompt: The user message to send to the model
system_message: The system message to include
max_tokens: Maximum number of tokens to generate
temperature: Temperature for generation (0.0 to 1.0)
format_type: Type of request format to use:
"openai" - Standard OpenAI format
"hf_wrapped" - HuggingFace format with OpenAI format wrapped in "inputs"
"simple" - Simple text input in "inputs" field
Returns:
The response from the model
"""
# Prepare headers
headers = {
"Content-Type": "application/json"
}
if api_token:
headers["Authorization"] = f"Bearer {api_token}"
# Prepare the request payload based on format_type
if format_type == "openai":
# Standard OpenAI format
payload = {
"messages": [
{"role": "system", "content": system_message},
{"role": "user", "content": prompt}
],
"max_tokens": max_tokens,
"temperature": temperature
}
elif format_type == "hf_wrapped":
# HuggingFace wrapped format
payload = {
"inputs": {
"messages": [
{"role": "system", "content": system_message},
{"role": "user", "content": prompt}
],
"max_tokens": max_tokens,
"temperature": temperature
}
}
elif format_type == "simple":
# Simple text input
payload = {
"inputs": prompt
}
else:
raise ValueError(f"Invalid format type: {format_type}")
# Make the request
try:
print(f"Request payload: {json.dumps(payload, indent=2)}")
response = requests.post(endpoint_url, headers=headers, data=json.dumps(payload))
response.raise_for_status() # Raise an exception for HTTP errors
# Parse and return the response
return response.json()
except requests.exceptions.RequestException as e:
print(f"Error making request: {e}")
if hasattr(e, 'response') and e.response:
print(f"Response content: {e.response.text}")
return None
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Query a Phi-4 Mini HuggingFace Inference Endpoint")
parser.add_argument("--url", type=str, required=True, help="The endpoint URL")
parser.add_argument("--token", type=str, default=os.environ.get("HF_API_TOKEN"), help="HuggingFace API token")
parser.add_argument("--prompt", type=str, default="Explain quantum computing in simple terms.", help="User prompt")
parser.add_argument("--system", type=str, default="You are a helpful assistant.", help="System message")
parser.add_argument("--max_tokens", type=int, default=256, help="Maximum tokens to generate")
parser.add_argument("--temperature", type=float, default=0.7, help="Temperature (0.0 to 1.0)")
parser.add_argument("--format", type=str, default="openai",
choices=["openai", "hf_wrapped", "simple"],
help="Format to use for the request")
args = parser.parse_args()
print(f"Querying endpoint: {args.url}")
print(f"Prompt: {args.prompt}")
print(f"Format: {args.format}")
response = query_endpoint(
args.url,
args.token,
args.prompt,
args.system,
args.max_tokens,
args.temperature,
args.format
)
if response:
print("\nResponse:")
if "choices" in response and len(response["choices"]) > 0:
print(response["choices"][0]["message"]["content"])
else:
print(json.dumps(response, indent=2))
else:
print("Failed to get a valid response")