| | import requests |
| | import json |
| | import os |
| | import argparse |
| |
|
| | def query_endpoint(endpoint_url, api_token=None, prompt="Hello, how are you?", |
| | system_message="You are a helpful assistant.", |
| | max_tokens=256, temperature=0.7, |
| | format_type="openai"): |
| | """ |
| | Query the Phi-4 Mini model at the specified HuggingFace Inference Endpoint. |
| | |
| | Args: |
| | endpoint_url: The URL of your HuggingFace Inference Endpoint |
| | api_token: Your HuggingFace API token (if needed) |
| | prompt: The user message to send to the model |
| | system_message: The system message to include |
| | max_tokens: Maximum number of tokens to generate |
| | temperature: Temperature for generation (0.0 to 1.0) |
| | format_type: Type of request format to use: |
| | "openai" - Standard OpenAI format |
| | "hf_wrapped" - HuggingFace format with OpenAI format wrapped in "inputs" |
| | "simple" - Simple text input in "inputs" field |
| | |
| | Returns: |
| | The response from the model |
| | """ |
| | |
| | headers = { |
| | "Content-Type": "application/json" |
| | } |
| | |
| | if api_token: |
| | headers["Authorization"] = f"Bearer {api_token}" |
| | |
| | |
| | if format_type == "openai": |
| | |
| | payload = { |
| | "messages": [ |
| | {"role": "system", "content": system_message}, |
| | {"role": "user", "content": prompt} |
| | ], |
| | "max_tokens": max_tokens, |
| | "temperature": temperature |
| | } |
| | elif format_type == "hf_wrapped": |
| | |
| | payload = { |
| | "inputs": { |
| | "messages": [ |
| | {"role": "system", "content": system_message}, |
| | {"role": "user", "content": prompt} |
| | ], |
| | "max_tokens": max_tokens, |
| | "temperature": temperature |
| | } |
| | } |
| | elif format_type == "simple": |
| | |
| | payload = { |
| | "inputs": prompt |
| | } |
| | else: |
| | raise ValueError(f"Invalid format type: {format_type}") |
| | |
| | |
| | try: |
| | print(f"Request payload: {json.dumps(payload, indent=2)}") |
| | response = requests.post(endpoint_url, headers=headers, data=json.dumps(payload)) |
| | response.raise_for_status() |
| | |
| | |
| | return response.json() |
| | except requests.exceptions.RequestException as e: |
| | print(f"Error making request: {e}") |
| | if hasattr(e, 'response') and e.response: |
| | print(f"Response content: {e.response.text}") |
| | return None |
| |
|
| | if __name__ == "__main__": |
| | parser = argparse.ArgumentParser(description="Query a Phi-4 Mini HuggingFace Inference Endpoint") |
| | parser.add_argument("--url", type=str, required=True, help="The endpoint URL") |
| | parser.add_argument("--token", type=str, default=os.environ.get("HF_API_TOKEN"), help="HuggingFace API token") |
| | parser.add_argument("--prompt", type=str, default="Explain quantum computing in simple terms.", help="User prompt") |
| | parser.add_argument("--system", type=str, default="You are a helpful assistant.", help="System message") |
| | parser.add_argument("--max_tokens", type=int, default=256, help="Maximum tokens to generate") |
| | parser.add_argument("--temperature", type=float, default=0.7, help="Temperature (0.0 to 1.0)") |
| | parser.add_argument("--format", type=str, default="openai", |
| | choices=["openai", "hf_wrapped", "simple"], |
| | help="Format to use for the request") |
| | |
| | args = parser.parse_args() |
| | |
| | print(f"Querying endpoint: {args.url}") |
| | print(f"Prompt: {args.prompt}") |
| | print(f"Format: {args.format}") |
| | |
| | response = query_endpoint( |
| | args.url, |
| | args.token, |
| | args.prompt, |
| | args.system, |
| | args.max_tokens, |
| | args.temperature, |
| | args.format |
| | ) |
| | |
| | if response: |
| | print("\nResponse:") |
| | if "choices" in response and len(response["choices"]) > 0: |
| | print(response["choices"][0]["message"]["content"]) |
| | else: |
| | print(json.dumps(response, indent=2)) |
| | else: |
| | print("Failed to get a valid response") |