import requests import json import os import argparse def query_endpoint(endpoint_url, api_token=None, prompt="Hello, how are you?", system_message="You are a helpful assistant.", max_tokens=256, temperature=0.7, format_type="openai"): """ Query the Phi-4 Mini model at the specified HuggingFace Inference Endpoint. Args: endpoint_url: The URL of your HuggingFace Inference Endpoint api_token: Your HuggingFace API token (if needed) prompt: The user message to send to the model system_message: The system message to include max_tokens: Maximum number of tokens to generate temperature: Temperature for generation (0.0 to 1.0) format_type: Type of request format to use: "openai" - Standard OpenAI format "hf_wrapped" - HuggingFace format with OpenAI format wrapped in "inputs" "simple" - Simple text input in "inputs" field Returns: The response from the model """ # Prepare headers headers = { "Content-Type": "application/json" } if api_token: headers["Authorization"] = f"Bearer {api_token}" # Prepare the request payload based on format_type if format_type == "openai": # Standard OpenAI format payload = { "messages": [ {"role": "system", "content": system_message}, {"role": "user", "content": prompt} ], "max_tokens": max_tokens, "temperature": temperature } elif format_type == "hf_wrapped": # HuggingFace wrapped format payload = { "inputs": { "messages": [ {"role": "system", "content": system_message}, {"role": "user", "content": prompt} ], "max_tokens": max_tokens, "temperature": temperature } } elif format_type == "simple": # Simple text input payload = { "inputs": prompt } else: raise ValueError(f"Invalid format type: {format_type}") # Make the request try: print(f"Request payload: {json.dumps(payload, indent=2)}") response = requests.post(endpoint_url, headers=headers, data=json.dumps(payload)) response.raise_for_status() # Raise an exception for HTTP errors # Parse and return the response return response.json() except requests.exceptions.RequestException as e: print(f"Error making request: {e}") if hasattr(e, 'response') and e.response: print(f"Response content: {e.response.text}") return None if __name__ == "__main__": parser = argparse.ArgumentParser(description="Query a Phi-4 Mini HuggingFace Inference Endpoint") parser.add_argument("--url", type=str, required=True, help="The endpoint URL") parser.add_argument("--token", type=str, default=os.environ.get("HF_API_TOKEN"), help="HuggingFace API token") parser.add_argument("--prompt", type=str, default="Explain quantum computing in simple terms.", help="User prompt") parser.add_argument("--system", type=str, default="You are a helpful assistant.", help="System message") parser.add_argument("--max_tokens", type=int, default=256, help="Maximum tokens to generate") parser.add_argument("--temperature", type=float, default=0.7, help="Temperature (0.0 to 1.0)") parser.add_argument("--format", type=str, default="openai", choices=["openai", "hf_wrapped", "simple"], help="Format to use for the request") args = parser.parse_args() print(f"Querying endpoint: {args.url}") print(f"Prompt: {args.prompt}") print(f"Format: {args.format}") response = query_endpoint( args.url, args.token, args.prompt, args.system, args.max_tokens, args.temperature, args.format ) if response: print("\nResponse:") if "choices" in response and len(response["choices"]) > 0: print(response["choices"][0]["message"]["content"]) else: print(json.dumps(response, indent=2)) else: print("Failed to get a valid response")