providers.py•3.72 kB
import httpx
import time
import os
from typing import Dict, Any, Optional
from dotenv import load_dotenv
load_dotenv()
class AzureOpenAIProvider:
def __init__(self):
self.endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
self.deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT")
self.api_key = os.getenv("AZURE_OPENAI_API_KEY")
self.api_version = os.getenv("AZURE_OPENAI_API_VERSION", "2025-01-01-preview")
def is_available(self) -> bool:
return all([self.endpoint, self.deployment, self.api_key])
async def call(self, prompt: str) -> Dict[str, Any]:
if not self.is_available():
return {"error": "skipped_no_key"}
url = f"{self.endpoint}/openai/deployments/{self.deployment}/chat/completions?api-version={self.api_version}"
headers = {
"api-key": self.api_key,
"Content-Type": "application/json"
}
data = {
"messages": [
{"role": "system", "content": "You are a neutral assistant generating concise, citation-rich answers."},
{"role": "user", "content": prompt}
],
"max_tokens": 500,
"temperature": 0.3
}
start_time = time.time()
try:
async with httpx.AsyncClient() as client:
response = await client.post(url, headers=headers, json=data, timeout=30.0)
response.raise_for_status()
result = response.json()
latency_ms = int((time.time() - start_time) * 1000)
return {
"raw_answer": result["choices"][0]["message"]["content"],
"tokens_used": result.get("usage", {}).get("total_tokens"),
"latency_ms": latency_ms
}
except Exception as e:
return {"error": f"api_error: {str(e)}"}
class PerplexityProvider:
def __init__(self):
self.api_key = os.getenv("PERPLEXITY_API_KEY")
def is_available(self) -> bool:
return bool(self.api_key)
async def call(self, prompt: str) -> Dict[str, Any]:
if not self.is_available():
return {"error": "skipped_no_key"}
url = "https://api.perplexity.ai/chat/completions"
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
data = {
"model": "llama-3.1-sonar-small-128k-online",
"messages": [
{"role": "system", "content": "You are a neutral assistant generating concise, citation-rich answers."},
{"role": "user", "content": prompt}
],
"max_tokens": 500,
"temperature": 0.3
}
start_time = time.time()
try:
async with httpx.AsyncClient() as client:
response = await client.post(url, headers=headers, json=data, timeout=30.0)
response.raise_for_status()
result = response.json()
latency_ms = int((time.time() - start_time) * 1000)
return {
"raw_answer": result["choices"][0]["message"]["content"],
"tokens_used": result.get("usage", {}).get("total_tokens"),
"latency_ms": latency_ms
}
except Exception as e:
return {"error": f"api_error: {str(e)}"}
# Provider registry
PROVIDERS = {
"azure_openai": AzureOpenAIProvider(),
"perplexity": PerplexityProvider()
}