Everything you need to integrate PromptRouter into your application and start saving on AI costs
Get started in 5 minutes
Complete endpoint documentation
Integration examples
PromptRouter automatically routes your prompts to the most cost-effective AI model while maintaining quality. Here's the complete flow:

💡 Key benefit: Replace one API endpoint and start saving 85-99% on AI costs immediately. No architectural changes needed.
Important: Your API token will only be shown once. Make sure to copy and save it securely!
curl -X POST https://api.prompt-router.com/v1/chat/completions \
-H "Authorization: Bearer YOUR_API_TOKEN" \
-H "Content-Type: application/json" \
-d '{
"messages": [
{"role": "user", "content": "Write a haiku about coding"}
],
"max_tokens": 100
}'{
"id": "chatcmpl-abc123",
"object": "chat.completion",
"model": "claude-3-haiku-20240307",
"choices": [{
"index": 0,
"message": {
"role": "assistant",
"content": "Code flows like water\nBugs hide in silent shadows\nDebug brings the light"
},
"finish_reason": "stop"
}],
"usage": {
"prompt_tokens": 45,
"completion_tokens": 48,
"total_tokens": 93
},
"x-promptrouter": {
"routing": {
"provider": "anthropic",
"model": "claude-3-haiku-20240307",
"reason": "Cheapest option for simple creative task",
"estimated_cost": 0.00025
},
"savings": {
"actual_cost": 0.00025,
"alternative_cost": 0.00279,
"amount_saved": 0.00254,
"savings_percentage": 91
}
}
}You just saved 91% on that request! PromptRouter automatically selected Claude Haiku instead of GPT-4 for this creative task.
✨ Drop-in Replacement: PromptRouter provides a fully compatible /v1/chat/completions endpoint. Change 2 lines of code and start saving 30-60% instantly!
from openai import OpenAI
# Before: Direct OpenAI
# client = OpenAI(api_key="sk-...")
# After: PromptRouter (drop-in replacement!)
client = OpenAI(
base_url="https://api.prompt-router.com/v1",
api_key="pr_live_your_token_here"
)
# Same API - automatic cost optimization!
response = client.chat.completions.create(
# Omit 'model' for auto-routing (recommended)
messages=[
{"role": "user", "content": "Explain quantum computing"}
]
)
print(response.choices[0].message.content)
# Check your savings
print(f"Saved: {response.x_promptrouter.savings['amount_saved']:.4f{'}'}")import OpenAI from 'openai';
const client = new OpenAI({
baseURL: 'https://api.prompt-router.com/v1',
apiKey: 'pr_live_your_token_here'
});
const response = await client.chat.completions.create({
messages: [{ role: 'user', content: 'Hello!' }]
});
console.log('Savings:', response['x-promptrouter'].savings);model parameter for cost optimizationmodel="gpt-4" when neededx-promptrouter fieldStandard OpenAI format with PromptRouter extensions:
{
"id": "chatcmpl-abc123",
"object": "chat.completion",
"model": "gpt-3.5-turbo",
"choices": [...],
"usage": {...},
"x-promptrouter": {
"routing": {
"provider": "openai",
"reason": "Cost-optimized: 86% cheaper..."
},
"savings": {
"amount_saved": 0.000193,
"savings_percentage": 86.3
},
"was_routed": true
}
}All API requests require authentication using your PromptRouter API token.
Include your token in the Authorization header:
Authorization: Bearer YOUR_API_TOKEN/v1/chat/completionsExecute a chat completion through intelligent routing (OpenAI-compatible)
{
"messages": [
{"role": "system", "content": "string (optional)"},
{"role": "user", "content": "string (required)"}
],
"model": "string (optional - omit for auto-routing)",
"max_tokens": 1000,
"temperature": 0.7
}{
"id": "chatcmpl-abc123",
"object": "chat.completion",
"model": "gpt-3.5-turbo",
"choices": [{
"index": 0,
"message": {
"role": "assistant",
"content": "string"
},
"finish_reason": "stop"
}],
"usage": {
"prompt_tokens": 45,
"completion_tokens": 48,
"total_tokens": 93
},
"x-promptrouter": {
"routing": {
"provider": "openai",
"model": "gpt-3.5-turbo",
"reason": "Cost optimized"
},
"savings": {
"actual_cost": 0.00025,
"alternative_cost": 0.00279,
"amount_saved": 0.00254,
"savings_percentage": 91
}
}
}/v1/metricsGet usage statistics and savings
days - Number of days to look back (default: 30){
"total_requests": 1234,
"total_tokens": 567890,
"total_spend": 12.45,
"estimated_spend_without_routing": 45.67,
"total_saved": 33.22,
"average_latency_ms": 650,
"error_rate": 0.5
}// Using fetch
const response = await fetch('https://api.prompt-router.com/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': 'Bearer YOUR_API_TOKEN',
'Content-Type': 'application/json'
},
body: JSON.stringify({
messages: [
{ role: 'user', content: 'Explain quantum computing in simple terms' }
],
max_tokens: 500
})
});
const data = await response.json();
console.log('Response:', data.choices[0].message.content);
console.log('Saved:', data['x-promptrouter'].savings.amount_saved, 'EUR');import requests
response = requests.post(
'https://api.prompt-router.com/v1/chat/completions',
headers={
'Authorization': 'Bearer YOUR_API_TOKEN',
'Content-Type': 'application/json'
},
json={
'messages': [
{'role': 'user', 'content': 'Explain quantum computing in simple terms'}
],
'max_tokens': 500
}
)
data = response.json()
print(f"Response: {'{'}data['choices'][0]['message']['content']{'}'}")
print(f"Saved: €{'{'}data['x-promptrouter']['savings']['amount_saved']{'}'}")const axios = require('axios');
const response = await axios.post(
'https://api.prompt-router.com/v1/chat/completions',
{
messages: [
{ role: 'user', content: 'Explain quantum computing in simple terms' }
],
max_tokens: 500
},
{
headers: {
'Authorization': 'Bearer YOUR_API_TOKEN',
'Content-Type': 'application/json'
}
}
);
console.log('Response:', response.data.choices[0].message.content);
console.log('Saved:', response.data['x-promptrouter'].savings.amount_saved, 'EUR');Don't specify a model - let our routing engine select the optimal one based on your prompt and constraints.
The more providers you add, the better we can optimize. Add OpenAI, Anthropic, Google, and Grok for maximum savings.
Regularly check your dashboard to see savings trends and identify optimization opportunities.
Set quality tiers and latency constraints only when necessary. Fewer constraints = more routing options = better savings.
Never expose your PromptRouter API token in client-side code. Use environment variables and keep it server-side.
Join thousands of developers optimizing their AI costs with PromptRouter