Python SDK
The official OpenAI Python SDK works seamlessly with Assisters API. Just change the base URL and API key.Installation
Copy
pip install openai
Copy
pip install openai
Quick Start
Copy
from openai import OpenAI
client = OpenAI(
api_key="ask_your_api_key",
base_url="https://api.assisters.dev/v1"
)
response = client.chat.completions.create(
model="llama-3.1-8b",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"}
]
)
print(response.choices[0].message.content)
Configuration
Environment Variables
Copy
export ASSISTERS_API_KEY="ask_your_api_key"
Copy
import os
from openai import OpenAI
client = OpenAI(
api_key=os.environ["ASSISTERS_API_KEY"],
base_url="https://api.assisters.dev/v1"
)
Client Options
Copy
client = OpenAI(
api_key="ask_...",
base_url="https://api.assisters.dev/v1",
timeout=30.0, # Request timeout in seconds
max_retries=3, # Automatic retries
default_headers={...}, # Custom headers
)
Chat Completions
Basic Request
Copy
response = client.chat.completions.create(
model="llama-3.1-8b",
messages=[
{"role": "user", "content": "What is machine learning?"}
],
temperature=0.7,
max_tokens=500
)
print(response.choices[0].message.content)
print(f"Tokens used: {response.usage.total_tokens}")
Streaming
Copy
stream = client.chat.completions.create(
model="llama-3.1-8b",
messages=[{"role": "user", "content": "Write a poem about Python"}],
stream=True
)
for chunk in stream:
content = chunk.choices[0].delta.content
if content:
print(content, end="", flush=True)
Multi-turn Conversation
Copy
messages = [
{"role": "system", "content": "You are a math tutor."}
]
while True:
user_input = input("You: ")
if user_input.lower() == "quit":
break
messages.append({"role": "user", "content": user_input})
response = client.chat.completions.create(
model="llama-3.1-8b",
messages=messages
)
assistant_message = response.choices[0].message.content
messages.append({"role": "assistant", "content": assistant_message})
print(f"Assistant: {assistant_message}")
Embeddings
Copy
response = client.embeddings.create(
model="e5-large-v2",
input="The quick brown fox jumps over the lazy dog"
)
embedding = response.data[0].embedding
print(f"Dimensions: {len(embedding)}") # 1024
# Batch embeddings
response = client.embeddings.create(
model="e5-large-v2",
input=["First text", "Second text", "Third text"]
)
for i, data in enumerate(response.data):
print(f"Text {i}: {len(data.embedding)} dimensions")
Moderation
Copy
response = client.moderations.create(
model="llama-guard-3",
input="Hello, how are you today?"
)
result = response.results[0]
print(f"Flagged: {result.flagged}")
print(f"Categories: {result.categories}")
print(f"Scores: {result.category_scores}")
Async Client
For high-performance applications:Copy
import asyncio
from openai import AsyncOpenAI
client = AsyncOpenAI(
api_key="ask_...",
base_url="https://api.assisters.dev/v1"
)
async def main():
response = await client.chat.completions.create(
model="llama-3.1-8b",
messages=[{"role": "user", "content": "Hello!"}]
)
print(response.choices[0].message.content)
asyncio.run(main())
Async Streaming
Copy
async def stream_response():
stream = await client.chat.completions.create(
model="llama-3.1-8b",
messages=[{"role": "user", "content": "Tell me a story"}],
stream=True
)
async for chunk in stream:
content = chunk.choices[0].delta.content
if content:
print(content, end="")
asyncio.run(stream_response())
Concurrent Requests
Copy
async def process_batch(prompts):
tasks = [
client.chat.completions.create(
model="llama-3.1-8b",
messages=[{"role": "user", "content": prompt}]
)
for prompt in prompts
]
responses = await asyncio.gather(*tasks)
return [r.choices[0].message.content for r in responses]
prompts = ["Question 1", "Question 2", "Question 3"]
results = asyncio.run(process_batch(prompts))
Error Handling
Copy
from openai import (
OpenAI,
APIError,
RateLimitError,
AuthenticationError,
BadRequestError
)
client = OpenAI(api_key="ask_...", base_url="https://api.assisters.dev/v1")
try:
response = client.chat.completions.create(
model="llama-3.1-8b",
messages=[{"role": "user", "content": "Hello"}]
)
except AuthenticationError:
print("Invalid API key")
except RateLimitError as e:
print(f"Rate limited. Retry after: {e.response.headers.get('Retry-After')}s")
except BadRequestError as e:
print(f"Bad request: {e.message}")
except APIError as e:
print(f"API error: {e.status_code} - {e.message}")
Type Hints
The SDK is fully typed:Copy
from openai import OpenAI
from openai.types.chat import ChatCompletion, ChatCompletionMessage
client = OpenAI(api_key="ask_...", base_url="https://api.assisters.dev/v1")
def get_response(prompt: str) -> str:
response: ChatCompletion = client.chat.completions.create(
model="llama-3.1-8b",
messages=[{"role": "user", "content": prompt}]
)
message: ChatCompletionMessage = response.choices[0].message
return message.content or ""
Pydantic Models
Parse responses into Pydantic models:Copy
from pydantic import BaseModel
from typing import List
class Entity(BaseModel):
name: str
type: str
class ExtractionResult(BaseModel):
entities: List[Entity]
response = client.chat.completions.create(
model="llama-3.1-8b",
messages=[
{
"role": "system",
"content": "Extract entities as JSON: {\"entities\": [{\"name\": \"...\", \"type\": \"...\"}]}"
},
{"role": "user", "content": "John Smith works at Google in New York."}
]
)
# Parse response
import json
data = json.loads(response.choices[0].message.content)
result = ExtractionResult(**data)
print(result.entities)
Framework Integration
FastAPI
Copy
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from openai import OpenAI
app = FastAPI()
client = OpenAI(api_key="ask_...", base_url="https://api.assisters.dev/v1")
@app.post("/chat")
async def chat(message: str):
response = client.chat.completions.create(
model="llama-3.1-8b",
messages=[{"role": "user", "content": message}]
)
return {"response": response.choices[0].message.content}
@app.post("/chat/stream")
async def chat_stream(message: str):
def generate():
stream = client.chat.completions.create(
model="llama-3.1-8b",
messages=[{"role": "user", "content": message}],
stream=True
)
for chunk in stream:
content = chunk.choices[0].delta.content
if content:
yield f"data: {content}\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(generate(), media_type="text/event-stream")
Django
Copy
# views.py
from django.http import JsonResponse
from openai import OpenAI
client = OpenAI(api_key="ask_...", base_url="https://api.assisters.dev/v1")
def chat_view(request):
message = request.POST.get("message")
response = client.chat.completions.create(
model="llama-3.1-8b",
messages=[{"role": "user", "content": message}]
)
return JsonResponse({
"response": response.choices[0].message.content
})
Best Practices
Use Environment Variables
Never hardcode API keys in your code
Enable Streaming
Use streaming for better UX in chat apps
Handle Errors
Implement proper error handling and retries
Use Async for Scale
Use AsyncOpenAI for concurrent requests