Skip to main content

Python SDK

The official OpenAI Python SDK works seamlessly with Assisters API. Just change the base URL and API key.

Installation

pip install openai
Or with your package manager:
pip install openai

Quick Start

from openai import OpenAI

client = OpenAI(
    api_key="ask_your_api_key",
    base_url="https://api.assisters.dev/v1"
)

response = client.chat.completions.create(
    model="llama-3.1-8b",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Hello!"}
    ]
)

print(response.choices[0].message.content)

Configuration

Environment Variables

export ASSISTERS_API_KEY="ask_your_api_key"
import os
from openai import OpenAI

client = OpenAI(
    api_key=os.environ["ASSISTERS_API_KEY"],
    base_url="https://api.assisters.dev/v1"
)

Client Options

client = OpenAI(
    api_key="ask_...",
    base_url="https://api.assisters.dev/v1",
    timeout=30.0,           # Request timeout in seconds
    max_retries=3,          # Automatic retries
    default_headers={...},  # Custom headers
)

Chat Completions

Basic Request

response = client.chat.completions.create(
    model="llama-3.1-8b",
    messages=[
        {"role": "user", "content": "What is machine learning?"}
    ],
    temperature=0.7,
    max_tokens=500
)

print(response.choices[0].message.content)
print(f"Tokens used: {response.usage.total_tokens}")

Streaming

stream = client.chat.completions.create(
    model="llama-3.1-8b",
    messages=[{"role": "user", "content": "Write a poem about Python"}],
    stream=True
)

for chunk in stream:
    content = chunk.choices[0].delta.content
    if content:
        print(content, end="", flush=True)

Multi-turn Conversation

messages = [
    {"role": "system", "content": "You are a math tutor."}
]

while True:
    user_input = input("You: ")
    if user_input.lower() == "quit":
        break

    messages.append({"role": "user", "content": user_input})

    response = client.chat.completions.create(
        model="llama-3.1-8b",
        messages=messages
    )

    assistant_message = response.choices[0].message.content
    messages.append({"role": "assistant", "content": assistant_message})

    print(f"Assistant: {assistant_message}")

Embeddings

response = client.embeddings.create(
    model="e5-large-v2",
    input="The quick brown fox jumps over the lazy dog"
)

embedding = response.data[0].embedding
print(f"Dimensions: {len(embedding)}")  # 1024

# Batch embeddings
response = client.embeddings.create(
    model="e5-large-v2",
    input=["First text", "Second text", "Third text"]
)

for i, data in enumerate(response.data):
    print(f"Text {i}: {len(data.embedding)} dimensions")

Moderation

response = client.moderations.create(
    model="llama-guard-3",
    input="Hello, how are you today?"
)

result = response.results[0]
print(f"Flagged: {result.flagged}")
print(f"Categories: {result.categories}")
print(f"Scores: {result.category_scores}")

Async Client

For high-performance applications:
import asyncio
from openai import AsyncOpenAI

client = AsyncOpenAI(
    api_key="ask_...",
    base_url="https://api.assisters.dev/v1"
)

async def main():
    response = await client.chat.completions.create(
        model="llama-3.1-8b",
        messages=[{"role": "user", "content": "Hello!"}]
    )
    print(response.choices[0].message.content)

asyncio.run(main())

Async Streaming

async def stream_response():
    stream = await client.chat.completions.create(
        model="llama-3.1-8b",
        messages=[{"role": "user", "content": "Tell me a story"}],
        stream=True
    )

    async for chunk in stream:
        content = chunk.choices[0].delta.content
        if content:
            print(content, end="")

asyncio.run(stream_response())

Concurrent Requests

async def process_batch(prompts):
    tasks = [
        client.chat.completions.create(
            model="llama-3.1-8b",
            messages=[{"role": "user", "content": prompt}]
        )
        for prompt in prompts
    ]

    responses = await asyncio.gather(*tasks)
    return [r.choices[0].message.content for r in responses]

prompts = ["Question 1", "Question 2", "Question 3"]
results = asyncio.run(process_batch(prompts))

Error Handling

from openai import (
    OpenAI,
    APIError,
    RateLimitError,
    AuthenticationError,
    BadRequestError
)

client = OpenAI(api_key="ask_...", base_url="https://api.assisters.dev/v1")

try:
    response = client.chat.completions.create(
        model="llama-3.1-8b",
        messages=[{"role": "user", "content": "Hello"}]
    )
except AuthenticationError:
    print("Invalid API key")
except RateLimitError as e:
    print(f"Rate limited. Retry after: {e.response.headers.get('Retry-After')}s")
except BadRequestError as e:
    print(f"Bad request: {e.message}")
except APIError as e:
    print(f"API error: {e.status_code} - {e.message}")

Type Hints

The SDK is fully typed:
from openai import OpenAI
from openai.types.chat import ChatCompletion, ChatCompletionMessage

client = OpenAI(api_key="ask_...", base_url="https://api.assisters.dev/v1")

def get_response(prompt: str) -> str:
    response: ChatCompletion = client.chat.completions.create(
        model="llama-3.1-8b",
        messages=[{"role": "user", "content": prompt}]
    )
    message: ChatCompletionMessage = response.choices[0].message
    return message.content or ""

Pydantic Models

Parse responses into Pydantic models:
from pydantic import BaseModel
from typing import List

class Entity(BaseModel):
    name: str
    type: str

class ExtractionResult(BaseModel):
    entities: List[Entity]

response = client.chat.completions.create(
    model="llama-3.1-8b",
    messages=[
        {
            "role": "system",
            "content": "Extract entities as JSON: {\"entities\": [{\"name\": \"...\", \"type\": \"...\"}]}"
        },
        {"role": "user", "content": "John Smith works at Google in New York."}
    ]
)

# Parse response
import json
data = json.loads(response.choices[0].message.content)
result = ExtractionResult(**data)
print(result.entities)

Framework Integration

FastAPI

from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from openai import OpenAI

app = FastAPI()
client = OpenAI(api_key="ask_...", base_url="https://api.assisters.dev/v1")

@app.post("/chat")
async def chat(message: str):
    response = client.chat.completions.create(
        model="llama-3.1-8b",
        messages=[{"role": "user", "content": message}]
    )
    return {"response": response.choices[0].message.content}

@app.post("/chat/stream")
async def chat_stream(message: str):
    def generate():
        stream = client.chat.completions.create(
            model="llama-3.1-8b",
            messages=[{"role": "user", "content": message}],
            stream=True
        )
        for chunk in stream:
            content = chunk.choices[0].delta.content
            if content:
                yield f"data: {content}\n\n"
        yield "data: [DONE]\n\n"

    return StreamingResponse(generate(), media_type="text/event-stream")

Django

# views.py
from django.http import JsonResponse
from openai import OpenAI

client = OpenAI(api_key="ask_...", base_url="https://api.assisters.dev/v1")

def chat_view(request):
    message = request.POST.get("message")

    response = client.chat.completions.create(
        model="llama-3.1-8b",
        messages=[{"role": "user", "content": message}]
    )

    return JsonResponse({
        "response": response.choices[0].message.content
    })

Best Practices

Use Environment Variables

Never hardcode API keys in your code

Enable Streaming

Use streaming for better UX in chat apps

Handle Errors

Implement proper error handling and retries

Use Async for Scale

Use AsyncOpenAI for concurrent requests