← Back to Docs
Python SDK
Drop-in proxy for OpenAI. Save 40-60% on every call.
Installation
pip install agentready-sdk openaiMethod 1: Drop-in Proxy (Recommended)
Just swap your base_url. Zero code changes to your existing OpenAI calls:
from openai import OpenAI
client = OpenAI(
base_url="https://agentready.cloud/v1", # ← only change
api_key="ak_...", # your AgentReady key
default_headers={
"X-Upstream-API-Key": "sk-...", # your OpenAI key
},
)
# Everything works exactly like before — but 40-60% cheaper
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": long_prompt}]
)
# → Prompt compressed automatically, saving ~50% tokensOne-liner Helper
import agentready
client = agentready.openai("ak_...", upstream_key="sk-...")
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello!"}],
)
# Async client
client = agentready.create_client("ak_...", upstream_key="sk-...", async_client=True)
response = await client.chat.completions.create(...)Method 2: patch_openai()
Monkey-patch existing code with one function call:
from agentready import patch_openai
patch_openai(api_key="ak_...")
# All existing OpenAI code now compresses automatically
from openai import OpenAI
client = OpenAI()
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": long_prompt}]
)Method 3: Manual Compression
import agentready
agentready.api_key = "ak_..."
result = agentready.compress(
"Your long prompt here...",
level="medium", # light, medium, aggressive
preserve_code=True,
target_model="gpt-4",
)
print(result.text) # compressed output
print(result.tokens_saved) # 1,247
print(result.reduction_percent) # 52.3
print(result.savings_usd) # 0.0374LangChain Integration
from agentready.integrations.langchain import TokenCutCallbackHandler
from langchain_openai import ChatOpenAI
handler = TokenCutCallbackHandler(api_key="ak_...")
llm = ChatOpenAI(model="gpt-4o", callbacks=[handler])
# All prompts compressed before sending
response = llm.invoke("Your long context here...")LlamaIndex Integration
from agentready.integrations.llamaindex import TokenCutPostprocessor
postprocessor = TokenCutPostprocessor(api_key="ak_...")
query_engine = index.as_query_engine(
node_postprocessors=[postprocessor]
)
response = query_engine.query("What is the system architecture?")CrewAI Integration
from agentready.integrations.crewai import create_crewai_llm
from crewai import Agent, Task, Crew
llm = create_crewai_llm(
agentready_key="ak_...",
upstream_key="sk-...",
model="gpt-4o",
)
agent = Agent(
role="Researcher",
goal="Research AI trends",
backstory="Expert AI researcher.",
llm=llm,
)
task = Task(description="Research and summarize AI trends.", agent=agent)
crew = Crew(agents=[agent], tasks=[task])
result = crew.kickoff()Streaming
Full streaming support — works exactly like OpenAI:
client = agentready.openai("ak_...", upstream_key="sk-...")
stream = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": long_prompt}],
stream=True,
)
for chunk in stream:
print(chunk.choices[0].delta.content or "", end="")Pricing
Beta — Free unlimited usage. After beta: pay-per-token, ~60% less than direct API costs.