← Back to Docs
Python SDK
Compression SDK for OpenAI. Save 40-60% on every call.
Installation
pip install agentready-sdk openaiMethod 1: Compress + Call (Recommended)
Compress your messages first, then call OpenAI directly:
import agentready
from openai import OpenAI
import os
# Step 1 — compress
result = agentready.compress(
api_key=os.environ["AGENTREADY_API_KEY"],
messages=[{"role": "user", "content": long_prompt}],
)
# Step 2 — call OpenAI directly
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
response = client.chat.completions.create(
model="gpt-4o",
messages=result["messages"],
)
# → Prompt compressed automatically, saving ~50% tokensAsync Compression
import agentready
from openai import AsyncOpenAI
import os
# Async compress + call
result = await agentready.compress_async(
api_key=os.environ["AGENTREADY_API_KEY"],
messages=[{"role": "user", "content": "Hello!"}],
)
client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
response = await client.chat.completions.create(
model="gpt-4o",
messages=result["messages"],
)Method 2: Compress with Options
Fine-tune compression with additional options:
import agentready
from openai import OpenAI
import os
result = agentready.compress(
api_key=os.environ["AGENTREADY_API_KEY"],
messages=[{"role": "user", "content": long_prompt}],
level="medium", # light, medium, aggressive
preserve_code=True,
)
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
response = client.chat.completions.create(
model="gpt-4o",
messages=result["messages"],
)Method 3: Manual Compression
import agentready
import os
agentready.api_key = os.environ["AGENTREADY_API_KEY"]
result = agentready.compress(
"Your long prompt here...",
level="medium", # light, medium, aggressive
preserve_code=True,
target_model="gpt-4",
)
print(result.text) # compressed output
print(result.tokens_saved) # 1,247
print(result.reduction_percent) # 52.3
print(result.savings_usd) # 0.0374LangChain Integration
from agentready.integrations.langchain import TokenCutCallbackHandler
from langchain_openai import ChatOpenAI
import os
handler = TokenCutCallbackHandler(api_key=os.environ["AGENTREADY_API_KEY"])
llm = ChatOpenAI(model="gpt-4o", callbacks=[handler])
# All prompts compressed before sending
response = llm.invoke("Your long context here...")LlamaIndex Integration
from agentready.integrations.llamaindex import TokenCutPostprocessor
import os
postprocessor = TokenCutPostprocessor(api_key=os.environ["AGENTREADY_API_KEY"])
query_engine = index.as_query_engine(
node_postprocessors=[postprocessor]
)
response = query_engine.query("What is the system architecture?")CrewAI Integration
from agentready.integrations.crewai import create_crewai_llm
from crewai import Agent, Task, Crew
import os
llm = create_crewai_llm(
agentready_key=os.environ["AGENTREADY_API_KEY"],
openai_key=os.environ["OPENAI_API_KEY"],
model="gpt-4o",
)
agent = Agent(
role="Researcher",
goal="Research AI trends",
backstory="Expert AI researcher.",
llm=llm,
)
task = Task(description="Research and summarize AI trends.", agent=agent)
crew = Crew(agents=[agent], tasks=[task])
result = crew.kickoff()Streaming
Compress first, then stream with OpenAI as usual:
import agentready
from openai import OpenAI
import os
result = agentready.compress(
api_key=os.environ["AGENTREADY_API_KEY"],
messages=[{"role": "user", "content": long_prompt}],
)
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
stream = client.chat.completions.create(
model="gpt-4o",
messages=result["messages"],
stream=True,
)
for chunk in stream:
print(chunk.choices[0].delta.content or "", end="")Pricing
Beta — Free unlimited usage. After beta: pay-per-token, ~60% less than direct API costs.