Skip to main content

Agent with Code Execution

Data analysis agent that uses Python code execution.

Python Implementation

import requests

# Create agent
create_response = requests.post(
"http://localhost/api/v1/agent/configs",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"name": "data_analyst",
"display_name": "Data Analyst",
"description": "Executes Python code for data analysis and visualization",
"category": "analysis",
"system_prompt": "You are a data analyst. Use code execution to process data, generate insights, and create visualizations. Explain your methods clearly. Use the pandas, numpy, matplotlib, and seaborn libraries. Return results in a clear, tabular format.",
"model": "gpt-4",
"temperature": 0.2,
"builtin_tools": ["code_execution"],
"tool_choice": "required",
"max_iterations": 10,
"tool_resources": {
"code_execution": {
"timeout": 60,
"allowed_libraries": ["pandas", "numpy", "matplotlib", "seaborn", "scikit-learn"]
}
}
}
)

agent_id = create_response.json()["id"]

# Chat with agent
chat_response = requests.post(
"http://localhost/api/v1/agent/chat",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"agent_id": agent_id,
"message": "Analyze this dataset: Generate 100 random numbers between 0 and 100, calculate mean, median, standard deviation, and create a histogram."
}
)

result = chat_response.json()
print(f"Response: {result['response']}")

# Check tool usage
if "tool_calls" in result:
print("\nTool execution trace:")
for tool_call in result["tool_calls"]:
print(f" Tool: {tool_call['tool_name']}")
print(f" Parameters: {tool_call.get('parameters', {})}")
print(f" Status: {tool_call['status']}")
if "result" in tool_call:
print(f" Output: {tool_call['result'][:200]}...")
print(f" Stderr: {tool_call['result'].get('stderr', '')}")

Code Execution Examples

Basic Statistics

# Agent will execute this code
code = """
import pandas as pd
import numpy as np

# Generate sample data
np.random.seed(42)
data = pd.DataFrame({
'values': np.random.randn(100),
'category': np.random.choice(['A', 'B', 'C'], 100)
})

# Calculate statistics
mean = data['values'].mean()
median = data['values'].median()
std = data['values'].std()

# Format output
result = f"""
Mean: {mean:.2f}
Median: {median:.2f}
Std Dev: {std:.2f}
"""

Data Visualization

code = """
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Load data
data = pd.DataFrame({
'month': range(1, 13),
'sales': np.random.randint(5000, 20000, 12)
})

# Create histogram
plt.figure(figsize=(10, 6))
plt.hist(data['sales'], bins=30, edgecolor='skyblue')
plt.xlabel('Sales')
plt.ylabel('Frequency')
plt.title('Monthly Sales Distribution')

# Save plot
plt.savefig('/tmp/sales_histogram.png')

# Return as base64
import base64
with open('/tmp/sales_histogram.png', 'rb') as f:
plot_data = base64.b64encode(f.read())
"""

Data Analysis

code = """
import pandas as pd
from sklearn.linear_model import LinearRegression
import numpy as np

# Sample data
data = pd.DataFrame({
'x': np.random.randn(100),
'y': np.random.randn(100) * 2 + 1
})

# Fit model
model = LinearRegression()
X = data[['x']]
y = data['y']
model.fit(X, y)

# Make prediction
predictions = model.predict(np.array([[1.5]]))
print(f"Predicted value: {predictions[0]:.2f}")
"""

Tool Configuration

Timeout Settings

{
"tool_resources": {
"code_execution": {
"timeout": 30 # 30 second timeout
}
}
}

Allowed Libraries

{
"tool_resources": {
"code_execution": {
"allowed_libraries": [
"pandas",
"numpy",
"matplotlib",
"seaborn",
"scikit-learn"
]
}
}
}

Best Practices

Code Quality

  1. Use Libraries: Leverage pandas, numpy for efficiency
  2. Error Handling: Include try-except blocks in code
  3. Clear Output: Print results with clear formatting
  4. Comment Code: Add comments explaining analysis steps
  5. Return Data: Format results as structured data (JSON, CSV)

Security Considerations

  1. No File System Access: Code runs in isolated environment
  2. No Network Access: No internet access by default
  3. Restricted Libraries: Only allow safe, approved libraries
  4. Memory Limits: Code has memory and time constraints
  5. Sanitize Inputs: Validate user inputs before executing

Performance

  1. Optimize Libraries: Use vectorized operations in pandas/numpy
  2. Limit Data Size: Process samples or aggregates for large datasets
  3. Complexity Awareness: Avoid O(n²) operations
  4. Timeout Management: Set appropriate timeouts for code execution

Error Handling

Timeout Error

result = {
"tool_name": "code_execution",
"status": "error",
"error": "Execution failed",
"details": {
"error_type": "timeout",
"message": "Code execution exceeded timeout of 60 seconds"
}
}

Handle by:

  • Adjusting timeout in agent config
  • Breaking long tasks into smaller steps
  • Using more efficient code

Code Execution Failed

result = {
"tool_name": "code_execution",
"status": "error",
"error": "Syntax error in code",
"details": {
"error_type": "syntax_error",
"message": "Unexpected EOF while parsing",
"line_number": 42
}
}

Handle by:

  • Pre-validating code syntax
  • Checking for common errors
  • Providing helpful error messages to LLM

Library Import Errors

result = {
"tool_name": "code_execution",
"status": "error",
"error": "Import failed",
"details": {
"error_type": "import_error",
"message": "Library 'tensorflow' is not allowed",
"library": "tensorflow"
}
}

Handle by:

  • Checking allowed libraries list
  • Providing clear error messages
  • Suggesting alternatives

Use Cases

Exploratory Data Analysis

query = "Explore the sales data and identify trends, patterns, and insights"

code = """
import pandas as pd
import numpy as np

# Sample data
data = pd.read_csv('sales.csv')

# Basic statistics
print(data.describe())

# Trend analysis
data['date'] = pd.to_datetime(data['date'])
monthly_sales = data.groupby(data['date'].dt.to_period('M'))['sales'].sum()
print(monthly_sales.head(12))
"""

Customer Churn Analysis

query = "Analyze customer data and identify churn risk factors"

code = """
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Load data
data = pd.read_csv('customers.csv')

# Features
features = ['purchase_count', 'avg_order_value', 'days_since_last_purchase']
X = data[features]
y = data['churn']

# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict
predictions = model.predict(X_test)
"""

A/B Testing Analysis

query = "Compare the performance of two different features or products"

code = """
import pandas as pd
from scipy import stats

# Load data
data = pd.read_csv('experiment_results.csv')

# Group by variant
variant_a = data[data['variant'] == 'A']['conversion_rate']
variant_b = data[data['variant'] == 'B']['conversion_rate']

# Perform t-test
t_stat, p_value = stats.ttest_ind(variant_a['conversion_rate'], variant_b['conversion_rate'])

print(f"Variant A: {variant_a['conversion_rate']:.2%}")
print(f"Variant B: {variant_b['conversion_rate']:.2%}")
print(f"P-value: {p_value:.4f}")
print(f"Significant: {p_value < 0.05}")
"""

Advanced Patterns

Data Visualization Pipeline

code = """
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# Load data
data = pd.read_csv('metrics.csv')

# Create multi-panel visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Plot 1: Sales over time
axes[0, 0].plot(data['date'], data['sales'])
axes[0, 0].set_title('Sales Over Time')
axes[0, 0].set_ylabel('Sales')

# Plot 2: Category distribution
category_counts = data['category'].value_counts()
axes[0, 1].bar(category_counts.index, category_counts.values)
axes[0, 1].set_title('Sales by Category')
axes[0, 1].set_ylabel('Count')

plt.tight_layout()
plt.savefig('/tmp/sales_dashboard.png')
"""

Automated Reporting

code = """
import pandas as pd
import matplotlib.pyplot as plt

# Generate daily report
data = pd.read_csv('sales_data.csv')

# Daily summary
daily_summary = data.groupby('date').agg({
'sales': 'sum',
'orders': 'count',
'unique_customers': 'nunique'
})

# Create visualizations
fig, axes = plt.subplots(2, 1, figsize=(12, 8))

# Line chart
axes[0, 0].plot(daily_summary.index, daily_summary['sales'])
axes[0, 0].set_title('Daily Sales')
axes[0, 0].set_ylabel('Sales')

# Bar chart
axes[1, 0].bar(daily_summary['unique_customers'])
axes[1, 0].set_title('Unique Customers')
axes[1, 0].set_ylabel('Count')

plt.tight_layout()
plt.savefig('/tmp/daily_report.png')
"""

Troubleshooting

Code Execution Timeout

Problem: Code takes longer than allowed timeout

Solutions:

  • Break large analysis into smaller steps
  • Use more efficient libraries and algorithms
  • Reduce data size by sampling
  • Increase timeout in agent configuration

Memory Issues

Problem: Code execution fails with memory error

Solutions:

  • Use data chunking for large datasets
  • Process data in batches
  • Reduce memory usage in code
  • Use more memory-efficient data types

Library Import Failures

Problem: Required library not available or import fails

Solutions:

  • Verify library is in allowed list
  • Check Python environment and dependencies
  • Use alternative libraries if needed
  • Provide clear error messages about which library is missing

Next Steps