Upload Documents Example
Complete examples for uploading documents to RAG collections.
Basic Upload
import requests
def upload_document(file_path, collection_name, description=None):
with open(file_path, "rb") as f:
data = {"collection_name": collection_name}
if description:
data["description"] = description
response = requests.post(
"http://localhost/api/v1/rag/upload",
headers={"Authorization": "Bearer YOUR_API_KEY"},
files={"file": f},
data=data
)
return response.json()
# Upload a PDF
result = upload_document(
"user_manual.pdf",
"documentation",
"Complete user manual for product v2.0"
)
print(f"Document ID: {result['document_id']}")
print(f"Status: {result['status']}")
Upload with Custom Metadata
import requests
import json
def upload_with_metadata(file_path, collection_name, metadata):
with open(file_path, "rb") as f:
response = requests.post(
"http://localhost/api/v1/rag/upload",
headers={"Authorization": "Bearer YOUR_API_KEY"},
files={"file": f},
data={
"collection_name": collection_name,
"description": "Policy document with metadata",
"metadata": json.dumps(metadata)
}
)
return response.json()
# Upload with custom metadata
result = upload_with_metadata(
"privacy_policy.pdf",
"policies",
{
"category": "legal",
"type": "policy",
"version": "2.1",
"effective_date": "2024-01-01",
"department": "compliance",
"language": "en"
}
)
print(f"Uploaded: {result['document_id']}")
Upload Multiple Files
import requests
import os
def upload_directory(directory, collection_name):
supported_extensions = [".pdf", ".txt", ".md", ".docx", ".json"]
uploaded = []
failed = []
for filename in os.listdir(directory):
file_path = os.path.join(directory, filename)
if not os.path.isfile(file_path):
continue
ext = os.path.splitext(filename)[1].lower()
if ext not in supported_extensions:
print(f"Skipping {filename} (unsupported format)")
continue
try:
with open(file_path, "rb") as f:
response = requests.post(
"http://localhost/api/v1/rag/upload",
headers={"Authorization": "Bearer YOUR_API_KEY"},
files={"file": f},
data={
"collection_name": collection_name,
"description": f"File: {filename}"
},
timeout=60
)
result = response.json()
uploaded.append(result['document_id'])
print(f"Uploaded: {filename} -> {result['document_id']}")
except Exception as e:
failed.append((filename, str(e)))
print(f"Failed: {filename} - {e}")
print(f"\nSummary:")
print(f"Uploaded: {len(uploaded)}")
print(f"Failed: {len(failed)}")
return uploaded, failed
# Upload all files in directory
upload_directory("./docs", "knowledge_base")
Upload from URL
import requests
from urllib.parse import urlparse
def upload_from_url(url, collection_name, description=None):
try:
# Download file
download_response = requests.get(url, timeout=30)
download_response.raise_for_status()
# Get filename from URL
filename = os.path.basename(urlparse(url).path)
if not filename:
filename = "downloaded_document.pdf"
# Upload to RAG
files = {"file": (filename, download_response.content)}
data = {"collection_name": collection_name}
if description:
data["description"] = description
upload_response = requests.post(
"http://localhost/api/v1/rag/upload",
headers={"Authorization": "Bearer YOUR_API_KEY"},
files=files,
data=data
)
return upload_response.json()
except Exception as e:
return {"error": str(e)}
# Example
result = upload_from_url(
"https://example.com/document.pdf",
"documentation",
"Downloaded from external source"
)
if "error" in result:
print(f"Error: {result['error']}")
else:
print(f"Uploaded: {result['document_id']}")
Upload with Progress Tracking
import requests
from tqdm import tqdm
def upload_with_progress(file_path, collection_name):
file_size = os.path.getsize(file_path)
with open(file_path, "rb") as f:
with tqdm(
total=file_size,
unit="B",
unit_scale=True,
desc=os.path.basename(file_path)
) as pbar:
def upload_callback(chunk):
pbar.update(len(chunk))
response = requests.post(
"http://localhost/api/v1/rag/upload",
headers={"Authorization": "Bearer YOUR_API_KEY"},
files={"file": f},
data={"collection_name": collection_name},
timeout=120
)
pbar.close()
return response.json()
result = upload_with_progress("large_document.pdf", "docs")
print(f"Status: {result['status']}")
Upload with Retry Logic
import requests
import time
def upload_with_retry(file_path, collection_name, max_retries=3):
for attempt in range(max_retries):
try:
with open(file_path, "rb") as f:
response = requests.post(
"http://localhost/api/v1/rag/upload",
headers={"Authorization": "Bearer YOUR_API_KEY"},
files={"file": f},
data={"collection_name": collection_name},
timeout=60
)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
if attempt == max_retries - 1:
raise
print(f"Attempt {attempt + 1} failed. Retrying in 5 seconds...")
time.sleep(5)
# Example
try:
result = upload_with_retry("document.pdf", "documentation")
print(f"Uploaded: {result['document_id']}")
except Exception as e:
print(f"Failed after retries: {e}")
JavaScript Upload Example
class DocumentUploader {
constructor(apiKey) {
this.apiKey = apiKey;
this.baseUrl = 'http://localhost/api/v1';
}
async upload(file, collectionName, options = {}) {
const formData = new FormData();
formData.append('file', file);
formData.append('collection_name', collectionName);
if (options.description) {
formData.append('description', options.description);
}
if (options.metadata) {
formData.append('metadata', JSON.stringify(options.metadata));
}
const response = await fetch(`${this.baseUrl}/rag/upload`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.apiKey}`
},
body: formData
});
if (!response.ok) {
const error = await response.json();
throw new Error(error.message || 'Upload failed');
}
return await response.json();
}
async uploadMultiple(files, collectionName, onProgress) {
const results = [];
let completed = 0;
for (const file of files) {
try {
const result = await this.upload(file, collectionName);
results.push({ success: true, file: file.name, result });
} catch (error) {
results.push({ success: false, file: file.name, error: error.message });
}
completed++;
if (onProgress) onProgress(completed, files.length);
}
return results;
}
}
// Usage
const uploader = new DocumentUploader('YOUR_API_KEY');
// Single file upload
const fileInput = document.getElementById('file-input');
const result = await uploader.upload(fileInput.files[0], 'documentation');
console.log('Uploaded:', result.document_id);
// Multiple files upload
const files = Array.from(fileInput.files);
const results = await uploader.uploadMultiple(
files,
'knowledge_base',
(completed, total) => {
console.log(`Progress: ${completed}/${total}`);
}
);
results.forEach(r => {
if (r.success) {
console.log(`${r.file}: ${r.result.document_id}`);
} else {
console.log(`${r.file}: Failed - ${r.error}`);
}
});
cURL Examples
Upload a file
curl -X POST http://localhost/api/v1/rag/upload \
-H "Authorization: Bearer YOUR_API_KEY" \
-F "file=@document.pdf" \
-F "collection_name=documentation" \
-F "description=Product documentation"
Upload with metadata
curl -X POST http://localhost/api/v1/rag/upload \
-H "Authorization: Bearer YOUR_API_KEY" \
-F "file=@policy.pdf" \
-F "collection_name=policies" \
-F "metadata={\"category\":\"legal\",\"version\":\"2.0\"}"
Best Practices
- Verify file format - Only upload supported file types
- Check file size - Large files may timeout
- Add metadata - Makes searching and filtering easier
- Use descriptive names - Helps identify documents later
- Handle errors - Implement retry logic for failed uploads
- Track uploads - Log document IDs for reference
Next Steps
- Bulk Upload Example - Upload multiple files efficiently
- Search Example - Search uploaded documents
- RAG Chat Example - Combine search with AI chat