Skip to main content

Upload Documents Example

Complete examples for uploading documents to RAG collections.

Basic Upload

import requests

def upload_document(file_path, collection_name, description=None):
with open(file_path, "rb") as f:
data = {"collection_name": collection_name}
if description:
data["description"] = description

response = requests.post(
"http://localhost/api/v1/rag/upload",
headers={"Authorization": "Bearer YOUR_API_KEY"},
files={"file": f},
data=data
)

return response.json()

# Upload a PDF
result = upload_document(
"user_manual.pdf",
"documentation",
"Complete user manual for product v2.0"
)

print(f"Document ID: {result['document_id']}")
print(f"Status: {result['status']}")

Upload with Custom Metadata

import requests
import json

def upload_with_metadata(file_path, collection_name, metadata):
with open(file_path, "rb") as f:
response = requests.post(
"http://localhost/api/v1/rag/upload",
headers={"Authorization": "Bearer YOUR_API_KEY"},
files={"file": f},
data={
"collection_name": collection_name,
"description": "Policy document with metadata",
"metadata": json.dumps(metadata)
}
)

return response.json()

# Upload with custom metadata
result = upload_with_metadata(
"privacy_policy.pdf",
"policies",
{
"category": "legal",
"type": "policy",
"version": "2.1",
"effective_date": "2024-01-01",
"department": "compliance",
"language": "en"
}
)

print(f"Uploaded: {result['document_id']}")

Upload Multiple Files

import requests
import os

def upload_directory(directory, collection_name):
supported_extensions = [".pdf", ".txt", ".md", ".docx", ".json"]
uploaded = []
failed = []

for filename in os.listdir(directory):
file_path = os.path.join(directory, filename)

if not os.path.isfile(file_path):
continue

ext = os.path.splitext(filename)[1].lower()
if ext not in supported_extensions:
print(f"Skipping {filename} (unsupported format)")
continue

try:
with open(file_path, "rb") as f:
response = requests.post(
"http://localhost/api/v1/rag/upload",
headers={"Authorization": "Bearer YOUR_API_KEY"},
files={"file": f},
data={
"collection_name": collection_name,
"description": f"File: {filename}"
},
timeout=60
)

result = response.json()
uploaded.append(result['document_id'])
print(f"Uploaded: {filename} -> {result['document_id']}")

except Exception as e:
failed.append((filename, str(e)))
print(f"Failed: {filename} - {e}")

print(f"\nSummary:")
print(f"Uploaded: {len(uploaded)}")
print(f"Failed: {len(failed)}")

return uploaded, failed

# Upload all files in directory
upload_directory("./docs", "knowledge_base")

Upload from URL

import requests
from urllib.parse import urlparse

def upload_from_url(url, collection_name, description=None):
try:
# Download file
download_response = requests.get(url, timeout=30)
download_response.raise_for_status()

# Get filename from URL
filename = os.path.basename(urlparse(url).path)
if not filename:
filename = "downloaded_document.pdf"

# Upload to RAG
files = {"file": (filename, download_response.content)}
data = {"collection_name": collection_name}
if description:
data["description"] = description

upload_response = requests.post(
"http://localhost/api/v1/rag/upload",
headers={"Authorization": "Bearer YOUR_API_KEY"},
files=files,
data=data
)

return upload_response.json()

except Exception as e:
return {"error": str(e)}

# Example
result = upload_from_url(
"https://example.com/document.pdf",
"documentation",
"Downloaded from external source"
)

if "error" in result:
print(f"Error: {result['error']}")
else:
print(f"Uploaded: {result['document_id']}")

Upload with Progress Tracking

import requests
from tqdm import tqdm

def upload_with_progress(file_path, collection_name):
file_size = os.path.getsize(file_path)

with open(file_path, "rb") as f:
with tqdm(
total=file_size,
unit="B",
unit_scale=True,
desc=os.path.basename(file_path)
) as pbar:

def upload_callback(chunk):
pbar.update(len(chunk))

response = requests.post(
"http://localhost/api/v1/rag/upload",
headers={"Authorization": "Bearer YOUR_API_KEY"},
files={"file": f},
data={"collection_name": collection_name},
timeout=120
)

pbar.close()

return response.json()

result = upload_with_progress("large_document.pdf", "docs")
print(f"Status: {result['status']}")

Upload with Retry Logic

import requests
import time

def upload_with_retry(file_path, collection_name, max_retries=3):
for attempt in range(max_retries):
try:
with open(file_path, "rb") as f:
response = requests.post(
"http://localhost/api/v1/rag/upload",
headers={"Authorization": "Bearer YOUR_API_KEY"},
files={"file": f},
data={"collection_name": collection_name},
timeout=60
)

response.raise_for_status()
return response.json()

except requests.exceptions.RequestException as e:
if attempt == max_retries - 1:
raise
print(f"Attempt {attempt + 1} failed. Retrying in 5 seconds...")
time.sleep(5)

# Example
try:
result = upload_with_retry("document.pdf", "documentation")
print(f"Uploaded: {result['document_id']}")
except Exception as e:
print(f"Failed after retries: {e}")

JavaScript Upload Example

class DocumentUploader {
constructor(apiKey) {
this.apiKey = apiKey;
this.baseUrl = 'http://localhost/api/v1';
}

async upload(file, collectionName, options = {}) {
const formData = new FormData();
formData.append('file', file);
formData.append('collection_name', collectionName);

if (options.description) {
formData.append('description', options.description);
}

if (options.metadata) {
formData.append('metadata', JSON.stringify(options.metadata));
}

const response = await fetch(`${this.baseUrl}/rag/upload`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.apiKey}`
},
body: formData
});

if (!response.ok) {
const error = await response.json();
throw new Error(error.message || 'Upload failed');
}

return await response.json();
}

async uploadMultiple(files, collectionName, onProgress) {
const results = [];
let completed = 0;

for (const file of files) {
try {
const result = await this.upload(file, collectionName);
results.push({ success: true, file: file.name, result });
} catch (error) {
results.push({ success: false, file: file.name, error: error.message });
}

completed++;
if (onProgress) onProgress(completed, files.length);
}

return results;
}
}

// Usage
const uploader = new DocumentUploader('YOUR_API_KEY');

// Single file upload
const fileInput = document.getElementById('file-input');
const result = await uploader.upload(fileInput.files[0], 'documentation');
console.log('Uploaded:', result.document_id);

// Multiple files upload
const files = Array.from(fileInput.files);
const results = await uploader.uploadMultiple(
files,
'knowledge_base',
(completed, total) => {
console.log(`Progress: ${completed}/${total}`);
}
);

results.forEach(r => {
if (r.success) {
console.log(`${r.file}: ${r.result.document_id}`);
} else {
console.log(`${r.file}: Failed - ${r.error}`);
}
});

cURL Examples

Upload a file

curl -X POST http://localhost/api/v1/rag/upload \
-H "Authorization: Bearer YOUR_API_KEY" \
-F "file=@document.pdf" \
-F "collection_name=documentation" \
-F "description=Product documentation"

Upload with metadata

curl -X POST http://localhost/api/v1/rag/upload \
-H "Authorization: Bearer YOUR_API_KEY" \
-F "file=@policy.pdf" \
-F "collection_name=policies" \
-F "metadata={\"category\":\"legal\",\"version\":\"2.0\"}"

Best Practices

  1. Verify file format - Only upload supported file types
  2. Check file size - Large files may timeout
  3. Add metadata - Makes searching and filtering easier
  4. Use descriptive names - Helps identify documents later
  5. Handle errors - Implement retry logic for failed uploads
  6. Track uploads - Log document IDs for reference

Next Steps