Managing Documents
List, view, and delete documents in your RAG collections.
List Documents in Collection
Using cURL
curl -X GET http://localhost/api/v1/rag/collections/documentation/documents \
-H "Authorization: Bearer YOUR_API_KEY"
Using Python
import requests
response = requests.get(
"http://localhost/api/v1/rag/collections/documentation/documents",
headers={"Authorization": "Bearer YOUR_API_KEY"}
)
documents = response.json()["documents"]
for doc in documents:
print(f"ID: {doc['id']}")
print(f"Name: {doc['name']}")
print(f"Chunks: {doc['chunk_count']}")
print(f"Uploaded: {doc['created_at']}")
print("-" * 40)
Using JavaScript
const response = await fetch(
'http://localhost/api/v1/rag/collections/documentation/documents',
{
headers: {
'Authorization': 'Bearer YOUR_API_KEY'
}
}
);
const data = await response.json();
data.documents.forEach(doc => {
console.log(`ID: ${doc.id}`);
console.log(`Name: ${doc.name}`);
console.log(`Chunks: ${doc.chunk_count}\n`);
});
List with Pagination
def list_all_documents(collection_name, page_size=50):
all_documents = []
page = 1
while True:
response = requests.get(
f"http://localhost/api/v1/rag/collections/{collection_name}/documents",
headers={"Authorization": "Bearer YOUR_API_KEY"},
params={
"page": page,
"page_size": page_size
}
)
data = response.json()
documents = data["documents"]
all_documents.extend(documents)
if len(documents) < page_size:
break
page += 1
return all_documents
docs = list_all_documents("documentation")
print(f"Total documents: {len(docs)}")
View Document Details
curl -X GET http://localhost/api/v1/rag/documents/doc_abc123 \
-H "Authorization: Bearer YOUR_API_KEY"
response = requests.get(
"http://localhost/api/v1/rag/documents/doc_abc123",
headers={"Authorization": "Bearer YOUR_API_KEY"}
)
doc = response.json()
print(f"ID: {doc['id']}")
print(f"Name: {doc['name']}")
print(f"Collection: {doc['collection_name']}")
print(f"Description: {doc.get('description', 'N/A')}")
print(f"Chunks: {doc['chunk_count']}")
print(f"Metadata: {doc.get('metadata', {})}")
print(f"Uploaded: {doc['created_at']}")
Delete a Document
Using cURL
curl -X DELETE http://localhost/api/v1/rag/documents/doc_abc123 \
-H "Authorization: Bearer YOUR_API_KEY"
Using Python
response = requests.delete(
"http://localhost/api/v1/rag/documents/doc_abc123",
headers={"Authorization": "Bearer YOUR_API_KEY"}
)
result = response.json()
print(f"Status: {result['message']}")
Using JavaScript
const response = await fetch(
'http://localhost/api/v1/rag/documents/doc_abc123',
{
method: 'DELETE',
headers: {
'Authorization': 'Bearer YOUR_API_KEY'
}
}
);
const result = await response.json();
console.log(`Status: ${result.message}`);
Delete Multiple Documents
def delete_documents(document_ids):
deleted = []
failed = []
for doc_id in document_ids:
try:
response = requests.delete(
f"http://localhost/api/v1/rag/documents/{doc_id}",
headers={"Authorization": "Bearer YOUR_API_KEY"},
timeout=10
)
deleted.append(doc_id)
print(f"Deleted: {doc_id}")
except Exception as e:
failed.append((doc_id, str(e)))
print(f"Failed: {doc_id} - {e}")
return {"deleted": deleted, "failed": failed}
# Example usage
ids_to_delete = ["doc_1", "doc_2", "doc_3"]
result = delete_documents(ids_to_delete)
print(f"\nDeleted: {len(result['deleted'])}")
print(f"Failed: {len(result['failed'])}")
Delete Documents by Filter
def delete_by_filter(collection_name, filter_key, filter_value):
# Get all documents
response = requests.get(
f"http://localhost/api/v1/rag/collections/{collection_name}/documents",
headers={"Authorization": "Bearer YOUR_API_KEY"}
)
documents = response.json()["documents"]
# Find matching documents
to_delete = []
for doc in documents:
metadata = doc.get("metadata", {})
if metadata.get(filter_key) == filter_value:
to_delete.append(doc["id"])
# Delete matching documents
for doc_id in to_delete:
requests.delete(
f"http://localhost/api/v1/rag/documents/{doc_id}",
headers={"Authorization": "Bearer YOUR_API_KEY"}
)
print(f"Deleted: {doc_id}")
return len(to_delete)
# Delete all documents with category "test"
deleted_count = delete_by_filter("documentation", "category", "test")
print(f"Deleted {deleted_count} documents")
Update Document Metadata
response = requests.put(
"http://localhost/api/v1/rag/documents/doc_abc123",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"description": "Updated description",
"metadata": {
"category": "updated",
"version": "2.1"
}
}
)
print(f"Updated: {response.json()['message']}")
Document Statistics
def get_collection_stats(collection_name):
response = requests.get(
f"http://localhost/api/v1/rag/collections/{collection_name}/documents",
headers={"Authorization": "Bearer YOUR_API_KEY"}
)
documents = response.json()["documents"]
total_docs = len(documents)
total_chunks = sum(doc["chunk_count"] for doc in documents)
print(f"Collection: {collection_name}")
print(f"Total Documents: {total_docs}")
print(f"Total Chunks: {total_chunks}")
print(f"Avg Chunks per Document: {total_chunks / total_docs:.1f}")
# Document types
types = {}
for doc in documents:
ext = doc["name"].split(".")[-1]
types[ext] = types.get(ext, 0) + 1
print("\nDocument Types:")
for ext, count in sorted(types.items()):
print(f" .{ext}: {count}")
stats = get_collection_stats("documentation")
Search Within Collection
def find_documents_by_name(collection_name, search_term):
response = requests.get(
f"http://localhost/api/v1/rag/collections/{collection_name}/documents",
headers={"Authorization": "Bearer YOUR_API_KEY"}
)
documents = response.json()["documents"]
matches = [doc for doc in documents if search_term.lower() in doc["name"].lower()]
return matches
matches = find_documents_by_name("documentation", "api")
for doc in matches:
print(f"ID: {doc['id']}")
print(f"Name: {doc['name']}")
print(f"Chunks: {doc['chunk_count']}\n")
Bulk Operations
Bulk Delete with Confirmation
def bulk_delete_with_confirmation(collection_name, document_ids):
print(f"About to delete {len(document_ids)} documents from '{collection_name}'")
# Show sample of documents to delete
response = requests.get(
f"http://localhost/api/v1/rag/collections/{collection_name}/documents",
headers={"Authorization": "Bearer YOUR_API_KEY"}
)
all_docs = {doc["id"]: doc for doc in response.json()["documents"]}
print("\nDocuments to delete:")
for doc_id in document_ids[:5]:
if doc_id in all_docs:
print(f" - {all_docs[doc_id]['name']} ({doc_id})")
if len(document_ids) > 5:
print(f" ... and {len(document_ids) - 5} more")
confirm = input("\nProceed with deletion? (yes/no): ")
if confirm.lower() != "yes":
print("Cancelled.")
return
deleted = 0
for doc_id in document_ids:
try:
requests.delete(
f"http://localhost/api/v1/rag/documents/{doc_id}",
headers={"Authorization": "Bearer YOUR_API_KEY"}
)
deleted += 1
except Exception as e:
print(f"Failed to delete {doc_id}: {e}")
print(f"\nDeleted {deleted} documents.")
# Example
docs = ["doc_1", "doc_2", "doc_3"]
bulk_delete_with_confirmation("documentation", docs)
Best Practices
Document Organization
- Use descriptive filenames
- Add relevant metadata
- Group related documents in collections
- Update metadata when content changes
Deletion Safety
- Always list documents before deletion
- Use confirmation prompts for bulk operations
- Keep backups of important documents
- Track deletion logs
Metadata Management
- Use consistent metadata structure
- Include version information
- Track document owners
- Add expiration dates for temporary docs
Troubleshooting
Document Not Found
Problem: {"error": "Document not found"}
Solution:
- Verify document ID is correct
- Check document exists using list endpoint
- Confirm collection name is correct
Delete Fails
Problem: Delete request fails or times out
Solution:
- Check document ID format
- Verify API key has delete permissions
- Retry request with timeout
- Check server logs for errors
List Returns Empty
Problem: List documents returns no results
Solution:
- Verify collection name is correct
- Check collection has uploaded documents
- Confirm API key has read permissions
- Check pagination parameters
Next Steps
- Upload Documents - Add new documents
- Search Documents - Find relevant content
- Examples - More management examples