Embeddings
Convert text into vector representations for semantic search and similarity matching.
Endpoint
POST /api/v1/embeddings
Basic Request
Using cURL
curl -X POST http://localhost/api/v1/embeddings \
-H "Authorization: Bearer YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "text-embedding-ada-002",
"input": "The quick brown fox jumps over the lazy dog."
}'
Using Python
import requests
response = requests.post(
"http://localhost/api/v1/embeddings",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"model": "text-embedding-ada-002",
"input": "The quick brown fox jumps over the lazy dog."
}
)
data = response.json()
embedding = data["data"][0]["embedding"]
print(f"Embedding dimensions: {len(embedding)}")
print(f"First 5 values: {embedding[:5]}")
Using JavaScript
const response = await fetch('http://localhost/api/v1/embeddings', {
method: 'POST',
headers: {
'Authorization': 'Bearer YOUR_API_KEY',
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'text-embedding-ada-002',
input: 'The quick brown fox jumps over the lazy dog.'
})
});
const data = await response.json();
const embedding = data.data[0].embedding;
console.log(`Embedding dimensions: ${embedding.length}`);
console.log(`First 5 values: ${embedding.slice(0, 5)}`);
Request Parameters
| Parameter | Type | Required | Description | Default |
|---|---|---|---|---|
model | string | Yes | Embedding model to use | - |
input | string/array | Yes | Text(s) to embed | - |
encoding_format | string | No | Format of output | float |
Response Format
{
"object": "list",
"data": [
{
"object": "embedding",
"embedding": [
0.0023,
-0.0235,
0.0527,
...
],
"index": 0
}
],
"model": "text-embedding-ada-002",
"usage": {
"prompt_tokens": 8,
"total_tokens": 8
}
}
Examples
Single Text
response = requests.post(
"http://localhost/api/v1/embeddings",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"model": "text-embedding-ada-002",
"input": "Hello, world!"
}
)
embedding = response.json()["data"][0]["embedding"]
print(f"Generated {len(embedding)}-dimension vector")
Multiple Texts
response = requests.post(
"http://localhost/api/v1/embeddings",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"model": "text-embedding-ada-002",
"input": [
"The cat sat on the mat.",
"The dog ran around the park."
]
}
)
embeddings = response.json()["data"]
for item in embeddings:
print(f"Index {item['index']}: {len(item['embedding'])} dimensions")
Large Documents
text = "Your long document text here..."
# For long texts, consider chunking
chunk_size = 500 # words per chunk
chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
# Generate embeddings for each chunk
embeddings = []
for i, chunk in enumerate(chunks):
response = requests.post(
"http://localhost/api/v1/embeddings",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"model": "text-embedding-ada-002",
"input": chunk
}
)
embedding = response.json()["data"][0]["embedding"]
embeddings.append({"index": i, "embedding": embedding})
print(f"Embedded chunk {i+1}/{len(chunks)}")
print(f"Total embeddings: {len(embeddings)}")
Use Cases
Semantic Search
Create embeddings for documents, then search:
# Store documents with embeddings
documents = [
{"text": "Python is a programming language.", "embedding": get_embedding("Python is a programming language.")},
{"text": "JavaScript is used for web development.", "embedding": get_embedding("JavaScript is used for web development.")}
]
# Search
query = "What is Python?"
query_embedding = get_embedding(query)
# Calculate cosine similarity (simplified)
def cosine_similarity(a, b):
import numpy as np
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
results = []
for doc in documents:
similarity = cosine_similarity(query_embedding, doc["embedding"])
results.append({"text": doc["text"], "similarity": similarity})
results.sort(key=lambda x: x["similarity"], reverse=True)
print(f"Most relevant: {results[0]['text']}")
Text Classification
# Create reference embeddings for categories
categories = {
"technical": get_embedding("Technical documentation and manuals"),
"marketing": get_embedding("Marketing and promotional content"),
"support": get_embedding("Customer support and FAQs")
}
# Classify new text
text = "How do I configure the API?"
text_embedding = get_embedding(text)
best_category = None
best_similarity = -1
for category, category_embedding in categories.items():
similarity = cosine_similarity(text_embedding, category_embedding)
if similarity > best_similarity:
best_similarity = similarity
best_category = category
print(f"Category: {best_category} (similarity: {best_similarity:.2f})")
Clustering
import numpy as np
from sklearn.cluster import KMeans
# Get embeddings for multiple texts
texts = ["Python", "JavaScript", "Go", "Rust", "Ruby", "Java"]
embeddings = [get_embedding(text) for text in texts]
# Cluster embeddings
kmeans = KMeans(n_clusters=3, random_state=0)
clusters = kmeans.fit_predict(embeddings)
for i, (text, cluster) in enumerate(zip(texts, clusters)):
print(f"{text}: Cluster {cluster}")
Best Practices
Chunking Long Text
def chunk_text(text, max_tokens=800):
"""Split text into chunks of max tokens"""
# Simple word-based chunking
words = text.split()
chunks = []
current_chunk = []
current_length = 0
for word in words:
current_chunk.append(word)
current_length += 1
if current_length >= max_tokens:
chunks.append(" ".join(current_chunk))
current_chunk = []
current_length = 0
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
text = "Your long document..."
chunks = chunk_text(text)
for i, chunk in enumerate(chunks):
print(f"Processing chunk {i+1}/{len(chunks)}...")
embedding = get_embedding(chunk)
# Store embedding with chunk metadata
Batch Processing
import time
def batch_embed(texts, batch_size=10, delay=1.0):
"""Process embeddings in batches to avoid rate limits"""
all_embeddings = []
for i in range(0, len(texts), batch_size):
batch = texts[i:i+batch_size]
response = requests.post(
"http://localhost/api/v1/embeddings",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"model": "text-embedding-ada-002",
"input": batch
}
)
all_embeddings.extend(response.json()["data"])
print(f"Processed batch {i//batch_size + 1}/{(len(texts)-1)//batch_size + 1}")
if i + batch_size < len(texts):
time.sleep(delay)
return all_embeddings
texts = ["Text 1", "Text 2", "Text 3", ...]
embeddings = batch_embed(texts)
Caching Embeddings
import hashlib
import json
import os
class EmbeddingCache:
def __init__(self, cache_file="embedding_cache.json"):
self.cache_file = cache_file
self.cache = self._load_cache()
def _load_cache(self):
if os.path.exists(self.cache_file):
with open(self.cache_file, 'r') as f:
return json.load(f)
return {}
def _save_cache(self):
with open(self.cache_file, 'w') as f:
json.dump(self.cache, f)
def _get_key(self, text):
return hashlib.md5(text.encode()).hexdigest()
def get(self, text):
key = self._get_key(text)
return self.cache.get(key)
def set(self, text, embedding):
key = self._get_key(text)
self.cache[key] = embedding
self._save_cache()
# Usage
cache = EmbeddingCache()
text = "Python is great!"
# Check cache first
cached = cache.get(text)
if cached:
embedding = cached
print("From cache")
else:
embedding = get_embedding(text)
cache.set(text, embedding)
print("From API")
Integration with RAG
Use embeddings to build a knowledge base:
# 1. Upload and embed documents
documents = load_documents()
for doc in documents:
embedding = get_embedding(doc["content"])
store_in_vector_db({
"id": doc["id"],
"content": doc["content"],
"embedding": embedding,
"metadata": doc["metadata"]
})
# 2. Search
query = "How to use the API?"
query_embedding = get_embedding(query)
results = search_vector_db(query_embedding, top_k=5)
# 3. Use results in chat completion
context = "\n\n".join([r["content"] for r in results])
response = requests.post(
"http://localhost/api/v1/chat/completions",
headers={"Authorization": "Bearer YOUR_API_KEY"},
json={
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": f"Answer using this context:\n\n{context}"
},
{
"role": "user",
"content": query
}
]
}
)
Next Steps
- RAG Documentation - Build full knowledge base with Enclava
- Integrations - Use with frameworks
- Chatbots - Create chatbots with built-in RAG