- Introduced new skills tools: `skills_categories`, `skills_list`, and `skill_view` in `model_tools.py`, allowing for better organization and access to skill-related functionalities. - Updated `toolsets.py` to include a new `skills` toolset, providing a dedicated space for skill tools. - Enhanced `batch_runner.py` to recognize and validate skills tools during batch processing. - Added comprehensive tool definitions for skills tools, ensuring compatibility with OpenAI's expected format. - Created new shell script `test_skills_kimi.sh` for testing skills tool functionality with Kimi K2.5. - Added example skill files demonstrating the structure and usage of skills within the Hermes-Agent framework, including `SKILL.md` for example and audiocraft skills. - Improved documentation for skills tools and their integration into the existing tool framework, ensuring clarity for future development and usage.
15 KiB
15 KiB
Qdrant Advanced Usage Guide
Distributed Deployment
Cluster Setup
Qdrant uses Raft consensus for distributed coordination.
# docker-compose.yml for 3-node cluster
version: '3.8'
services:
qdrant-node-1:
image: qdrant/qdrant:latest
ports:
- "6333:6333"
- "6334:6334"
- "6335:6335"
volumes:
- ./node1_storage:/qdrant/storage
environment:
- QDRANT__CLUSTER__ENABLED=true
- QDRANT__CLUSTER__P2P__PORT=6335
- QDRANT__SERVICE__HTTP_PORT=6333
- QDRANT__SERVICE__GRPC_PORT=6334
qdrant-node-2:
image: qdrant/qdrant:latest
ports:
- "6343:6333"
- "6344:6334"
- "6345:6335"
volumes:
- ./node2_storage:/qdrant/storage
environment:
- QDRANT__CLUSTER__ENABLED=true
- QDRANT__CLUSTER__P2P__PORT=6335
- QDRANT__CLUSTER__BOOTSTRAP=http://qdrant-node-1:6335
depends_on:
- qdrant-node-1
qdrant-node-3:
image: qdrant/qdrant:latest
ports:
- "6353:6333"
- "6354:6334"
- "6355:6335"
volumes:
- ./node3_storage:/qdrant/storage
environment:
- QDRANT__CLUSTER__ENABLED=true
- QDRANT__CLUSTER__P2P__PORT=6335
- QDRANT__CLUSTER__BOOTSTRAP=http://qdrant-node-1:6335
depends_on:
- qdrant-node-1
Sharding Configuration
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, ShardingMethod
client = QdrantClient(host="localhost", port=6333)
# Create sharded collection
client.create_collection(
collection_name="large_collection",
vectors_config=VectorParams(size=384, distance=Distance.COSINE),
shard_number=6, # Number of shards
replication_factor=2, # Replicas per shard
write_consistency_factor=1 # Required acks for write
)
# Check cluster status
cluster_info = client.get_cluster_info()
print(f"Peers: {cluster_info.peers}")
print(f"Raft state: {cluster_info.raft_info}")
Replication and Consistency
from qdrant_client.models import WriteOrdering
# Strong consistency write
client.upsert(
collection_name="critical_data",
points=points,
ordering=WriteOrdering.STRONG # Wait for all replicas
)
# Eventual consistency (faster)
client.upsert(
collection_name="logs",
points=points,
ordering=WriteOrdering.WEAK # Return after primary ack
)
# Read from specific shard
results = client.search(
collection_name="documents",
query_vector=query,
consistency="majority" # Read from majority of replicas
)
Hybrid Search
Dense + Sparse Vectors
Combine semantic (dense) and keyword (sparse) search:
from qdrant_client.models import (
VectorParams, SparseVectorParams, SparseIndexParams,
Distance, PointStruct, SparseVector, Prefetch, Query
)
# Create hybrid collection
client.create_collection(
collection_name="hybrid",
vectors_config={
"dense": VectorParams(size=384, distance=Distance.COSINE)
},
sparse_vectors_config={
"sparse": SparseVectorParams(
index=SparseIndexParams(on_disk=False)
)
}
)
# Insert with both vector types
def encode_sparse(text: str) -> SparseVector:
"""Simple BM25-like sparse encoding"""
from collections import Counter
tokens = text.lower().split()
counts = Counter(tokens)
# Map tokens to indices (use vocabulary in production)
indices = [hash(t) % 30000 for t in counts.keys()]
values = list(counts.values())
return SparseVector(indices=indices, values=values)
client.upsert(
collection_name="hybrid",
points=[
PointStruct(
id=1,
vector={
"dense": dense_encoder.encode("Python programming").tolist(),
"sparse": encode_sparse("Python programming language code")
},
payload={"text": "Python programming language code"}
)
]
)
# Hybrid search with Reciprocal Rank Fusion (RRF)
from qdrant_client.models import FusionQuery
results = client.query_points(
collection_name="hybrid",
prefetch=[
Prefetch(query=dense_query, using="dense", limit=20),
Prefetch(query=sparse_query, using="sparse", limit=20)
],
query=FusionQuery(fusion="rrf"), # Combine results
limit=10
)
Multi-Stage Search
from qdrant_client.models import Prefetch, Query
# Two-stage retrieval: coarse then fine
results = client.query_points(
collection_name="documents",
prefetch=[
Prefetch(
query=query_vector,
limit=100, # Broad first stage
params={"quantization": {"rescore": False}} # Fast, approximate
)
],
query=Query(nearest=query_vector),
limit=10,
params={"quantization": {"rescore": True}} # Accurate reranking
)
Recommendations
Item-to-Item Recommendations
# Find similar items
recommendations = client.recommend(
collection_name="products",
positive=[1, 2, 3], # IDs user liked
negative=[4], # IDs user disliked
limit=10
)
# With filtering
recommendations = client.recommend(
collection_name="products",
positive=[1, 2],
query_filter={
"must": [
{"key": "category", "match": {"value": "electronics"}},
{"key": "in_stock", "match": {"value": True}}
]
},
limit=10
)
Lookup from Another Collection
from qdrant_client.models import RecommendStrategy, LookupLocation
# Recommend using vectors from another collection
results = client.recommend(
collection_name="products",
positive=[
LookupLocation(
collection_name="user_history",
id="user_123"
)
],
strategy=RecommendStrategy.AVERAGE_VECTOR,
limit=10
)
Advanced Filtering
Nested Payload Filtering
from qdrant_client.models import Filter, FieldCondition, MatchValue, NestedCondition
# Filter on nested objects
results = client.search(
collection_name="documents",
query_vector=query,
query_filter=Filter(
must=[
NestedCondition(
key="metadata",
filter=Filter(
must=[
FieldCondition(
key="author.name",
match=MatchValue(value="John")
)
]
)
)
]
),
limit=10
)
Geo Filtering
from qdrant_client.models import FieldCondition, GeoRadius, GeoPoint
# Find within radius
results = client.search(
collection_name="locations",
query_vector=query,
query_filter=Filter(
must=[
FieldCondition(
key="location",
geo_radius=GeoRadius(
center=GeoPoint(lat=40.7128, lon=-74.0060),
radius=5000 # meters
)
)
]
),
limit=10
)
# Geo bounding box
from qdrant_client.models import GeoBoundingBox
results = client.search(
collection_name="locations",
query_vector=query,
query_filter=Filter(
must=[
FieldCondition(
key="location",
geo_bounding_box=GeoBoundingBox(
top_left=GeoPoint(lat=40.8, lon=-74.1),
bottom_right=GeoPoint(lat=40.6, lon=-73.9)
)
)
]
),
limit=10
)
Full-Text Search
from qdrant_client.models import TextIndexParams, TokenizerType
# Create text index
client.create_payload_index(
collection_name="documents",
field_name="content",
field_schema=TextIndexParams(
type="text",
tokenizer=TokenizerType.WORD,
min_token_len=2,
max_token_len=15,
lowercase=True
)
)
# Full-text filter
from qdrant_client.models import MatchText
results = client.search(
collection_name="documents",
query_vector=query,
query_filter=Filter(
must=[
FieldCondition(
key="content",
match=MatchText(text="machine learning")
)
]
),
limit=10
)
Quantization Strategies
Scalar Quantization (INT8)
from qdrant_client.models import ScalarQuantization, ScalarQuantizationConfig, ScalarType
# ~4x memory reduction, minimal accuracy loss
client.create_collection(
collection_name="scalar_quantized",
vectors_config=VectorParams(size=384, distance=Distance.COSINE),
quantization_config=ScalarQuantization(
scalar=ScalarQuantizationConfig(
type=ScalarType.INT8,
quantile=0.99, # Clip extreme values
always_ram=True # Keep quantized vectors in RAM
)
)
)
Product Quantization
from qdrant_client.models import ProductQuantization, ProductQuantizationConfig, CompressionRatio
# ~16x memory reduction, some accuracy loss
client.create_collection(
collection_name="product_quantized",
vectors_config=VectorParams(size=384, distance=Distance.COSINE),
quantization_config=ProductQuantization(
product=ProductQuantizationConfig(
compression=CompressionRatio.X16,
always_ram=True
)
)
)
Binary Quantization
from qdrant_client.models import BinaryQuantization, BinaryQuantizationConfig
# ~32x memory reduction, requires oversampling
client.create_collection(
collection_name="binary_quantized",
vectors_config=VectorParams(size=384, distance=Distance.COSINE),
quantization_config=BinaryQuantization(
binary=BinaryQuantizationConfig(always_ram=True)
)
)
# Search with oversampling
results = client.search(
collection_name="binary_quantized",
query_vector=query,
search_params={
"quantization": {
"rescore": True,
"oversampling": 2.0 # Retrieve 2x candidates, rescore
}
},
limit=10
)
Snapshots and Backups
Create Snapshot
# Create collection snapshot
snapshot_info = client.create_snapshot(collection_name="documents")
print(f"Snapshot: {snapshot_info.name}")
# List snapshots
snapshots = client.list_snapshots(collection_name="documents")
for s in snapshots:
print(f"{s.name}: {s.size} bytes")
# Full storage snapshot
full_snapshot = client.create_full_snapshot()
Restore from Snapshot
# Download snapshot
client.download_snapshot(
collection_name="documents",
snapshot_name="documents-2024-01-01.snapshot",
target_path="./backup/"
)
# Restore (via REST API)
import requests
response = requests.put(
"http://localhost:6333/collections/documents/snapshots/recover",
json={"location": "file:///backup/documents-2024-01-01.snapshot"}
)
Collection Aliases
# Create alias
client.update_collection_aliases(
change_aliases_operations=[
{"create_alias": {"alias_name": "production", "collection_name": "documents_v2"}}
]
)
# Blue-green deployment
# 1. Create new collection with updates
client.create_collection(collection_name="documents_v3", ...)
# 2. Populate new collection
client.upsert(collection_name="documents_v3", points=new_points)
# 3. Atomic switch
client.update_collection_aliases(
change_aliases_operations=[
{"delete_alias": {"alias_name": "production"}},
{"create_alias": {"alias_name": "production", "collection_name": "documents_v3"}}
]
)
# Search via alias
results = client.search(collection_name="production", query_vector=query, limit=10)
Scroll and Iteration
Scroll Through All Points
# Paginated iteration
offset = None
all_points = []
while True:
results, offset = client.scroll(
collection_name="documents",
limit=100,
offset=offset,
with_payload=True,
with_vectors=False
)
all_points.extend(results)
if offset is None:
break
print(f"Total points: {len(all_points)}")
Filtered Scroll
# Scroll with filter
results, _ = client.scroll(
collection_name="documents",
scroll_filter=Filter(
must=[
FieldCondition(key="status", match=MatchValue(value="active"))
]
),
limit=1000
)
Async Client
import asyncio
from qdrant_client import AsyncQdrantClient
async def main():
client = AsyncQdrantClient(host="localhost", port=6333)
# Async operations
await client.create_collection(
collection_name="async_docs",
vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)
await client.upsert(
collection_name="async_docs",
points=points
)
results = await client.search(
collection_name="async_docs",
query_vector=query,
limit=10
)
return results
results = asyncio.run(main())
gRPC Client
from qdrant_client import QdrantClient
# Prefer gRPC for better performance
client = QdrantClient(
host="localhost",
port=6333,
grpc_port=6334,
prefer_grpc=True # Use gRPC when available
)
# gRPC-only client
from qdrant_client import QdrantClient
client = QdrantClient(
host="localhost",
grpc_port=6334,
prefer_grpc=True,
https=False
)
Multitenancy
Payload-Based Isolation
# Single collection, filter by tenant
client.upsert(
collection_name="multi_tenant",
points=[
PointStruct(
id=1,
vector=embedding,
payload={"tenant_id": "tenant_a", "text": "..."}
)
]
)
# Search within tenant
results = client.search(
collection_name="multi_tenant",
query_vector=query,
query_filter=Filter(
must=[FieldCondition(key="tenant_id", match=MatchValue(value="tenant_a"))]
),
limit=10
)
Collection-Per-Tenant
# Create tenant collection
def create_tenant_collection(tenant_id: str):
client.create_collection(
collection_name=f"tenant_{tenant_id}",
vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)
# Search tenant collection
def search_tenant(tenant_id: str, query_vector: list, limit: int = 10):
return client.search(
collection_name=f"tenant_{tenant_id}",
query_vector=query_vector,
limit=limit
)
Performance Monitoring
Collection Statistics
# Collection info
info = client.get_collection("documents")
print(f"Points: {info.points_count}")
print(f"Indexed vectors: {info.indexed_vectors_count}")
print(f"Segments: {len(info.segments)}")
print(f"Status: {info.status}")
# Detailed segment info
for i, segment in enumerate(info.segments):
print(f"Segment {i}: {segment}")
Telemetry
# Get telemetry data
telemetry = client.get_telemetry()
print(f"Collections: {telemetry.collections}")
print(f"Operations: {telemetry.operations}")