File size: 4,897 Bytes
b36cb8b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
"""
Vector Database Service implementation for Qdrant
"""
from typing import List, Dict, Any, Optional
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct, VectorParams, Distance, Record
class VectorDatabaseClient:
"""Client for interacting with Qdrant vector database"""
def __init__(self, url: str, api_key: str, collection_name: str, embedding_size: int):
"""Initialize Qdrant client and collection settings
Args:
url: Qdrant server URL
api_key: API key for Qdrant
collection_name: Name of the collection to use
embedding_size: Size of embedding vectors
"""
self.client = QdrantClient(url=url, api_key=api_key)
self.collection_name = collection_name
self.embedding_size = embedding_size
def ensure_collection_exists(self):
"""Ensure the collection exists, create it if it doesn't"""
collections = [c.name for c in self.client.get_collections().collections]
if self.collection_name not in collections:
self.client.create_collection(
collection_name=self.collection_name,
vectors_config=VectorParams(
size=self.embedding_size,
distance=Distance.COSINE
)
)
print(f"✅ Collection '{self.collection_name}' created.")
else:
print(f"ℹ️ Collection '{self.collection_name}' already exists.")
def add_embedding(self, id: str, embedding: List[float], filename: str, metadata: Optional[str] = None) -> str:
"""Add an embedding to the collection
Args:
id: Unique ID for the point
embedding: Vector embedding
filename: Original filename
metadata: Optional metadata as JSON string
Returns:
ID of the added point
"""
payload = {"filename": filename}
if metadata:
payload["metadata"] = metadata
self.client.upsert(
collection_name=self.collection_name,
points=[
PointStruct(
id=id,
vector=embedding,
payload=payload
)
]
)
return id
def add_embedding_with_payload(self, id: str, embedding: List[float], payload: Dict[str, Any]) -> str:
"""Add an embedding with a custom payload
Args:
id: Unique ID for the point
embedding: Vector embedding
payload: Dictionary of metadata to store
Returns:
ID of the added point
"""
self.client.upsert(
collection_name=self.collection_name,
points=[
PointStruct(
id=id,
vector=embedding,
payload=payload
)
]
)
return id
def search_by_embedding(self, embedding: List[float], limit: int = 5) -> List[Record]:
"""Search for similar vectors
Args:
embedding: Query vector
limit: Maximum number of results
Returns:
List of search results
"""
results = self.client.search(
collection_name=self.collection_name,
query_vector=embedding,
limit=limit
)
return results
def search_by_id(self, id: str, limit: int = 1) -> List[Record]:
"""Search for similar vectors using an existing vector as query
Args:
id: ID of the existing vector to use as query
limit: Maximum number of results
Returns:
List of search results
"""
# Get the vector by ID
vector = self.client.retrieve(
collection_name=self.collection_name,
ids=[id]
)
if not vector or len(vector) == 0:
return []
# Use the vector to search
return self.search_by_embedding(vector[0].vector, limit)
def delete_embedding(self, id: str) -> bool:
"""Delete an embedding from the collection
Args:
id: ID of the embedding to delete
Returns:
True if deleted, False if not found
"""
self.client.delete(
collection_name=self.collection_name,
points_selector=[id]
)
return True
def list_collections(self) -> List[str]:
"""List all collections in the database
Returns:
List of collection names
"""
return [c.name for c in self.client.get_collections().collections] |