File size: 4,897 Bytes
b36cb8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""
Vector Database Service implementation for Qdrant
"""

from typing import List, Dict, Any, Optional
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct, VectorParams, Distance, Record



class VectorDatabaseClient:
    """Client for interacting with Qdrant vector database"""
    
    def __init__(self, url: str, api_key: str, collection_name: str, embedding_size: int):
        """Initialize Qdrant client and collection settings
        
        Args:
            url: Qdrant server URL
            api_key: API key for Qdrant
            collection_name: Name of the collection to use
            embedding_size: Size of embedding vectors
        """
        self.client = QdrantClient(url=url, api_key=api_key)
        self.collection_name = collection_name
        self.embedding_size = embedding_size
    
    def ensure_collection_exists(self):
        """Ensure the collection exists, create it if it doesn't"""
        collections = [c.name for c in self.client.get_collections().collections]
        
        if self.collection_name not in collections:
            self.client.create_collection(
                collection_name=self.collection_name,
                vectors_config=VectorParams(
                    size=self.embedding_size,
                    distance=Distance.COSINE
                )
            )
            print(f"✅ Collection '{self.collection_name}' created.")
        else:
            print(f"ℹ️ Collection '{self.collection_name}' already exists.")
    
    def add_embedding(self, id: str, embedding: List[float], filename: str, metadata: Optional[str] = None) -> str:
        """Add an embedding to the collection
        
        Args:
            id: Unique ID for the point
            embedding: Vector embedding
            filename: Original filename
            metadata: Optional metadata as JSON string
            
        Returns:
            ID of the added point
        """
        payload = {"filename": filename}
        if metadata:
            payload["metadata"] = metadata
            
        self.client.upsert(
            collection_name=self.collection_name,
            points=[
                PointStruct(
                    id=id,
                    vector=embedding,
                    payload=payload
                )
            ]
        )
        return id
    
    def add_embedding_with_payload(self, id: str, embedding: List[float], payload: Dict[str, Any]) -> str:
        """Add an embedding with a custom payload
        
        Args:
            id: Unique ID for the point
            embedding: Vector embedding
            payload: Dictionary of metadata to store
            
        Returns:
            ID of the added point
        """
        self.client.upsert(
            collection_name=self.collection_name,
            points=[
                PointStruct(
                    id=id,
                    vector=embedding,
                    payload=payload
                )
            ]
        )
        return id
    
    def search_by_embedding(self, embedding: List[float], limit: int = 5) -> List[Record]:
        """Search for similar vectors
        
        Args:
            embedding: Query vector
            limit: Maximum number of results
            
        Returns:
            List of search results
        """
        results = self.client.search(
            collection_name=self.collection_name,
            query_vector=embedding,
            limit=limit
        )
        return results
    
    def search_by_id(self, id: str, limit: int = 1) -> List[Record]:
        """Search for similar vectors using an existing vector as query
        
        Args:
            id: ID of the existing vector to use as query
            limit: Maximum number of results
            
        Returns:
            List of search results
        """
        # Get the vector by ID
        vector = self.client.retrieve(
            collection_name=self.collection_name,
            ids=[id]
        )
        
        if not vector or len(vector) == 0:
            return []
            
        # Use the vector to search
        return self.search_by_embedding(vector[0].vector, limit)
    
    def delete_embedding(self, id: str) -> bool:
        """Delete an embedding from the collection
        
        Args:
            id: ID of the embedding to delete
            
        Returns:
            True if deleted, False if not found
        """
        self.client.delete(
            collection_name=self.collection_name,
            points_selector=[id]
        )
        return True
    
    def list_collections(self) -> List[str]:
        """List all collections in the database
        
        Returns:
            List of collection names
        """
        return [c.name for c in self.client.get_collections().collections]