File size: 3,380 Bytes
b36cb8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
"""
Embedding Service for generating image embeddings
"""

import os
from typing import List, Dict, Any
from PIL import Image
import io
import numpy as np
import torch
from transformers import CLIPProcessor, CLIPModel


class ImageEmbeddingModel:
    """Class for generating embeddings from images using CLIP"""
    
    def __init__(self, model_name: str = "openai/clip-vit-base-patch32"):
        """Initialize the CLIP model
        
        Args:
            model_name: Name of the CLIP model to use
        """
        self.model_name = model_name
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model = CLIPModel.from_pretrained(model_name).to(self.device)
        self.processor = CLIPProcessor.from_pretrained(model_name)
    
    def generate_embedding(self, image_data: bytes) -> List[float]:
        """Generate embedding for an image from binary data
        
        Args:
            image_data: Binary image data
            
        Returns:
            Image embedding as a list of floats
        """
        # Load image from binary data
        image = Image.open(io.BytesIO(image_data)).convert("RGB")
        return self.generate_embedding_from_pil(image)
    
    def generate_embedding_from_pil(self, image: Image.Image) -> List[float]:
        """Generate embedding for a PIL Image
        
        Args:
            image: PIL Image object
            
        Returns:
            Image embedding as a list of floats
        """
        # Process image for CLIP
        inputs = self.processor(images=image, return_tensors="pt").to(self.device)
        
        # Generate embedding
        with torch.no_grad():
            image_features = self.model.get_image_features(**inputs)
            
        # Normalize embedding and convert to list
        image_embedding = image_features.cpu().numpy()[0]
        normalized_embedding = image_embedding / np.linalg.norm(image_embedding)
        return normalized_embedding.tolist()
    
    def get_embeddings_from_folder(self, folder_path: str) -> Dict[str, Any]:
        """Generate embeddings for all images in a folder
        
        Args:
            folder_path: Path to folder containing images
            
        Returns:
            Dictionary mapping filenames to embeddings
        """
        results = {}
        image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.gif', '.webp'}
        
        # Check if folder exists
        if not os.path.exists(folder_path):
            return {"error": f"Folder {folder_path} does not exist"}
        
        # Process each image file
        for filename in os.listdir(folder_path):
            if os.path.splitext(filename)[1].lower() in image_extensions:
                try:
                    file_path = os.path.join(folder_path, filename)
                    with open(file_path, 'rb') as f:
                        image_data = f.read()
                    
                    embedding = self.generate_embedding(image_data)
                    results[filename] = {
                        "embedding": embedding,
                        "status": "success"
                    }
                except Exception as e:
                    results[filename] = {
                        "error": str(e),
                        "status": "failed"
                    }
        
        return results