Spaces:

anmoldograpsl
/

test_space

Sleeping

App Files Files Community

anmoldograpsl commited on Oct 14, 2024

Commit

0e3adc8

verified ·

1 Parent(s): 10271df

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -58

app.py CHANGED Viewed

@@ -1,60 +1,42 @@
 import os
-import base64
-import torch
-import gradio as gr
 from PIL import Image
-from transformers import AutoProcessor, AutoModelForCausalLM
-from peft import get_peft_model, LoraConfig, TaskType
-from huggingface_hub import login
-# Step 1: Log in to Hugging Face
-hf_token = os.getenv("HF_TOKEN")
-login(token=hf_token)
-# Step 2: Load the private model and processor
-model_name = "anushettypsl/paligemma_vqav2"  # Replace with the actual model link
-processor = AutoProcessor.from_pretrained(model_name)
-base_model = AutoModelForCausalLM.from_pretrained(model_name)
-# Step 3: Set up PEFT configuration (if needed)
-lora_config = LoraConfig(
-    r=16,  # Rank
-    lora_alpha=32,  # Scaling factor
-    lora_dropout=0.1,  # Dropout
-    task_type=TaskType.CAUSAL_LM,  # Adjust according to your model's task
-)
-# Step 4: Get the PEFT model
-peft_model = get_peft_model(base_model, lora_config)
-# Step 5: Define the prediction function
-def predict(image_base64, prompt):
-    # Decode the base64 image
-    image_data = base64.b64decode(image_base64)
-    image = Image.open(io.BytesIO(image_data))
-    # Process the image
-    inputs = processor( text=prompt,images=image, return_tensors="pt")
-    # Generate output using the model
-    with torch.no_grad():
-        output = peft_model.generate(**inputs)
-    # Decode the output to text
-    generated_text = processor.decode(output[0], skip_special_tokens=True)
-    return generated_text
-# Step 6: Create the Gradio interface
-interface = gr.Interface(
-    fn=predict,
-    inputs=[
-        gr.Textbox(label="Image (Base64)", placeholder="Enter base64 encoded image here...", lines=10),  # Base64 input for image
-        gr.Textbox(label="Prompt", placeholder="Enter your prompt here...")  # Prompt input
-    ],
-    outputs="text",  # Text output
-    title="Image and Prompt to Text Model",
-    description="Enter a base64 encoded image and a prompt to generate a descriptive text."
-)
-# Step 7: Launch the Gradio app
-interface.launch()

+from huggingface_hub import login
 import os
+from peft import PeftModel, PeftConfig
+from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
 from PIL import Image
+import requests
+import torch
+import io
+import base64
+import cv2
+access_token = os.environ["HF_TOKEN"]
+login(token=access_token)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+dtype = torch.bfloat16
+config = PeftConfig.from_pretrained("anushettypsl/paligemma_vqav2")
+# base_model = AutoModelForCausalLM.from_pretrained("google/paligemma-3b-pt-448")
+base_model = PaliGemmaForConditionalGeneration.from_pretrained("google/paligemma-3b-pt-448")
+model = PeftModel.from_pretrained(base_model, "anushettypsl/paligemma_vqav2", device_map=device)
+processor = AutoProcessor.from_pretrained("google/paligemma-3b-pt-448", device_map=device)
+model.to(device)
+image = cv2.imread('/content/15_BC_G2_6358_40x_2_jpg.rf.97595fa4965f66ad45be8fd055331933.jpg')
+# Convert the image to base64 encoding
+image_bytes = cv2.imencode('.jpg', image)[1]
+base64_string = base64.b64encode(image_bytes).decode('utf-8')
+input_image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
+model_inputs = processor(
+    text=input_text, images=input_image, return_tensors="pt").to(device)
+input_len = model_inputs["input_ids"].shape[-1]
+model.to(device)
+with torch.inference_mode():
+    generation = model.generate(
+        **model_inputs, max_new_tokens=100, do_sample=False)
+    generation = generation[0][input_len:]
+    decoded = processor.decode(generation, skip_special_tokens=True)
+    print(decoded)