Aduc-sdr's picture
Update engineers/deformes3D.py
1444940 verified
# engineers/deformes3D.py
#
# Copyright (C) 2025 Carlos Rodrigues dos Santos
#
# Version: 1.5.1
#
# This version maintains the core FLUX-based keyframe generation and adds the
# LTX-based "enrichment" as a secondary, experimental step for each keyframe,
# allowing for direct comparison without altering the primary workflow.
from PIL import Image, ImageOps
import os
import time
import logging
import gradio as gr
import yaml
import torch
import numpy as np
from managers.flux_kontext_manager import flux_kontext_singleton
from engineers.deformes2D_thinker import deformes2d_thinker_singleton
from aduc_types import LatentConditioningItem
from managers.ltx_manager import ltx_manager_singleton
from managers.vae_manager import vae_manager_singleton
from managers.latent_enhancer_manager import latent_enhancer_specialist_singleton
logger = logging.getLogger(__name__)
class Deformes3DEngine:
"""
ADUC Specialist for static image (keyframe) generation.
"""
def __init__(self, workspace_dir):
self.workspace_dir = workspace_dir
self.image_generation_helper = flux_kontext_singleton
logger.info("3D Engine (Image Specialist) ready to receive orders from the Maestro.")
def _generate_single_keyframe(self, prompt: str, reference_images: list[Image.Image], output_filename: str, width: int, height: int, callback: callable = None) -> str:
"""
Low-level function that generates a single image using the LTX helper.
"""
logger.info(f"Generating keyframe '{output_filename}' with prompt: '{prompt}'")
generated_image = self.image_generation_helper.generate_image(
reference_images=reference_images, prompt=prompt, width=width,
height=height, seed=int(time.time()), callback=callback
)
final_path = os.path.join(self.workspace_dir, output_filename)
generated_image.save(final_path)
logger.info(f"Keyframe successfully saved to: {final_path}")
return final_path
def generate_keyframes_from_storyboard(self, storyboard: list, initial_ref_path: str, global_prompt: str, keyframe_resolution: int, general_ref_paths: list, progress_callback_factory: callable = None):
"""
Orchestrates the generation of all keyframes.
"""
current_base_image_path = initial_ref_path
previous_prompt = "N/A (initial reference image)"
final_keyframes_gallery = [] #[current_base_image_path]
width, height = keyframe_resolution, keyframe_resolution
target_resolution_tuple = (width, height)
num_keyframes_to_generate = len(storyboard) - 1
logger.info(f"IMAGE SPECIALIST: Received order to generate {num_keyframes_to_generate} keyframes (LTX versions).")
for i in range(num_keyframes_to_generate):
scene_index = i + 1
current_scene = storyboard[i]
future_scene = storyboard[i+1]
progress_callback_flux = progress_callback_factory(scene_index, num_keyframes_to_generate) if progress_callback_factory else None
logger.info(f"--> Generating Keyframe {scene_index}/{num_keyframes_to_generate}...")
# --- STEP A: Generate with FLUX (Primary Method) ---
logger.info(f" - Step A: Generating with keyframe...")
img_prompt = deformes2d_thinker_singleton.get_anticipatory_keyframe_prompt(
global_prompt=global_prompt, scene_history=previous_prompt,
current_scene_desc=current_scene, future_scene_desc=future_scene,
last_image_path=current_base_image_path, fixed_ref_paths=general_ref_paths
)
#flux_ref_paths = list(set([current_base_image_path] + general_ref_paths))
#flux_ref_images = [Image.open(p) for p in flux_ref_paths]
#flux_keyframe_path = self._generate_single_keyframe(
# prompt=img_prompt, reference_images=flux_ref_images,
# output_filename=f"keyframe_{scene_index}_flux.png", width=width, height=height,
# callback=progress_callback_flux
#)
#final_keyframes_gallery.append(flux_keyframe_path)
# --- STEP B: LTX Enrichment Experiment ---
#logger.info(f" - Step B: Generating enrichment with LTX...")
ltx_context_paths = []
context_paths = []
context_paths = [current_base_image_path] + [p for p in general_ref_paths if p != current_base_image_path][:3]
ltx_context_paths = list(reversed(context_paths))
logger.info(f" - LTX Context Order (Reversed): {[os.path.basename(p) for p in ltx_context_paths]}")
ltx_conditioning_items = []
weight = 0.6
for idx, path in enumerate(ltx_context_paths):
img_pil = Image.open(path).convert("RGB")
img_processed = self._preprocess_image_for_latent_conversion(img_pil, target_resolution_tuple)
pixel_tensor = self._pil_to_pixel_tensor(img_processed)
latent_tensor = vae_manager_singleton.encode(pixel_tensor)
ltx_conditioning_items.append(LatentConditioningItem(latent_tensor, 0, weight))
if idx >= 0:
weight -= 0.1
ltx_base_params = {"guidance_scale": 1.0, "stg_scale": 0.001, "num_inference_steps": 25}
generated_latents, _ = ltx_manager_singleton.generate_latent_fragment(
height=height, width=width,
conditioning_items_data=ltx_conditioning_items,
motion_prompt=img_prompt,
video_total_frames=48,
video_fps=24,
**ltx_base_params
)
final_latent = generated_latents[:, :, -1:, :, :]
upscaled_latent = latent_enhancer_specialist_singleton.upscale(final_latent)
enriched_pixel_tensor = vae_manager_singleton.decode(upscaled_latent)
ltx_keyframe_path = os.path.join(self.workspace_dir, f"keyframe_{scene_index}_ltx.png")
self.save_image_from_tensor(enriched_pixel_tensor, ltx_keyframe_path)
final_keyframes_gallery.append(ltx_keyframe_path)
# Use the FLUX keyframe as the base for the next iteration to maintain the primary narrative path
current_base_image_path = ltx_keyframe_path #flux_keyframe_path
previous_prompt = img_prompt
logger.info(f"IMAGE SPECIALIST: Generation of all keyframe versions (LTX) complete.")
return final_keyframes_gallery
# --- HELPER FUNCTIONS ---
def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
"""Resizes and fits an image to the target resolution for VAE encoding."""
if image.size != target_resolution:
return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
return image
def _pil_to_pixel_tensor(self, pil_image: Image.Image) -> torch.Tensor:
"""Helper to convert PIL to the 5D pixel tensor the VAE expects."""
image_np = np.array(pil_image).astype(np.float32) / 255.0
tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
return (tensor * 2.0) - 1.0
def save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str):
"""Helper to save a 1-frame pixel tensor as an image."""
tensor_chw = pixel_tensor.squeeze(0).squeeze(1)
tensor_hwc = tensor_chw.permute(1, 2, 0)
tensor_hwc = (tensor_hwc.clamp(-1, 1) + 1) / 2.0
image_np = (tensor_hwc.cpu().float().numpy() * 255).astype(np.uint8)
Image.fromarray(image_np).save(path)
# --- Singleton Instantiation ---
try:
with open("config.yaml", 'r') as f:
config = yaml.safe_load(f)
WORKSPACE_DIR = config['application']['workspace_dir']
deformes3d_engine_singleton = Deformes3DEngine(workspace_dir=WORKSPACE_DIR)
except Exception as e:
logger.error(f"Could not initialize Deformes3DEngine: {e}", exc_info=True)
deformes3d_engine_singleton = None