Aduc-sdr-cinematic-video

Runtime error

App Files Files Community

Aduc-sdr-cinematic-video / engineers /deformes3D.py

Aduc-sdr

Update engineers/deformes3D.py

1444940 verified 3 months ago

raw

history blame contribute delete

8.28 kB

	# engineers/deformes3D.py
	#
	# Copyright (C) 2025 Carlos Rodrigues dos Santos
	#
	# Version: 1.5.1
	#
	# This version maintains the core FLUX-based keyframe generation and adds the
	# LTX-based "enrichment" as a secondary, experimental step for each keyframe,
	# allowing for direct comparison without altering the primary workflow.

	from PIL import Image, ImageOps
	import os
	import time
	import logging
	import gradio as gr
	import yaml
	import torch
	import numpy as np

	from managers.flux_kontext_manager import flux_kontext_singleton
	from engineers.deformes2D_thinker import deformes2d_thinker_singleton
	from aduc_types import LatentConditioningItem
	from managers.ltx_manager import ltx_manager_singleton
	from managers.vae_manager import vae_manager_singleton
	from managers.latent_enhancer_manager import latent_enhancer_specialist_singleton

	logger = logging.getLogger(__name__)

	class Deformes3DEngine:
	"""
	ADUC Specialist for static image (keyframe) generation.
	"""
	def __init__(self, workspace_dir):
	self.workspace_dir = workspace_dir
	self.image_generation_helper = flux_kontext_singleton
	logger.info("3D Engine (Image Specialist) ready to receive orders from the Maestro.")

	def _generate_single_keyframe(self, prompt: str, reference_images: list[Image.Image], output_filename: str, width: int, height: int, callback: callable = None) -> str:
	"""
	Low-level function that generates a single image using the LTX helper.
	"""
	logger.info(f"Generating keyframe '{output_filename}' with prompt: '{prompt}'")
	generated_image = self.image_generation_helper.generate_image(
	reference_images=reference_images, prompt=prompt, width=width,
	height=height, seed=int(time.time()), callback=callback
	)
	final_path = os.path.join(self.workspace_dir, output_filename)
	generated_image.save(final_path)
	logger.info(f"Keyframe successfully saved to: {final_path}")
	return final_path

	def generate_keyframes_from_storyboard(self, storyboard: list, initial_ref_path: str, global_prompt: str, keyframe_resolution: int, general_ref_paths: list, progress_callback_factory: callable = None):
	"""
	Orchestrates the generation of all keyframes.
	"""
	current_base_image_path = initial_ref_path
	previous_prompt = "N/A (initial reference image)"
	final_keyframes_gallery = [] #[current_base_image_path]
	width, height = keyframe_resolution, keyframe_resolution
	target_resolution_tuple = (width, height)

	num_keyframes_to_generate = len(storyboard) - 1
	logger.info(f"IMAGE SPECIALIST: Received order to generate {num_keyframes_to_generate} keyframes (LTX versions).")

	for i in range(num_keyframes_to_generate):
	scene_index = i + 1
	current_scene = storyboard[i]
	future_scene = storyboard[i+1]
	progress_callback_flux = progress_callback_factory(scene_index, num_keyframes_to_generate) if progress_callback_factory else None

	logger.info(f"--> Generating Keyframe {scene_index}/{num_keyframes_to_generate}...")

	# --- STEP A: Generate with FLUX (Primary Method) ---
	logger.info(f" - Step A: Generating with keyframe...")

	img_prompt = deformes2d_thinker_singleton.get_anticipatory_keyframe_prompt(
	global_prompt=global_prompt, scene_history=previous_prompt,
	current_scene_desc=current_scene, future_scene_desc=future_scene,
	last_image_path=current_base_image_path, fixed_ref_paths=general_ref_paths
	)

	#flux_ref_paths = list(set([current_base_image_path] + general_ref_paths))
	#flux_ref_images = [Image.open(p) for p in flux_ref_paths]

	#flux_keyframe_path = self._generate_single_keyframe(
	# prompt=img_prompt, reference_images=flux_ref_images,
	# output_filename=f"keyframe_{scene_index}_flux.png", width=width, height=height,
	# callback=progress_callback_flux
	#)
	#final_keyframes_gallery.append(flux_keyframe_path)

	# --- STEP B: LTX Enrichment Experiment ---
	#logger.info(f" - Step B: Generating enrichment with LTX...")

	ltx_context_paths = []
	context_paths = []
	context_paths = [current_base_image_path] + [p for p in general_ref_paths if p != current_base_image_path][:3]

	ltx_context_paths = list(reversed(context_paths))
	logger.info(f" - LTX Context Order (Reversed): {[os.path.basename(p) for p in ltx_context_paths]}")

	ltx_conditioning_items = []

	weight = 0.6
	for idx, path in enumerate(ltx_context_paths):
	img_pil = Image.open(path).convert("RGB")
	img_processed = self._preprocess_image_for_latent_conversion(img_pil, target_resolution_tuple)
	pixel_tensor = self._pil_to_pixel_tensor(img_processed)
	latent_tensor = vae_manager_singleton.encode(pixel_tensor)

	ltx_conditioning_items.append(LatentConditioningItem(latent_tensor, 0, weight))

	if idx >= 0:
	weight -= 0.1

	ltx_base_params = {"guidance_scale": 1.0, "stg_scale": 0.001, "num_inference_steps": 25}
	generated_latents, _ = ltx_manager_singleton.generate_latent_fragment(
	height=height, width=width,
	conditioning_items_data=ltx_conditioning_items,
	motion_prompt=img_prompt,
	video_total_frames=48,
	video_fps=24,
	**ltx_base_params
	)

	final_latent = generated_latents[:, :, -1:, :, :]
	upscaled_latent = latent_enhancer_specialist_singleton.upscale(final_latent)
	enriched_pixel_tensor = vae_manager_singleton.decode(upscaled_latent)

	ltx_keyframe_path = os.path.join(self.workspace_dir, f"keyframe_{scene_index}_ltx.png")
	self.save_image_from_tensor(enriched_pixel_tensor, ltx_keyframe_path)
	final_keyframes_gallery.append(ltx_keyframe_path)

	# Use the FLUX keyframe as the base for the next iteration to maintain the primary narrative path
	current_base_image_path = ltx_keyframe_path #flux_keyframe_path
	previous_prompt = img_prompt

	logger.info(f"IMAGE SPECIALIST: Generation of all keyframe versions (LTX) complete.")
	return final_keyframes_gallery

	# --- HELPER FUNCTIONS ---

	def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
	"""Resizes and fits an image to the target resolution for VAE encoding."""
	if image.size != target_resolution:
	return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
	return image

	def _pil_to_pixel_tensor(self, pil_image: Image.Image) -> torch.Tensor:
	"""Helper to convert PIL to the 5D pixel tensor the VAE expects."""
	image_np = np.array(pil_image).astype(np.float32) / 255.0
	tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
	return (tensor * 2.0) - 1.0

	def save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str):
	"""Helper to save a 1-frame pixel tensor as an image."""
	tensor_chw = pixel_tensor.squeeze(0).squeeze(1)
	tensor_hwc = tensor_chw.permute(1, 2, 0)
	tensor_hwc = (tensor_hwc.clamp(-1, 1) + 1) / 2.0
	image_np = (tensor_hwc.cpu().float().numpy() * 255).astype(np.uint8)
	Image.fromarray(image_np).save(path)

	# --- Singleton Instantiation ---
	try:
	with open("config.yaml", 'r') as f:
	config = yaml.safe_load(f)
	WORKSPACE_DIR = config['application']['workspace_dir']
	deformes3d_engine_singleton = Deformes3DEngine(workspace_dir=WORKSPACE_DIR)
	except Exception as e:
	logger.error(f"Could not initialize Deformes3DEngine: {e}", exc_info=True)
	deformes3d_engine_singleton = None