depth-anything-3

Running

App Files Files Community

depth-anything-3 / app.py

wop

Update app.py

b15831b verified 10 days ago

raw

history blame contribute delete

5.63 kB

	# app.py (safe CPU startup for HF Spaces)
	import os
	import io
	import numpy as np
	import torch
	from PIL import Image
	import gradio as gr

	# Import the CPU-patched class you added earlier
	from depth_anything_3.api import DepthAnything3

	# ---------------------------
	# Configuration
	# ---------------------------
	# Keep the same model path you used earlier (default is the one in your logs)
	MODEL_DIR = os.environ.get("DA3_MODEL_DIR", "depth-anything/DA3NESTED-GIANT-LARGE")

	# Lower processing resolution to make CPU inference feasible.
	# Increase if you want better quality but expect it to be much slower.
	PROCESS_RES = int(os.environ.get("DA3_PROCESS_RES", "384"))

	# ---------------------------
	# Model loading (CPU)
	# ---------------------------
	print(f"🔄 Loading DepthAnything3 from '{MODEL_DIR}' on CPU (this may take a moment)...")
	# Uses the PyTorchModelHubMixin.from_pretrained you have in the class
	model = DepthAnything3.from_pretrained(MODEL_DIR)
	model.to(torch.device("cpu"))
	model.eval()
	print("✅ Model ready on CPU")

	# ---------------------------
	# Inference helper
	# ---------------------------
	def _normalize_depth_to_uint8(depth: np.ndarray) -> np.ndarray:
	"""Normalize a depth map (H,W) to uint8 grayscale for display."""
	if depth is None:
	return None
	# convert to float
	d = depth.astype(np.float32)
	# clip NaNs / infs
	d = np.nan_to_num(d, nan=0.0, posinf=0.0, neginf=0.0)
	# Normalize robustly: use 1st and 99th percentiles to avoid outliers
	vmin = np.percentile(d, 1.0)
	vmax = np.percentile(d, 99.0)
	if vmax - vmin < 1e-6:
	vmax = vmin + 1.0
	d = (d - vmin) / (vmax - vmin)
	d = np.clip(d, 0.0, 1.0)
	img = (d * 255.0).astype(np.uint8)
	return img

	def run_depth(single_img: Image.Image, process_res: int = PROCESS_RES):
	"""
	Run single-image depth inference with the patched DepthAnything3 API.
	Returns a grayscale PIL image visualizing depth.
	"""
	if single_img is None:
	return None

	# Convert PIL to numpy (DepthAnything3 accepts PIL images)
	try:
	# Use the API's inference function; we pass a list with single image.
	# Keep other args minimal to avoid heavy processing.
	pred = model.inference(
	[single_img],
	process_res=process_res,
	process_res_method="upper_bound_resize",
	export_format="mini_npz", # minimal export
	)
	except Exception as e:
	# If inference raises, return a helpful message image
	msg = f"Inference error: {e}"
	print(msg)
	# Make a small image with the error text
	err_img = Image.new("RGB", (640, 120), color=(255, 255, 255))
	return err_img

	# Extract depth from Prediction object - handle a few possible shapes / attrs
	depth_map = None
	# First try attribute .depth (common pattern in your code)
	if hasattr(pred, "depth"):
	depth_map = pred.depth
	elif isinstance(pred, dict) and "depth" in pred:
	depth_map = pred["depth"]
	elif hasattr(pred, "predictions") and len(pred.predictions) > 0:
	# fallback: some wrappers store lists
	depth_map = pred.predictions[0].depth if hasattr(pred.predictions[0], "depth") else None

	# depth_map might be (N,H,W) or (H,W)
	if depth_map is None:
	# fallback: try processed_images if available (visual sanity)
	try:
	if hasattr(pred, "processed_images"):
	imgs = pred.processed_images
	if isinstance(imgs, np.ndarray) and imgs.shape[0] > 0:
	# return first processed image
	return Image.fromarray((imgs[0] * 255).astype(np.uint8))
	except Exception:
	pass
	# nothing usable
	print("No depth found in prediction; returning empty image.")
	return Image.new("RGB", (640, 480), color=(255, 255, 255))

	# If depth_map is batched, take first
	if isinstance(depth_map, (list, tuple)):
	depth_map = depth_map[0]
	if isinstance(depth_map, np.ndarray) and depth_map.ndim == 3 and depth_map.shape[0] in (1,):
	# shape (1,H,W)
	depth_map = depth_map[0]
	if isinstance(depth_map, torch.Tensor):
	depth_map = depth_map.cpu().numpy()
	# Now depth_map should be (H,W)
	if depth_map.ndim == 3 and depth_map.shape[0] == 3:
	# if somehow 3-channel, convert to single channel by averaging
	depth_map = depth_map.mean(axis=0)

	depth_uint8 = _normalize_depth_to_uint8(depth_map)
	if depth_uint8 is None:
	return Image.new("RGB", (640, 480), color=(255, 255, 255))

	# Return grayscale PIL image
	depth_img = Image.fromarray(depth_uint8, mode="L")
	return depth_img

	# ---------------------------
	# Gradio interface
	# ---------------------------
	title = "Depth Anything 3 — CPU (single-image)"
	description = (
	"CPU-only minimal interface. Upload a single image and get a quick depth visualization.\n"
	"This Space is intentionally lightweight to allow CPU startup. For better quality/multiview features you need GPU or the full app."
	)

	# Make the Gradio Interface the top-level `app` variable so HF Spaces detects it
	app = gr.Interface(
	fn=run_depth,
	inputs=[
	gr.Image(type="pil", label="Upload image"),
	gr.Slider(minimum=128, maximum=1024, step=64, value=PROCESS_RES, label="Process resolution (smaller = faster)")
	],
	outputs=gr.Image(label="Predicted depth (grayscale)"),
	title=title,
	description=description,
	)

	# For local running
	if __name__ == "__main__":
	app.launch(server_name="0.0.0.0", server_port=7860)