Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Skeleton Data Augmentation for ST-GCN Fall Detection | |
| This module provides augmentation strategies for skeleton sequence data to improve | |
| model generalization and robustness. All augmentations preserve the spatial-temporal | |
| structure required by ST-GCN while introducing controlled variations. | |
| Input Format: (C, T, V, M) where | |
| C = 3 channels (x, y, confidence) | |
| T = 60 frames (temporal window) | |
| V = 17 keypoints (COCO skeleton) | |
| M = 1 person (max persons tracked) | |
| Augmentation Strategies: | |
| 1. Horizontal Flip: Mirror skeleton across vertical axis with keypoint swapping | |
| 2. Gaussian Noise: Add random noise to x,y coordinates (preserves confidence) | |
| 3. Temporal Crop: Random crop + resize to simulate variable fall speeds | |
| Reference: Issue #34 - ST-GCN Training Dataset Creation | |
| """ | |
| import numpy as np | |
| from typing import Tuple, Optional | |
| # COCO 17-keypoint left/right pairs for horizontal flip | |
| # Format: (left_index, right_index) | |
| COCO_LEFT_RIGHT_PAIRS = [ | |
| (1, 2), # left_eye <-> right_eye | |
| (3, 4), # left_ear <-> right_ear | |
| (5, 6), # left_shoulder <-> right_shoulder | |
| (7, 8), # left_elbow <-> right_elbow | |
| (9, 10), # left_wrist <-> right_wrist | |
| (11, 12), # left_hip <-> right_hip | |
| (13, 14), # left_knee <-> right_knee | |
| (15, 16), # left_ankle <-> right_ankle | |
| ] | |
| def augment_skeleton(data: np.ndarray, prob: float = 0.5) -> np.ndarray: | |
| """ | |
| Apply random augmentations to skeleton sequence data. | |
| This function applies three augmentation strategies with probability `prob`: | |
| 1. Horizontal flip with keypoint swapping | |
| 2. Gaussian noise injection to x,y coordinates | |
| 3. Temporal crop and resize | |
| Mathematical Formulations: | |
| ------------------------- | |
| 1. Horizontal Flip: | |
| x' = -x | |
| For each (left, right) keypoint pair: swap(left, right) | |
| 2. Gaussian Noise: | |
| x' = x + N(0, sigma^2) | |
| y' = y + N(0, sigma^2) | |
| where N(0, sigma^2) ~ Normal(mean=0, std=0.01) | |
| 3. Temporal Crop & Resize: | |
| T_crop ~ Uniform(0.8 * T, 1.0 * T) | |
| start_frame ~ Uniform(0, T - T_crop) | |
| cropped = data[:, start:start+T_crop, :, :] | |
| resized = interpolate(cropped, T) | |
| Args: | |
| data: Skeleton data with shape (C, T, V, M) where | |
| C = 3 (x, y, confidence) | |
| T = 60 (number of frames) | |
| V = 17 (number of keypoints) | |
| M = 1 (number of persons) | |
| prob: Probability of applying each augmentation (default: 0.5) | |
| Returns: | |
| augmented_data: Augmented skeleton data with same shape (C, T, V, M) | |
| Example: | |
| >>> data = np.random.rand(3, 60, 17, 1) | |
| >>> augmented = augment_skeleton(data, prob=0.5) | |
| >>> augmented.shape | |
| (3, 60, 17, 1) | |
| """ | |
| C, T, V, M = data.shape | |
| assert C == 3, f"Expected 3 channels (x, y, conf), got {C}" | |
| assert V == 17, f"Expected 17 COCO keypoints, got {V}" | |
| assert M == 1, f"Expected max 1 person, got {M}" | |
| # Create a copy to avoid modifying original data | |
| augmented_data = data.copy() | |
| # 1. Horizontal Flip (flip x-coordinate + swap left/right keypoints) | |
| if np.random.rand() < prob: | |
| augmented_data = _horizontal_flip(augmented_data) | |
| # 2. Random Noise Injection (add Gaussian noise to x,y only) | |
| if np.random.rand() < prob: | |
| augmented_data = _add_gaussian_noise(augmented_data) | |
| # 3. Temporal Crop and Resize (crop 0.8-1.0 of length, resize back) | |
| if np.random.rand() < prob: | |
| augmented_data = _temporal_crop_resize(augmented_data) | |
| return augmented_data | |
| def _horizontal_flip(data: np.ndarray) -> np.ndarray: | |
| """ | |
| Horizontally flip skeleton by negating x-coordinate and swapping left/right keypoints. | |
| Mathematical Formulation: | |
| x' = -x | |
| y' = y | |
| conf' = conf | |
| For each (left_idx, right_idx) pair: swap keypoints | |
| Args: | |
| data: Skeleton data (C, T, V, M) | |
| Returns: | |
| flipped_data: Horizontally flipped data (C, T, V, M) | |
| """ | |
| flipped_data = data.copy() | |
| # Flip x-coordinate (channel 0) | |
| flipped_data[0] = -flipped_data[0] | |
| # Swap left/right keypoint pairs | |
| for left_idx, right_idx in COCO_LEFT_RIGHT_PAIRS: | |
| # Swap all channels (x, y, conf) for the keypoint pair | |
| temp = flipped_data[:, :, left_idx, :].copy() | |
| flipped_data[:, :, left_idx, :] = flipped_data[:, :, right_idx, :] | |
| flipped_data[:, :, right_idx, :] = temp | |
| return flipped_data | |
| def _add_gaussian_noise(data: np.ndarray, std: float = 0.01) -> np.ndarray: | |
| """ | |
| Add Gaussian noise to x,y coordinates (preserves confidence channel). | |
| Mathematical Formulation: | |
| x' = x + N(0, sigma^2) | |
| y' = y + N(0, sigma^2) | |
| conf' = conf (unchanged) | |
| where sigma = 0.01 (default) | |
| The noise magnitude is calibrated for normalized coordinates in range [-0.5, 0.5]. | |
| With std=0.01, 99.7% of noise values fall within [-0.03, 0.03] (3-sigma rule). | |
| Args: | |
| data: Skeleton data (C, T, V, M) | |
| std: Standard deviation of Gaussian noise (default: 0.01) | |
| Returns: | |
| noisy_data: Data with Gaussian noise added to x,y coordinates | |
| """ | |
| C, T, V, M = data.shape | |
| noisy_data = data.copy() | |
| # Generate Gaussian noise for x,y channels only (not confidence) | |
| noise_shape = (2, T, V, M) # Only x,y channels | |
| noise = np.random.normal(0, std, noise_shape).astype(data.dtype) | |
| # Add noise to x,y channels (0, 1), leave confidence channel (2) unchanged | |
| noisy_data[:2] += noise | |
| return noisy_data | |
| def _temporal_crop_resize(data: np.ndarray, crop_ratio_range: Tuple[float, float] = (0.8, 1.0)) -> np.ndarray: | |
| """ | |
| Randomly crop temporal sequence and resize back to original length. | |
| This augmentation simulates variable fall speeds by compressing or expanding | |
| the temporal dimension. A crop ratio of 0.8 means the fall happens 20% faster, | |
| while 1.0 means no temporal change. | |
| Mathematical Formulation: | |
| T_crop ~ Uniform(crop_min * T, crop_max * T) | |
| start ~ Uniform(0, T - T_crop) | |
| cropped = data[:, start:start+T_crop, :, :] | |
| resized = interpolate(cropped, T) using linear interpolation | |
| Args: | |
| data: Skeleton data (C, T, V, M) | |
| crop_ratio_range: (min_ratio, max_ratio) for crop length (default: (0.8, 1.0)) | |
| Returns: | |
| resized_data: Temporally augmented data with original shape (C, T, V, M) | |
| """ | |
| C, T, V, M = data.shape | |
| min_ratio, max_ratio = crop_ratio_range | |
| # Sample random crop ratio | |
| crop_ratio = np.random.uniform(min_ratio, max_ratio) | |
| crop_length = int(T * crop_ratio) | |
| crop_length = max(1, crop_length) # Ensure at least 1 frame | |
| # Sample random start position | |
| max_start = max(0, T - crop_length) | |
| start_frame = np.random.randint(0, max_start + 1) if max_start > 0 else 0 | |
| # Extract cropped window | |
| cropped = data[:, start_frame:start_frame + crop_length, :, :] | |
| # Resize back to original temporal length using linear interpolation | |
| resized_data = _temporal_interpolate(cropped, T) | |
| return resized_data | |
| def _temporal_interpolate(data: np.ndarray, target_length: int) -> np.ndarray: | |
| """ | |
| Interpolate temporal dimension to target length using linear interpolation. | |
| This function performs 1D linear interpolation along the temporal axis (axis=1) | |
| for each channel, keypoint, and person independently. | |
| Args: | |
| data: Skeleton data (C, T, V, M) | |
| target_length: Target number of frames | |
| Returns: | |
| interpolated_data: Data with temporal dimension resized to target_length | |
| """ | |
| C, T_src, V, M = data.shape | |
| if T_src == target_length: | |
| return data | |
| # Create target time indices | |
| src_indices = np.linspace(0, T_src - 1, T_src) | |
| target_indices = np.linspace(0, T_src - 1, target_length) | |
| # Interpolate each channel, keypoint, person combination | |
| interpolated_data = np.zeros((C, target_length, V, M), dtype=data.dtype) | |
| for c in range(C): | |
| for v in range(V): | |
| for m in range(M): | |
| interpolated_data[c, :, v, m] = np.interp( | |
| target_indices, | |
| src_indices, | |
| data[c, :, v, m] | |
| ) | |
| return interpolated_data | |
| def _normalize_by_hip_center(data: np.ndarray) -> np.ndarray: | |
| """ | |
| Normalize skeleton by hip center position and skeleton size (ST-GCN standard). | |
| This is the recommended normalization method for skeleton-based action recognition, | |
| following the ST-GCN paper and NTU RGB+D dataset preprocessing. | |
| Algorithm: | |
| ---------- | |
| 1. Calculate hip center from left_hip (11) and right_hip (12) | |
| 2. If hips have low confidence (<0.3), fallback to shoulder center | |
| 3. Center all keypoints by subtracting hip center | |
| 4. Calculate skeleton size as average shoulder-to-hip distance | |
| 5. Scale all coordinates by skeleton size | |
| COCO Keypoints Used: | |
| - 5: left_shoulder | |
| - 6: right_shoulder | |
| - 11: left_hip | |
| - 12: right_hip | |
| Args: | |
| data: Skeleton data (C, T, V, M) with C=3 (x, y, conf) | |
| Returns: | |
| normalized_data: (C, T, V, M) centered at hip, scaled by skeleton size | |
| - x,y channels: relative to hip center, scaled by skeleton size | |
| - conf channel: unchanged | |
| Example: | |
| >>> data = np.random.rand(3, 60, 17, 1) * [3840, 2160, 1] | |
| >>> normalized = _normalize_by_hip_center(data) | |
| >>> # Hip center is now at (0, 0) | |
| >>> hip_center_x = (normalized[0, :, 11, :] + normalized[0, :, 12, :]) / 2 | |
| >>> np.allclose(hip_center_x, 0.0, atol=1e-6) | |
| True | |
| """ | |
| C, T, V, M = data.shape | |
| normalized_data = data.copy() | |
| # Extract hip keypoints (COCO: 11=left_hip, 12=right_hip) | |
| left_hip_xy = data[:2, :, 11:12, :] # (2, T, 1, M) | |
| right_hip_xy = data[:2, :, 12:13, :] # (2, T, 1, M) | |
| left_hip_conf = data[2:3, :, 11:12, :] # (1, T, 1, M) | |
| right_hip_conf = data[2:3, :, 12:13, :]# (1, T, 1, M) | |
| # Calculate average hip confidence across all frames | |
| left_hip_conf_mean = np.mean(left_hip_conf) | |
| right_hip_conf_mean = np.mean(right_hip_conf) | |
| # Determine center point (hip or shoulder fallback) | |
| if left_hip_conf_mean >= 0.3 and right_hip_conf_mean >= 0.3: | |
| # Normal case: Use hip center | |
| center_point = (left_hip_xy + right_hip_xy) / 2.0 # (2, T, 1, M) | |
| # Calculate skeleton size from shoulder-to-hip distance | |
| left_shoulder_xy = data[:2, :, 5:6, :] # (2, T, 1, M) | |
| right_shoulder_xy = data[:2, :, 6:7, :] # (2, T, 1, M) | |
| # Left torso distance: ||left_shoulder - left_hip|| | |
| left_torso = left_shoulder_xy - left_hip_xy # (2, T, 1, M) | |
| left_torso_dist = np.sqrt(np.sum(left_torso ** 2, axis=0)) # (T, 1, M) | |
| # Right torso distance: ||right_shoulder - right_hip|| | |
| right_torso = right_shoulder_xy - right_hip_xy # (2, T, 1, M) | |
| right_torso_dist = np.sqrt(np.sum(right_torso ** 2, axis=0)) # (T, 1, M) | |
| # Average skeleton size across frames and left/right | |
| skeleton_size = np.mean([left_torso_dist, right_torso_dist]) # scalar | |
| else: | |
| # Fallback: Use shoulder center if hips not detected | |
| left_shoulder_xy = data[:2, :, 5:6, :] | |
| right_shoulder_xy = data[:2, :, 6:7, :] | |
| center_point = (left_shoulder_xy + right_shoulder_xy) / 2.0 # (2, T, 1, M) | |
| # Use shoulder width as skeleton size estimate | |
| shoulder_vector = right_shoulder_xy - left_shoulder_xy # (2, T, 1, M) | |
| shoulder_width = np.sqrt(np.sum(shoulder_vector ** 2, axis=0)) # (T, 1, M) | |
| skeleton_size = np.mean(shoulder_width) * 2.0 # Approximate torso height | |
| # Prevent division by zero | |
| skeleton_size = max(skeleton_size, 1e-6) | |
| # Normalize x,y channels: center and scale | |
| normalized_data[:2] = (normalized_data[:2] - center_point) / skeleton_size | |
| # Confidence channel unchanged | |
| # normalized_data[2] remains as is | |
| return normalized_data | |
| def _normalize_by_image_center( | |
| data: np.ndarray, | |
| img_width: int = 3840, | |
| img_height: int = 2160 | |
| ) -> np.ndarray: | |
| """ | |
| Legacy normalization by image center (for comparison only). | |
| This method is NOT recommended for ST-GCN training as it: | |
| - Includes absolute position information | |
| - Varies with camera angle | |
| - Does not normalize body size | |
| Use this only for comparing with old implementations or specific use cases | |
| where absolute position in frame matters. | |
| Args: | |
| data: Skeleton data (C, T, V, M) | |
| img_width: Image width in pixels (default: 3840 for AI Hub 4K) | |
| img_height: Image height in pixels (default: 2160 for AI Hub 4K) | |
| Returns: | |
| normalized_data: (C, T, V, M) with x,y in [-0.5, 0.5] | |
| """ | |
| C, T, V, M = data.shape | |
| normalized_data = data.copy() | |
| # Normalize x-coordinate (channel 0): [0, img_width] -> [-0.5, 0.5] | |
| normalized_data[0] = (normalized_data[0] / img_width) - 0.5 | |
| # Normalize y-coordinate (channel 1): [0, img_height] -> [-0.5, 0.5] | |
| normalized_data[1] = (normalized_data[1] / img_height) - 0.5 | |
| # Confidence channel (2) remains unchanged in [0, 1] | |
| return normalized_data | |
| def normalize_skeleton( | |
| data: np.ndarray, | |
| method: str = 'hip_center', | |
| img_width: int = 3840, | |
| img_height: int = 2160 | |
| ) -> np.ndarray: | |
| """ | |
| Normalize skeleton coordinates using ST-GCN standard method. | |
| This normalization removes absolute position information and makes the model | |
| focus on relative pose patterns, which is critical for fall detection across | |
| different camera angles (AI Hub 8-camera setup). | |
| Methods: | |
| -------- | |
| 1. 'hip_center' (default, ST-GCN standard): | |
| - Center: Hip center (average of left_hip and right_hip) | |
| - Scale: Skeleton size (shoulder-to-hip distance) | |
| - Fallback: Shoulder center if hips not detected | |
| - Reference: ST-GCN (Yan et al., AAAI 2018), NTU RGB+D normalization | |
| 2. 'image_center' (legacy, not recommended): | |
| - Center: Image center | |
| - Scale: Image dimensions | |
| - Use only for comparison with old implementations | |
| Mathematical Formulations (hip_center): | |
| ---------------------------------------- | |
| Step 1: Calculate hip center | |
| hip_center = (left_hip + right_hip) / 2 # COCO keypoints 11, 12 | |
| Step 2: Center all keypoints | |
| x' = x - hip_center_x | |
| y' = y - hip_center_y | |
| Step 3: Scale by skeleton size (shoulder-to-hip distance) | |
| skeleton_size = mean(||shoulder - hip||) over left and right | |
| x'' = x' / skeleton_size | |
| y'' = y' / skeleton_size | |
| Advantages of hip_center normalization: | |
| - Camera angle invariant (critical for 8-camera AI Hub dataset) | |
| - Absolute position independent (person can be anywhere in frame) | |
| - Body size normalized (tall/short people comparable) | |
| - Matches ST-GCN paper and most skeleton action recognition works | |
| Args: | |
| data: Skeleton data with shape (C, T, V, M) where | |
| C = 3 (x in pixels, y in pixels, confidence) | |
| T = number of frames | |
| V = 17 (COCO keypoints) | |
| M = 1 (max persons) | |
| method: Normalization method - 'hip_center' (default) or 'image_center' | |
| img_width: Image width for image_center method (default: 3840 for AI Hub 4K) | |
| img_height: Image height for image_center method (default: 2160 for AI Hub 4K) | |
| Returns: | |
| normalized_data: Normalized skeleton data with shape (C, T, V, M) | |
| For hip_center: relative coordinates centered at hip, scaled by skeleton size | |
| For image_center: x,y in [-0.5, 0.5], conf in [0, 1] | |
| Example: | |
| >>> # ST-GCN standard normalization | |
| >>> data = np.random.rand(3, 60, 17, 1) * [3840, 2160, 1] | |
| >>> normalized = normalize_skeleton(data, method='hip_center') | |
| >>> # Hip is now at origin (0, 0) | |
| >>> # Coordinates scaled by skeleton size | |
| >>> # Legacy image center normalization | |
| >>> normalized_legacy = normalize_skeleton(data, method='image_center') | |
| >>> normalized_legacy[0].min(), normalized_legacy[0].max() # x range | |
| (-0.5, 0.5) | |
| """ | |
| C, T, V, M = data.shape | |
| assert C == 3, f"Expected 3 channels (x, y, conf), got {C}" | |
| assert V == 17, f"Expected 17 COCO keypoints, got {V}" | |
| if method == 'hip_center': | |
| return _normalize_by_hip_center(data) | |
| elif method == 'image_center': | |
| return _normalize_by_image_center(data, img_width, img_height) | |
| else: | |
| raise ValueError( | |
| f"Unknown normalization method: '{method}'. " | |
| f"Use 'hip_center' (ST-GCN standard) or 'image_center' (legacy)." | |
| ) | |
| def denormalize_skeleton( | |
| data: np.ndarray, | |
| method: str = 'hip_center', | |
| hip_center: Optional[np.ndarray] = None, | |
| skeleton_size: Optional[float] = None, | |
| img_width: int = 3840, | |
| img_height: int = 2160 | |
| ) -> np.ndarray: | |
| """ | |
| Denormalize skeleton coordinates back to original space. | |
| NOTE: For hip_center method, denormalization requires storing the original | |
| hip_center and skeleton_size values during normalization. This function is | |
| primarily for visualization purposes. | |
| For most ST-GCN training workflows, you don't need denormalization since: | |
| - Training works directly on normalized coordinates | |
| - Model predictions are classification labels (not coordinates) | |
| Methods: | |
| -------- | |
| 1. 'hip_center': Requires hip_center and skeleton_size parameters | |
| 2. 'image_center': Only requires img_width and img_height | |
| Args: | |
| data: Normalized skeleton data (C, T, V, M) | |
| method: Denormalization method - 'hip_center' or 'image_center' | |
| hip_center: Original hip center position (2, T, 1, M) - required for hip_center method | |
| skeleton_size: Original skeleton size (scalar) - required for hip_center method | |
| img_width: Image width for image_center method (default: 3840) | |
| img_height: Image height for image_center method (default: 2160) | |
| Returns: | |
| denormalized_data: Skeleton data in original coordinate space | |
| Example: | |
| >>> # Hip center denormalization (requires original values) | |
| >>> data_original = np.random.rand(3, 60, 17, 1) * [3840, 2160, 1] | |
| >>> normalized = normalize_skeleton(data_original, method='hip_center') | |
| >>> # Note: In practice, you need to store hip_center and skeleton_size | |
| >>> # during normalization for accurate denormalization | |
| >>> # Image center denormalization (simpler) | |
| >>> normalized = normalize_skeleton(data_original, method='image_center') | |
| >>> denormalized = denormalize_skeleton(normalized, method='image_center') | |
| >>> np.allclose(data_original[:2], denormalized[:2], atol=1.0) # Within 1 pixel | |
| True | |
| """ | |
| C, T, V, M = data.shape | |
| assert C == 3, f"Expected 3 channels (x, y, conf), got {C}" | |
| if method == 'hip_center': | |
| if hip_center is None or skeleton_size is None: | |
| raise ValueError( | |
| "hip_center denormalization requires 'hip_center' and 'skeleton_size' parameters. " | |
| "These values must be saved during normalization. " | |
| "For visualization without original values, consider using method='image_center'." | |
| ) | |
| return _denormalize_by_hip_center(data, hip_center, skeleton_size) | |
| elif method == 'image_center': | |
| return _denormalize_by_image_center(data, img_width, img_height) | |
| else: | |
| raise ValueError( | |
| f"Unknown denormalization method: '{method}'. " | |
| f"Use 'hip_center' or 'image_center'." | |
| ) | |
| def _denormalize_by_hip_center( | |
| data: np.ndarray, | |
| hip_center: np.ndarray, | |
| skeleton_size: float | |
| ) -> np.ndarray: | |
| """ | |
| Reverse hip center normalization. | |
| Args: | |
| data: Normalized skeleton data (C, T, V, M) | |
| hip_center: Original hip center (2, T, 1, M) or (2,) for constant | |
| skeleton_size: Original skeleton size (scalar) | |
| Returns: | |
| denormalized_data: (C, T, V, M) in original pixel coordinates | |
| """ | |
| C, T, V, M = data.shape | |
| denormalized_data = data.copy() | |
| # Reverse scale and centering: x_original = x_normalized * skeleton_size + hip_center | |
| denormalized_data[:2] = denormalized_data[:2] * skeleton_size + hip_center | |
| # Confidence channel unchanged | |
| return denormalized_data | |
| def _denormalize_by_image_center( | |
| data: np.ndarray, | |
| img_width: int = 3840, | |
| img_height: int = 2160 | |
| ) -> np.ndarray: | |
| """ | |
| Reverse image center normalization. | |
| Args: | |
| data: Normalized skeleton data (C, T, V, M) with x,y in [-0.5, 0.5] | |
| img_width: Image width in pixels (default: 3840) | |
| img_height: Image height in pixels (default: 2160) | |
| Returns: | |
| denormalized_data: (C, T, V, M) with x,y in pixel coordinates | |
| """ | |
| C, T, V, M = data.shape | |
| denormalized_data = data.copy() | |
| # Denormalize x-coordinate: [-0.5, 0.5] -> [0, img_width] | |
| denormalized_data[0] = (denormalized_data[0] + 0.5) * img_width | |
| # Denormalize y-coordinate: [-0.5, 0.5] -> [0, img_height] | |
| denormalized_data[1] = (denormalized_data[1] + 0.5) * img_height | |
| # Confidence channel remains unchanged | |
| return denormalized_data | |
| def test_augmentation(): | |
| """ | |
| Test augmentation functions and demonstrate their effects. | |
| This function creates synthetic skeleton data and applies each augmentation | |
| to verify correctness and visualize the transformations. | |
| """ | |
| print("Skeleton Data Augmentation Test") | |
| print("=" * 80) | |
| # Create synthetic skeleton data (C, T, V, M) | |
| C, T, V, M = 3, 60, 17, 1 | |
| np.random.seed(42) | |
| # Generate synthetic data in pixel coordinates | |
| data = np.random.rand(C, T, V, M) | |
| data[0] *= 1920 # x in [0, 1920] | |
| data[1] *= 1080 # y in [0, 1080] | |
| data[2] = np.random.uniform(0.5, 1.0, (T, V, M)) # confidence in [0.5, 1.0] | |
| print(f"\nOriginal data shape: {data.shape}") | |
| print(f"Original x range: [{data[0].min():.2f}, {data[0].max():.2f}] pixels") | |
| print(f"Original y range: [{data[1].min():.2f}, {data[1].max():.2f}] pixels") | |
| print(f"Original confidence range: [{data[2].min():.3f}, {data[2].max():.3f}]") | |
| # Test 1: Normalization | |
| print("\n" + "-" * 80) | |
| print("Test 1: Normalization") | |
| print("-" * 80) | |
| normalized = normalize_skeleton(data, img_width=1920, img_height=1080) | |
| print(f"Normalized x range: [{normalized[0].min():.3f}, {normalized[0].max():.3f}]") | |
| print(f"Normalized y range: [{normalized[1].min():.3f}, {normalized[1].max():.3f}]") | |
| print(f"Normalized confidence range: [{normalized[2].min():.3f}, {normalized[2].max():.3f}]") | |
| # Verify denormalization | |
| denormalized = denormalize_skeleton(normalized, img_width=1920, img_height=1080) | |
| reconstruction_error = np.abs(data - denormalized).max() | |
| print(f"Denormalization reconstruction error: {reconstruction_error:.6f} pixels") | |
| # Test 2: Horizontal Flip | |
| print("\n" + "-" * 80) | |
| print("Test 2: Horizontal Flip") | |
| print("-" * 80) | |
| np.random.seed(42) | |
| flipped = augment_skeleton(normalized, prob=1.0) # Force all augmentations | |
| print(f"Original x (frame 0, keypoint 0): {normalized[0, 0, 0, 0]:.3f}") | |
| print(f"After augmentation x: {flipped[0, 0, 0, 0]:.3f}") | |
| print(f"X-coordinate sign flipped: {np.sign(normalized[0].mean()) != np.sign(flipped[0].mean())}") | |
| # Test 3: Check left/right keypoint swapping | |
| print("\n" + "-" * 80) | |
| print("Test 3: Keypoint Pair Swapping (Horizontal Flip)") | |
| print("-" * 80) | |
| # Create data with distinctive values for left/right pairs | |
| test_data = np.zeros((3, 60, 17, 1)) | |
| test_data[0, :, 5, 0] = 100 # left_shoulder x = 100 | |
| test_data[0, :, 6, 0] = -100 # right_shoulder x = -100 | |
| flipped_test = _horizontal_flip(test_data) | |
| print(f"Original left_shoulder (idx 5) x: {test_data[0, 0, 5, 0]:.1f}") | |
| print(f"Original right_shoulder (idx 6) x: {test_data[0, 0, 6, 0]:.1f}") | |
| print(f"Flipped left_shoulder (idx 5) x: {flipped_test[0, 0, 5, 0]:.1f}") | |
| print(f"Flipped right_shoulder (idx 6) x: {flipped_test[0, 0, 6, 0]:.1f}") | |
| print(f"Swap successful: {flipped_test[0, 0, 5, 0] == 100 and flipped_test[0, 0, 6, 0] == -100}") | |
| # Test 4: Gaussian Noise | |
| print("\n" + "-" * 80) | |
| print("Test 4: Gaussian Noise") | |
| print("-" * 80) | |
| np.random.seed(42) | |
| noisy = _add_gaussian_noise(normalized, std=0.01) | |
| noise_magnitude = np.abs(noisy[:2] - normalized[:2]).max() | |
| confidence_unchanged = np.allclose(noisy[2], normalized[2]) | |
| print(f"Max noise magnitude (x,y): {noise_magnitude:.4f}") | |
| print(f"Confidence channel unchanged: {confidence_unchanged}") | |
| # Test 5: Temporal Crop and Resize | |
| print("\n" + "-" * 80) | |
| print("Test 5: Temporal Crop and Resize") | |
| print("-" * 80) | |
| np.random.seed(42) | |
| cropped = _temporal_crop_resize(normalized, crop_ratio_range=(0.8, 1.0)) | |
| print(f"Original temporal length: {normalized.shape[1]}") | |
| print(f"Cropped temporal length: {cropped.shape[1]}") | |
| print(f"Shape preserved: {cropped.shape == normalized.shape}") | |
| # Test 6: Full Augmentation Pipeline | |
| print("\n" + "-" * 80) | |
| print("Test 6: Full Augmentation Pipeline") | |
| print("-" * 80) | |
| np.random.seed(42) | |
| augmented = augment_skeleton(normalized, prob=0.5) | |
| print(f"Augmented shape: {augmented.shape}") | |
| print(f"Augmented x range: [{augmented[0].min():.3f}, {augmented[0].max():.3f}]") | |
| print(f"Augmented y range: [{augmented[1].min():.3f}, {augmented[1].max():.3f}]") | |
| print(f"Augmented confidence range: [{augmented[2].min():.3f}, {augmented[2].max():.3f}]") | |
| # Test 7: Augmentation Statistics (Run 100 times) | |
| print("\n" + "-" * 80) | |
| print("Test 7: Augmentation Statistics (100 runs with prob=0.5)") | |
| print("-" * 80) | |
| np.random.seed(42) | |
| augmentation_counts = {"flip": 0, "noise": 0, "crop": 0} | |
| num_runs = 100 | |
| for _ in range(num_runs): | |
| original_copy = normalized.copy() | |
| augmented = augment_skeleton(original_copy, prob=0.5) | |
| # Detect which augmentations were applied (heuristics) | |
| x_sign_changed = np.sign(augmented[0].mean()) != np.sign(normalized[0].mean()) | |
| noise_added = not np.allclose(augmented[:2], normalized[:2], atol=1e-4) | |
| # Crop detection is harder, skip for now | |
| if x_sign_changed: | |
| augmentation_counts["flip"] += 1 | |
| if noise_added and not x_sign_changed: | |
| augmentation_counts["noise"] += 1 | |
| print(f"Horizontal flip applied: {augmentation_counts['flip']}/{num_runs} times") | |
| print(f"Gaussian noise applied: {augmentation_counts['noise']}/{num_runs} times") | |
| print(f"Expected frequency (prob=0.5): ~50 times per augmentation") | |
| print("\n" + "=" * 80) | |
| print("All tests completed successfully") | |
| print("=" * 80) | |
| if __name__ == "__main__": | |
| test_augmentation() | |