Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """ | |
| Test script for cloud training with local data | |
| """ | |
| import os | |
| import sys | |
| from pathlib import Path | |
| # Add the current directory to Python path | |
| sys.path.append(str(Path(__file__).parent.parent)) | |
| def test_cloud_training(): | |
| """Test the cloud training script with local data""" | |
| # Set environment variables for local testing | |
| os.environ['MODEL_NAME'] = 'test-morphological-transformer' | |
| os.environ['DATASET_NAME'] = '10L_90NL' | |
| os.environ['RUN_NUMBER'] = '1' | |
| os.environ['DATA_DIR'] = './10L_90NL' # Use local data | |
| os.environ['OUTPUT_DIR'] = './test_output' | |
| os.environ['MODEL_DIR'] = './test_models' | |
| os.environ['WANDB_PROJECT'] = 'test-morphological-transformer' | |
| # Check if data exists | |
| data_path = Path('./10L_90NL') | |
| if not data_path.exists(): | |
| print("β Data directory not found. Please ensure ./10L_90NL exists") | |
| return False | |
| # Check for required data files | |
| required_files = [ | |
| 'train/run1/train.10L_90NL_1_1.src', | |
| 'train/run1/train.10L_90NL_1_1.tgt', | |
| 'dev/run1/dev.10L_90NL_1_1.src', | |
| 'dev/run1/dev.10L_90NL_1_1.tgt', | |
| 'test/run1/test.10L_90NL_1_1.src', | |
| 'test/run1/test.10L_90NL_1_1.tgt' | |
| ] | |
| missing_files = [] | |
| for file_path in required_files: | |
| full_path = data_path / file_path | |
| if not full_path.exists(): | |
| missing_files.append(str(full_path)) | |
| if missing_files: | |
| print("β Missing required data files:") | |
| for file_path in missing_files: | |
| print(f" - {file_path}") | |
| return False | |
| print("β All required data files found") | |
| # Test importing the cloud training script | |
| try: | |
| from scripts.hf_cloud_training import CloudTrainingConfig, CloudMorphologicalTrainer | |
| print("β Successfully imported cloud training modules") | |
| except ImportError as e: | |
| print(f"β Failed to import cloud training modules: {e}") | |
| return False | |
| # Test configuration | |
| try: | |
| config = CloudTrainingConfig() | |
| print(f"β Configuration created successfully") | |
| print(f" - Data dir: {config.data_dir}") | |
| print(f" - Output dir: {config.output_dir}") | |
| print(f" - Model dir: {config.model_dir}") | |
| print(f" - Model name: {config.model_name}") | |
| print(f" - Dataset: {config.dataset_name}") | |
| print(f" - Run number: {config.run_number}") | |
| except Exception as e: | |
| print(f"β Failed to create configuration: {e}") | |
| return False | |
| # Test directory creation | |
| try: | |
| os.makedirs(config.output_dir, exist_ok=True) | |
| os.makedirs(config.model_dir, exist_ok=True) | |
| print(f"β Successfully created directories: {config.output_dir}, {config.model_dir}") | |
| except Exception as e: | |
| print(f"β Failed to create directories: {e}") | |
| return False | |
| print("\nπ Cloud training script is ready to use!") | |
| print("\nTo run the actual training:") | |
| print("uv run python scripts/hf_cloud_training.py") | |
| return True | |
| if __name__ == '__main__': | |
| test_cloud_training() | |