morphological-transformer / scripts /test_cloud_training.py
akki2825
Initial deployment of Morphological Transformer with ZeroGPU
1f39ae1
#!/usr/bin/env python3
"""
Test script for cloud training with local data
"""
import os
import sys
from pathlib import Path
# Add the current directory to Python path
sys.path.append(str(Path(__file__).parent.parent))
def test_cloud_training():
"""Test the cloud training script with local data"""
# Set environment variables for local testing
os.environ['MODEL_NAME'] = 'test-morphological-transformer'
os.environ['DATASET_NAME'] = '10L_90NL'
os.environ['RUN_NUMBER'] = '1'
os.environ['DATA_DIR'] = './10L_90NL' # Use local data
os.environ['OUTPUT_DIR'] = './test_output'
os.environ['MODEL_DIR'] = './test_models'
os.environ['WANDB_PROJECT'] = 'test-morphological-transformer'
# Check if data exists
data_path = Path('./10L_90NL')
if not data_path.exists():
print("❌ Data directory not found. Please ensure ./10L_90NL exists")
return False
# Check for required data files
required_files = [
'train/run1/train.10L_90NL_1_1.src',
'train/run1/train.10L_90NL_1_1.tgt',
'dev/run1/dev.10L_90NL_1_1.src',
'dev/run1/dev.10L_90NL_1_1.tgt',
'test/run1/test.10L_90NL_1_1.src',
'test/run1/test.10L_90NL_1_1.tgt'
]
missing_files = []
for file_path in required_files:
full_path = data_path / file_path
if not full_path.exists():
missing_files.append(str(full_path))
if missing_files:
print("❌ Missing required data files:")
for file_path in missing_files:
print(f" - {file_path}")
return False
print("βœ… All required data files found")
# Test importing the cloud training script
try:
from scripts.hf_cloud_training import CloudTrainingConfig, CloudMorphologicalTrainer
print("βœ… Successfully imported cloud training modules")
except ImportError as e:
print(f"❌ Failed to import cloud training modules: {e}")
return False
# Test configuration
try:
config = CloudTrainingConfig()
print(f"βœ… Configuration created successfully")
print(f" - Data dir: {config.data_dir}")
print(f" - Output dir: {config.output_dir}")
print(f" - Model dir: {config.model_dir}")
print(f" - Model name: {config.model_name}")
print(f" - Dataset: {config.dataset_name}")
print(f" - Run number: {config.run_number}")
except Exception as e:
print(f"❌ Failed to create configuration: {e}")
return False
# Test directory creation
try:
os.makedirs(config.output_dir, exist_ok=True)
os.makedirs(config.model_dir, exist_ok=True)
print(f"βœ… Successfully created directories: {config.output_dir}, {config.model_dir}")
except Exception as e:
print(f"❌ Failed to create directories: {e}")
return False
print("\nπŸŽ‰ Cloud training script is ready to use!")
print("\nTo run the actual training:")
print("uv run python scripts/hf_cloud_training.py")
return True
if __name__ == '__main__':
test_cloud_training()