File size: 3,151 Bytes
1f39ae1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env python3
"""
Test script for cloud training with local data
"""

import os
import sys
from pathlib import Path

# Add the current directory to Python path
sys.path.append(str(Path(__file__).parent.parent))

def test_cloud_training():
    """Test the cloud training script with local data"""
    
    # Set environment variables for local testing
    os.environ['MODEL_NAME'] = 'test-morphological-transformer'
    os.environ['DATASET_NAME'] = '10L_90NL'
    os.environ['RUN_NUMBER'] = '1'
    os.environ['DATA_DIR'] = './10L_90NL'  # Use local data
    os.environ['OUTPUT_DIR'] = './test_output'
    os.environ['MODEL_DIR'] = './test_models'
    os.environ['WANDB_PROJECT'] = 'test-morphological-transformer'
    
    # Check if data exists
    data_path = Path('./10L_90NL')
    if not data_path.exists():
        print("❌ Data directory not found. Please ensure ./10L_90NL exists")
        return False
    
    # Check for required data files
    required_files = [
        'train/run1/train.10L_90NL_1_1.src',
        'train/run1/train.10L_90NL_1_1.tgt',
        'dev/run1/dev.10L_90NL_1_1.src',
        'dev/run1/dev.10L_90NL_1_1.tgt',
        'test/run1/test.10L_90NL_1_1.src',
        'test/run1/test.10L_90NL_1_1.tgt'
    ]
    
    missing_files = []
    for file_path in required_files:
        full_path = data_path / file_path
        if not full_path.exists():
            missing_files.append(str(full_path))
    
    if missing_files:
        print("❌ Missing required data files:")
        for file_path in missing_files:
            print(f"  - {file_path}")
        return False
    
    print("βœ… All required data files found")
    
    # Test importing the cloud training script
    try:
        from scripts.hf_cloud_training import CloudTrainingConfig, CloudMorphologicalTrainer
        print("βœ… Successfully imported cloud training modules")
    except ImportError as e:
        print(f"❌ Failed to import cloud training modules: {e}")
        return False
    
    # Test configuration
    try:
        config = CloudTrainingConfig()
        print(f"βœ… Configuration created successfully")
        print(f"  - Data dir: {config.data_dir}")
        print(f"  - Output dir: {config.output_dir}")
        print(f"  - Model dir: {config.model_dir}")
        print(f"  - Model name: {config.model_name}")
        print(f"  - Dataset: {config.dataset_name}")
        print(f"  - Run number: {config.run_number}")
    except Exception as e:
        print(f"❌ Failed to create configuration: {e}")
        return False
    
    # Test directory creation
    try:
        os.makedirs(config.output_dir, exist_ok=True)
        os.makedirs(config.model_dir, exist_ok=True)
        print(f"βœ… Successfully created directories: {config.output_dir}, {config.model_dir}")
    except Exception as e:
        print(f"❌ Failed to create directories: {e}")
        return False
    
    print("\nπŸŽ‰ Cloud training script is ready to use!")
    print("\nTo run the actual training:")
    print("uv run python scripts/hf_cloud_training.py")
    
    return True

if __name__ == '__main__':
    test_cloud_training()