Spaces:
Runtime error
Runtime error
File size: 6,696 Bytes
1f39ae1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
#!/usr/bin/env python3
"""
Batch training script for all morphological reinflection datasets on Hugging Face
"""
import os
import subprocess
import argparse
from pathlib import Path
def run_training_command(cmd):
"""Run a training command and handle errors"""
print(f"Running: {' '.join(cmd)}")
try:
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
print(f"β
Success: {cmd[2]}") # cmd[2] is the model name
return True
except subprocess.CalledProcessError as e:
print(f"β Error training {cmd[2]}: {e}")
print(f"STDOUT: {e.stdout}")
print(f"STDERR: {e.stderr}")
return False
def main():
parser = argparse.ArgumentParser(description='Train all morphological transformer models on Hugging Face')
parser.add_argument('--username', type=str, required=True, help='Your Hugging Face username')
parser.add_argument('--wandb_project', type=str, default='morphological-transformer', help='Weights & Biases project name')
parser.add_argument('--hf_token', type=str, help='Hugging Face token for model upload')
parser.add_argument('--upload_models', action='store_true', help='Upload models to Hugging Face Hub')
parser.add_argument('--output_dir', type=str, default='./hf_models', help='Output directory')
parser.add_argument('--datasets', nargs='+', default=['10L_90NL', '50L_50NL', '90L_10NL'],
help='Datasets to train (default: all)')
parser.add_argument('--runs', nargs='+', default=['1', '2', '3'],
help='Runs to train (default: all)')
parser.add_argument('--dry_run', action='store_true', help='Print commands without executing')
args = parser.parse_args()
# Base command template
base_cmd = [
'python', 'scripts/train_huggingface.py',
'--output_dir', args.output_dir,
'--wandb_project', args.wandb_project
]
if args.hf_token:
base_cmd.extend(['--hf_token', args.hf_token])
if args.upload_models:
base_cmd.append('--upload_model')
# Dataset configurations
datasets = {
'10L_90NL': {
'train_pattern': './10L_90NL/train/run{run}/train.10L_90NL_{run}_1.src',
'train_tgt_pattern': './10L_90NL/train/run{run}/train.10L_90NL_{run}_1.tgt',
'dev_pattern': './10L_90NL/dev/run{run}/dev.10L_90NL_{run}_1.src',
'dev_tgt_pattern': './10L_90NL/dev/run{run}/dev.10L_90NL_{run}_1.tgt',
'test_pattern': './10L_90NL/test/run{run}/test.10L_90NL_{run}_1.src',
'test_tgt_pattern': './10L_90NL/test/run{run}/test.10L_90NL_{run}_1.tgt',
'model_name_pattern': '{username}/morphological-transformer-10L90NL-run{run}'
},
'50L_50NL': {
'train_pattern': './50L_50NL/train/run{run}/train.50L_50NL_{run}_1.src',
'train_tgt_pattern': './50L_50NL/train/run{run}/train.50L_50NL_{run}_1.tgt',
'dev_pattern': './50L_50NL/dev/run{run}/dev.50L_50NL_{run}_1.src',
'dev_tgt_pattern': './50L_50NL/dev/run{run}/dev.50L_50NL_{run}_1.tgt',
'test_pattern': './50L_50NL/test/run{run}/test.50L_50NL_{run}_1.src',
'test_tgt_pattern': './50L_50NL/test/run{run}/test.50L_50NL_{run}_1.tgt',
'model_name_pattern': '{username}/morphological-transformer-50L50NL-run{run}'
},
'90L_10NL': {
'train_pattern': './90L_10NL/train/run{run}/train.90L_10NL_{run}_1.src',
'train_tgt_pattern': './90L_10NL/train/run{run}/train.90L_10NL_{run}_1.tgt',
'dev_pattern': './90L_10NL/dev/run{run}/dev.90L_10NL_{run}_1.src',
'dev_tgt_pattern': './90L_10NL/dev/run{run}/dev.90L_10NL_{run}_1.tgt',
'test_pattern': './90L_10NL/test/run{run}/test.90L_10NL_{run}_1.src',
'test_tgt_pattern': './90L_10NL/test/run{run}/test.90L_10NL_{run}_1.tgt',
'model_name_pattern': '{username}/morphological-transformer-90L10NL-run{run}'
}
}
# Generate training commands
commands = []
for dataset in args.datasets:
if dataset not in datasets:
print(f"β οΈ Unknown dataset: {dataset}")
continue
config = datasets[dataset]
for run in args.runs:
# Check if data files exist
train_src = config['train_pattern'].format(run=run)
train_tgt = config['train_tgt_pattern'].format(run=run)
dev_src = config['dev_pattern'].format(run=run)
dev_tgt = config['dev_tgt_pattern'].format(run=run)
test_src = config['test_pattern'].format(run=run)
test_tgt = config['test_tgt_pattern'].format(run=run)
# Check if files exist
missing_files = []
for file_path in [train_src, train_tgt, dev_src, dev_tgt, test_src, test_tgt]:
if not os.path.exists(file_path):
missing_files.append(file_path)
if missing_files:
print(f"β οΈ Skipping {dataset} run {run} - missing files: {missing_files}")
continue
# Build command
model_name = config['model_name_pattern'].format(username=args.username, run=run)
cmd = base_cmd + [
'--model_name', model_name,
'--train_src', train_src,
'--train_tgt', train_tgt,
'--dev_src', dev_src,
'--dev_tgt', dev_tgt,
'--test_src', test_src,
'--test_tgt', test_tgt
]
commands.append(cmd)
print(f"π Found {len(commands)} training jobs to run")
if args.dry_run:
print("\nπ Commands that would be executed:")
for i, cmd in enumerate(commands, 1):
print(f"{i:2d}. {' '.join(cmd)}")
return
# Execute training commands
successful = 0
failed = 0
for i, cmd in enumerate(commands, 1):
print(f"\nπ Training {i}/{len(commands)}: {cmd[2]}")
if run_training_command(cmd):
successful += 1
else:
failed += 1
# Summary
print(f"\nπ Training Summary:")
print(f"β
Successful: {successful}")
print(f"β Failed: {failed}")
print(f"π Success Rate: {successful/(successful+failed)*100:.1f}%")
if successful > 0:
print(f"\nπ Models saved to: {args.output_dir}")
if args.upload_models:
print(f"π Models uploaded to: https://huggingface.co/{args.username}")
if __name__ == '__main__':
main()
|