morphological-transformer / scripts /hf_model_config.py
akki2825
Initial deployment of Morphological Transformer with ZeroGPU
1f39ae1
#!/usr/bin/env python3
"""
Hugging Face model configuration for TagTransformer
"""
from transformers import PretrainedConfig
from typing import Dict, Any, Optional
class TagTransformerConfig(PretrainedConfig):
"""Configuration class for TagTransformer model"""
model_type = "tag_transformer"
def __init__(
self,
src_vocab_size: int = 1000,
trg_vocab_size: int = 1000,
embed_dim: int = 256,
nb_heads: int = 4,
src_hid_size: int = 1024,
src_nb_layers: int = 4,
trg_hid_size: int = 1024,
trg_nb_layers: int = 4,
dropout_p: float = 0.1,
tie_trg_embed: bool = True,
label_smooth: float = 0.1,
max_length: int = 100,
nb_attr: int = 0,
**kwargs
):
super().__init__(**kwargs)
self.src_vocab_size = src_vocab_size
self.trg_vocab_size = trg_vocab_size
self.embed_dim = embed_dim
self.nb_heads = nb_heads
self.src_hid_size = src_hid_size
self.src_nb_layers = src_nb_layers
self.trg_hid_size = trg_hid_size
self.trg_nb_layers = trg_nb_layers
self.dropout_p = dropout_p
self.tie_trg_embed = tie_trg_embed
self.label_smooth = label_smooth
self.max_length = max_length
self.nb_attr = nb_attr
class TagTransformerForMorphologicalReinflection:
"""Hugging Face model wrapper for TagTransformer"""
def __init__(self, config: TagTransformerConfig):
self.config = config
self.model = None
def from_pretrained(self, model_path: str):
"""Load model from pretrained checkpoint"""
import torch
from transformer import TagTransformer
# Load configuration
config = TagTransformerConfig.from_pretrained(model_path)
# Create model
model = TagTransformer(
src_vocab_size=config.src_vocab_size,
trg_vocab_size=config.trg_vocab_size,
embed_dim=config.embed_dim,
nb_heads=config.nb_heads,
src_hid_size=config.src_hid_size,
src_nb_layers=config.src_nb_layers,
trg_hid_size=config.trg_hid_size,
trg_nb_layers=config.trg_nb_layers,
dropout_p=config.dropout_p,
tie_trg_embed=config.tie_trg_embed,
label_smooth=config.label_smooth,
nb_attr=config.nb_attr,
src_c2i={}, # Will be loaded separately
trg_c2i={}, # Will be loaded separately
attr_c2i={},
)
# Load state dict
state_dict = torch.load(f"{model_path}/pytorch_model.bin", map_location='cpu')
model.load_state_dict(state_dict)
self.model = model
return self
def save_pretrained(self, save_path: str):
"""Save model in Hugging Face format"""
import torch
import json
from pathlib import Path
save_path = Path(save_path)
save_path.mkdir(parents=True, exist_ok=True)
# Save model state dict
torch.save(self.model.state_dict(), save_path / "pytorch_model.bin")
# Save configuration
self.config.save_pretrained(save_path)
# Save vocabularies if available
if hasattr(self.model, 'src_c2i') and self.model.src_c2i:
with open(save_path / "src_vocab.json", "w") as f:
json.dump(self.model.src_c2i, f, indent=2)
if hasattr(self.model, 'trg_c2i') and self.model.trg_c2i:
with open(save_path / "tgt_vocab.json", "w") as f:
json.dump(self.model.trg_c2i, f, indent=2)
def generate(self, input_ids, max_length: int = 100, **kwargs):
"""Generate predictions for morphological reinflection"""
import torch
self.model.eval()
with torch.no_grad():
# Simple greedy generation
# This is a simplified version - you might want to implement beam search
output = self.model(input_ids, **kwargs)
predictions = torch.argmax(output, dim=-1)
return predictions
def create_model_card(model_name: str, dataset_name: str, task: str = "morphological-reinflection") -> str:
"""Create a model card for Hugging Face Hub"""
model_card = f"""---
license: mit
tags:
- morphological-reinflection
- transformer
- nlp
- linguistics
datasets:
- {dataset_name}
metrics:
- accuracy
- bleu
model-index:
- name: {model_name}
results:
- task:
type: morphological-reinflection
name: Morphological Reinflection
dataset:
type: {dataset_name}
name: {dataset_name}
metrics:
- type: accuracy
value: 0.0
name: Accuracy
- type: bleu
value: 0.0
name: BLEU Score
---
# {model_name}
This model is a TagTransformer for morphological reinflection tasks. It can transform words from one morphological form to another based on linguistic features.
## Model Description
- **Model type**: TagTransformer
- **Task**: Morphological Reinflection
- **Language**: Multiple languages (depends on training data)
- **Architecture**: Encoder-Decoder Transformer with special feature embeddings
## Usage
```python
from transformers import AutoModel, AutoTokenizer
# Load model and tokenizer
model = AutoModel.from_pretrained("{model_name}")
tokenizer = AutoTokenizer.from_pretrained("{model_name}")
# Example usage
input_text = "example input"
output = model.generate(input_text)
```
## Training Data
This model was trained on the {dataset_name} dataset.
## Training Procedure
The model was trained using:
- Optimizer: AdamW
- Learning rate: 0.001
- Batch size: 400
- Mixed precision training
- Gradient accumulation
## Evaluation
The model achieves the following results on the test set:
- Accuracy: TBD
- BLEU Score: TBD
## Limitations and Bias
This model may have limitations in:
- Handling rare morphological patterns
- Cross-lingual generalization
- Domain-specific terminology
## Citation
```bibtex
@misc{{{model_name.lower().replace('-', '_')},
title={{{model_name}}},
author={{Your Name}},
year={{2024}},
publisher={{Hugging Face}},
howpublished={{\\url{{https://huggingface.co/{model_name}}}}}
}}
```
"""
return model_card