Commit
·
1d6ffe4
1
Parent(s):
ee65134
quit
Browse files
app.py
CHANGED
|
@@ -68,8 +68,7 @@ def check_system_resources(model_name):
|
|
| 68 |
if total_memory_gb >= required_memory_gb:
|
| 69 |
log.info("✅ Sufficient CPU memory available; using CPU.")
|
| 70 |
return "cpu", total_memory_gb
|
| 71 |
-
|
| 72 |
-
raise MemoryError(f"❌ Insufficient system memory (requires {required_memory_gb:.1f}GB, available {available_memory_gb:.1f}GB).")
|
| 73 |
|
| 74 |
@timeit
|
| 75 |
def setup_environment(model_name):
|
|
@@ -114,19 +113,20 @@ def download_and_merge_model(base_model_name, lora_model_name, output_dir, devic
|
|
| 114 |
"""
|
| 115 |
os.makedirs("temp", exist_ok=True)
|
| 116 |
log.info("Loading base model...")
|
| 117 |
-
model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True, device_map="auto"
|
| 118 |
log.info("Loading adapter tokenizer...")
|
| 119 |
-
adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name, trust_remote_code=True, device_map="auto"
|
| 120 |
log.info("Resizing token embeddings...")
|
| 121 |
added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
|
| 122 |
model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
|
| 123 |
log.info("Loading LoRA adapter...")
|
| 124 |
-
peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True, device_map="auto"
|
| 125 |
log.info("Merging and unloading model...")
|
| 126 |
model = peft_model.merge_and_unload()
|
| 127 |
log.info("Saving model...")
|
| 128 |
model.save_pretrained(output_dir)
|
| 129 |
adapter_tokenizer.save_pretrained(output_dir)
|
|
|
|
| 130 |
return output_dir
|
| 131 |
|
| 132 |
@timeit
|
|
|
|
| 68 |
if total_memory_gb >= required_memory_gb:
|
| 69 |
log.info("✅ Sufficient CPU memory available; using CPU.")
|
| 70 |
return "cpu", total_memory_gb
|
| 71 |
+
|
|
|
|
| 72 |
|
| 73 |
@timeit
|
| 74 |
def setup_environment(model_name):
|
|
|
|
| 113 |
"""
|
| 114 |
os.makedirs("temp", exist_ok=True)
|
| 115 |
log.info("Loading base model...")
|
| 116 |
+
model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True, device_map="auto")
|
| 117 |
log.info("Loading adapter tokenizer...")
|
| 118 |
+
adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name, trust_remote_code=True, device_map="auto")
|
| 119 |
log.info("Resizing token embeddings...")
|
| 120 |
added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
|
| 121 |
model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
|
| 122 |
log.info("Loading LoRA adapter...")
|
| 123 |
+
peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True, device_map="auto")
|
| 124 |
log.info("Merging and unloading model...")
|
| 125 |
model = peft_model.merge_and_unload()
|
| 126 |
log.info("Saving model...")
|
| 127 |
model.save_pretrained(output_dir)
|
| 128 |
adapter_tokenizer.save_pretrained(output_dir)
|
| 129 |
+
del model, peft_model
|
| 130 |
return output_dir
|
| 131 |
|
| 132 |
@timeit
|