Spaces:
Runtime error
Runtime error
menouar
commited on
Commit
·
611507d
1
Parent(s):
f5d0e7d
Create automatically a README.md (HF model card)
Browse files- app.py +8 -12
- utils/__init__.py +2 -0
- utils/components_creator.py +5 -0
- utils/create_info_files.py +74 -0
- utils/notebook_generator.py +15 -22
app.py
CHANGED
|
@@ -2,6 +2,7 @@ from typing import Any
|
|
| 2 |
|
| 3 |
from nbconvert import HTMLExporter
|
| 4 |
|
|
|
|
| 5 |
from utils.notebook_generator import *
|
| 6 |
from utils.components_creator import *
|
| 7 |
|
|
@@ -68,10 +69,6 @@ def change_model_selection(model_id):
|
|
| 68 |
return None
|
| 69 |
|
| 70 |
|
| 71 |
-
def handle_push_to_hub(value):
|
| 72 |
-
return gr.Textbox(visible=value)
|
| 73 |
-
|
| 74 |
-
|
| 75 |
def check_valid_input(value):
|
| 76 |
if isinstance(value, str):
|
| 77 |
return value and value.strip()
|
|
@@ -124,6 +121,7 @@ def generate_code(components: dict[Component, Any]):
|
|
| 124 |
model_value = get_value(components, MODEL_SELECTION_ID)
|
| 125 |
should_login = should_login_to_hf_model(model_value)
|
| 126 |
|
|
|
|
| 127 |
if not check_valid_input(model_value):
|
| 128 |
gr.Warning("No model is selected!")
|
| 129 |
else:
|
|
@@ -192,6 +190,12 @@ def generate_code(components: dict[Component, Any]):
|
|
| 192 |
|
| 193 |
merge_model_cells(notebook['cells'], output_dir)
|
| 194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
if push_to_hub:
|
| 196 |
if not should_login:
|
| 197 |
create_login_hf_cells(notebook['cells'])
|
|
@@ -287,8 +291,6 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(text_size='lg', font=["monospace"],
|
|
| 287 |
with centered_column():
|
| 288 |
output_dir_cmp, push_to_hub_cmp = add_outputs()
|
| 289 |
all_components.update({output_dir_cmp, push_to_hub_cmp})
|
| 290 |
-
repo_name_cmp = add_hf_repo_cmp()
|
| 291 |
-
all_components.update({repo_name_cmp})
|
| 292 |
with centered_column():
|
| 293 |
all_components.update(add_outputs1())
|
| 294 |
|
|
@@ -317,12 +319,6 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(text_size='lg', font=["monospace"],
|
|
| 317 |
outputs=version_selection
|
| 318 |
)
|
| 319 |
|
| 320 |
-
push_to_hub_cmp.change(
|
| 321 |
-
fn=handle_push_to_hub,
|
| 322 |
-
inputs=push_to_hub_cmp,
|
| 323 |
-
outputs=repo_name_cmp
|
| 324 |
-
)
|
| 325 |
-
|
| 326 |
demo.launch(allowed_paths=["/"])
|
| 327 |
|
| 328 |
# Upload metrics to the hub....
|
|
|
|
| 2 |
|
| 3 |
from nbconvert import HTMLExporter
|
| 4 |
|
| 5 |
+
from utils.create_info_files import create_hf_card
|
| 6 |
from utils.notebook_generator import *
|
| 7 |
from utils.components_creator import *
|
| 8 |
|
|
|
|
| 69 |
return None
|
| 70 |
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
def check_valid_input(value):
|
| 73 |
if isinstance(value, str):
|
| 74 |
return value and value.strip()
|
|
|
|
| 121 |
model_value = get_value(components, MODEL_SELECTION_ID)
|
| 122 |
should_login = should_login_to_hf_model(model_value)
|
| 123 |
|
| 124 |
+
version_value = ""
|
| 125 |
if not check_valid_input(model_value):
|
| 126 |
gr.Warning("No model is selected!")
|
| 127 |
else:
|
|
|
|
| 190 |
|
| 191 |
merge_model_cells(notebook['cells'], output_dir)
|
| 192 |
|
| 193 |
+
create_readme = get_value(components, README_ID)
|
| 194 |
+
if create_readme:
|
| 195 |
+
create_hf_card(notebook['cells'], name=output_dir, base_model_name=model_value,
|
| 196 |
+
base_model_version=version_value,
|
| 197 |
+
dataset_name=dataset_value, output_dir=output_dir, report_to=report_to)
|
| 198 |
+
|
| 199 |
if push_to_hub:
|
| 200 |
if not should_login:
|
| 201 |
create_login_hf_cells(notebook['cells'])
|
|
|
|
| 291 |
with centered_column():
|
| 292 |
output_dir_cmp, push_to_hub_cmp = add_outputs()
|
| 293 |
all_components.update({output_dir_cmp, push_to_hub_cmp})
|
|
|
|
|
|
|
| 294 |
with centered_column():
|
| 295 |
all_components.update(add_outputs1())
|
| 296 |
|
|
|
|
| 319 |
outputs=version_selection
|
| 320 |
)
|
| 321 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
demo.launch(allowed_paths=["/"])
|
| 323 |
|
| 324 |
# Upload metrics to the hub....
|
utils/__init__.py
CHANGED
|
@@ -41,6 +41,8 @@ REPOSITORY_NAME_ID = "repo_id"
|
|
| 41 |
|
| 42 |
REPORT_TO_ID = "report_to"
|
| 43 |
|
|
|
|
|
|
|
| 44 |
MAX_SEQ_LENGTH_ID = "max_seq_length"
|
| 45 |
PACKING_ID = "packing"
|
| 46 |
|
|
|
|
| 41 |
|
| 42 |
REPORT_TO_ID = "report_to"
|
| 43 |
|
| 44 |
+
README_ID = "readme"
|
| 45 |
+
|
| 46 |
MAX_SEQ_LENGTH_ID = "max_seq_length"
|
| 47 |
PACKING_ID = "packing"
|
| 48 |
|
utils/components_creator.py
CHANGED
|
@@ -201,8 +201,13 @@ def add_outputs1() -> Set[Component]:
|
|
| 201 |
"'comet_ml', 'mlflow', 'tensorboard' and 'wandb'. Use 'all' to report to all integrations installed, "
|
| 202 |
"'none' for no integrations."
|
| 203 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
out_components: Set[Component] = set()
|
| 205 |
out_components.add(report_to)
|
|
|
|
| 206 |
return out_components
|
| 207 |
|
| 208 |
|
|
|
|
| 201 |
"'comet_ml', 'mlflow', 'tensorboard' and 'wandb'. Use 'all' to report to all integrations installed, "
|
| 202 |
"'none' for no integrations."
|
| 203 |
)
|
| 204 |
+
create_readme = gr.Checkbox(label="Automatically Generate a README.md", value=True, interactive=True,
|
| 205 |
+
info="Choose whether to automatically generate a model card (README.md) or not.",
|
| 206 |
+
elem_id=README_ID)
|
| 207 |
+
|
| 208 |
out_components: Set[Component] = set()
|
| 209 |
out_components.add(report_to)
|
| 210 |
+
out_components.add(create_readme)
|
| 211 |
return out_components
|
| 212 |
|
| 213 |
|
utils/create_info_files.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import nbformat as nbf
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def create_hf_card(cells, name, base_model_name, base_model_version, dataset_name, output_dir, report_to):
|
| 5 |
+
text = f"""
|
| 6 |
+
card = '''
|
| 7 |
+
---
|
| 8 |
+
license: apache-2.0
|
| 9 |
+
tags:
|
| 10 |
+
- generated_from_trainer
|
| 11 |
+
- {base_model_name}
|
| 12 |
+
- PyTorch
|
| 13 |
+
- transformers
|
| 14 |
+
- trl
|
| 15 |
+
- peft
|
| 16 |
+
- {report_to}
|
| 17 |
+
base_model: {base_model_name}-{base_model_version}
|
| 18 |
+
widget:
|
| 19 |
+
- example_title: Pirate!
|
| 20 |
+
messages:
|
| 21 |
+
- role: system
|
| 22 |
+
content: You are a pirate chatbot who always responds with Arr!
|
| 23 |
+
- role: user
|
| 24 |
+
content: "There's a llama on my lawn, how can I get rid of him?"
|
| 25 |
+
output:
|
| 26 |
+
text: >-
|
| 27 |
+
Arr! 'Tis a puzzlin' matter, me hearty! A llama on yer lawn be a rare
|
| 28 |
+
sight, but I've got a plan that might help ye get rid of 'im. Ye'll need
|
| 29 |
+
to gather some carrots and hay, and then lure the llama away with the
|
| 30 |
+
promise of a tasty treat. Once he's gone, ye can clean up yer lawn and
|
| 31 |
+
enjoy the peace and quiet once again. But beware, me hearty, for there
|
| 32 |
+
may be more llamas where that one came from! Arr!
|
| 33 |
+
model-index:
|
| 34 |
+
- name: {name}
|
| 35 |
+
results: []
|
| 36 |
+
datasets:
|
| 37 |
+
- {dataset_name}
|
| 38 |
+
language:
|
| 39 |
+
- en
|
| 40 |
+
pipeline_tag: text-generation
|
| 41 |
+
---
|
| 42 |
+
|
| 43 |
+
# Model Card for {name}:
|
| 44 |
+
|
| 45 |
+
**{name}** is a language model that is trained to act as helpful assistant. It is a finetuned version of [{base_model_name}-{base_model_version}](https://huggingface.co/{base_model_name}-{base_model_version}) that was trained using SFTTrainer on of publicly available dataset [
|
| 46 |
+
{dataset_name}](https://huggingface.co/datasets/{dataset_name}).
|
| 47 |
+
|
| 48 |
+
## Training Procedure:
|
| 49 |
+
|
| 50 |
+
The training code used to create this model was generated by [Menouar/LLM-FineTuning-Notebook-Generator](https://huggingface.co/spaces/Menouar/LLM-FineTuning-Notebook-Generator).
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
## Training hyperparameters
|
| 55 |
+
|
| 56 |
+
The following hyperparameters were used during the training:
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
'''
|
| 60 |
+
|
| 61 |
+
with open("{output_dir}/README.md", "w") as f:
|
| 62 |
+
f.write(card)
|
| 63 |
+
|
| 64 |
+
args_dict = vars(args)
|
| 65 |
+
|
| 66 |
+
with open("{output_dir}/README.md", "a") as f:
|
| 67 |
+
for k, v in args_dict.items():
|
| 68 |
+
f.write(f"- {{k}}: {{v}}")
|
| 69 |
+
f.write("\\n \\n")
|
| 70 |
+
"""
|
| 71 |
+
title = """### Generating a model card (README.md)"""
|
| 72 |
+
cells.append(nbf.v4.new_markdown_cell(title))
|
| 73 |
+
code_cell = nbf.v4.new_code_cell(text)
|
| 74 |
+
cells.append(code_cell)
|
utils/notebook_generator.py
CHANGED
|
@@ -61,10 +61,10 @@ def create_login_hf_cells(cells: list, should_login: bool = False, model_name: O
|
|
| 61 |
text_cell = nbf.v4.new_markdown_cell(
|
| 62 |
"### Login to HF")
|
| 63 |
|
| 64 |
-
text_1 = "Login with
|
| 65 |
|
| 66 |
if should_login:
|
| 67 |
-
text_1 = f"Login with
|
| 68 |
|
| 69 |
text_cell1 = nbf.v4.new_markdown_cell(text_1)
|
| 70 |
code = """
|
|
@@ -167,7 +167,7 @@ This process involves two key steps:
|
|
| 167 |
|
| 168 |
1. **LLM Quantization:**
|
| 169 |
- We first load the selected large language model (LLM).
|
| 170 |
-
- We then use the
|
| 171 |
|
| 172 |
> **Note:** The memory requirements of the model scale with its size. For instance, a 7B parameter model may require
|
| 173 |
a 24GB GPU for fine-tuning.
|
|
@@ -228,7 +228,7 @@ def create_training_args_cells(cells: list, epochs, max_steps, logging_steps, pe
|
|
| 228 |
save_strategy, gradient_accumulation_steps, gradient_checkpointing,
|
| 229 |
learning_rate, max_grad_norm, warmup_ratio, lr_scheduler_type, output_dir,
|
| 230 |
report_to, seed):
|
| 231 |
-
text_cell = nbf.v4.new_markdown_cell("### TrainingArguments")
|
| 232 |
to_install = None
|
| 233 |
if report_to == "all":
|
| 234 |
to_install = "azure_ml comet_ml mlflow tensorboard wandb"
|
|
@@ -238,8 +238,7 @@ def create_training_args_cells(cells: list, epochs, max_steps, logging_steps, pe
|
|
| 238 |
gradient_checkpointing_kwargs = {"use_reentrant": False}
|
| 239 |
|
| 240 |
code_report = f"""
|
| 241 |
-
# Installing {to_install} to report the metrics
|
| 242 |
-
|
| 243 |
!pip install -q {to_install}
|
| 244 |
"""
|
| 245 |
|
|
@@ -278,7 +277,7 @@ args = TrainingArguments(
|
|
| 278 |
|
| 279 |
def create_sft_trainer_cells(cells: list, max_seq_length, packing):
|
| 280 |
text_cell = nbf.v4.new_markdown_cell(
|
| 281 |
-
"""### Supervised Finetuning Trainer (
|
| 282 |
|
| 283 |
This `SFTTrainer` is a wrapper around the `transformers.Trainer` class and inherits all of its attributes and methods.
|
| 284 |
The trainer takes care of properly initializing the `PeftModel`.
|
|
@@ -308,7 +307,7 @@ trainer = SFTTrainer(
|
|
| 308 |
|
| 309 |
def create_start_training_cells(cells: list, epochs, max_steps, push_to_hub, output_dir):
|
| 310 |
if push_to_hub:
|
| 311 |
-
save_txt = "and to the hub."
|
| 312 |
else:
|
| 313 |
save_txt = "."
|
| 314 |
|
|
@@ -320,7 +319,7 @@ def create_start_training_cells(cells: list, epochs, max_steps, push_to_hub, out
|
|
| 320 |
f"""### Starting Training and Saving Model/Tokenizer
|
| 321 |
|
| 322 |
We start training the model by calling the `train()` method on the trainer instance. This will start the training
|
| 323 |
-
loop and train the model for `{epoch_str}`. The model will be automatically saved the output directory(temp_{output_dir})
|
| 324 |
{save_txt}
|
| 325 |
|
| 326 |
""")
|
|
@@ -342,7 +341,8 @@ trainer.save_model()
|
|
| 342 |
|
| 343 |
def create_free_gpu_cells(cells: list):
|
| 344 |
text_cell = nbf.v4.new_markdown_cell(
|
| 345 |
-
"""### Free the GPU Memory for Merging `
|
|
|
|
| 346 |
|
| 347 |
code = f"""
|
| 348 |
|
|
@@ -358,11 +358,11 @@ torch.cuda.empty_cache()
|
|
| 358 |
|
| 359 |
def create_merge_lora_cells(cells: list, output_dir):
|
| 360 |
text_cell = nbf.v4.new_markdown_cell(
|
| 361 |
-
"""###
|
| 362 |
|
| 363 |
While utilizing `LoRA`, we focus on training the adapters rather than the entire model. Consequently, during the
|
| 364 |
model saving process, only the `adapter weights` are preserved, not the complete model. If we wish to save the
|
| 365 |
-
entire model for easier usage with Text Generation Inference,
|
| 366 |
weights. This can be achieved using the `merge_and_unload` method. Following this, the model can be saved using the
|
| 367 |
`save_pretrained` method. The result is a default model that is ready for inference.
|
| 368 |
""")
|
|
@@ -378,7 +378,7 @@ model = AutoPeftModelForCausalLM.from_pretrained(
|
|
| 378 |
low_cpu_mem_usage=True
|
| 379 |
)
|
| 380 |
|
| 381 |
-
# Merge LoRA
|
| 382 |
merged_model = model.merge_and_unload()
|
| 383 |
merged_model.save_pretrained("{output_dir}", safe_serialization=True, max_shard_size="2GB")
|
| 384 |
tokenizer.save_pretrained("{output_dir}")
|
|
@@ -396,23 +396,17 @@ def merge_model_cells(cells: list, output_dir):
|
|
| 396 |
import os
|
| 397 |
import shutil
|
| 398 |
|
| 399 |
-
# Specify the source folder and the destination folder
|
| 400 |
source_folder = "temp_{output_dir}"
|
| 401 |
destination_folder = "{output_dir}"
|
| 402 |
|
| 403 |
-
# Create the destination folder if it doesn't exist
|
| 404 |
os.makedirs(destination_folder, exist_ok=True)
|
| 405 |
|
| 406 |
-
# Iterate over the files and subfolders in the source folder
|
| 407 |
for item in os.listdir(source_folder):
|
| 408 |
item_path = os.path.join(source_folder, item)
|
| 409 |
|
| 410 |
-
# Check if it's a subfolder (and not a file)
|
| 411 |
if os.path.isdir(item_path):
|
| 412 |
-
# Specify the destination path
|
| 413 |
destination_path = os.path.join(destination_folder, item)
|
| 414 |
|
| 415 |
-
# Copy the subfolder to the destination folder
|
| 416 |
shutil.copytree(item_path, destination_path)
|
| 417 |
"""
|
| 418 |
|
|
@@ -422,7 +416,7 @@ for item in os.listdir(source_folder):
|
|
| 422 |
|
| 423 |
|
| 424 |
def push_to_hub_cells(cells: list, output_dir):
|
| 425 |
-
text = f"
|
| 426 |
code = f"""
|
| 427 |
from huggingface_hub import HfApi, HfFolder, Repository
|
| 428 |
|
|
@@ -437,8 +431,7 @@ repo = api.create_repo(token=HfFolder.get_token(), repo_type="model", repo_id=re
|
|
| 437 |
|
| 438 |
api.upload_folder(
|
| 439 |
folder_path="{output_dir}",
|
| 440 |
-
repo_id=repo.repo_id
|
| 441 |
-
repo_type="model"
|
| 442 |
)
|
| 443 |
"""
|
| 444 |
code_cell = nbf.v4.new_code_cell(code)
|
|
|
|
| 61 |
text_cell = nbf.v4.new_markdown_cell(
|
| 62 |
"### Login to HF")
|
| 63 |
|
| 64 |
+
text_1 = "Login with our `HF_TOKEN` in order to push the finetuned model to `huggingface_hub`."
|
| 65 |
|
| 66 |
if should_login:
|
| 67 |
+
text_1 = f"Login with our `HF_TOKEN` in order to load **{model_name}** from `huggingface_hub`."
|
| 68 |
|
| 69 |
text_cell1 = nbf.v4.new_markdown_cell(text_1)
|
| 70 |
code = """
|
|
|
|
| 167 |
|
| 168 |
1. **LLM Quantization:**
|
| 169 |
- We first load the selected large language model (LLM).
|
| 170 |
+
- We then use the `bitsandbytes` library to quantize the model, which can significantly reduce its memory footprint.
|
| 171 |
|
| 172 |
> **Note:** The memory requirements of the model scale with its size. For instance, a 7B parameter model may require
|
| 173 |
a 24GB GPU for fine-tuning.
|
|
|
|
| 228 |
save_strategy, gradient_accumulation_steps, gradient_checkpointing,
|
| 229 |
learning_rate, max_grad_norm, warmup_ratio, lr_scheduler_type, output_dir,
|
| 230 |
report_to, seed):
|
| 231 |
+
text_cell = nbf.v4.new_markdown_cell("### Setting the TrainingArguments")
|
| 232 |
to_install = None
|
| 233 |
if report_to == "all":
|
| 234 |
to_install = "azure_ml comet_ml mlflow tensorboard wandb"
|
|
|
|
| 238 |
gradient_checkpointing_kwargs = {"use_reentrant": False}
|
| 239 |
|
| 240 |
code_report = f"""
|
| 241 |
+
# Installing {to_install} to report the metrics
|
|
|
|
| 242 |
!pip install -q {to_install}
|
| 243 |
"""
|
| 244 |
|
|
|
|
| 277 |
|
| 278 |
def create_sft_trainer_cells(cells: list, max_seq_length, packing):
|
| 279 |
text_cell = nbf.v4.new_markdown_cell(
|
| 280 |
+
"""### Setting the Supervised Finetuning Trainer (`SFTTrainer`)
|
| 281 |
|
| 282 |
This `SFTTrainer` is a wrapper around the `transformers.Trainer` class and inherits all of its attributes and methods.
|
| 283 |
The trainer takes care of properly initializing the `PeftModel`.
|
|
|
|
| 307 |
|
| 308 |
def create_start_training_cells(cells: list, epochs, max_steps, push_to_hub, output_dir):
|
| 309 |
if push_to_hub:
|
| 310 |
+
save_txt = f"and to the hub in 'User/{output_dir}'."
|
| 311 |
else:
|
| 312 |
save_txt = "."
|
| 313 |
|
|
|
|
| 319 |
f"""### Starting Training and Saving Model/Tokenizer
|
| 320 |
|
| 321 |
We start training the model by calling the `train()` method on the trainer instance. This will start the training
|
| 322 |
+
loop and train the model for `{epoch_str}`. The model will be automatically saved to the output directory ('temp_{output_dir}')
|
| 323 |
{save_txt}
|
| 324 |
|
| 325 |
""")
|
|
|
|
| 341 |
|
| 342 |
def create_free_gpu_cells(cells: list):
|
| 343 |
text_cell = nbf.v4.new_markdown_cell(
|
| 344 |
+
"""### Free the GPU Memory to Prepare for the Merging of the `PerfModel`
|
| 345 |
+
""")
|
| 346 |
|
| 347 |
code = f"""
|
| 348 |
|
|
|
|
| 358 |
|
| 359 |
def create_merge_lora_cells(cells: list, output_dir):
|
| 360 |
text_cell = nbf.v4.new_markdown_cell(
|
| 361 |
+
"""### Merging LoRa Adapters into the Original Model
|
| 362 |
|
| 363 |
While utilizing `LoRA`, we focus on training the adapters rather than the entire model. Consequently, during the
|
| 364 |
model saving process, only the `adapter weights` are preserved, not the complete model. If we wish to save the
|
| 365 |
+
entire model for easier usage with Text Generation Inference, we can incorporate the adapter weights into the model
|
| 366 |
weights. This can be achieved using the `merge_and_unload` method. Following this, the model can be saved using the
|
| 367 |
`save_pretrained` method. The result is a default model that is ready for inference.
|
| 368 |
""")
|
|
|
|
| 378 |
low_cpu_mem_usage=True
|
| 379 |
)
|
| 380 |
|
| 381 |
+
# Merge LoRA with the base model and save
|
| 382 |
merged_model = model.merge_and_unload()
|
| 383 |
merged_model.save_pretrained("{output_dir}", safe_serialization=True, max_shard_size="2GB")
|
| 384 |
tokenizer.save_pretrained("{output_dir}")
|
|
|
|
| 396 |
import os
|
| 397 |
import shutil
|
| 398 |
|
|
|
|
| 399 |
source_folder = "temp_{output_dir}"
|
| 400 |
destination_folder = "{output_dir}"
|
| 401 |
|
|
|
|
| 402 |
os.makedirs(destination_folder, exist_ok=True)
|
| 403 |
|
|
|
|
| 404 |
for item in os.listdir(source_folder):
|
| 405 |
item_path = os.path.join(source_folder, item)
|
| 406 |
|
|
|
|
| 407 |
if os.path.isdir(item_path):
|
|
|
|
| 408 |
destination_path = os.path.join(destination_folder, item)
|
| 409 |
|
|
|
|
| 410 |
shutil.copytree(item_path, destination_path)
|
| 411 |
"""
|
| 412 |
|
|
|
|
| 416 |
|
| 417 |
|
| 418 |
def push_to_hub_cells(cells: list, output_dir):
|
| 419 |
+
text = f"### Pushing '{output_dir}' to our Hugging Face account."
|
| 420 |
code = f"""
|
| 421 |
from huggingface_hub import HfApi, HfFolder, Repository
|
| 422 |
|
|
|
|
| 431 |
|
| 432 |
api.upload_folder(
|
| 433 |
folder_path="{output_dir}",
|
| 434 |
+
repo_id=repo.repo_id
|
|
|
|
| 435 |
)
|
| 436 |
"""
|
| 437 |
code_cell = nbf.v4.new_code_cell(code)
|