diff --git a/Mergekit.ipynb b/Mergekit.ipynb index af4746d..2a6aa4f 100644 --- a/Mergekit.ipynb +++ b/Mergekit.ipynb @@ -5,7 +5,7 @@ "colab": { "provenance": [], "machine_shape": "hm", - "authorship_tag": "ABX9TyNkCdo3uzEUbLA4CS6VfaEM", + "authorship_tag": "ABX9TyO11tndDoFEdL4+/FDgLad9", "include_colab_link": true }, "kernelspec": { @@ -1486,13 +1486,14 @@ "outputs": [], "source": [ "!git clone https://github.com/cg123/mergekit.git\n", - "%cd mergekit\n", - "!pip install -e ." + "!cd mergekit && pip install -q -e ." ] }, { "cell_type": "code", "source": [ + "import yaml\n", + "\n", "MODEL_NAME = \"Marcoro14-7B-slerp\"\n", "yaml_config = \"\"\"\n", "slices:\n", @@ -1511,40 +1512,24 @@ " value: [1, 0.5, 0.7, 0.3, 0]\n", " - value: 0.5\n", "dtype: bfloat16\n", - "\"\"\"" + "\n", + "\"\"\"\n", + "\n", + "# Save config as yaml file\n", + "with open('config.yaml', 'w', encoding=\"utf-8\") as f:\n", + " f.write(yaml_config)" ], "metadata": { "id": "LGd7jlfCpNcg" }, - "execution_count": 21, + "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ - "import yaml\n", - "import torch\n", - "\n", - "from mergekit.config import MergeConfiguration\n", - "from mergekit.merge import MergeOptions, run_merge\n", - "\n", - "# Load merge config\n", - "data = yaml.safe_load(yaml_config)\n", - "merge_config = MergeConfiguration.model_validate(data)\n", - "\n", "# Merge models\n", - "run_merge(\n", - " merge_config,\n", - " \"./merged\",\n", - " options=MergeOptions(\n", - " lora_merge_cache='/tmp',\n", - " cuda=torch.cuda.is_available(),\n", - " low_cpu_memory=True, # VRAM offloading\n", - " copy_tokenizer=True,\n", - " trust_remote_code=False,\n", - " lazy_unpickle=False,\n", - " ),\n", - ")" + "!mergekit-yaml config.yaml merge --copy-tokenizer --allow-crimes --out-shard-size 1B --lazy-unpickle" ], "metadata": { "id": "d5mYzDo1q96y" @@ -1560,20 +1545,26 @@ "from huggingface_hub import ModelCard, ModelCardData\n", "from jinja2 import Template\n", "\n", + "username = \"mlabonne\"\n", + "\n", "template_text = \"\"\"\n", "---\n", "license: apache-2.0\n", "tags:\n", "- merge\n", "- mergekit\n", + "- lazymergekit\n", + "{%- for model in models %}\n", + "- {{ model }}\n", + "{%- endfor %}\n", "---\n", "\n", "# {{ model_name }}\n", "\n", - "This model is a merge of the following models made with [mergekit](https://github.com/cg123/mergekit):\n", + "{{ model_name }} is a merge of the following models using [mergekit](https://github.com/cg123/mergekit):\n", "\n", "{%- for model in models %}\n", - " * [{{ model }}](https://huggingface.co/{{ model }})\n", + "* [{{ model }}](https://huggingface.co/{{ model }})\n", "{%- endfor %}\n", "\n", "## 🧩 Configuration\n", @@ -1587,6 +1578,7 @@ "jinja_template = Template(template_text.strip())\n", "\n", "# Get list of models from config\n", + "data = yaml.safe_load(yaml_config)\n", "if \"models\" in data:\n", " models = [data[\"models\"][i][\"model\"] for i in range(len(data[\"models\"])) if \"parameters\" in data[\"models\"][i]]\n", "elif \"parameters\" in data:\n", @@ -1601,11 +1593,12 @@ " model_name=MODEL_NAME,\n", " models=models,\n", " yaml_config=yaml_config,\n", + " username=username,\n", ")\n", "\n", "# Save the model card\n", "card = ModelCard(content)\n", - "card.save('merged/README.md')" + "card.save('merge/README.md')" ], "metadata": { "colab": { @@ -1643,7 +1636,7 @@ ")\n", "api.upload_folder(\n", " repo_id=f\"{username}/{MODEL_NAME}\",\n", - " folder_path=\"merged\",\n", + " folder_path=\"merge\",\n", ")" ], "metadata": {