Created using Colaboratory

pull/17/head
Maxime Labonne 5 months ago
parent 0b4c98ee19
commit 21f7408d61

@ -5,7 +5,7 @@
"colab": {
"provenance": [],
"machine_shape": "hm",
"authorship_tag": "ABX9TyMuGevIbBdnvORov5ZLmtGx",
"authorship_tag": "ABX9TyNkCdo3uzEUbLA4CS6VfaEM",
"include_colab_link": true
},
"kernelspec": {
@ -1481,67 +1481,42 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "NPNPie5Eo3EZ",
"outputId": "450c623b-7fc8-44df-c437-ea72b44a5a75"
"id": "NPNPie5Eo3EZ"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m258.1/258.1 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m395.4/395.4 kB\u001b[0m \u001b[31m20.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m168.3/168.3 kB\u001b[0m \u001b[31m12.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m26.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m17.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Building wheel for mergekit (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
"lida 0.0.10 requires fastapi, which is not installed.\n",
"lida 0.0.10 requires kaleido, which is not installed.\n",
"lida 0.0.10 requires python-multipart, which is not installed.\n",
"lida 0.0.10 requires uvicorn, which is not installed.\n",
"llmx 0.0.15a0 requires cohere, which is not installed.\n",
"llmx 0.0.15a0 requires openai, which is not installed.\n",
"llmx 0.0.15a0 requires tiktoken, which is not installed.\n",
"tensorflow-probability 0.22.0 requires typing-extensions<4.6.0, but you have typing-extensions 4.9.0 which is incompatible.\u001b[0m\u001b[31m\n",
"\u001b[0m"
]
}
],
"outputs": [],
"source": [
"!git clone https://github.com/cg123/mergekit.git\n",
"%cd mergekit\n",
"!pip install -qe ."
"!pip install -e ."
]
},
{
"cell_type": "code",
"source": [
"MODEL_NAME = \"NeuralPipe-9B-merged\"\n",
"MODEL_NAME = \"Marcoro14-7B-slerp\"\n",
"yaml_config = \"\"\"\n",
"slices:\n",
" - sources:\n",
" - model: OpenPipe/mistral-ft-optimized-1218\n",
" layer_range: [0, 32]\n",
" - sources:\n",
" - model: mlabonne/NeuralHermes-2.5-Mistral-7B\n",
" layer_range: [24, 32]\n",
"merge_method: passthrough\n",
" - model: AIDC-ai-business/Marcoroni-7B-v3\n",
" layer_range: [0, 32]\n",
" - model: EmbeddedLLM/Mistral-7B-Merge-14-v0.1\n",
" layer_range: [0, 32]\n",
"merge_method: slerp\n",
"base_model: AIDC-ai-business/Marcoroni-7B-v3\n",
"parameters:\n",
" t:\n",
" - filter: self_attn\n",
" value: [0, 0.5, 0.3, 0.7, 1]\n",
" - filter: mlp\n",
" value: [1, 0.5, 0.7, 0.3, 0]\n",
" - value: 0.5\n",
"dtype: bfloat16\n",
"\"\"\""
],
"metadata": {
"id": "LGd7jlfCpNcg"
},
"execution_count": null,
"execution_count": 21,
"outputs": []
},
{
@ -1611,9 +1586,17 @@
"# Create a Jinja template object\n",
"jinja_template = Template(template_text.strip())\n",
"\n",
"# Get list of models from config\n",
"if \"models\" in data:\n",
" models = [data[\"models\"][i][\"model\"] for i in range(len(data[\"models\"])) if \"parameters\" in data[\"models\"][i]]\n",
"elif \"parameters\" in data:\n",
" models = [data[\"slices\"][0][\"sources\"][i][\"model\"] for i in range(len(data[\"slices\"][0][\"sources\"]))]\n",
"elif \"slices\" in data:\n",
" models = [data[\"slices\"][i][\"sources\"][0][\"model\"] for i in range(len(data[\"slices\"]))]\n",
"else:\n",
" raise Exception(\"No models or slices found in yaml config\")\n",
"\n",
"# Fill the template\n",
"models = [data[\"slices\"][i][\"sources\"][0][\"model\"] for i in range(len(data[\"slices\"]))]\n",
"# models = [\"OpenPipe/mistral-ft-optimized-1218\", \"mlabonne/NeuralHermes-2.5-Mistral-7B\"]\n",
"content = jinja_template.render(\n",
" model_name=MODEL_NAME,\n",
" models=models,\n",

Loading…
Cancel
Save