You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
llm-course/Quantize_Llama_2_models_usi...

2218 lines
173 KiB
Plaintext

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"gpuType": "T4",
"authorship_tag": "ABX9TyMohoDhmmKsuh9OLDHor3GB",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"c281b60e104f4c5da547bbdd7208d4bc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "VBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "VBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "VBoxView",
"box_style": "",
"children": [
"IPY_MODEL_2e2fabac70484c1c8b16fa6ca8fd8537",
"IPY_MODEL_bf53c635fa374420ad850eea22cd1e31",
"IPY_MODEL_065d59126a734c1aa096ba40cd4a129f",
"IPY_MODEL_e8855d5678a342f5a33171aa74d3b7bc"
],
"layout": "IPY_MODEL_1c8a6b959f9c4443a92f58eff1b03077"
}
},
"74b084c97f6f46d293a197bf9804460c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_9fb5726f91734b1da149784680dc9624",
"placeholder": "",
"style": "IPY_MODEL_202a8eb11eda4e58942113fbeacfdc3d",
"value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
}
},
"1409574c4f9742e7a711965dd2c8ad87": {
"model_module": "@jupyter-widgets/controls",
"model_name": "PasswordModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "PasswordModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "PasswordView",
"continuous_update": true,
"description": "Token:",
"description_tooltip": null,
"disabled": false,
"layout": "IPY_MODEL_970d4d3daf854f92bd650dc4da99e1bc",
"placeholder": "",
"style": "IPY_MODEL_24b1e007921046b1adc61db0f2bf9fc7",
"value": ""
}
},
"704ecf9409244e0b93612d6a11476346": {
"model_module": "@jupyter-widgets/controls",
"model_name": "CheckboxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "CheckboxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "CheckboxView",
"description": "Add token as git credential?",
"description_tooltip": null,
"disabled": false,
"indent": true,
"layout": "IPY_MODEL_24d3d72f5de54de8a1ded4e528dde332",
"style": "IPY_MODEL_e90cb0ce526a4556bc643ba6c5485661",
"value": true
}
},
"b1a8d3a9a379415393d9e7d995a40788": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ButtonView",
"button_style": "",
"description": "Login",
"disabled": false,
"icon": "",
"layout": "IPY_MODEL_76e7372656b745c889b9283b76c04148",
"style": "IPY_MODEL_ce0204c7e1ff4a51b2648284a2492262",
"tooltip": ""
}
},
"f928772f92724579b068e984d9eef387": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_6dbb8e8a5ebb40a4ba910b09dde27e1a",
"placeholder": "",
"style": "IPY_MODEL_7944af54f2564920822d5d4b348896c4",
"value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
}
},
"1c8a6b959f9c4443a92f58eff1b03077": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": "center",
"align_self": null,
"border": null,
"bottom": null,
"display": "flex",
"flex": null,
"flex_flow": "column",
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": "50%"
}
},
"9fb5726f91734b1da149784680dc9624": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"202a8eb11eda4e58942113fbeacfdc3d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"970d4d3daf854f92bd650dc4da99e1bc": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"24b1e007921046b1adc61db0f2bf9fc7": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"24d3d72f5de54de8a1ded4e528dde332": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e90cb0ce526a4556bc643ba6c5485661": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"76e7372656b745c889b9283b76c04148": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"ce0204c7e1ff4a51b2648284a2492262": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"button_color": null,
"font_weight": ""
}
},
"6dbb8e8a5ebb40a4ba910b09dde27e1a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"7944af54f2564920822d5d4b348896c4": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"1b55372f62494ca0baabf87f7e7f4ba8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "LabelModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "LabelModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "LabelView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_bf612001ad354ea19de6ee45a166a43c",
"placeholder": "",
"style": "IPY_MODEL_a8e4691970b14955bfb4865bcef5e912",
"value": "Connecting..."
}
},
"bf612001ad354ea19de6ee45a166a43c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a8e4691970b14955bfb4865bcef5e912": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"2e2fabac70484c1c8b16fa6ca8fd8537": {
"model_module": "@jupyter-widgets/controls",
"model_name": "LabelModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "LabelModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "LabelView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_7eb6de1a979b46f7b234724073f8bc3a",
"placeholder": "",
"style": "IPY_MODEL_6ae4640196da492fadafeb63f4bc89d2",
"value": "Token is valid (permission: write)."
}
},
"bf53c635fa374420ad850eea22cd1e31": {
"model_module": "@jupyter-widgets/controls",
"model_name": "LabelModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "LabelModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "LabelView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_cef83433dbea4f529f43722fe78a8baf",
"placeholder": "",
"style": "IPY_MODEL_845ba8115d5140ac9ee22af4a9e6a03b",
"value": "Your token has been saved in your configured git credential helpers (store)."
}
},
"065d59126a734c1aa096ba40cd4a129f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "LabelModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "LabelModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "LabelView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_cdd888041aca4dcf8adc785309071fc6",
"placeholder": "",
"style": "IPY_MODEL_cf63214cb4f8442999fa5b971035fe4f",
"value": "Your token has been saved to /root/.cache/huggingface/token"
}
},
"e8855d5678a342f5a33171aa74d3b7bc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "LabelModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "LabelModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "LabelView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_7d9b22f2b7fe4a749f989e247bce446a",
"placeholder": "",
"style": "IPY_MODEL_7f8e268db8144adfb09d089784d8411a",
"value": "Login successful"
}
},
"7eb6de1a979b46f7b234724073f8bc3a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6ae4640196da492fadafeb63f4bc89d2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"cef83433dbea4f529f43722fe78a8baf": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"845ba8115d5140ac9ee22af4a9e6a03b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"cdd888041aca4dcf8adc785309071fc6": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"cf63214cb4f8442999fa5b971035fe4f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"7d9b22f2b7fe4a749f989e247bce446a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"7f8e268db8144adfb09d089784d8411a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/mlabonne/llm-course/blob/main/Quantize_Llama_2_models_using_GGUF_and_llama_cpp.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"source": [
"# Quantize Llama 2 models using GGUF and llama.cpp\n",
"> 🗣️ [Large Language Model Course](https://github.com/mlabonne/llm-course)\n",
"\n",
"❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne).\n",
"\n",
"## Usage\n",
"\n",
"* `MODEL_ID`: The ID of the model to quantize (e.g., `mlabonne/EvolCodeLlama-7b`).\n",
"* `QUANTIZATION_METHOD`: The quantization method to use.\n",
"\n",
"## Quantization methods\n",
"\n",
"The names of the quantization methods follow the naming convention: \"q\" + the number of bits + the variant used (detailed below). Here is a list of all the possible quant methods and their corresponding use cases, based on model cards made by [TheBloke](https://huggingface.co/TheBloke/):\n",
"\n",
"* `q2_k`: Uses Q4_K for the attention.vw and feed_forward.w2 tensors, Q2_K for the other tensors.\n",
"* `q3_k_l`: Uses Q5_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K\n",
"* `q3_k_m`: Uses Q4_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K\n",
"* `q3_k_s`: Uses Q3_K for all tensors\n",
"* `q4_0`: Original quant method, 4-bit.\n",
"* `q4_1`: Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.\n",
"* `q4_k_m`: Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q4_K\n",
"* `q4_k_s`: Uses Q4_K for all tensors\n",
"* `q5_0`: Higher accuracy, higher resource usage and slower inference.\n",
"* `q5_1`: Even higher accuracy, resource usage and slower inference.\n",
"* `q5_k_m`: Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q5_K\n",
"* `q5_k_s`: Uses Q5_K for all tensors\n",
"* `q6_k`: Uses Q8_K for all tensors\n",
"* `q8_0`: Almost indistinguishable from float16. High resource use and slow. Not recommended for most users.\n",
"\n",
"As a rule of thumb, **I recommend using Q5_K_M** as it preserves most of the model's performance. Alternatively, you can use Q4_K_M if you want to save some memory. In general, K_M versions are better than K_S versions. I cannot recommend Q2_K or Q3_* versions, as they drastically decrease model performance."
],
"metadata": {
"id": "8y_Rk94LzG7I"
}
},
{
"cell_type": "code",
"source": [
"# Variables\n",
"MODEL_ID = \"mlabonne/EvolCodeLlama-7b\"\n",
"QUANTIZATION_METHODS = [\"q4_k_m\", \"q5_k_m\"]\n",
"\n",
"# Constants\n",
"MODEL_NAME = MODEL_ID.split('/')[-1]\n",
"\n",
"# Install llama.cpp\n",
"!git clone https://github.com/ggerganov/llama.cpp\n",
"!cd llama.cpp && git pull && make clean && LLAMA_CUBLAS=1 make\n",
"!pip install -r llama.cpp/requirements.txt\n",
"\n",
"# Download model\n",
"!git lfs install\n",
"!git clone https://huggingface.co/{MODEL_ID}\n",
"\n",
"# Convert to fp16\n",
"fp16 = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.bin\"\n",
"!python llama.cpp/convert.py {MODEL_NAME} --outtype f16 --outfile {fp16}\n",
"\n",
"# Quantize the model for each method in the QUANTIZATION_METHODS list\n",
"for method in QUANTIZATION_METHODS:\n",
" qtype = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.{method.upper()}.gguf\"\n",
" !./llama.cpp/quantize {fp16} {qtype} {method}"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "fD24jJxq7t3k",
"outputId": "94954934-0829-44e9-a5e5-262c17e162d0"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"ggml_init_cublas: found 1 CUDA devices:\n",
" Device 0: Tesla T4, compute capability 7.5\n",
"main: build = 1100 (dd0dc36)\n",
"main: quantizing 'EvolCodeLlama-7b/evolcodellama-7b.gguf.fp16.bin' to 'EvolCodeLlama-7b/evolcodellama-7b.gguf.q4_k_s.bin' as Q4_K_S\n",
"llama_model_loader: loaded meta data with 16 key-value pairs and 291 tensors from EvolCodeLlama-7b/evolcodellama-7b.gguf.fp16.bin (version GGUF V1 (support until nov 2023))\n",
"llama_model_loader: - tensor 0: token_embd.weight f16 [ 4096, 32016, 1, 1 ]\n",
"llama_model_loader: - tensor 1: blk.0.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 2: blk.0.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 3: blk.0.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 4: blk.0.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 5: blk.0.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 6: blk.0.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 7: blk.0.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 8: blk.0.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 9: blk.0.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 10: blk.1.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 11: blk.1.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 12: blk.1.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 13: blk.1.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 14: blk.1.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 15: blk.1.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 16: blk.1.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 17: blk.1.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 18: blk.1.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 19: blk.2.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 20: blk.2.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 21: blk.2.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 22: blk.2.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 23: blk.2.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 24: blk.2.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 25: blk.2.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 26: blk.2.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 27: blk.2.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 28: blk.3.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 29: blk.3.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 30: blk.3.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 31: blk.3.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 32: blk.3.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 33: blk.3.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 34: blk.3.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 35: blk.3.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 36: blk.3.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 37: blk.4.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 38: blk.4.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 39: blk.4.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 40: blk.4.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 41: blk.4.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 42: blk.4.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 43: blk.4.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 44: blk.4.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 45: blk.4.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 46: blk.5.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 47: blk.5.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 48: blk.5.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 49: blk.5.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 50: blk.5.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 51: blk.5.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 52: blk.5.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 53: blk.5.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 54: blk.5.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 55: blk.6.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 56: blk.6.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 57: blk.6.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 58: blk.6.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 59: blk.6.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 60: blk.6.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 61: blk.6.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 62: blk.6.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 63: blk.6.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 64: blk.7.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 65: blk.7.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 66: blk.7.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 67: blk.7.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 68: blk.7.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 69: blk.7.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 70: blk.7.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 71: blk.7.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 72: blk.7.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 73: blk.8.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 74: blk.8.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 75: blk.8.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 76: blk.8.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 77: blk.8.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 78: blk.8.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 79: blk.8.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 80: blk.8.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 81: blk.8.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 82: blk.9.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 83: blk.9.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 84: blk.9.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 85: blk.9.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 86: blk.9.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 87: blk.9.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 88: blk.9.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 89: blk.9.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 90: blk.9.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 91: blk.10.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 92: blk.10.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 93: blk.10.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 94: blk.10.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 95: blk.10.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 96: blk.10.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 97: blk.10.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 98: blk.10.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 99: blk.10.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 100: blk.11.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 101: blk.11.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 102: blk.11.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 103: blk.11.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 104: blk.11.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 105: blk.11.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 106: blk.11.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 107: blk.11.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 108: blk.11.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 109: blk.12.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 110: blk.12.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 111: blk.12.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 112: blk.12.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 113: blk.12.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 114: blk.12.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 115: blk.12.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 116: blk.12.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 117: blk.12.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 118: blk.13.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 119: blk.13.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 120: blk.13.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 121: blk.13.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 122: blk.13.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 123: blk.13.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 124: blk.13.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 125: blk.13.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 126: blk.13.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 127: blk.14.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 128: blk.14.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 129: blk.14.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 130: blk.14.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 131: blk.14.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 132: blk.14.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 133: blk.14.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 134: blk.14.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 135: blk.14.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 136: blk.15.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 137: blk.15.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 138: blk.15.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 139: blk.15.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 140: blk.15.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 141: blk.15.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 142: blk.15.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 143: blk.15.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 144: blk.15.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 145: blk.16.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 146: blk.16.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 147: blk.16.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 148: blk.16.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 149: blk.16.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 150: blk.16.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 151: blk.16.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 152: blk.16.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 153: blk.16.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 154: blk.17.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 155: blk.17.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 156: blk.17.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 157: blk.17.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 158: blk.17.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 159: blk.17.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 160: blk.17.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 161: blk.17.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 162: blk.17.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 163: blk.18.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 164: blk.18.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 165: blk.18.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 166: blk.18.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 167: blk.18.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 168: blk.18.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 169: blk.18.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 170: blk.18.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 171: blk.18.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 172: blk.19.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 173: blk.19.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 174: blk.19.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 175: blk.19.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 176: blk.19.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 177: blk.19.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 178: blk.19.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 179: blk.19.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 180: blk.19.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 181: blk.20.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 182: blk.20.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 183: blk.20.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 184: blk.20.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 185: blk.20.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 186: blk.20.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 187: blk.20.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 188: blk.20.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 189: blk.20.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 190: blk.21.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 191: blk.21.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 192: blk.21.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 193: blk.21.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 194: blk.21.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 195: blk.21.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 196: blk.21.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 197: blk.21.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 198: blk.21.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 199: blk.22.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 200: blk.22.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 201: blk.22.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 202: blk.22.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 203: blk.22.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 204: blk.22.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 205: blk.22.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 206: blk.22.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 207: blk.22.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 208: blk.23.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 209: blk.23.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 210: blk.23.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 211: blk.23.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 212: blk.23.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 213: blk.23.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 214: blk.23.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 215: blk.23.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 216: blk.23.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 217: blk.24.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 218: blk.24.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 219: blk.24.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 220: blk.24.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 221: blk.24.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 222: blk.24.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 223: blk.24.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 224: blk.24.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 225: blk.24.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 226: blk.25.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 227: blk.25.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 228: blk.25.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 229: blk.25.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 230: blk.25.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 231: blk.25.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 232: blk.25.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 233: blk.25.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 234: blk.25.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 235: blk.26.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 236: blk.26.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 237: blk.26.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 238: blk.26.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 239: blk.26.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 240: blk.26.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 241: blk.26.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 242: blk.26.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 243: blk.26.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 244: blk.27.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 245: blk.27.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 246: blk.27.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 247: blk.27.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 248: blk.27.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 249: blk.27.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 250: blk.27.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 251: blk.27.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 252: blk.27.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 253: blk.28.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 254: blk.28.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 255: blk.28.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 256: blk.28.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 257: blk.28.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 258: blk.28.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 259: blk.28.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 260: blk.28.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 261: blk.28.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 262: blk.29.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 263: blk.29.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 264: blk.29.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 265: blk.29.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 266: blk.29.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 267: blk.29.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 268: blk.29.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 269: blk.29.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 270: blk.29.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 271: blk.30.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 272: blk.30.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 273: blk.30.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 274: blk.30.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 275: blk.30.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 276: blk.30.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 277: blk.30.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 278: blk.30.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 279: blk.30.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 280: blk.31.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 281: blk.31.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 282: blk.31.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 283: blk.31.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 284: blk.31.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 285: blk.31.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 286: blk.31.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 287: blk.31.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 288: blk.31.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 289: output_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 290: output.weight f16 [ 4096, 32016, 1, 1 ]\n",
"llama_model_loader: - kv 0: general.architecture str \n",
"llama_model_loader: - kv 1: general.name str \n",
"llama_model_loader: - kv 2: llama.context_length u32 \n",
"llama_model_loader: - kv 3: llama.embedding_length u32 \n",
"llama_model_loader: - kv 4: llama.block_count u32 \n",
"llama_model_loader: - kv 5: llama.feed_forward_length u32 \n",
"llama_model_loader: - kv 6: llama.rope.dimension_count u32 \n",
"llama_model_loader: - kv 7: llama.attention.head_count u32 \n",
"llama_model_loader: - kv 8: llama.attention.head_count_kv u32 \n",
"llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 \n",
"llama_model_loader: - kv 10: llama.rope.freq_base f32 \n",
"llama_model_loader: - kv 11: general.file_type u32 \n",
"llama_model_loader: - kv 12: tokenizer.ggml.model str \n",
"llama_model_loader: - kv 13: tokenizer.ggml.tokens arr \n",
"llama_model_loader: - kv 14: tokenizer.ggml.scores arr \n",
"llama_model_loader: - kv 15: tokenizer.ggml.token_type arr \n",
"llama_model_loader: - type f32: 65 tensors\n",
"llama_model_loader: - type f16: 226 tensors\n",
"llama_model_quantize_internal: meta size = 741408 bytes\n",
"[ 1/ 291] token_embd.weight - [ 4096, 32016, 1, 1], type = f16, quantizing to q4_K .. size = 250.12 MB -> 70.35 MB | hist: \n",
"[ 2/ 291] blk.0.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 3/ 291] blk.0.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 4/ 291] blk.0.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MB -> 11.00 MB | hist: \n",
"[ 5/ 291] blk.0.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 6/ 291] blk.0.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 7/ 291] blk.0.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 8/ 291] blk.0.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MB -> 29.56 MB | hist: \n",
"[ 9/ 291] blk.0.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 10/ 291] blk.0.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 11/ 291] blk.1.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 12/ 291] blk.1.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 13/ 291] blk.1.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MB -> 11.00 MB | hist: \n",
"[ 14/ 291] blk.1.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 15/ 291] blk.1.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 16/ 291] blk.1.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 17/ 291] blk.1.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MB -> 29.56 MB | hist: \n",
"[ 18/ 291] blk.1.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 19/ 291] blk.1.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 20/ 291] blk.2.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 21/ 291] blk.2.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 22/ 291] blk.2.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MB -> 11.00 MB | hist: \n",
"[ 23/ 291] blk.2.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 24/ 291] blk.2.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 25/ 291] blk.2.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 26/ 291] blk.2.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MB -> 29.56 MB | hist: \n",
"[ 27/ 291] blk.2.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 28/ 291] blk.2.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 29/ 291] blk.3.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 30/ 291] blk.3.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 31/ 291] blk.3.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MB -> 11.00 MB | hist: \n",
"[ 32/ 291] blk.3.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 33/ 291] blk.3.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 34/ 291] blk.3.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 35/ 291] blk.3.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MB -> 29.56 MB | hist: \n",
"[ 36/ 291] blk.3.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 37/ 291] blk.3.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 38/ 291] blk.4.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 39/ 291] blk.4.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 40/ 291] blk.4.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 41/ 291] blk.4.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 42/ 291] blk.4.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 43/ 291] blk.4.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 44/ 291] blk.4.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 45/ 291] blk.4.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 46/ 291] blk.4.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 47/ 291] blk.5.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 48/ 291] blk.5.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 49/ 291] blk.5.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 50/ 291] blk.5.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 51/ 291] blk.5.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 52/ 291] blk.5.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 53/ 291] blk.5.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 54/ 291] blk.5.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 55/ 291] blk.5.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 56/ 291] blk.6.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 57/ 291] blk.6.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 58/ 291] blk.6.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 59/ 291] blk.6.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 60/ 291] blk.6.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 61/ 291] blk.6.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 62/ 291] blk.6.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 63/ 291] blk.6.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 64/ 291] blk.6.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 65/ 291] blk.7.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 66/ 291] blk.7.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 67/ 291] blk.7.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 68/ 291] blk.7.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 69/ 291] blk.7.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 70/ 291] blk.7.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 71/ 291] blk.7.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 72/ 291] blk.7.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 73/ 291] blk.7.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 74/ 291] blk.8.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 75/ 291] blk.8.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 76/ 291] blk.8.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 77/ 291] blk.8.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 78/ 291] blk.8.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 79/ 291] blk.8.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 80/ 291] blk.8.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 81/ 291] blk.8.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 82/ 291] blk.8.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 83/ 291] blk.9.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 84/ 291] blk.9.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 85/ 291] blk.9.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 86/ 291] blk.9.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 87/ 291] blk.9.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 88/ 291] blk.9.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 89/ 291] blk.9.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 90/ 291] blk.9.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 91/ 291] blk.9.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 92/ 291] blk.10.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 93/ 291] blk.10.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 94/ 291] blk.10.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 95/ 291] blk.10.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 96/ 291] blk.10.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 97/ 291] blk.10.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 98/ 291] blk.10.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 99/ 291] blk.10.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 100/ 291] blk.10.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 101/ 291] blk.11.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 102/ 291] blk.11.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 103/ 291] blk.11.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 104/ 291] blk.11.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 105/ 291] blk.11.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 106/ 291] blk.11.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 107/ 291] blk.11.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 108/ 291] blk.11.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 109/ 291] blk.11.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 110/ 291] blk.12.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 111/ 291] blk.12.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 112/ 291] blk.12.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 113/ 291] blk.12.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 114/ 291] blk.12.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 115/ 291] blk.12.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 116/ 291] blk.12.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 117/ 291] blk.12.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 118/ 291] blk.12.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 119/ 291] blk.13.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 120/ 291] blk.13.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 121/ 291] blk.13.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 122/ 291] blk.13.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 123/ 291] blk.13.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 124/ 291] blk.13.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 125/ 291] blk.13.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 126/ 291] blk.13.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 127/ 291] blk.13.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 128/ 291] blk.14.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 129/ 291] blk.14.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 130/ 291] blk.14.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 131/ 291] blk.14.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 132/ 291] blk.14.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 133/ 291] blk.14.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 134/ 291] blk.14.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 135/ 291] blk.14.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 136/ 291] blk.14.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 137/ 291] blk.15.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 138/ 291] blk.15.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 139/ 291] blk.15.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 140/ 291] blk.15.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 141/ 291] blk.15.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 142/ 291] blk.15.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 143/ 291] blk.15.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 144/ 291] blk.15.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 145/ 291] blk.15.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 146/ 291] blk.16.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 147/ 291] blk.16.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 148/ 291] blk.16.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 149/ 291] blk.16.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 150/ 291] blk.16.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 151/ 291] blk.16.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 152/ 291] blk.16.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 153/ 291] blk.16.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 154/ 291] blk.16.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 155/ 291] blk.17.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 156/ 291] blk.17.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 157/ 291] blk.17.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 158/ 291] blk.17.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 159/ 291] blk.17.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 160/ 291] blk.17.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 161/ 291] blk.17.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 162/ 291] blk.17.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 163/ 291] blk.17.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 164/ 291] blk.18.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 165/ 291] blk.18.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 166/ 291] blk.18.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 167/ 291] blk.18.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 168/ 291] blk.18.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 169/ 291] blk.18.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 170/ 291] blk.18.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 171/ 291] blk.18.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 172/ 291] blk.18.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 173/ 291] blk.19.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 174/ 291] blk.19.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 175/ 291] blk.19.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 176/ 291] blk.19.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 177/ 291] blk.19.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 178/ 291] blk.19.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 179/ 291] blk.19.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 180/ 291] blk.19.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 181/ 291] blk.19.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 182/ 291] blk.20.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 183/ 291] blk.20.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 184/ 291] blk.20.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 185/ 291] blk.20.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 186/ 291] blk.20.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 187/ 291] blk.20.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 188/ 291] blk.20.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 189/ 291] blk.20.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 190/ 291] blk.20.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 191/ 291] blk.21.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 192/ 291] blk.21.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 193/ 291] blk.21.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 194/ 291] blk.21.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 195/ 291] blk.21.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 196/ 291] blk.21.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 197/ 291] blk.21.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 198/ 291] blk.21.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 199/ 291] blk.21.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 200/ 291] blk.22.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 201/ 291] blk.22.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 202/ 291] blk.22.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 203/ 291] blk.22.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 204/ 291] blk.22.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 205/ 291] blk.22.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 206/ 291] blk.22.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 207/ 291] blk.22.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 208/ 291] blk.22.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 209/ 291] blk.23.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 210/ 291] blk.23.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 211/ 291] blk.23.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 212/ 291] blk.23.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 213/ 291] blk.23.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 214/ 291] blk.23.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 215/ 291] blk.23.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 216/ 291] blk.23.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 217/ 291] blk.23.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 218/ 291] blk.24.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 219/ 291] blk.24.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 220/ 291] blk.24.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 221/ 291] blk.24.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 222/ 291] blk.24.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 223/ 291] blk.24.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 224/ 291] blk.24.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 225/ 291] blk.24.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 226/ 291] blk.24.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 227/ 291] blk.25.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 228/ 291] blk.25.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 229/ 291] blk.25.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 230/ 291] blk.25.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 231/ 291] blk.25.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 232/ 291] blk.25.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 233/ 291] blk.25.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 234/ 291] blk.25.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 235/ 291] blk.25.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 236/ 291] blk.26.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 237/ 291] blk.26.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 238/ 291] blk.26.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 239/ 291] blk.26.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 240/ 291] blk.26.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 241/ 291] blk.26.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 242/ 291] blk.26.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 243/ 291] blk.26.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 244/ 291] blk.26.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 245/ 291] blk.27.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 246/ 291] blk.27.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 247/ 291] blk.27.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 248/ 291] blk.27.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 249/ 291] blk.27.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 250/ 291] blk.27.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 251/ 291] blk.27.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 252/ 291] blk.27.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 253/ 291] blk.27.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 254/ 291] blk.28.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 255/ 291] blk.28.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 256/ 291] blk.28.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 257/ 291] blk.28.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 258/ 291] blk.28.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 259/ 291] blk.28.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 260/ 291] blk.28.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 261/ 291] blk.28.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 262/ 291] blk.28.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 263/ 291] blk.29.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 264/ 291] blk.29.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 265/ 291] blk.29.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 266/ 291] blk.29.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 267/ 291] blk.29.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 268/ 291] blk.29.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 269/ 291] blk.29.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 270/ 291] blk.29.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 271/ 291] blk.29.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 272/ 291] blk.30.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 273/ 291] blk.30.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 274/ 291] blk.30.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 275/ 291] blk.30.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 276/ 291] blk.30.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 277/ 291] blk.30.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 278/ 291] blk.30.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 279/ 291] blk.30.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 280/ 291] blk.30.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 281/ 291] blk.31.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 282/ 291] blk.31.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 283/ 291] blk.31.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 284/ 291] blk.31.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
"[ 285/ 291] blk.31.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 286/ 291] blk.31.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 287/ 291] blk.31.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
"[ 288/ 291] blk.31.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 289/ 291] blk.31.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 290/ 291] output_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
"[ 291/ 291] output.weight - [ 4096, 32016, 1, 1], type = f16, quantizing to q6_K .. size = 250.12 MB -> 102.59 MB | hist: \n",
"llama_model_quantize_internal: model size = 12853.27 MB\n",
"llama_model_quantize_internal: quant size = 3677.45 MB\n",
"\n",
"main: quantize time = 1089230.46 ms\n",
"main: total time = 1089230.46 ms\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Run inference\n",
"\n",
"Here is a simple script to run your quantized models. I'm offloading every layer to the GPU (35 for a 7b parameter model) to speed up inference."
],
"metadata": {
"id": "WqI1CPiXI4dP"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"\n",
"model_list = [file for file in os.listdir(MODEL_NAME) if \"gguf\" in file]\n",
"\n",
"prompt = input(\"Enter your prompt: \")\n",
"chosen_method = input(\"Name of the model (options: \" + \", \".join(model_list) + \"): \")\n",
"\n",
"# Verify the chosen method is in the list\n",
"if chosen_method not in model_list:\n",
" print(\"Invalid name\")\n",
"else:\n",
" qtype = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.{method.upper()}.gguf\"\n",
" !./llama.cpp/main -m {qtype} -n 128 --color -ngl 35 -p \"{prompt}\""
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "vNPL9WYg78l-",
"outputId": "3c3e7d2f-f0de-429d-fd97-dab480bc514a"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Enter your prompt: prompt\n",
"Please specify the quantization method to run the model (options: q4_k_s): q4_k_s\n",
"main: build = 1100 (dd0dc36)\n",
"main: seed = 1693227123\n",
"ggml_init_cublas: found 1 CUDA devices:\n",
" Device 0: Tesla T4, compute capability 7.5\n",
"llama_model_loader: loaded meta data with 17 key-value pairs and 291 tensors from EvolCodeLlama-7b/evolcodellama-7b.gguf.q4_k_s.bin (version GGUF V2 (latest))\n",
"llama_model_loader: - tensor 0: token_embd.weight q4_K [ 4096, 32016, 1, 1 ]\n",
"llama_model_loader: - tensor 1: blk.0.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 2: blk.0.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 3: blk.0.attn_v.weight q5_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 4: blk.0.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 5: blk.0.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 6: blk.0.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 7: blk.0.ffn_down.weight q5_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 8: blk.0.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 9: blk.0.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 10: blk.1.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 11: blk.1.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 12: blk.1.attn_v.weight q5_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 13: blk.1.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 14: blk.1.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 15: blk.1.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 16: blk.1.ffn_down.weight q5_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 17: blk.1.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 18: blk.1.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 19: blk.2.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 20: blk.2.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 21: blk.2.attn_v.weight q5_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 22: blk.2.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 23: blk.2.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 24: blk.2.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 25: blk.2.ffn_down.weight q5_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 26: blk.2.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 27: blk.2.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 28: blk.3.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 29: blk.3.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 30: blk.3.attn_v.weight q5_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 31: blk.3.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 32: blk.3.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 33: blk.3.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 34: blk.3.ffn_down.weight q5_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 35: blk.3.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 36: blk.3.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 37: blk.4.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 38: blk.4.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 39: blk.4.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 40: blk.4.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 41: blk.4.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 42: blk.4.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 43: blk.4.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 44: blk.4.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 45: blk.4.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 46: blk.5.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 47: blk.5.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 48: blk.5.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 49: blk.5.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 50: blk.5.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 51: blk.5.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 52: blk.5.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 53: blk.5.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 54: blk.5.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 55: blk.6.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 56: blk.6.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 57: blk.6.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 58: blk.6.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 59: blk.6.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 60: blk.6.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 61: blk.6.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 62: blk.6.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 63: blk.6.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 64: blk.7.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 65: blk.7.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 66: blk.7.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 67: blk.7.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 68: blk.7.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 69: blk.7.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 70: blk.7.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 71: blk.7.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 72: blk.7.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 73: blk.8.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 74: blk.8.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 75: blk.8.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 76: blk.8.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 77: blk.8.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 78: blk.8.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 79: blk.8.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 80: blk.8.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 81: blk.8.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 82: blk.9.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 83: blk.9.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 84: blk.9.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 85: blk.9.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 86: blk.9.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 87: blk.9.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 88: blk.9.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 89: blk.9.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 90: blk.9.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 91: blk.10.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 92: blk.10.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 93: blk.10.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 94: blk.10.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 95: blk.10.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 96: blk.10.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 97: blk.10.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 98: blk.10.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 99: blk.10.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 100: blk.11.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 101: blk.11.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 102: blk.11.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 103: blk.11.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 104: blk.11.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 105: blk.11.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 106: blk.11.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 107: blk.11.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 108: blk.11.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 109: blk.12.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 110: blk.12.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 111: blk.12.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 112: blk.12.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 113: blk.12.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 114: blk.12.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 115: blk.12.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 116: blk.12.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 117: blk.12.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 118: blk.13.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 119: blk.13.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 120: blk.13.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 121: blk.13.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 122: blk.13.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 123: blk.13.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 124: blk.13.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 125: blk.13.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 126: blk.13.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 127: blk.14.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 128: blk.14.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 129: blk.14.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 130: blk.14.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 131: blk.14.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 132: blk.14.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 133: blk.14.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 134: blk.14.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 135: blk.14.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 136: blk.15.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 137: blk.15.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 138: blk.15.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 139: blk.15.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 140: blk.15.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 141: blk.15.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 142: blk.15.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 143: blk.15.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 144: blk.15.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 145: blk.16.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 146: blk.16.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 147: blk.16.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 148: blk.16.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 149: blk.16.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 150: blk.16.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 151: blk.16.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 152: blk.16.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 153: blk.16.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 154: blk.17.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 155: blk.17.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 156: blk.17.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 157: blk.17.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 158: blk.17.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 159: blk.17.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 160: blk.17.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 161: blk.17.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 162: blk.17.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 163: blk.18.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 164: blk.18.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 165: blk.18.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 166: blk.18.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 167: blk.18.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 168: blk.18.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 169: blk.18.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 170: blk.18.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 171: blk.18.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 172: blk.19.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 173: blk.19.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 174: blk.19.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 175: blk.19.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 176: blk.19.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 177: blk.19.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 178: blk.19.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 179: blk.19.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 180: blk.19.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 181: blk.20.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 182: blk.20.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 183: blk.20.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 184: blk.20.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 185: blk.20.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 186: blk.20.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 187: blk.20.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 188: blk.20.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 189: blk.20.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 190: blk.21.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 191: blk.21.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 192: blk.21.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 193: blk.21.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 194: blk.21.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 195: blk.21.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 196: blk.21.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 197: blk.21.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 198: blk.21.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 199: blk.22.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 200: blk.22.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 201: blk.22.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 202: blk.22.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 203: blk.22.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 204: blk.22.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 205: blk.22.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 206: blk.22.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 207: blk.22.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 208: blk.23.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 209: blk.23.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 210: blk.23.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 211: blk.23.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 212: blk.23.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 213: blk.23.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 214: blk.23.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 215: blk.23.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 216: blk.23.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 217: blk.24.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 218: blk.24.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 219: blk.24.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 220: blk.24.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 221: blk.24.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 222: blk.24.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 223: blk.24.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 224: blk.24.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 225: blk.24.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 226: blk.25.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 227: blk.25.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 228: blk.25.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 229: blk.25.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 230: blk.25.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 231: blk.25.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 232: blk.25.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 233: blk.25.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 234: blk.25.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 235: blk.26.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 236: blk.26.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 237: blk.26.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 238: blk.26.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 239: blk.26.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 240: blk.26.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 241: blk.26.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 242: blk.26.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 243: blk.26.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 244: blk.27.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 245: blk.27.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 246: blk.27.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 247: blk.27.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 248: blk.27.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 249: blk.27.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 250: blk.27.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 251: blk.27.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 252: blk.27.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 253: blk.28.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 254: blk.28.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 255: blk.28.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 256: blk.28.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 257: blk.28.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 258: blk.28.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 259: blk.28.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 260: blk.28.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 261: blk.28.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 262: blk.29.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 263: blk.29.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 264: blk.29.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 265: blk.29.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 266: blk.29.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 267: blk.29.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 268: blk.29.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 269: blk.29.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 270: blk.29.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 271: blk.30.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 272: blk.30.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 273: blk.30.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 274: blk.30.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 275: blk.30.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 276: blk.30.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 277: blk.30.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 278: blk.30.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 279: blk.30.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 280: blk.31.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 281: blk.31.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 282: blk.31.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 283: blk.31.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 284: blk.31.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 285: blk.31.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
"llama_model_loader: - tensor 286: blk.31.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
"llama_model_loader: - tensor 287: blk.31.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 288: blk.31.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 289: output_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
"llama_model_loader: - tensor 290: output.weight q6_K [ 4096, 32016, 1, 1 ]\n",
"llama_model_loader: - kv 0: general.architecture str \n",
"llama_model_loader: - kv 1: general.name str \n",
"llama_model_loader: - kv 2: llama.context_length u32 \n",
"llama_model_loader: - kv 3: llama.embedding_length u32 \n",
"llama_model_loader: - kv 4: llama.block_count u32 \n",
"llama_model_loader: - kv 5: llama.feed_forward_length u32 \n",
"llama_model_loader: - kv 6: llama.rope.dimension_count u32 \n",
"llama_model_loader: - kv 7: llama.attention.head_count u32 \n",
"llama_model_loader: - kv 8: llama.attention.head_count_kv u32 \n",
"llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 \n",
"llama_model_loader: - kv 10: llama.rope.freq_base f32 \n",
"llama_model_loader: - kv 11: general.file_type u32 \n",
"llama_model_loader: - kv 12: tokenizer.ggml.model str \n",
"llama_model_loader: - kv 13: tokenizer.ggml.tokens arr \n",
"llama_model_loader: - kv 14: tokenizer.ggml.scores arr \n",
"llama_model_loader: - kv 15: tokenizer.ggml.token_type arr \n",
"llama_model_loader: - kv 16: general.quantization_version u32 \n",
"llama_model_loader: - type f32: 65 tensors\n",
"llama_model_loader: - type q4_K: 217 tensors\n",
"llama_model_loader: - type q5_K: 8 tensors\n",
"llama_model_loader: - type q6_K: 1 tensors\n",
"llm_load_print_meta: format = GGUF V2 (latest)\n",
"llm_load_print_meta: arch = llama\n",
"llm_load_print_meta: vocab type = SPM\n",
"llm_load_print_meta: n_vocab = 32016\n",
"llm_load_print_meta: n_merges = 0\n",
"llm_load_print_meta: n_ctx_train = 16384\n",
"llm_load_print_meta: n_ctx = 512\n",
"llm_load_print_meta: n_embd = 4096\n",
"llm_load_print_meta: n_head = 32\n",
"llm_load_print_meta: n_head_kv = 32\n",
"llm_load_print_meta: n_layer = 32\n",
"llm_load_print_meta: n_rot = 128\n",
"llm_load_print_meta: n_gqa = 1\n",
"llm_load_print_meta: f_norm_eps = 1.0e-05\n",
"llm_load_print_meta: f_norm_rms_eps = 1.0e-05\n",
"llm_load_print_meta: n_ff = 11008\n",
"llm_load_print_meta: freq_base = 1000000.0\n",
"llm_load_print_meta: freq_scale = 1\n",
"llm_load_print_meta: model type = 7B\n",
"llm_load_print_meta: model ftype = mostly Q4_K - Small\n",
"llm_load_print_meta: model size = 6.74 B\n",
"llm_load_print_meta: general.name = LLaMA\n",
"llm_load_print_meta: BOS token = 1 '<s>'\n",
"llm_load_print_meta: EOS token = 2 '</s>'\n",
"llm_load_print_meta: UNK token = 0 '<unk>'\n",
"llm_load_print_meta: LF token = 13 '<0x0A>'\n",
"llm_load_tensors: ggml ctx size = 0.09 MB\n",
"llm_load_tensors: using CUDA for GPU acceleration\n",
"llm_load_tensors: mem required = 70.44 MB (+ 256.00 MB per state)\n",
"llm_load_tensors: offloading 32 repeating layers to GPU\n",
"llm_load_tensors: offloading non-repeating layers to GPU\n",
"llm_load_tensors: offloading v cache to GPU\n",
"llm_load_tensors: offloading k cache to GPU\n",
"llm_load_tensors: offloaded 35/35 layers to GPU\n",
"llm_load_tensors: VRAM used: 3864 MB\n",
"..................................................................................................\n",
"llama_new_context_with_model: kv self size = 256.00 MB\n",
"llama_new_context_with_model: compute buffer total size = 71.94 MB\n",
"llama_new_context_with_model: VRAM scratch buffer: 70.53 MB\n",
"\n",
"system_info: n_threads = 2 / 2 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | \n",
"sampling: repeat_last_n = 64, repeat_penalty = 1.100000, presence_penalty = 0.000000, frequency_penalty = 0.000000, top_k = 40, tfs_z = 1.000000, top_p = 0.950000, typical_p = 1.000000, temp = 0.800000, mirostat = 0, mirostat_lr = 0.100000, mirostat_ent = 5.000000\n",
"generate: n_ctx = 512, n_batch = 512, n_predict = 128, n_keep = 0\n",
"\n",
"\n",
"\u001b[33m prompt\u001b[0m.\t\t\t\t\n",
"\t\t\t\t\tif( !this->m_pMiscSettings ) { return; }\t// If no misc settings, do nothing\n",
"\t\t\t\t\t\n",
"\t\t\t\t\t// Get the value of the checkbox for \"Always on top\"\n",
"\t\t\t\t\tbool alwaysOnTop = this->m_pMiscSettings->GetBool(L\"AlwaysOnTop\", false);\n",
"\t\t\t\t\tthis->SetWindowPos((alwaysOnTop ? HWND_TOPMOST : HWND_NOTOPMOST\n",
"llama_print_timings: load time = 1392.10 ms\n",
"llama_print_timings: sample time = 147.99 ms / 128 runs ( 1.16 ms per token, 864.92 tokens per second)\n",
"llama_print_timings: prompt eval time = 261.80 ms / 2 tokens ( 130.90 ms per token, 7.64 tokens per second)\n",
"llama_print_timings: eval time = 5923.18 ms / 127 runs ( 46.64 ms per token, 21.44 tokens per second)\n",
"llama_print_timings: total time = 6370.96 ms\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Push to hub\n",
"\n",
"To push your model to the hub, you'll need to input your Hugging Face token (https://huggingface.co/settings/tokens) in Google Colab's \"Secrets\" tab. The following code creates a new repo with the \"-GGUF\" suffix. Don't forget to change the `username` variable."
],
"metadata": {
"id": "Ar8pO7bb80US"
}
},
{
"cell_type": "code",
"source": [
"!pip install -q huggingface_hub\n",
"from huggingface_hub import create_repo, HfApi\n",
"from google.colab import userdata\n",
"\n",
"# Defined in the secrets tab in Google Colab\n",
"hf_token = userdata.get('huggingface')\n",
"\n",
"api = HfApi()\n",
"username = \"mlabonne\"\n",
"\n",
"# Create empty repo\n",
"create_repo(\n",
" repo_id = f\"{username}/{MODEL_NAME}-GGUF\",\n",
" repo_type=\"model\",\n",
" exist_ok=True,\n",
" token=hf_token\n",
")\n",
"\n",
"# Upload gguf files\n",
"api.upload_folder(\n",
" folder_path=MODEL_NAME,\n",
" repo_id=f\"{username}/{MODEL_NAME}-GGUF\",\n",
" allow_patterns=f\"*.gguf\",\n",
" token=hf_token\n",
")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 163,
"referenced_widgets": [
"c281b60e104f4c5da547bbdd7208d4bc",
"74b084c97f6f46d293a197bf9804460c",
"1409574c4f9742e7a711965dd2c8ad87",
"704ecf9409244e0b93612d6a11476346",
"b1a8d3a9a379415393d9e7d995a40788",
"f928772f92724579b068e984d9eef387",
"1c8a6b959f9c4443a92f58eff1b03077",
"9fb5726f91734b1da149784680dc9624",
"202a8eb11eda4e58942113fbeacfdc3d",
"970d4d3daf854f92bd650dc4da99e1bc",
"24b1e007921046b1adc61db0f2bf9fc7",
"24d3d72f5de54de8a1ded4e528dde332",
"e90cb0ce526a4556bc643ba6c5485661",
"76e7372656b745c889b9283b76c04148",
"ce0204c7e1ff4a51b2648284a2492262",
"6dbb8e8a5ebb40a4ba910b09dde27e1a",
"7944af54f2564920822d5d4b348896c4",
"1b55372f62494ca0baabf87f7e7f4ba8",
"bf612001ad354ea19de6ee45a166a43c",
"a8e4691970b14955bfb4865bcef5e912",
"2e2fabac70484c1c8b16fa6ca8fd8537",
"bf53c635fa374420ad850eea22cd1e31",
"065d59126a734c1aa096ba40cd4a129f",
"e8855d5678a342f5a33171aa74d3b7bc",
"7eb6de1a979b46f7b234724073f8bc3a",
"6ae4640196da492fadafeb63f4bc89d2",
"cef83433dbea4f529f43722fe78a8baf",
"845ba8115d5140ac9ee22af4a9e6a03b",
"cdd888041aca4dcf8adc785309071fc6",
"cf63214cb4f8442999fa5b971035fe4f",
"7d9b22f2b7fe4a749f989e247bce446a",
"7f8e268db8144adfb09d089784d8411a"
]
},
"id": "UOyKfUD-8jmh",
"outputId": "3c8df47b-f350-4251-a19f-4b9fb1116381"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/268.8 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.7/268.8 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "c281b60e104f4c5da547bbdd7208d4bc"
}
},
"metadata": {}
}
]
}
]
}