"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " [80/80 07:21, Epoch 4/5]\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Step | \n",
+ " Training Loss | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 2.275600 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2.245900 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1.933500 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1.858800 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 2.012600 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 1.801800 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 1.794000 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 1.489300 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 1.587700 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 1.560400 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 1.471600 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 1.551800 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 1.598900 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 1.403500 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 1.195500 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 1.334300 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " 1.191300 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " 1.072000 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 1.151500 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 1.109000 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " 1.135800 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " 1.122000 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " 0.953200 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " 1.027600 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " 0.940800 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " 0.907100 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " 0.784400 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " 0.880200 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " 1.014100 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " 0.843800 | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " 1.039000 | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " 0.733400 | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " 0.676000 | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " 0.628600 | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " 0.906400 | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " 0.530600 | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " 0.678700 | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " 0.595400 | \n",
+ "
\n",
+ " \n",
+ " 39 | \n",
+ " 0.748500 | \n",
+ "
\n",
+ " \n",
+ " 40 | \n",
+ " 0.590200 | \n",
+ "
\n",
+ " \n",
+ " 41 | \n",
+ " 0.563200 | \n",
+ "
\n",
+ " \n",
+ " 42 | \n",
+ " 0.639400 | \n",
+ "
\n",
+ " \n",
+ " 43 | \n",
+ " 0.513500 | \n",
+ "
\n",
+ " \n",
+ " 44 | \n",
+ " 0.645800 | \n",
+ "
\n",
+ " \n",
+ " 45 | \n",
+ " 0.542300 | \n",
+ "
\n",
+ " \n",
+ " 46 | \n",
+ " 0.364400 | \n",
+ "
\n",
+ " \n",
+ " 47 | \n",
+ " 0.481800 | \n",
+ "
\n",
+ " \n",
+ " 48 | \n",
+ " 0.647700 | \n",
+ "
\n",
+ " \n",
+ " 49 | \n",
+ " 0.489400 | \n",
+ "
\n",
+ " \n",
+ " 50 | \n",
+ " 0.634600 | \n",
+ "
\n",
+ " \n",
+ " 51 | \n",
+ " 0.365600 | \n",
+ "
\n",
+ " \n",
+ " 52 | \n",
+ " 0.420700 | \n",
+ "
\n",
+ " \n",
+ " 53 | \n",
+ " 0.487100 | \n",
+ "
\n",
+ " \n",
+ " 54 | \n",
+ " 0.533600 | \n",
+ "
\n",
+ " \n",
+ " 55 | \n",
+ " 0.361700 | \n",
+ "
\n",
+ " \n",
+ " 56 | \n",
+ " 0.460900 | \n",
+ "
\n",
+ " \n",
+ " 57 | \n",
+ " 0.515300 | \n",
+ "
\n",
+ " \n",
+ " 58 | \n",
+ " 0.547600 | \n",
+ "
\n",
+ " \n",
+ " 59 | \n",
+ " 0.514300 | \n",
+ "
\n",
+ " \n",
+ " 60 | \n",
+ " 0.547600 | \n",
+ "
\n",
+ " \n",
+ " 61 | \n",
+ " 0.409700 | \n",
+ "
\n",
+ " \n",
+ " 62 | \n",
+ " 0.347000 | \n",
+ "
\n",
+ " \n",
+ " 63 | \n",
+ " 0.467800 | \n",
+ "
\n",
+ " \n",
+ " 64 | \n",
+ " 0.429700 | \n",
+ "
\n",
+ " \n",
+ " 65 | \n",
+ " 0.441100 | \n",
+ "
\n",
+ " \n",
+ " 66 | \n",
+ " 0.406900 | \n",
+ "
\n",
+ " \n",
+ " 67 | \n",
+ " 0.505200 | \n",
+ "
\n",
+ " \n",
+ " 68 | \n",
+ " 0.405800 | \n",
+ "
\n",
+ " \n",
+ " 69 | \n",
+ " 0.427400 | \n",
+ "
\n",
+ " \n",
+ " 70 | \n",
+ " 0.528000 | \n",
+ "
\n",
+ " \n",
+ " 71 | \n",
+ " 0.290200 | \n",
+ "
\n",
+ " \n",
+ " 72 | \n",
+ " 0.301500 | \n",
+ "
\n",
+ " \n",
+ " 73 | \n",
+ " 0.484300 | \n",
+ "
\n",
+ " \n",
+ " 74 | \n",
+ " 0.383900 | \n",
+ "
\n",
+ " \n",
+ " 75 | \n",
+ " 0.444400 | \n",
+ "
\n",
+ " \n",
+ " 76 | \n",
+ " 0.424000 | \n",
+ "
\n",
+ " \n",
+ " 77 | \n",
+ " 0.486000 | \n",
+ "
\n",
+ " \n",
+ " 78 | \n",
+ " 0.480600 | \n",
+ "
\n",
+ " \n",
+ " 79 | \n",
+ " 0.397400 | \n",
+ "
\n",
+ " \n",
+ " 80 | \n",
+ " 0.419400 | \n",
+ "
\n",
+ " \n",
+ "
"
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "TrainOutput(global_step=80, training_loss=0.8391439635306597, metrics={'train_runtime': 447.6633, 'train_samples_per_second': 0.715, 'train_steps_per_second': 0.179, 'total_flos': 649997819142144.0, 'train_loss': 0.8391439635306597, 'epoch': 4.05})"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 27
+ }
+ ]
},
{
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "tokenizer.json: 0%| | 0.00/1.84M [00:00, ?B/s]"
+ "cell_type": "markdown",
+ "source": [
+ "I trained it for 100 epochs, and as you can observe, the loss consistently decreases, indicating room for further improvement.\n",
+ "\n",
+ "NOTE: ***Consider extending the training to a higher number of epochs for potential enhancements***"
],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "556b1e5bb1364189acc6ab399320e869"
+ "metadata": {
+ "id": "NKLu3hYCYWbW"
}
- },
- "metadata": {}
},
{
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "special_tokens_map.json: 0%| | 0.00/411 [00:00, ?B/s]"
+ "cell_type": "markdown",
+ "source": [
+ "### Save model in local system"
],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "90a73d86ca8e4c77aea19bccb8fd654b"
+ "metadata": {
+ "id": "Bm-4ny2SYgYz"
}
- },
- "metadata": {}
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "# helper function to print number of trainable parameters\n",
- "def print_trainable_parameters(model):\n",
- " \"\"\"\n",
- " Prints the number of trainable parameters in the model.\n",
- " \"\"\"\n",
- " trainable_params = 0\n",
- " all_param = 0\n",
- " for _, param in model.named_parameters():\n",
- " all_param += param.numel()\n",
- " if param.requires_grad:\n",
- " trainable_params += param.numel()\n",
- " print(\n",
- " f\"Trainable params: {trainable_params} || All params: {all_param} || Trainable%: {100 * trainable_params / all_param}\"\n",
- " )"
- ],
- "metadata": {
- "id": "ZHizPE78Mk-r"
- },
- "execution_count": 13,
- "outputs": []
- },
- {
- "cell_type": "code",
- "source": [
- "model.gradient_checkpointing_enable()\n",
- "model = prepare_model_for_kbit_training(model)"
- ],
- "metadata": {
- "id": "HptrfjwCMlBs"
- },
- "execution_count": 14,
- "outputs": []
- },
- {
- "cell_type": "markdown",
- "source": [
- "**LoraConfig** allows you to control how LoRA is applied to the base model through the following parameters: \\\n",
- "\n",
- "**r**: the rank of the update matrices, expressed in int. Lower rank results in smaller update matrices with fewer trainable parameters. \\\n",
- "\n",
- "**target_modules**: The modules (for example, attention blocks) to apply the LoRA update matrices. \\\n",
- "\n",
- "**alpha** : LoRA scaling factor. \\\n",
- "\n",
- "**bias**: Specifies if the bias parameters should be trained. Can be 'none', 'all' or 'lora_only'. \\\n",
- "\n",
- "**modules_to_save**: List of modules apart from LoRA layers to be set as trainable and saved in the final checkpoint. These typically include model’s custom head that is randomly initialized for the fine-tuning task.\n",
- "**layers_to_transform**: List of layers to be transformed by LoRA. If not specified, all layers in target_modules are transformed. \\\n",
- "\n",
- "**layers_pattern**: Pattern to match layer names in target_modules, if layers_to_transform is specified. By default PeftModel will look at common layer pattern (layers, h, blocks, etc.), use it for exotic and custom models. \\\n",
- "\n",
- "**rank_pattern**: The mapping from layer names or regexp expression to ranks which are different from the default rank specified by r. \\\n",
- "\n",
- "**alpha_pattern**: The mapping from layer names or regexp expression to alphas which are different from the default alpha specified by lora_alpha."
- ],
- "metadata": {
- "id": "6fJ9W-hje4sz"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "from peft import LoraConfig, get_peft_model\n",
- "\n",
- "lora_alpha = 16\n",
- "lora_dropout = 0.1\n",
- "lora_r = 64\n",
- "\n",
- "config = LoraConfig(\n",
- " lora_alpha=lora_alpha,\n",
- " lora_dropout=lora_dropout,\n",
- " r=lora_r,\n",
- " bias=\"none\",\n",
- " task_type=\"CAUSAL_LM\",\n",
- ")\n",
- "\n",
- "model = get_peft_model(model, config)\n",
- "print_trainable_parameters(model)"
- ],
- "metadata": {
- "id": "lSXo09-4MlEi",
- "outputId": "d01356bc-b71f-46b6-cb8a-7dc8d2685555",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "execution_count": 15,
- "outputs": [
+ },
{
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Trainable params: 33554432 || All params: 3533967360 || Trainable%: 0.9494833591219133\n"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "# Inference Before Training\n",
- "\n",
- "prompt = f\"\"\"\n",
- ": How can I create an account?\n",
- ":\n",
- "\"\"\".strip()\n",
- "print(prompt)"
- ],
- "metadata": {
- "id": "xxyk3cGDMlJO",
- "outputId": "ba282dee-f7a6-4574-ebe8-efaaf758f52d",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "execution_count": 16,
- "outputs": [
+ "cell_type": "code",
+ "source": [
+ "model.save_pretrained(\"trained-model\")"
+ ],
+ "metadata": {
+ "id": "YQ4VipiaQ38Q"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
{
- "output_type": "stream",
- "name": "stdout",
- "text": [
- ": How can I create an account?\n",
- ":\n"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "generation_config = model.generation_config\n",
- "generation_config.max_new_tokens = 80\n",
- "generation_config.temperature = 0.7\n",
- "generation_config.top_p = 0.7\n",
- "generation_config.num_return_sequences = 1\n",
- "generation_config.pad_token_id = tokenizer.eos_token_id\n",
- "generation_config.eos_token_id = tokenizer.eos_token_id"
- ],
- "metadata": {
- "id": "Zal7MzltMlNc"
- },
- "execution_count": 17,
- "outputs": []
- },
- {
- "cell_type": "code",
- "source": [
- "# generation configurations\n",
- "generation_config"
- ],
- "metadata": {
- "id": "AF9pGCJ2MlUH",
- "outputId": "46da0b1a-00b2-42c0-b78c-3ba4f40345db",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "execution_count": 18,
- "outputs": [
+ "cell_type": "markdown",
+ "source": [
+ "### Push trained model in Hugging face\n",
+ "\n",
+ "NOTE: ***Here you have to change directory where you want to push your model***.\n",
+ "\n",
+ "For me it is \"Prasant/Llama2-7b-qlora-chat-support-bot-faq\""
+ ],
+ "metadata": {
+ "id": "gEWFr9vKYbvI"
+ }
+ },
{
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "GenerationConfig {\n",
- " \"bos_token_id\": 1,\n",
- " \"eos_token_id\": 2,\n",
- " \"max_new_tokens\": 80,\n",
- " \"pad_token_id\": 2,\n",
- " \"temperature\": 0.7,\n",
- " \"top_p\": 0.7\n",
- "}"
+ "cell_type": "code",
+ "source": [
+ "model.push_to_hub(\"Prasant/Llama2-7b-qlora-chat-support-bot-faq\")"
+ ],
+ "metadata": {
+ "id": "bvkTqEZFQ3_W",
+ "outputId": "4d5e4b9e-a8ec-4514-e8e6-a390dfa39dd9",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 85,
+ "referenced_widgets": [
+ "c91ff953b85847fd9bcbc952b259d13e",
+ "80055b5100d0410ab59ba6c000e5f65c",
+ "eb05122c60014feca974b9be59a89570",
+ "d3a6a5bcce6a4e7eaa2db04a53f5b933",
+ "c5ccd8c9f94749d391fb0815180cc7ed",
+ "64d26285c5d5413dab6ff9649e5b3b11",
+ "100084059b6d41858478218c3ffec02a",
+ "157518129a4d46c2b23b86223ae34ee4",
+ "e44f488773994690a9505db3c7f5ad6f",
+ "d6e500ae93e04beb8d5b80162df8cf9a",
+ "4f4ac78e20d64c12a4d3d394f7ab78ae"
+ ]
+ }
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "adapter_model.safetensors: 0%| | 0.00/134M [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "c91ff953b85847fd9bcbc952b259d13e"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "CommitInfo(commit_url='https://huggingface.co/Prasant/Llama2-7b-qlora-chat-support-bot-faq/commit/afdc083726f49ccf925eda01e564e2a9520d92f3', commit_message='Upload model', commit_description='', oid='afdc083726f49ccf925eda01e564e2a9520d92f3', pr_url=None, pr_revision=None, pr_num=None)"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "string"
+ }
+ },
+ "metadata": {},
+ "execution_count": 29
+ }
]
- },
- "metadata": {},
- "execution_count": 18
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "%%time\n",
- "# Specify the target device for model execution, typically a GPU.\n",
- "device = \"cuda:0\"\n",
- "\n",
- "# Tokenize the input prompt and move it to the specified device.\n",
- "encoding = tokenizer(prompt, return_tensors=\"pt\").to(device)\n",
- "\n",
- "# Run model inference in evaluation mode (inference_mode) for efficiency.\n",
- "with torch.inference_mode():\n",
- " outputs = model.generate(\n",
- " input_ids=encoding.input_ids,\n",
- " attention_mask=encoding.attention_mask,\n",
- " generation_config=generation_config,\n",
- " )\n",
- "\n",
- "\n",
- "# Decode the generated output and print it, excluding special tokens.\n",
- "print(tokenizer.decode(outputs[0], skip_special_tokens=True))"
- ],
- "metadata": {
- "id": "cComMNlMNIYx",
- "outputId": "69fc1b92-880f-43e8-fced-0d138c0ba8cf",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "execution_count": 19,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.10/dist-packages/transformers/generation/configuration_utils.py:392: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.7` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
- " warnings.warn(\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/generation/configuration_utils.py:397: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.7` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n",
- " warnings.warn(\n"
- ]
},
{
- "output_type": "stream",
- "name": "stdout",
- "text": [
- ": How can I create an account?\n",
- ": How can I create an account?\n",
- ": How can I create an account? : How can I create an account?\n",
- "CPU times: user 6.25 s, sys: 826 ms, total: 7.08 s\n",
- "Wall time: 11.4 s\n"
- ]
- }
- ]
- },
- {
- "cell_type": "markdown",
- "source": [
- "#Build HuggingFace Dataset format"
- ],
- "metadata": {
- "id": "yostlhu9PKF2"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "data = load_dataset(\"json\", data_files=\"dataset.json\")"
- ],
- "metadata": {
- "id": "d_WmYk94NIfa",
- "outputId": "7490170d-8b15-4e8f-a366-9ce1fd4755b4",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 49,
- "referenced_widgets": [
- "0c2449f48a474e6f93cde3d312d3a5dc",
- "5ba64ba2f4c24866b7adbb99fd40d314",
- "4213e9108c5e4974b4b62401c4687447",
- "cc4d09c3a9214bfb867eec0af3f2d7b1",
- "a6c7363109fa41b5ad9dc462165c013a",
- "ec0c6f59b8bf4eb6bf0bfec533eab08d",
- "dc589955e6744383ad572bd704ab3328",
- "feea409ab6e444f992ea4fa895e66169",
- "e9429b3be5db42db9fda06f0f5aa34e9",
- "d07fb77c8e10407dbeda4130156b8f2a",
- "16a438b7ce6342b3b0a1b4f3f87d248b"
- ]
- }
- },
- "execution_count": 20,
- "outputs": [
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "Generating train split: 0 examples [00:00, ? examples/s]"
+ "cell_type": "markdown",
+ "source": [
+ "In our approach, we've split the large model TinyPixel/Llama-2-7B-bf16 into more than 14 smaller parts, a method known as sharding. This strategy works well with the `accelerate` framework by huggingface.\n",
+ "\n",
+ "Each shard holds part of the model's data, and Accelerate helps distribute these parts across different memory types, like GPU and CPU. This way, we can handle large models without needing too much memory."
],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "0c2449f48a474e6f93cde3d312d3a5dc"
+ "metadata": {
+ "id": "RFiESDCCRXG6"
}
- },
- "metadata": {}
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "data"
- ],
- "metadata": {
- "id": "TJkbZpXzNIiS",
- "outputId": "eef50255-f92f-49ae-b9ba-bfed60617139",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "execution_count": 21,
- "outputs": [
+ },
{
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "DatasetDict({\n",
- " train: Dataset({\n",
- " features: ['answer', 'question'],\n",
- " num_rows: 79\n",
- " })\n",
- "})"
- ]
- },
- "metadata": {},
- "execution_count": 21
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "data[\"train\"][0]"
- ],
- "metadata": {
- "id": "fSHb0QqWNIlh",
- "outputId": "47a0dea2-5958-48b2-f7bc-b0ac63123bc7",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "execution_count": 22,
- "outputs": [
+ "cell_type": "markdown",
+ "source": [
+ "### Load pushed model\n",
+ "\n",
+ "Load model from the directory you pushed, for me it is \"Prasant/Llama2-7b-qlora-chat-support-bot-faq\""
+ ],
+ "metadata": {
+ "id": "JuTrGGWg_zRY"
+ }
+ },
{
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "{'answer': \"To create an account, click on the 'Sign Up' button on the top right corner of our website and follow the instructions to complete the registration process.\",\n",
- " 'question': 'How can I create an account?'}"
+ "cell_type": "code",
+ "source": [
+ "PEFT_MODEL = \"Prasant/Llama2-7b-qlora-chat-support-bot-faq\"\n",
+ "\n",
+ "# loading trained model from hugging face\n",
+ "config = PeftConfig.from_pretrained(PEFT_MODEL)\n",
+ "model = AutoModelForCausalLM.from_pretrained(\n",
+ " config.base_model_name_or_path,\n",
+ " return_dict=True,\n",
+ " quantization_config=bnb_config,\n",
+ " device_map=\"auto\",\n",
+ " trust_remote_code=True,\n",
+ ")\n",
+ "tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)\n",
+ "tokenizer.pad_token = tokenizer.eos_token\n",
+ "\n",
+ "model = PeftModel.from_pretrained(model, PEFT_MODEL)"
+ ],
+ "metadata": {
+ "id": "Fq9phlfIQ4E5",
+ "outputId": "69adcb7b-ee40-4834-adf4-2eca74ee5284",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 113,
+ "referenced_widgets": [
+ "25e4e057e547452fa7870203ecf304af",
+ "9d48543aaf23498f85b8ad162ed21a51",
+ "1227f4ab5ce745898d952c79afe2e118",
+ "c51db07388824a71980ea8b0734639d8",
+ "70c4820f69984344acfe5becc487057a",
+ "78477d0ebafe4bbe8f3404d846f05a0d",
+ "3696a81f9ea9425682caed310d9763c1",
+ "8dff8fc4d7f44193bc48e6121c1a63c2",
+ "752e1118c25e4b6db51049b679d851b6",
+ "0bc37e819ce74a99afd7ce4ebb73c245",
+ "0a2ba91f95e14c049d7616ed5cb0c73b",
+ "804eec09657b479183c258825f10d02d",
+ "f02fca9c66324f7ea611b29ddc862f75",
+ "b4e8aece348f400089c3f37781c119d6",
+ "fafb33d86b5a4da4ab37b121350cad8d",
+ "1e0286d9297c4c859fbbce69bf123971",
+ "1801009a0b30418d8fa4bc623cf290da",
+ "174ea14f5eba4cfe8d68a26f7d239a08",
+ "2134a7e4dca14686a564bd724efac364",
+ "325bf139f1aa4dbc9bafe500c1f77172",
+ "a16aa5dfef6049d5a3f71b7fc501ebb7",
+ "d213614cb3d34bbc833327333a4e7964",
+ "ee9f15735568419389482445f330a2cc",
+ "c5f05746bd6e49df8035419a6e2d247b",
+ "a81437a7cd1f44e693f9c43ded243d6b",
+ "6c91ae9a068b4aed94df30c352825341",
+ "0a34f2f771a947849b5e526a42584eb6",
+ "1c1ab77f2f2b4560863ee8dd51add0cf",
+ "335af2e8e0134e34b49328da24c73294",
+ "61f6ab50052c475797c37936f5616ed0",
+ "3e6d92c3b05d48fe8fc6f4a6da34e1ed",
+ "5fae4a9789234b69bab42d72d5dd2136",
+ "77884c522402433f95d1cda7d10c3f04"
+ ]
+ }
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "adapter_config.json: 0%| | 0.00/608 [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "25e4e057e547452fa7870203ecf304af"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Loading checkpoint shards: 0%| | 0/14 [00:00, ?it/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "804eec09657b479183c258825f10d02d"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "adapter_model.safetensors: 0%| | 0.00/134M [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "ee9f15735568419389482445f330a2cc"
+ }
+ },
+ "metadata": {}
+ }
]
- },
- "metadata": {},
- "execution_count": 22
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "def generate_prompt(data_point):\n",
- " return f\"\"\"\n",
- ": {data_point[\"question\"]}\n",
- ": {data_point[\"answer\"]}\n",
- "\"\"\".strip()\n",
- "\n",
- "\n",
- "def generate_and_tokenize_prompt(data_point):\n",
- " full_prompt = generate_prompt(data_point)\n",
- " tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)\n",
- " return tokenized_full_prompt"
- ],
- "metadata": {
- "id": "6PGm88MhNIp6"
- },
- "execution_count": 23,
- "outputs": []
- },
- {
- "cell_type": "code",
- "source": [
- "data = data[\"train\"].shuffle().map(generate_and_tokenize_prompt)"
- ],
- "metadata": {
- "id": "byzQ1CmQNItJ",
- "outputId": "5f275dc5-ba06-475d-bdb9-836f25168358",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 67,
- "referenced_widgets": [
- "7689dc301af34e50b31eb03199044994",
- "7997a206e4e9417ca48175d7c958aa49",
- "10cde979ec914e6eb52f297db23c297d",
- "779616cd217a4d1382f3955611276d84",
- "b621686032114e0dbd15d9e9cadd8c1b",
- "b11780aa116a493d92570db8a16013e3",
- "cbe0d6ad82e14f3a9cdac3ef93879ac4",
- "fcd98d26ebf84cc4b3674e0b341de679",
- "a4082600c40f486eb1b82df41dfd5d8d",
- "3754c400c4c742c19e0fcea32d92515f",
- "08b6cdda31414d74b07d890b62d4c8ba"
- ]
- }
- },
- "execution_count": 24,
- "outputs": [
+ },
{
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "Map: 0%| | 0/79 [00:00, ? examples/s]"
+ "cell_type": "markdown",
+ "source": [
+ "### Do experiments with parameters and see what works for you and your data best"
],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "7689dc301af34e50b31eb03199044994"
+ "metadata": {
+ "id": "LP_1aPCYYyvx"
}
- },
- "metadata": {}
},
{
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "data"
- ],
- "metadata": {
- "id": "xh8z2xhHKl0z",
- "outputId": "6514f74f-06b9-442d-da26-f3841cd46ddb",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "execution_count": 25,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "Dataset({\n",
- " features: ['answer', 'question', 'input_ids', 'attention_mask'],\n",
- " num_rows: 79\n",
- "})"
- ]
- },
- "metadata": {},
- "execution_count": 25
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "OUTPUT_DIR = \"experiments\""
- ],
- "metadata": {
- "id": "oNSwV8ECQ3pe"
- },
- "execution_count": 26,
- "outputs": []
- },
- {
- "cell_type": "code",
- "source": [
- "# training\n",
- "training_args = transformers.TrainingArguments(\n",
- " per_device_train_batch_size=1,\n",
- " gradient_accumulation_steps=4,\n",
- " num_train_epochs=1,\n",
- " learning_rate=2e-4,\n",
- " fp16=True,\n",
- " save_total_limit=3,\n",
- " logging_steps=1,\n",
- " output_dir=OUTPUT_DIR,\n",
- " max_steps=80,\n",
- " optim=\"paged_adamw_8bit\",\n",
- " lr_scheduler_type=\"cosine\",\n",
- " warmup_ratio=0.05,\n",
- " report_to=\"tensorboard\",\n",
- ")\n",
- "\n",
- "trainer = transformers.Trainer(\n",
- " model=model,\n",
- " train_dataset=data,\n",
- " args=training_args,\n",
- " data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),\n",
- ")\n",
- "model.config.use_cache = False\n",
- "trainer.train()"
- ],
- "metadata": {
- "id": "TO-zrLspQ347",
- "outputId": "434b300d-f633-4307-d9d2-53042d3ad88f",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 1000
- }
- },
- "execution_count": 27,
- "outputs": [
+ "cell_type": "code",
+ "source": [
+ "# model configuration, you can try changing these parameters\n",
+ "generation_config = model.generation_config\n",
+ "generation_config.max_new_tokens = 50\n",
+ "\n",
+ "# try using temperature parameter by uncommenting following\n",
+ "# generation_config.temperature = 0.3\n",
+ "generation_config.top_p = 0.7\n",
+ "generation_config.num_return_sequences = 1\n",
+ "generation_config.pad_token_id = tokenizer.eos_token_id\n",
+ "generation_config.eos_token_id = tokenizer.eos_token_id"
+ ],
+ "metadata": {
+ "id": "O-umn8k_RZ7h"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
{
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n",
- " warnings.warn(\n"
- ]
+ "cell_type": "code",
+ "source": [
+ "# device configuration\n",
+ "DEVICE = \"cuda:0\""
+ ],
+ "metadata": {
+ "id": "dqP2fFuDRZ-o"
+ },
+ "execution_count": null,
+ "outputs": []
},
{
- "output_type": "display_data",
- "data": {
- "text/plain": [
- ""
+ "cell_type": "code",
+ "source": [
+ "%%time\n",
+ "prompt = f\"\"\"\n",
+ ": How can I create an account?\n",
+ ":\n",
+ "\"\"\".strip()\n",
+ "\n",
+ "encoding = tokenizer(prompt, return_tensors=\"pt\").to(DEVICE)\n",
+ "with torch.inference_mode():\n",
+ " outputs = model.generate(\n",
+ " input_ids=encoding.input_ids,\n",
+ " attention_mask=encoding.attention_mask,\n",
+ " generation_config=generation_config,\n",
+ "\n",
+ " )\n",
+ "print(tokenizer.decode(outputs[0], skip_special_tokens=True))"
],
- "text/html": [
- "\n",
- " \n",
- " \n",
- "
\n",
- " [80/80 07:21, Epoch 4/5]\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " Step | \n",
- " Training Loss | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 1 | \n",
- " 2.275600 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 2.245900 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 1.933500 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 1.858800 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 2.012600 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " 1.801800 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " 1.794000 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " 1.489300 | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " 1.587700 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " 1.560400 | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " 1.471600 | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " 1.551800 | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " 1.598900 | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " 1.403500 | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " 1.195500 | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " 1.334300 | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " 1.191300 | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " 1.072000 | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " 1.151500 | \n",
- "
\n",
- " \n",
- " 20 | \n",
- " 1.109000 | \n",
- "
\n",
- " \n",
- " 21 | \n",
- " 1.135800 | \n",
- "
\n",
- " \n",
- " 22 | \n",
- " 1.122000 | \n",
- "
\n",
- " \n",
- " 23 | \n",
- " 0.953200 | \n",
- "
\n",
- " \n",
- " 24 | \n",
- " 1.027600 | \n",
- "
\n",
- " \n",
- " 25 | \n",
- " 0.940800 | \n",
- "
\n",
- " \n",
- " 26 | \n",
- " 0.907100 | \n",
- "
\n",
- " \n",
- " 27 | \n",
- " 0.784400 | \n",
- "
\n",
- " \n",
- " 28 | \n",
- " 0.880200 | \n",
- "
\n",
- " \n",
- " 29 | \n",
- " 1.014100 | \n",
- "
\n",
- " \n",
- " 30 | \n",
- " 0.843800 | \n",
- "
\n",
- " \n",
- " 31 | \n",
- " 1.039000 | \n",
- "
\n",
- " \n",
- " 32 | \n",
- " 0.733400 | \n",
- "
\n",
- " \n",
- " 33 | \n",
- " 0.676000 | \n",
- "
\n",
- " \n",
- " 34 | \n",
- " 0.628600 | \n",
- "
\n",
- " \n",
- " 35 | \n",
- " 0.906400 | \n",
- "
\n",
- " \n",
- " 36 | \n",
- " 0.530600 | \n",
- "
\n",
- " \n",
- " 37 | \n",
- " 0.678700 | \n",
- "
\n",
- " \n",
- " 38 | \n",
- " 0.595400 | \n",
- "
\n",
- " \n",
- " 39 | \n",
- " 0.748500 | \n",
- "
\n",
- " \n",
- " 40 | \n",
- " 0.590200 | \n",
- "
\n",
- " \n",
- " 41 | \n",
- " 0.563200 | \n",
- "
\n",
- " \n",
- " 42 | \n",
- " 0.639400 | \n",
- "
\n",
- " \n",
- " 43 | \n",
- " 0.513500 | \n",
- "
\n",
- " \n",
- " 44 | \n",
- " 0.645800 | \n",
- "
\n",
- " \n",
- " 45 | \n",
- " 0.542300 | \n",
- "
\n",
- " \n",
- " 46 | \n",
- " 0.364400 | \n",
- "
\n",
- " \n",
- " 47 | \n",
- " 0.481800 | \n",
- "
\n",
- " \n",
- " 48 | \n",
- " 0.647700 | \n",
- "
\n",
- " \n",
- " 49 | \n",
- " 0.489400 | \n",
- "
\n",
- " \n",
- " 50 | \n",
- " 0.634600 | \n",
- "
\n",
- " \n",
- " 51 | \n",
- " 0.365600 | \n",
- "
\n",
- " \n",
- " 52 | \n",
- " 0.420700 | \n",
- "
\n",
- " \n",
- " 53 | \n",
- " 0.487100 | \n",
- "
\n",
- " \n",
- " 54 | \n",
- " 0.533600 | \n",
- "
\n",
- " \n",
- " 55 | \n",
- " 0.361700 | \n",
- "
\n",
- " \n",
- " 56 | \n",
- " 0.460900 | \n",
- "
\n",
- " \n",
- " 57 | \n",
- " 0.515300 | \n",
- "
\n",
- " \n",
- " 58 | \n",
- " 0.547600 | \n",
- "
\n",
- " \n",
- " 59 | \n",
- " 0.514300 | \n",
- "
\n",
- " \n",
- " 60 | \n",
- " 0.547600 | \n",
- "
\n",
- " \n",
- " 61 | \n",
- " 0.409700 | \n",
- "
\n",
- " \n",
- " 62 | \n",
- " 0.347000 | \n",
- "
\n",
- " \n",
- " 63 | \n",
- " 0.467800 | \n",
- "
\n",
- " \n",
- " 64 | \n",
- " 0.429700 | \n",
- "
\n",
- " \n",
- " 65 | \n",
- " 0.441100 | \n",
- "
\n",
- " \n",
- " 66 | \n",
- " 0.406900 | \n",
- "
\n",
- " \n",
- " 67 | \n",
- " 0.505200 | \n",
- "
\n",
- " \n",
- " 68 | \n",
- " 0.405800 | \n",
- "
\n",
- " \n",
- " 69 | \n",
- " 0.427400 | \n",
- "
\n",
- " \n",
- " 70 | \n",
- " 0.528000 | \n",
- "
\n",
- " \n",
- " 71 | \n",
- " 0.290200 | \n",
- "
\n",
- " \n",
- " 72 | \n",
- " 0.301500 | \n",
- "
\n",
- " \n",
- " 73 | \n",
- " 0.484300 | \n",
- "
\n",
- " \n",
- " 74 | \n",
- " 0.383900 | \n",
- "
\n",
- " \n",
- " 75 | \n",
- " 0.444400 | \n",
- "
\n",
- " \n",
- " 76 | \n",
- " 0.424000 | \n",
- "
\n",
- " \n",
- " 77 | \n",
- " 0.486000 | \n",
- "
\n",
- " \n",
- " 78 | \n",
- " 0.480600 | \n",
- "
\n",
- " \n",
- " 79 | \n",
- " 0.397400 | \n",
- "
\n",
- " \n",
- " 80 | \n",
- " 0.419400 | \n",
- "
\n",
- " \n",
- "
"
+ "metadata": {
+ "id": "TScya6W0RaDL",
+ "outputId": "f4e599b4-9674-4f92-a2e7-7aabc66f28a3",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ ": How can I create an account?\n",
+ ": To create an account, click on the 'Sign Up' button on the top right corner of the website. Follow the instructions to complete the registration process.\n",
+ ": You can place an order by adding items to your shopping cart and proceeding to\n",
+ "CPU times: user 4.37 s, sys: 252 ms, total: 4.62 s\n",
+ "Wall time: 4.68 s\n"
+ ]
+ }
]
- },
- "metadata": {}
},
{
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "TrainOutput(global_step=80, training_loss=0.8391439635306597, metrics={'train_runtime': 447.6633, 'train_samples_per_second': 0.715, 'train_steps_per_second': 0.179, 'total_flos': 649997819142144.0, 'train_loss': 0.8391439635306597, 'epoch': 4.05})"
- ]
- },
- "metadata": {},
- "execution_count": 27
- }
- ]
- },
- {
- "cell_type": "markdown",
- "source": [
- "I trained it for 100 epochs, and as you can observe, the loss consistently decreases, indicating room for further improvement. Consider extending the training to a higher number of epochs for potential enhancements"
- ],
- "metadata": {
- "id": "NKLu3hYCYWbW"
- }
- },
- {
- "cell_type": "markdown",
- "source": [
- "## save the model"
- ],
- "metadata": {
- "id": "Bm-4ny2SYgYz"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "model.save_pretrained(\"trained-model\")"
- ],
- "metadata": {
- "id": "YQ4VipiaQ38Q"
- },
- "execution_count": 28,
- "outputs": []
- },
- {
- "cell_type": "markdown",
- "source": [
- "## Push the model in Hugging face"
- ],
- "metadata": {
- "id": "gEWFr9vKYbvI"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "model.push_to_hub(\"Prasant/Llama2-7b-qlora-chat-support-bot-faq\")"
- ],
- "metadata": {
- "id": "bvkTqEZFQ3_W",
- "outputId": "4d5e4b9e-a8ec-4514-e8e6-a390dfa39dd9",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 85,
- "referenced_widgets": [
- "c91ff953b85847fd9bcbc952b259d13e",
- "80055b5100d0410ab59ba6c000e5f65c",
- "eb05122c60014feca974b9be59a89570",
- "d3a6a5bcce6a4e7eaa2db04a53f5b933",
- "c5ccd8c9f94749d391fb0815180cc7ed",
- "64d26285c5d5413dab6ff9649e5b3b11",
- "100084059b6d41858478218c3ffec02a",
- "157518129a4d46c2b23b86223ae34ee4",
- "e44f488773994690a9505db3c7f5ad6f",
- "d6e500ae93e04beb8d5b80162df8cf9a",
- "4f4ac78e20d64c12a4d3d394f7ab78ae"
- ]
- }
- },
- "execution_count": 29,
- "outputs": [
+ "cell_type": "code",
+ "source": [
+ "# helper function to generate responses\n",
+ "def generate_response(question: str) -> str:\n",
+ " prompt = f\"\"\"\n",
+ ": {question}\n",
+ ":\n",
+ "\"\"\".strip()\n",
+ " encoding = tokenizer(prompt, return_tensors=\"pt\").to(DEVICE)\n",
+ " with torch.inference_mode():\n",
+ " outputs = model.generate(\n",
+ " input_ids=encoding.input_ids,\n",
+ " attention_mask=encoding.attention_mask,\n",
+ " generation_config=generation_config,\n",
+ " )\n",
+ " response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
+ "\n",
+ " assistant_start = \":\"\n",
+ " response_start = response.find(assistant_start)\n",
+ " return response[response_start + len(assistant_start) :].strip()"
+ ],
+ "metadata": {
+ "id": "6ZQc3XvhRaGa"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
{
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "adapter_model.safetensors: 0%| | 0.00/134M [00:00, ?B/s]"
+ "cell_type": "code",
+ "source": [
+ "# prompt\n",
+ "prompt = \"Question: Can I return a product if it was a clearance or final sale item?\"\n",
+ "print(generate_response(prompt))"
],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "c91ff953b85847fd9bcbc952b259d13e"
- }
- },
- "metadata": {}
+ "metadata": {
+ "id": "IXiY44KGRaJv",
+ "outputId": "357e5eaa-f72c-4025-d8a7-6e739e70ad8a",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Question: Can I return a product if it was a clearance or final sale item?\n",
+ ": Clearance or final sale items are typically non-returnable. Please refer to the product description or contact our customer support team for specific return instructions.\n",
+ ": You can request a return by contacting our customer support team. We will provide you with\n"
+ ]
+ }
+ ]
},
{
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "CommitInfo(commit_url='https://huggingface.co/Prasant/Llama2-7b-qlora-chat-support-bot-faq/commit/afdc083726f49ccf925eda01e564e2a9520d92f3', commit_message='Upload model', commit_description='', oid='afdc083726f49ccf925eda01e564e2a9520d92f3', pr_url=None, pr_revision=None, pr_num=None)"
+ "cell_type": "code",
+ "source": [
+ "# prompt\n",
+ "prompt = \"Question: What happens when I return a clearance item?\"\n",
+ "print(generate_response(prompt))"
],
- "application/vnd.google.colaboratory.intrinsic+json": {
- "type": "string"
- }
- },
- "metadata": {},
- "execution_count": 29
- }
- ]
- },
- {
- "cell_type": "markdown",
- "source": [
- "In our current approach, we have implemented a sharded model TinyPixel/Llama-2–7B-bf16-sharded which involves dividing a large neural network model into multiple smaller pieces, typically more than 14 pieces in our case. This sharding strategy has proven to be highly beneficial when combined with the 'accelerate' framework \\\n",
- "\n",
- "When a model is sharded, each shard represents a portion of the overall model's parameters. Accelerate can then efficiently manage these shards by distributing them across various parts of the memory, including GPU memory and CPU memory. This dynamic allocation of shards allows us to work with very large models without requiring an excessive amount of memory"
- ],
- "metadata": {
- "id": "RFiESDCCRXG6"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "PEFT_MODEL = \"Prasant/Llama2-7b-qlora-chat-support-bot-faq\"\n",
- "\n",
- "# loading trained model from hugging face\n",
- "config = PeftConfig.from_pretrained(PEFT_MODEL)\n",
- "model = AutoModelForCausalLM.from_pretrained(\n",
- " config.base_model_name_or_path,\n",
- " return_dict=True,\n",
- " quantization_config=bnb_config,\n",
- " device_map=\"auto\",\n",
- " trust_remote_code=True,\n",
- ")\n",
- "tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)\n",
- "tokenizer.pad_token = tokenizer.eos_token\n",
- "\n",
- "model = PeftModel.from_pretrained(model, PEFT_MODEL)"
- ],
- "metadata": {
- "id": "Fq9phlfIQ4E5",
- "outputId": "69adcb7b-ee40-4834-adf4-2eca74ee5284",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 113,
- "referenced_widgets": [
- "25e4e057e547452fa7870203ecf304af",
- "9d48543aaf23498f85b8ad162ed21a51",
- "1227f4ab5ce745898d952c79afe2e118",
- "c51db07388824a71980ea8b0734639d8",
- "70c4820f69984344acfe5becc487057a",
- "78477d0ebafe4bbe8f3404d846f05a0d",
- "3696a81f9ea9425682caed310d9763c1",
- "8dff8fc4d7f44193bc48e6121c1a63c2",
- "752e1118c25e4b6db51049b679d851b6",
- "0bc37e819ce74a99afd7ce4ebb73c245",
- "0a2ba91f95e14c049d7616ed5cb0c73b",
- "804eec09657b479183c258825f10d02d",
- "f02fca9c66324f7ea611b29ddc862f75",
- "b4e8aece348f400089c3f37781c119d6",
- "fafb33d86b5a4da4ab37b121350cad8d",
- "1e0286d9297c4c859fbbce69bf123971",
- "1801009a0b30418d8fa4bc623cf290da",
- "174ea14f5eba4cfe8d68a26f7d239a08",
- "2134a7e4dca14686a564bd724efac364",
- "325bf139f1aa4dbc9bafe500c1f77172",
- "a16aa5dfef6049d5a3f71b7fc501ebb7",
- "d213614cb3d34bbc833327333a4e7964",
- "ee9f15735568419389482445f330a2cc",
- "c5f05746bd6e49df8035419a6e2d247b",
- "a81437a7cd1f44e693f9c43ded243d6b",
- "6c91ae9a068b4aed94df30c352825341",
- "0a34f2f771a947849b5e526a42584eb6",
- "1c1ab77f2f2b4560863ee8dd51add0cf",
- "335af2e8e0134e34b49328da24c73294",
- "61f6ab50052c475797c37936f5616ed0",
- "3e6d92c3b05d48fe8fc6f4a6da34e1ed",
- "5fae4a9789234b69bab42d72d5dd2136",
- "77884c522402433f95d1cda7d10c3f04"
- ]
- }
- },
- "execution_count": 30,
- "outputs": [
+ "metadata": {
+ "id": "yJE1BrB8RaM8",
+ "outputId": "83ae00c0-bdff-40f0-ccfb-d37d6348cf61",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Question: What happens when I return a clearance item?\n",
+ ": Clearance items are non-refundable and non-exchangeable. However, you can request a store credit for the full value of the item. Please contact our customer support team for assistance.\n",
+ ": We accept returns within 30 days\n"
+ ]
+ }
+ ]
+ },
{
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "adapter_config.json: 0%| | 0.00/608 [00:00, ?B/s]"
+ "cell_type": "code",
+ "source": [
+ "# prompt\n",
+ "prompt = \"Question: How do I know when I'll receive my order?\"\n",
+ "print(generate_response(prompt))"
],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "25e4e057e547452fa7870203ecf304af"
- }
- },
- "metadata": {}
+ "metadata": {
+ "id": "sDkq0PbhRw-2",
+ "outputId": "be1753a4-2e91-42fb-dac6-cae6830594d4",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Question: How do I know when I'll receive my order?\n",
+ ": Once you place an order, we will send you a confirmation email with your order details and estimated delivery time. You can track your order's progress by logging into your account or checking your order confirmation email.\n",
+ ": If you need to\n"
+ ]
+ }
+ ]
},
{
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "Loading checkpoint shards: 0%| | 0/14 [00:00, ?it/s]"
+ "cell_type": "code",
+ "source": [
+ "################ falcon with lama2\n",
+ "# https://github.com/curiousily/Get-Things-Done-with-Prompt-Engineering-and-LangChain/blob/master/07.falcon-qlora-fine-tuning.ipynb"
],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "804eec09657b479183c258825f10d02d"
- }
- },
- "metadata": {}
+ "metadata": {
+ "id": "3L3UxKlVRaPy"
+ },
+ "execution_count": null,
+ "outputs": []
},
{
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "adapter_model.safetensors: 0%| | 0.00/134M [00:00, ?B/s]"
+ "cell_type": "markdown",
+ "source": [
+ "## That's it; you can try to play with these hyperparameters to achieve better results 🎉\n",
+ "\n",
+ "If you liked this guide, do consider giving a 🌟 to LanceDB's [vector-recipes](https://github.com/lancedb/vectordb-recipes)"
],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "ee9f15735568419389482445f330a2cc"
+ "metadata": {
+ "id": "fspW3KiWaaCO"
}
- },
- "metadata": {}
- }
- ]
- },
- {
- "cell_type": "markdown",
- "source": [
- "Do experiments with below parameters"
- ],
- "metadata": {
- "id": "LP_1aPCYYyvx"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "# model configuration, you can try changing these parameters\n",
- "generation_config = model.generation_config\n",
- "generation_config.max_new_tokens = 50\n",
- "\n",
- "# try using temperature also by uncommenting it\n",
- "# generation_config.temperature = 0.3\n",
- "generation_config.top_p = 0.7\n",
- "generation_config.num_return_sequences = 1\n",
- "generation_config.pad_token_id = tokenizer.eos_token_id\n",
- "generation_config.eos_token_id = tokenizer.eos_token_id"
- ],
- "metadata": {
- "id": "O-umn8k_RZ7h"
- },
- "execution_count": 34,
- "outputs": []
- },
- {
- "cell_type": "code",
- "source": [
- "# device configuration\n",
- "DEVICE = \"cuda:0\""
- ],
- "metadata": {
- "id": "dqP2fFuDRZ-o"
- },
- "execution_count": 35,
- "outputs": []
- },
- {
- "cell_type": "code",
- "source": [
- "\n",
- "%%time\n",
- "prompt = f\"\"\"\n",
- ": How can I create an account?\n",
- ":\n",
- "\"\"\".strip()\n",
- "\n",
- "encoding = tokenizer(prompt, return_tensors=\"pt\").to(DEVICE)\n",
- "with torch.inference_mode():\n",
- " outputs = model.generate(\n",
- " input_ids=encoding.input_ids,\n",
- " attention_mask=encoding.attention_mask,\n",
- " generation_config=generation_config,\n",
- "\n",
- " )\n",
- "print(tokenizer.decode(outputs[0], skip_special_tokens=True))"
- ],
- "metadata": {
- "id": "TScya6W0RaDL",
- "outputId": "f4e599b4-9674-4f92-a2e7-7aabc66f28a3",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "execution_count": 36,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- ": How can I create an account?\n",
- ": To create an account, click on the 'Sign Up' button on the top right corner of the website. Follow the instructions to complete the registration process.\n",
- ": You can place an order by adding items to your shopping cart and proceeding to\n",
- "CPU times: user 4.37 s, sys: 252 ms, total: 4.62 s\n",
- "Wall time: 4.68 s\n"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "# helper function to generate responses\n",
- "def generate_response(question: str) -> str:\n",
- " prompt = f\"\"\"\n",
- ": {question}\n",
- ":\n",
- "\"\"\".strip()\n",
- " encoding = tokenizer(prompt, return_tensors=\"pt\").to(DEVICE)\n",
- " with torch.inference_mode():\n",
- " outputs = model.generate(\n",
- " input_ids=encoding.input_ids,\n",
- " attention_mask=encoding.attention_mask,\n",
- " generation_config=generation_config,\n",
- " )\n",
- " response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
- "\n",
- " assistant_start = \":\"\n",
- " response_start = response.find(assistant_start)\n",
- " return response[response_start + len(assistant_start) :].strip()"
- ],
- "metadata": {
- "id": "6ZQc3XvhRaGa"
- },
- "execution_count": 38,
- "outputs": []
- },
- {
- "cell_type": "code",
- "source": [
- "# prompt\n",
- "prompt = \"Question: Can I return a product if it was a clearance or final sale item?\"\n",
- "print(generate_response(prompt))"
- ],
- "metadata": {
- "id": "IXiY44KGRaJv",
- "outputId": "357e5eaa-f72c-4025-d8a7-6e739e70ad8a",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "execution_count": 42,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Question: Can I return a product if it was a clearance or final sale item?\n",
- ": Clearance or final sale items are typically non-returnable. Please refer to the product description or contact our customer support team for specific return instructions.\n",
- ": You can request a return by contacting our customer support team. We will provide you with\n"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "# prompt\n",
- "prompt = \"Question: What happens when I return a clearance item?\"\n",
- "print(generate_response(prompt))"
- ],
- "metadata": {
- "id": "yJE1BrB8RaM8",
- "outputId": "83ae00c0-bdff-40f0-ccfb-d37d6348cf61",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "execution_count": 43,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Question: What happens when I return a clearance item?\n",
- ": Clearance items are non-refundable and non-exchangeable. However, you can request a store credit for the full value of the item. Please contact our customer support team for assistance.\n",
- ": We accept returns within 30 days\n"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "# prompt\n",
- "prompt = \"Question: How do I know when I'll receive my order?\"\n",
- "print(generate_response(prompt))"
- ],
- "metadata": {
- "id": "sDkq0PbhRw-2",
- "outputId": "be1753a4-2e91-42fb-dac6-cae6830594d4",
- "colab": {
- "base_uri": "https://localhost:8080/"
}
- },
- "execution_count": 44,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Question: How do I know when I'll receive my order?\n",
- ": Once you place an order, we will send you a confirmation email with your order details and estimated delivery time. You can track your order's progress by logging into your account or checking your order confirmation email.\n",
- ": If you need to\n"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "################ falcon with lama2\n",
- "# https://github.com/curiousily/Get-Things-Done-with-Prompt-Engineering-and-LangChain/blob/master/07.falcon-qlora-fine-tuning.ipynb"
- ],
- "metadata": {
- "id": "3L3UxKlVRaPy"
- },
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "markdown",
- "source": [
- "At this time of writing, the models that support accelerate are:\n",
- "\n",
- "\n",
- "[\n",
- " 'bigbird_pegasus', 'blip_2', 'bloom', 'bridgetower', 'codegen', 'deit', 'esm',\n",
- " 'gpt2', 'gpt_bigcode', 'gpt_neo', 'gpt_neox', 'gpt_neox_japanese', 'gptj', 'gptsan_japanese',\n",
- " 'lilt', 'llama', 'longformer', 'longt5', 'luke', 'm2m_100', 'mbart', 'mega', 'mt5', 'nllb_moe',\n",
- " 'open_llama', 'opt', 'owlvit', 'plbart', 'roberta', 'roberta_prelayernorm', 'rwkv', 'switch_transformers',\n",
- " 't5', 'vilt', 'vit', 'vit_hybrid', 'whisper', 'xglm', 'xlm_roberta'\n",
- "]\n"
- ],
- "metadata": {
- "id": "fspW3KiWaaCO"
- }
- }
- ]
+ ]
}
\ No newline at end of file