Remove duplicate D3_Gradio... file

93db53cf · Pfister, Martin · b653ea3a · b653ea3a
Commit 93db53cf authored 5 months ago by Pfister, Martin
--- a/D3_Gradio_example_VSC.ipynb
+++ b/D3_Gradio_example_VSC.ipynb
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "fc68bcd6-bca8-4376-b231-0c8c84c532e4",
-   "metadata": {},
-   "source": [
-    "## Gradio"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c96197c4-9bdf-4895-a209-f97a30660b66",
-   "metadata": {},
-   "source": [
-    "[Gradio](https://www.gradio.app) can enable simple web interfaces to your software. In this example, we are using Gradio to get a simple chat interface to a large language model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "18b34bbb-3946-4e31-8c51-c50f66d327bd",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Import necessary libraries\n",
-    "import torch\n",
-    "from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig\n",
-    "import gradio as gr\n",
-    "import os\n",
-    "import random"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "744de243-d842-4fd8-a2d9-e0ebfb85f91e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Use a random TCP port:\n",
-    "port = random.randint(10000, 50000)\n",
-    "# Get username\n",
-    "username = os.environ['USER']\n",
-    "# Construct URL:\n",
-    "relative_url = f'/user/{username}/proxy/absolute/{port}/'  # Needs to start with '/'\n",
-    "absolute_url = f'https://jupyterhub.vsc.ac.at{relative_url}'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "114aa1d0-c7c2-49f5-878b-de7f76a6eb9e",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.\n",
-      "Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "7e7fe196c066404d8a596a2eb4703854",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Device set to use cpu\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Load tokenizer and model and create a pipeline that can be used for inference:\n",
-    "model_name = '/gpfs/data/fs70824/LLMs_models_datasets/models/microsoft--phi-3.5-mini-instruct'\n",
-    "# model_name = 'microsoft/Phi-3.5-mini-instruct'\n",
-    "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
-    "model = AutoModelForCausalLM.from_pretrained(\n",
-    "    model_name,\n",
-    "    device_map='cuda',\n",
-    "    torch_dtype=torch.bfloat16,\n",
-    "    trust_remote_code=True,\n",
-    "    quantization_config=BitsAndBytesConfig(\n",
-    "        load_in_4bit=True,\n",
-    "        bnb_4bit_quant_type=\"nf4\",\n",
-    "        bnb_4bit_compute_dtype=torch.bfloat16\n",
-    "    )\n",
-    ")\n",
-    "pipe = pipeline('text-generation', model=model, tokenizer=tokenizer)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "96fefd3b-f679-4b3a-b0de-f519c24249bd",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Prepare a function that takes chatbot questions and returns the answer from the LLM:\n",
-    "def get_answer(question, history=[]):\n",
-    "    history.append(\n",
-    "        {'role': 'user', 'content': question}\n",
-    "    )\n",
-    "    result = pipe(history, max_new_tokens=500, return_full_text=False)\n",
-    "    return result[0]['generated_text'].strip()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "53ae1640-5515-45d1-a8ff-494d72662c62",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "which: no node in (/opt/sw/jupyterhub/envs/conda/vsc5/jupyterhub-llm-training-v4/bin:/opt/sw/conda/miniconda3-24.1.2/condabin:/opt/sw/cuda-zen/spack-0.19.0/bin:/home/fs71550/mpfister/.local/bin:/home/fs71550/mpfister/bin:/usr/share/Modules/bin:/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/lpp/mmfs/bin:/opt/sw/slurm/x86_64/alma8.8/22-05-2-1/bin:/opt/sw/slurm/x86_64/alma8.8/22-05-2-1/sbin:/opt/sw/vsc_modules/modules-4.2.2/bin:/opt/sw/vsc4/VSC/x86_64/generic/bin:/opt/sw/tools:/opt/sw/conda/miniconda3/condabin:/opt/sw/conda/miniconda3/bin)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "* Running on local URL:  http://127.0.0.1:39953\n",
-      "\n",
-      "To create a public link, set `share=True` in `launch()`.\n",
-      "\n",
-      "Open the following URL in your webbrowser:\n",
-      "https://jupyterhub.vsc.ac.at/user/mpfister/proxy/absolute/39953/\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Create a Gradio ChatInterface and launch it:\n",
-    "chat_interface = gr.ChatInterface(get_answer, type='messages')\n",
-    "chat_interface.launch(share=False, inline=False, server_name='127.0.0.1', server_port=port, root_path=f'/user/{username}/proxy/absolute/{port}')\n",
-    "print(f'\\nOpen the following URL in your webbrowser:\\n{absolute_url}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2b049395-b496-4412-abb7-1c5a7f592cce",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.11"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
-%% Cell type:markdown id:fc68bcd6-bca8-4376-b231-0c8c84c532e4 tags:
-## Gradio
-%% Cell type:markdown id:c96197c4-9bdf-4895-a209-f97a30660b66 tags:
-[Gradio](https://www.gradio.app) can enable simple web interfaces to your software. In this example, we are using Gradio to get a simple chat interface to a large language model.
-%% Cell type:code id:18b34bbb-3946-4e31-8c51-c50f66d327bd tags:
-``` python
-# Import necessary libraries
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
-import gradio as gr
-import os
-import random
-```
-%% Cell type:code id:744de243-d842-4fd8-a2d9-e0ebfb85f91e tags:
-``` python
-# Use a random TCP port:
-port = random.randint(10000, 50000)
-# Get username
-username = os.environ['USER']
-# Construct URL:
-relative_url = f'/user/{username}/proxy/absolute/{port}/'  # Needs to start with '/'
-absolute_url = f'https://jupyterhub.vsc.ac.at{relative_url}'
-```
-%% Cell type:code id:114aa1d0-c7c2-49f5-878b-de7f76a6eb9e tags:
-``` python
-# Load tokenizer and model and create a pipeline that can be used for inference:
-model_name = '/gpfs/data/fs70824/LLMs_models_datasets/models/microsoft--phi-3.5-mini-instruct'
-# model_name = 'microsoft/Phi-3.5-mini-instruct'
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    device_map='cuda',
-    torch_dtype=torch.bfloat16,
-    trust_remote_code=True,
-    quantization_config=BitsAndBytesConfig(
-        load_in_4bit=True,
-        bnb_4bit_quant_type="nf4",
-        bnb_4bit_compute_dtype=torch.bfloat16
-    )
-)
-pipe = pipeline('text-generation', model=model, tokenizer=tokenizer)
-```
-%% Output
-    `flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
-    Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.
-    Device set to use cpu
-%% Cell type:code id:96fefd3b-f679-4b3a-b0de-f519c24249bd tags:
-``` python
-# Prepare a function that takes chatbot questions and returns the answer from the LLM:
-def get_answer(question, history=[]):
-    history.append(
-        {'role': 'user', 'content': question}
-    )
-    result = pipe(history, max_new_tokens=500, return_full_text=False)
-    return result[0]['generated_text'].strip()
-```
-%% Cell type:code id:53ae1640-5515-45d1-a8ff-494d72662c62 tags:
-``` python
-# Create a Gradio ChatInterface and launch it:
-chat_interface = gr.ChatInterface(get_answer, type='messages')
-chat_interface.launch(share=False, inline=False, server_name='127.0.0.1', server_port=port, root_path=f'/user/{username}/proxy/absolute/{port}')
-print(f'\nOpen the following URL in your webbrowser:\n{absolute_url}')
-```
-%% Output
-    which: no node in (/opt/sw/jupyterhub/envs/conda/vsc5/jupyterhub-llm-training-v4/bin:/opt/sw/conda/miniconda3-24.1.2/condabin:/opt/sw/cuda-zen/spack-0.19.0/bin:/home/fs71550/mpfister/.local/bin:/home/fs71550/mpfister/bin:/usr/share/Modules/bin:/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/lpp/mmfs/bin:/opt/sw/slurm/x86_64/alma8.8/22-05-2-1/bin:/opt/sw/slurm/x86_64/alma8.8/22-05-2-1/sbin:/opt/sw/vsc_modules/modules-4.2.2/bin:/opt/sw/vsc4/VSC/x86_64/generic/bin:/opt/sw/tools:/opt/sw/conda/miniconda3/condabin:/opt/sw/conda/miniconda3/bin)
-    * Running on local URL:  http://127.0.0.1:39953
-    To create a public link, set `share=True` in `launch()`.
-    Open the following URL in your webbrowser:
-    https://jupyterhub.vsc.ac.at/user/mpfister/proxy/absolute/39953/
-%% Cell type:code id:2b049395-b496-4412-abb7-1c5a7f592cce tags:
-``` python
-```