From f66ab63d64a7ce9dfa411fd150052355d56b5b70 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 23 Jul 2024 14:06:34 -0700
Subject: [PATCH 01/20] Bump transformers to 4.43

---
 requirements.txt                 | 2 +-
 requirements_amd.txt             | 2 +-
 requirements_amd_noavx2.txt      | 2 +-
 requirements_apple_intel.txt     | 2 +-
 requirements_apple_silicon.txt   | 2 +-
 requirements_cpu_only.txt        | 2 +-
 requirements_cpu_only_noavx2.txt | 2 +-
 requirements_noavx2.txt          | 2 +-
 requirements_nowheels.txt        | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 0c3f4690..6b1d6247 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -24,7 +24,7 @@ safetensors==0.4.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.42.*
+transformers==4.43.*
 tqdm
 wandb
 
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 7c9f4cda..b392089d 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -21,7 +21,7 @@ safetensors==0.4.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.42.*
+transformers==4.43.*
 tqdm
 wandb
 
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index cfe3a8e0..5aadd8c9 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -21,7 +21,7 @@ safetensors==0.4.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.42.*
+transformers==4.43.*
 tqdm
 wandb
 
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index a020387f..a166d4f6 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -21,7 +21,7 @@ safetensors==0.4.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.42.*
+transformers==4.43.*
 tqdm
 wandb
 
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index 9f59a487..45511a8f 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -21,7 +21,7 @@ safetensors==0.4.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.42.*
+transformers==4.43.*
 tqdm
 wandb
 
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index 6110eab6..d4913ac8 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -21,7 +21,7 @@ safetensors==0.4.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.42.*
+transformers==4.43.*
 tqdm
 wandb
 
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index d4591919..b468adaf 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -21,7 +21,7 @@ safetensors==0.4.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.42.*
+transformers==4.43.*
 tqdm
 wandb
 
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 8a486ef4..09ee3257 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -24,7 +24,7 @@ safetensors==0.4.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.42.*
+transformers==4.43.*
 tqdm
 wandb
 
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index 14e3aa88..bc8a59aa 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -21,7 +21,7 @@ safetensors==0.4.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.42.*
+transformers==4.43.*
 tqdm
 wandb
 

From e6181e834ab0b32baa19a55773f369dc9a64802d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 23 Jul 2024 15:26:02 -0700
Subject: [PATCH 02/20] Remove AutoAWQ as a standalone loader

(it works better through transformers)
---
 modules/LoRA.py            |  2 --
 modules/loaders.py         | 10 ----------
 modules/models.py          | 19 -------------------
 modules/models_settings.py |  2 --
 modules/shared.py          |  9 ++-------
 modules/ui.py              |  1 -
 modules/ui_model_menu.py   |  1 -
 7 files changed, 2 insertions(+), 42 deletions(-)

diff --git a/modules/LoRA.py b/modules/LoRA.py
index eda5e406..117022cf 100644
--- a/modules/LoRA.py
+++ b/modules/LoRA.py
@@ -72,8 +72,6 @@ def add_lora_autogptq(lora_names):
     else:
         if len(lora_names) > 1:
             logger.warning('AutoGPTQ can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.')
-        if not shared.args.no_inject_fused_attention:
-            logger.warning('Fused Attention + AutoGPTQ may break Lora loading. Disable it.')
 
         peft_config = GPTQLoraConfig(
             inference_mode=True,
diff --git a/modules/loaders.py b/modules/loaders.py
index 75ed897b..549de5fb 100644
--- a/modules/loaders.py
+++ b/modules/loaders.py
@@ -127,15 +127,6 @@ loaders_and_params = OrderedDict({
         'no_use_fast',
         'autogptq_info',
     ],
-    'AutoAWQ': [
-        'cpu_memory',
-        'gpu_memory',
-        'auto_devices',
-        'max_seq_len',
-        'no_inject_fused_attention',
-        'trust_remote_code',
-        'no_use_fast',
-    ],
     'HQQ': [
         'hqq_backend',
         'trust_remote_code',
@@ -200,7 +191,6 @@ def transformers_samplers():
 loaders_samplers = {
     'Transformers': transformers_samplers(),
     'AutoGPTQ': transformers_samplers(),
-    'AutoAWQ': transformers_samplers(),
     'HQQ': transformers_samplers(),
     'ExLlamav2': {
         'temperature',
diff --git a/modules/models.py b/modules/models.py
index 07c14308..ea046e9b 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -75,7 +75,6 @@ def load_model(model_name, loader=None):
         'llamacpp_HF': llamacpp_HF_loader,
         'ExLlamav2': ExLlamav2_loader,
         'ExLlamav2_HF': ExLlamav2_HF_loader,
-        'AutoAWQ': AutoAWQ_loader,
         'HQQ': HQQ_loader,
         'TensorRT-LLM': TensorRT_LLM_loader,
     }
@@ -292,24 +291,6 @@ def llamacpp_HF_loader(model_name):
     return model
 
 
-def AutoAWQ_loader(model_name):
-    from awq import AutoAWQForCausalLM
-
-    model_dir = Path(f'{shared.args.model_dir}/{model_name}')
-
-    model = AutoAWQForCausalLM.from_quantized(
-        quant_path=model_dir,
-        max_new_tokens=shared.args.max_seq_len,
-        trust_remote_code=shared.args.trust_remote_code,
-        fuse_layers=not shared.args.no_inject_fused_attention,
-        max_memory=get_max_memory_dict(),
-        batch_size=1,
-        safetensors=any(model_dir.glob('*.safetensors')),
-    )
-
-    return model
-
-
 def AutoGPTQ_loader(model_name):
     import modules.AutoGPTQ_loader
 
diff --git a/modules/models_settings.py b/modules/models_settings.py
index 7ae68125..1bb00ceb 100644
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@@ -180,8 +180,6 @@ def infer_loader(model_name, model_settings):
         loader = None
     elif (path_to_model / 'quantize_config.json').exists() or ('wbits' in model_settings and isinstance(model_settings['wbits'], int) and model_settings['wbits'] > 0):
         loader = 'ExLlamav2_HF'
-    elif (path_to_model / 'quant_config.json').exists() or re.match(r'.*-awq', model_name.lower()):
-        loader = 'AutoAWQ'
     elif len(list(path_to_model.glob('*.gguf'))) > 0 and path_to_model.is_dir() and (path_to_model / 'tokenizer_config.json').exists():
         loader = 'llamacpp_HF'
     elif len(list(path_to_model.glob('*.gguf'))) > 0:
diff --git a/modules/shared.py b/modules/shared.py
index dec427dd..fe09a165 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -89,7 +89,7 @@ group.add_argument('--idle-timeout', type=int, default=0, help='Unload model aft
 
 # Model loader
 group = parser.add_argument_group('Model loader')
-group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, AutoGPTQ, AutoAWQ.')
+group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, AutoGPTQ.')
 
 # Transformers/Accelerate
 group = parser.add_argument_group('Transformers/Accelerate')
@@ -160,10 +160,6 @@ group.add_argument('--disable_exllamav2', action='store_true', help='Disable ExL
 group.add_argument('--wbits', type=int, default=0, help='Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.')
 group.add_argument('--groupsize', type=int, default=-1, help='Group size.')
 
-# AutoAWQ
-group = parser.add_argument_group('AutoAWQ')
-group.add_argument('--no_inject_fused_attention', action='store_true', help='Disable the use of fused attention, which will use less VRAM at the cost of slower inference.')
-
 # HQQ
 group = parser.add_argument_group('HQQ')
 group.add_argument('--hqq-backend', type=str, default='PYTORCH_COMPILE', help='Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.')
@@ -217,6 +213,7 @@ group.add_argument('--model_type', type=str, help='DEPRECATED')
 group.add_argument('--pre_layer', type=int, nargs='+', help='DEPRECATED')
 group.add_argument('--checkpoint', type=str, help='DEPRECATED')
 group.add_argument('--monkey-patch', action='store_true', help='DEPRECATED')
+group.add_argument('--no_inject_fused_attention', action='store_true', help='DEPRECATED')
 
 args = parser.parse_args()
 args_defaults = parser.parse_args([])
@@ -267,8 +264,6 @@ def fix_loader_name(name):
         return 'ExLlamav2'
     elif name in ['exllamav2-hf', 'exllamav2_hf', 'exllama-v2-hf', 'exllama_v2_hf', 'exllama-v2_hf', 'exllama2-hf', 'exllama2_hf', 'exllama-2-hf', 'exllama_2_hf', 'exllama-2_hf']:
         return 'ExLlamav2_HF'
-    elif name in ['autoawq', 'awq', 'auto-awq']:
-        return 'AutoAWQ'
     elif name in ['hqq']:
         return 'HQQ'
     elif name in ['tensorrt', 'tensorrtllm', 'tensorrt_llm', 'tensorrt-llm', 'tensort', 'tensortllm']:
diff --git a/modules/ui.py b/modules/ui.py
index cfe709fa..47f92cf0 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -78,7 +78,6 @@ def list_model_elements():
         'groupsize',
         'triton',
         'desc_act',
-        'no_inject_fused_attention',
         'no_inject_fused_mlp',
         'no_use_cuda_fp16',
         'disable_exllama',
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 54ac9b12..2938c120 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -127,7 +127,6 @@ def create_ui():
                             shared.gradio['no_offload_kqv'] = gr.Checkbox(label="no_offload_kqv", value=shared.args.no_offload_kqv, info='Do not offload the  K, Q, V to the GPU. This saves VRAM but reduces the performance.')
                             shared.gradio['no_mul_mat_q'] = gr.Checkbox(label="no_mul_mat_q", value=shared.args.no_mul_mat_q, info='Disable the mulmat kernels.')
                             shared.gradio['triton'] = gr.Checkbox(label="triton", value=shared.args.triton)
-                            shared.gradio['no_inject_fused_attention'] = gr.Checkbox(label="no_inject_fused_attention", value=shared.args.no_inject_fused_attention, info='Disable fused attention. Fused attention improves inference performance but uses more VRAM. Fuses layers for AutoAWQ. Disable if running low on VRAM.')
                             shared.gradio['no_inject_fused_mlp'] = gr.Checkbox(label="no_inject_fused_mlp", value=shared.args.no_inject_fused_mlp, info='Affects Triton only. Disable fused MLP. Fused MLP improves performance but uses more VRAM. Disable if running low on VRAM.')
                             shared.gradio['no_use_cuda_fp16'] = gr.Checkbox(label="no_use_cuda_fp16", value=shared.args.no_use_cuda_fp16, info='This can make models faster on some systems.')
                             shared.gradio['desc_act'] = gr.Checkbox(label="desc_act", value=shared.args.desc_act, info='\'desc_act\', \'wbits\', and \'groupsize\' are used for old models without a quantize_config.json.')

From 1815877061e87c9926079d79f85af12612be5d33 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 23 Jul 2024 18:48:10 -0700
Subject: [PATCH 03/20] UI: fix the default character not loading correctly on
 startup

---
 modules/ui_chat.py | 2 +-
 server.py          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 7085f5cd..8b370d86 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -84,7 +84,7 @@ def create_ui():
                     shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
 
                 with gr.Row():
-                    shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
+                    shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'chat-instruct'] else None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
 
                 with gr.Row():
                     shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
diff --git a/server.py b/server.py
index 57e26be8..d6069d5e 100644
--- a/server.py
+++ b/server.py
@@ -90,7 +90,7 @@ def create_interface():
     # Force some events to be triggered on page load
     shared.persistent_interface_state.update({
         'loader': shared.args.loader or 'Transformers',
-        'mode': shared.settings['mode'],
+        'mode': shared.settings['mode'] if shared.settings['mode'] == 'instruct' else gr.update(),
         'character_menu': shared.args.character or shared.settings['character'],
         'instruction_template_str': shared.settings['instruction_template_str'],
         'prompt_menu-default': shared.settings['prompt-default'],

From e777b7334943e6d16e71750c1da1beb536bb153a Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 23 Jul 2024 19:04:19 -0700
Subject: [PATCH 04/20] UI: prevent LaTeX from being rendered for inline "$"

---
 js/main.js | 2 --
 1 file changed, 2 deletions(-)

diff --git a/js/main.js b/js/main.js
index bdbb7cef..3b8b13e4 100644
--- a/js/main.js
+++ b/js/main.js
@@ -213,12 +213,10 @@ function doSyntaxHighlighting() {
       renderMathInElement(element, {
         delimiters: [
           { left: "$$", right: "$$", display: true },
-          { left: "$", right: "$", display: false },
           { left: "\\(", right: "\\)", display: false },
           { left: "\\[", right: "\\]", display: true },
         ],
       });
-
     });
 
     observer.observe(targetElement, config);

From 8b52b93e8566a7268eb487ba7d81def9e9227ded Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 23 Jul 2024 19:35:00 -0700
Subject: [PATCH 05/20] Make the Google Colab notebook functional again
 (attempt)

---
 Colab-TextGen-GPU.ipynb | 49 ++++++++++++++---------------------------
 1 file changed, 16 insertions(+), 33 deletions(-)

diff --git a/Colab-TextGen-GPU.ipynb b/Colab-TextGen-GPU.ipynb
index 82e6c18e..739232a4 100644
--- a/Colab-TextGen-GPU.ipynb
+++ b/Colab-TextGen-GPU.ipynb
@@ -22,7 +22,7 @@
       "source": [
         "# oobabooga/text-generation-webui\n",
         "\n",
-        "After running both cells, a public gradio URL will appear at the bottom in a few minutes. You can optionally generate an API link.\n",
+        "After running both cells, a public gradio URL will appear at the bottom in around 10 minutes. You can optionally generate an API link.\n",
         "\n",
         "* Project page: https://github.com/oobabooga/text-generation-webui\n",
         "* Gradio server status: https://status.gradio.app/"
@@ -53,43 +53,27 @@
         "\n",
         "#@markdown If unsure about the branch, write \"main\" or leave it blank.\n",
         "\n",
-        "import torch\n",
+        "import os\n",
         "from pathlib import Path\n",
         "\n",
+        "os.environ.pop('PYTHONPATH', None)\n",
+        "\n",
         "if Path.cwd().name != 'text-generation-webui':\n",
-        "  print(\"Installing the webui...\")\n",
+        "  print(\"\\033[1;32;1m\\n --> Installing the web UI. This will take a while, but after the initial setup, you can download and test as many models as you like.\\033[0;37;0m\\n\")\n",
         "\n",
         "  !git clone https://github.com/oobabooga/text-generation-webui\n",
         "  %cd text-generation-webui\n",
         "\n",
-        "  torver = torch.__version__\n",
-        "  print(f\"TORCH: {torver}\")\n",
-        "  is_cuda118 = '+cu118' in torver  # 2.1.0+cu118\n",
-        "\n",
-        "  if is_cuda118:\n",
-        "    !python -m pip install --upgrade torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu118\n",
-        "  else:\n",
-        "    !python -m pip install --upgrade torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu121\n",
-        "\n",
-        "  textgen_requirements = open('requirements.txt').read().splitlines()\n",
-        "  if is_cuda118:\n",
-        "      textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements]\n",
-        "  with open('temp_requirements.txt', 'w') as file:\n",
-        "      file.write('\\n'.join(textgen_requirements))\n",
-        "\n",
-        "  !pip install -r temp_requirements.txt --upgrade\n",
-        "\n",
-        "  print(\"\\033[1;32;1m\\n --> If you see a warning about \\\"previously imported packages\\\", just ignore it.\\033[0;37;0m\")\n",
-        "  print(\"\\033[1;32;1m\\n --> There is no need to restart the runtime.\\n\\033[0;37;0m\")\n",
-        "\n",
-        "  try:\n",
-        "    import flash_attn\n",
-        "  except:\n",
-        "    !pip uninstall -y flash_attn\n",
+        "  # Install the project in an isolated environment\n",
+        "  !GPU_CHOICE=A \\\n",
+        "  USE_CUDA118=FALSE \\\n",
+        "  LAUNCH_AFTER_INSTALL=FALSE \\\n",
+        "  INSTALL_EXTENSIONS=FALSE \\\n",
+        "  ./start_linux.sh\n",
         "\n",
         "# Parameters\n",
-        "model_url = \"https://huggingface.co/TheBloke/MythoMax-L2-13B-GPTQ\" #@param {type:\"string\"}\n",
-        "branch = \"gptq-4bit-32g-actorder_True\" #@param {type:\"string\"}\n",
+        "model_url = \"https://huggingface.co/turboderp/gemma-2-9b-it-exl2\" #@param {type:\"string\"}\n",
+        "branch = \"8.0bpw\" #@param {type:\"string\"}\n",
         "command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant\" #@param {type:\"string\"}\n",
         "api = False #@param {type:\"boolean\"}\n",
         "\n",
@@ -116,11 +100,10 @@
         "    output_folder = \"\"\n",
         "\n",
         "# Start the web UI\n",
-        "cmd = f\"python server.py --share\"\n",
+        "cmd = f\"./start_linux.sh {command_line_flags} --share\"\n",
         "if output_folder != \"\":\n",
         "    cmd += f\" --model {output_folder}\"\n",
-        "cmd += f\" {command_line_flags}\"\n",
-        "print(cmd)\n",
+        "\n",
         "!$cmd"
       ],
       "metadata": {
@@ -131,4 +114,4 @@
       "outputs": []
     }
   ]
-}
+}
\ No newline at end of file

From 9d5513fda0f5a78db0fc03262c6b3fdc72e166b0 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 23 Jul 2024 19:38:04 -0700
Subject: [PATCH 06/20] Remove the AutoAWQ requirement

---
 requirements.txt            | 1 -
 requirements_amd.txt        | 2 --
 requirements_amd_noavx2.txt | 2 --
 requirements_noavx2.txt     | 1 -
 4 files changed, 6 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 6b1d6247..db73b8b1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -62,4 +62,3 @@ https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn
 https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-autoawq==0.2.5; platform_system == "Linux" or platform_system == "Windows"
diff --git a/requirements_amd.txt b/requirements_amd.txt
index b392089d..600db9b4 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -43,5 +43,3 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/ro
 https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
-https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index 5aadd8c9..4f148c94 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -41,5 +41,3 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp
 https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
-https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 09ee3257..603fb0b8 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -62,4 +62,3 @@ https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn
 https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-autoawq==0.2.5; platform_system == "Linux" or platform_system == "Windows"

From 98ed6d3a666e3924410d34568b3e1919709656a2 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 23 Jul 2024 19:50:56 -0700
Subject: [PATCH 07/20] Don't use flash attention on Google Colab

---
 Colab-TextGen-GPU.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Colab-TextGen-GPU.ipynb b/Colab-TextGen-GPU.ipynb
index 739232a4..8e305e1d 100644
--- a/Colab-TextGen-GPU.ipynb
+++ b/Colab-TextGen-GPU.ipynb
@@ -74,7 +74,7 @@
         "# Parameters\n",
         "model_url = \"https://huggingface.co/turboderp/gemma-2-9b-it-exl2\" #@param {type:\"string\"}\n",
         "branch = \"8.0bpw\" #@param {type:\"string\"}\n",
-        "command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant\" #@param {type:\"string\"}\n",
+        "command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant --no_flash_attn\" #@param {type:\"string\"}\n",
         "api = False #@param {type:\"boolean\"}\n",
         "\n",
         "if api:\n",
@@ -114,4 +114,4 @@
       "outputs": []
     }
   ]
-}
\ No newline at end of file
+}

From e637b702ff9d6955e830fe96b94bf5313e9f2703 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 23 Jul 2024 21:29:30 -0700
Subject: [PATCH 08/20] UI: make text between quotes colored in chat mode

---
 css/main.css              | 12 ++++++++++++
 modules/chat.py           |  2 +-
 modules/html_generator.py | 26 ++++++++++++++++++++++++++
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/css/main.css b/css/main.css
index d8e12e59..6f2a9fb7 100644
--- a/css/main.css
+++ b/css/main.css
@@ -406,6 +406,18 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     color: var(--body-text-color);
 }
 
+.message q {
+    color: #707070;
+}
+
+.dark .message q {
+    color: orange;
+}
+
+.message q::before, .message q::after {
+    content: "";
+}
+
 .message-body li {
     list-style-position: outside;
 }
diff --git a/modules/chat.py b/modules/chat.py
index c95673ce..9919cb76 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -488,7 +488,7 @@ def start_new_chat(state):
         greeting = replace_character_names(state['greeting'], state['name1'], state['name2'])
         if greeting != '':
             history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', greeting]]
-            history['visible'] += [['', apply_extensions('output', greeting, state, is_chat=True)]]
+            history['visible'] += [['', apply_extensions('output', html.escape(greeting), state, is_chat=True)]]
 
     unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S')
     save_history(history, unique_id, state['character_menu'], state['mode'])
diff --git a/modules/html_generator.py b/modules/html_generator.py
index 657133bd..61e61b0f 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -42,6 +42,29 @@ def fix_newlines(string):
     return string
 
 
+def replace_quotes(text):
+
+    # Define a list of quote pairs (opening and closing), using HTML entities
+    quote_pairs = [
+        ('&quot;', '&quot;'),  # Double quotes
+        ('&ldquo;', '&rdquo;'),  # Unicode left and right double quotation marks
+        ('&lsquo;', '&rsquo;'),  # Unicode left and right single quotation marks
+        ('&laquo;', '&raquo;'),  # French quotes
+        ('&bdquo;', '&ldquo;'),  # German quotes
+        ('&lsquo;', '&rsquo;'),  # Alternative single quotes
+        ('&#8220;', '&#8221;'),  # Unicode quotes (numeric entities)
+        ('&#x201C;', '&#x201D;'),  # Unicode quotes (hex entities)
+    ]
+
+    # Create a regex pattern that matches any of the quote pairs, including newlines
+    pattern = '|'.join(f'({re.escape(open_q)})(.*?)({re.escape(close_q)})' for open_q, close_q in quote_pairs)
+
+    # Replace matched patterns with <q> tags, keeping original quotes
+    replaced_text = re.sub(pattern, lambda m: f'<q>{m.group(1)}{m.group(2)}{m.group(3)}</q>', text, flags=re.DOTALL)
+
+    return replaced_text
+
+
 def replace_blockquote(m):
     return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '')
 
@@ -49,6 +72,9 @@ def replace_blockquote(m):
 @functools.lru_cache(maxsize=4096)
 def convert_to_markdown(string):
 
+    # Quote to <q></q>
+    string = replace_quotes(string)
+
     # Blockquote
     string = re.sub(r'(^|[\n])&gt;', r'\1>', string)
     pattern = re.compile(r'\\begin{blockquote}(.*?)\\end{blockquote}', re.DOTALL)

From 8a5f110c14f4ce4810c8bfd1a3fa8080935a61ba Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 24 Jul 2024 09:22:48 -0700
Subject: [PATCH 09/20] Bump ExLlamaV2 to 0.1.8

---
 requirements.txt               | 10 +++++-----
 requirements_amd.txt           |  6 +++---
 requirements_amd_noavx2.txt    |  6 +++---
 requirements_apple_intel.txt   |  2 +-
 requirements_apple_silicon.txt |  2 +-
 requirements_noavx2.txt        | 10 +++++-----
 6 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index db73b8b1..bc41421c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -53,11 +53,11 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 
 # CUDA wheels
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
 https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 600db9b4..df3ab7fb 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -40,6 +40,6 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp
 # AMD wheels
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.83+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.83+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index 4f148c94..e85d1262 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -38,6 +38,6 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 
 # AMD wheels
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index a166d4f6..123b6d9b 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -36,4 +36,4 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/me
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index 45511a8f..08509b05 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -38,4 +38,4 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/me
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 603fb0b8..28d13a90 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -53,11 +53,11 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 
 # CUDA wheels
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
 https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"

From 3b2c23dfb568d011276d5e2ec7ffc2596ce25580 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 24 Jul 2024 11:15:00 -0700
Subject: [PATCH 10/20] Add AutoAWQ 0.2.6 wheels for PyTorch 2.2.2

---
 requirements.txt            | 8 ++++++++
 requirements_amd.txt        | 4 ++++
 requirements_amd_noavx2.txt | 4 ++++
 requirements_noavx2.txt     | 8 ++++++++
 4 files changed, 24 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index bc41421c..4461c9cd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -62,3 +62,11 @@ https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn
 https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
diff --git a/requirements_amd.txt b/requirements_amd.txt
index df3ab7fb..48604f70 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -43,3 +43,7 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/ro
 https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index e85d1262..dcfaa5df 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -41,3 +41,7 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp
 https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 28d13a90..4756d844 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -62,3 +62,11 @@ https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn
 https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"

From 947016d01092342fa07b150979d1e13efa45d975 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 24 Jul 2024 11:54:26 -0700
Subject: [PATCH 11/20] UI: make the markdown LRU cache infinite (for really
 long conversations)

---
 modules/chat.py           | 19 +++++++++++++++++--
 modules/html_generator.py |  2 +-
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/modules/chat.py b/modules/chat.py
index 9919cb76..c744defc 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -17,7 +17,11 @@ from PIL import Image
 import modules.shared as shared
 from modules import utils
 from modules.extensions import apply_extensions
-from modules.html_generator import chat_html_wrapper, make_thumbnail
+from modules.html_generator import (
+    chat_html_wrapper,
+    convert_to_markdown,
+    make_thumbnail
+)
 from modules.logging_colors import logger
 from modules.text_generation import (
     generate_reply,
@@ -368,7 +372,6 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
 
 
 def impersonate_wrapper(text, state):
-
     static_output = chat_html_wrapper(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
 
     prompt = generate_chat_prompt('', state, impersonate=True)
@@ -1044,6 +1047,8 @@ def handle_unique_id_select(state):
     history = load_history(state['unique_id'], state['character_menu'], state['mode'])
     html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
 
+    convert_to_markdown.cache_clear()
+
     return [history, html]
 
 
@@ -1052,6 +1057,8 @@ def handle_start_new_chat_click(state):
     histories = find_all_histories_with_first_prompts(state)
     html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
 
+    convert_to_markdown.cache_clear()
+
     return [history, html, gr.update(choices=histories, value=histories[0][1])]
 
 
@@ -1061,6 +1068,8 @@ def handle_delete_chat_confirm_click(state):
     history, unique_id = load_history_after_deletion(state, index)
     html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
 
+    convert_to_markdown.cache_clear()
+
     return [
         history,
         html,
@@ -1099,6 +1108,8 @@ def handle_upload_chat_history(load_chat_history, state):
 
     html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
 
+    convert_to_markdown.cache_clear()
+
     return [
         history,
         html,
@@ -1119,6 +1130,8 @@ def handle_character_menu_change(state):
     histories = find_all_histories_with_first_prompts(state)
     html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
 
+    convert_to_markdown.cache_clear()
+
     return [
         history,
         html,
@@ -1136,6 +1149,8 @@ def handle_mode_change(state):
     histories = find_all_histories_with_first_prompts(state)
     html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
 
+    convert_to_markdown.cache_clear()
+
     return [
         history,
         html,
diff --git a/modules/html_generator.py b/modules/html_generator.py
index 61e61b0f..1b687ade 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -69,7 +69,7 @@ def replace_blockquote(m):
     return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '')
 
 
-@functools.lru_cache(maxsize=4096)
+@functools.lru_cache(maxsize=None)
 def convert_to_markdown(string):
 
     # Quote to <q></q>

From 7e2851e5058bf2a4569a7d374450ab549bcf2886 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 24 Jul 2024 15:04:12 -0700
Subject: [PATCH 12/20] UI: fix "Command for chat-instruct mode" not appearing
 by default

---
 modules/ui_chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 8b370d86..7ef8df4d 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -90,7 +90,7 @@ def create_ui():
                     shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
 
                 with gr.Row():
-                    shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=False, elem_classes=['add_scrollbar'])
+                    shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=shared.settings['mode'] == 'chat-instruct', elem_classes=['add_scrollbar'])
 
 
 def create_chat_settings_ui():

From 3170b6efc9e8e6189712c87e2dcf5723c89bb8ed Mon Sep 17 00:00:00 2001
From: Luana <luananeder@gmail.com>
Date: Wed, 24 Jul 2024 22:23:29 -0300
Subject: [PATCH 13/20] Fixes Linux shebangs (#6110)

---
 cmd_linux.sh           | 2 +-
 start_linux.sh         | 2 +-
 update_wizard_linux.sh | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cmd_linux.sh b/cmd_linux.sh
index 1685050a..576dbf02 100755
--- a/cmd_linux.sh
+++ b/cmd_linux.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 cd "$(dirname "${BASH_SOURCE[0]}")"
 
diff --git a/start_linux.sh b/start_linux.sh
index 5620c831..792daca8 100755
--- a/start_linux.sh
+++ b/start_linux.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 cd "$(dirname "${BASH_SOURCE[0]}")"
 
diff --git a/update_wizard_linux.sh b/update_wizard_linux.sh
index c5add61e..3ada9a1e 100755
--- a/update_wizard_linux.sh
+++ b/update_wizard_linux.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 cd "$(dirname "${BASH_SOURCE[0]}")"
 

From 1f101ee3e5d0516ea17b905128b6b76b8d2b0f23 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 24 Jul 2024 18:56:54 -0700
Subject: [PATCH 14/20] UI: improve the quote colors

---
 css/main.css | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/css/main.css b/css/main.css
index 6f2a9fb7..3ecf0044 100644
--- a/css/main.css
+++ b/css/main.css
@@ -406,12 +406,8 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     color: var(--body-text-color);
 }
 
-.message q {
-    color: #707070;
-}
-
 .dark .message q {
-    color: orange;
+    color: #f5b031;
 }
 
 .message q::before, .message q::after {

From ac30e7fe9c0ef0a03c7efc268f3300a4c6963ca4 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 24 Jul 2024 19:03:34 -0700
Subject: [PATCH 15/20] Updater: don't reinstall requirements if no updates
 after git pull

---
 one_click.py | 54 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/one_click.py b/one_click.py
index e94b6d44..547f4df4 100644
--- a/one_click.py
+++ b/one_click.py
@@ -337,6 +337,7 @@ def update_requirements(initial_installation=False, pull=True):
         git_creation_cmd = 'git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && git reset --hard origin/main && git branch --set-upstream-to=origin/main'
         run_cmd(git_creation_cmd, environment=True, assert_success=True)
 
+    repository_updated = False
     if pull:
         print_big_message("Updating the local copy of the repository with \"git pull\"")
 
@@ -347,9 +348,13 @@ def update_requirements(initial_installation=False, pull=True):
         ]
 
         before_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
-        run_cmd("git pull --autostash", assert_success=True, environment=True)
+        pull_output = run_cmd("git pull --autostash", assert_success=True, environment=True, capture_output=True)
         after_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
 
+        # Check if git pull actually updated anything
+        if "Already up to date." not in pull_output:
+            repository_updated = True
+
         # Check for differences in installation file hashes
         for file_name in files_to_check:
             if before_pull_hashes[file_name] != after_pull_hashes[file_name]:
@@ -382,39 +387,40 @@ def update_requirements(initial_installation=False, pull=True):
 
     requirements_file = base_requirements
 
-    print_big_message(f"Installing webui requirements from file: {requirements_file}")
-    print(f"TORCH: {torver}\n")
+    if repository_updated or initial_installation:
+        print_big_message(f"Installing webui requirements from file: {requirements_file}")
+        print(f"TORCH: {torver}\n")
 
-    # Prepare the requirements file
-    textgen_requirements = open(requirements_file).read().splitlines()
-    if is_cuda118:
-        textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements if "auto-gptq" not in req]
-    if is_windows() and is_cuda118:  # No flash-attention on Windows for CUDA 11
-        textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req]
+        # Prepare the requirements file
+        textgen_requirements = open(requirements_file).read().splitlines()
 
-    with open('temp_requirements.txt', 'w') as file:
-        file.write('\n'.join(textgen_requirements))
+        if is_cuda118:
+            textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements if "auto-gptq" not in req]
+        if is_windows() and is_cuda118:  # No flash-attention on Windows for CUDA 11
+            textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req]
 
-    # Workaround for git+ packages not updating properly.
-    git_requirements = [req for req in textgen_requirements if req.startswith("git+")]
-    for req in git_requirements:
-        url = req.replace("git+", "")
-        package_name = url.split("/")[-1].split("@")[0].rstrip(".git")
-        run_cmd(f"python -m pip uninstall -y {package_name}", environment=True)
-        print(f"Uninstalled {package_name}")
+        with open('temp_requirements.txt', 'w') as file:
+            file.write('\n'.join(textgen_requirements))
 
-    # Install/update the project requirements
-    run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True)
-    os.remove('temp_requirements.txt')
+        # Workaround for git+ packages not updating properly.
+        git_requirements = [req for req in textgen_requirements if req.startswith("git+")]
+        for req in git_requirements:
+            url = req.replace("git+", "")
+            package_name = url.split("/")[-1].split("@")[0].rstrip(".git")
+            run_cmd(f"python -m pip uninstall -y {package_name}", environment=True)
+            print(f"Uninstalled {package_name}")
+
+        # Install/update the project requirements
+        run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True)
+        os.remove('temp_requirements.txt')
+    else:
+        print("Repository is already up to date. Skipping requirements installation.")
 
     # Check for '+cu' or '+rocm' in version string to determine if torch uses CUDA or ROCm. Check for pytorch-cuda as well for backwards compatibility
     if not any((is_cuda, is_rocm)) and run_cmd("conda list -f pytorch-cuda | grep pytorch-cuda", environment=True, capture_output=True).returncode == 1:
         clear_cache()
         return
 
-    if not os.path.exists("repositories/"):
-        os.mkdir("repositories")
-
     clear_cache()
 
 

From b85ae6bc96ff03aca12f8ad6212da39ec9cca192 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 24 Jul 2024 19:10:17 -0700
Subject: [PATCH 16/20] Fix after previous commit

---
 one_click.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/one_click.py b/one_click.py
index 547f4df4..d823ecb3 100644
--- a/one_click.py
+++ b/one_click.py
@@ -352,7 +352,7 @@ def update_requirements(initial_installation=False, pull=True):
         after_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
 
         # Check if git pull actually updated anything
-        if "Already up to date." not in pull_output:
+        if "Already up to date." not in pull_output.stdout.decode('utf-8'):
             repository_updated = True
 
         # Check for differences in installation file hashes

From 14584fda366b8f38d02e31fbb511c0527438c078 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 24 Jul 2024 20:55:18 -0700
Subject: [PATCH 17/20] UI: don't change the color of italics in instruct mode

---
 css/html_instruct_style.css | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css
index 8a31d6e2..50b9402f 100644
--- a/css/html_instruct_style.css
+++ b/css/html_instruct_style.css
@@ -39,14 +39,6 @@
     margin-bottom: 0 !important;
 }
 
-.dark .message-body p em {
-    color: rgb(198 202 214) !important;
-}
-
-.message-body p em {
-    color: rgb(110 110 110) !important;
-}
-
 .gradio-container .chat .assistant-message {
     padding: 20px;
     background: #f4f4f4;

From d581334a41b0ddd2e9701aea4643ba6206f2edff Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 25 Jul 2024 05:38:52 -0700
Subject: [PATCH 18/20] Don't install AutoAWQ on CUDA 11.8

---
 one_click.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/one_click.py b/one_click.py
index d823ecb3..065265fe 100644
--- a/one_click.py
+++ b/one_click.py
@@ -395,7 +395,11 @@ def update_requirements(initial_installation=False, pull=True):
         textgen_requirements = open(requirements_file).read().splitlines()
 
         if is_cuda118:
-            textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements if "auto-gptq" not in req]
+            textgen_requirements = [
+                req.replace('+cu121', '+cu118').replace('+cu122', '+cu118')
+                for req in textgen_requirements
+                if "auto-gptq" not in req.lower() and "autoawq" not in req.lower()
+            ]
         if is_windows() and is_cuda118:  # No flash-attention on Windows for CUDA 11
             textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req]
 

From a34273755b7389adb14ee0a04d5d6345b0b00dcb Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 25 Jul 2024 07:34:01 -0700
Subject: [PATCH 19/20] Revert "Updater: don't reinstall requirements if no
 updates after git pull"

This reverts commit ac30e7fe9c0ef0a03c7efc268f3300a4c6963ca4.
---
 one_click.py | 61 ++++++++++++++++++++++++----------------------------
 1 file changed, 28 insertions(+), 33 deletions(-)

diff --git a/one_click.py b/one_click.py
index 065265fe..0a0412ba 100644
--- a/one_click.py
+++ b/one_click.py
@@ -337,7 +337,6 @@ def update_requirements(initial_installation=False, pull=True):
         git_creation_cmd = 'git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && git reset --hard origin/main && git branch --set-upstream-to=origin/main'
         run_cmd(git_creation_cmd, environment=True, assert_success=True)
 
-    repository_updated = False
     if pull:
         print_big_message("Updating the local copy of the repository with \"git pull\"")
 
@@ -348,13 +347,9 @@ def update_requirements(initial_installation=False, pull=True):
         ]
 
         before_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
-        pull_output = run_cmd("git pull --autostash", assert_success=True, environment=True, capture_output=True)
+        run_cmd("git pull --autostash", assert_success=True, environment=True)
         after_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
 
-        # Check if git pull actually updated anything
-        if "Already up to date." not in pull_output.stdout.decode('utf-8'):
-            repository_updated = True
-
         # Check for differences in installation file hashes
         for file_name in files_to_check:
             if before_pull_hashes[file_name] != after_pull_hashes[file_name]:
@@ -387,44 +382,44 @@ def update_requirements(initial_installation=False, pull=True):
 
     requirements_file = base_requirements
 
-    if repository_updated or initial_installation:
-        print_big_message(f"Installing webui requirements from file: {requirements_file}")
-        print(f"TORCH: {torver}\n")
+    print_big_message(f"Installing webui requirements from file: {requirements_file}")
+    print(f"TORCH: {torver}\n")
 
-        # Prepare the requirements file
-        textgen_requirements = open(requirements_file).read().splitlines()
+    # Prepare the requirements file
+    textgen_requirements = open(requirements_file).read().splitlines()
+    if is_cuda118:
+        textgen_requirements = [
+            req.replace('+cu121', '+cu118').replace('+cu122', '+cu118')
+            for req in textgen_requirements
+            if "auto-gptq" not in req.lower() and "autoawq" not in req.lower()
+        ]
 
-        if is_cuda118:
-            textgen_requirements = [
-                req.replace('+cu121', '+cu118').replace('+cu122', '+cu118')
-                for req in textgen_requirements
-                if "auto-gptq" not in req.lower() and "autoawq" not in req.lower()
-            ]
-        if is_windows() and is_cuda118:  # No flash-attention on Windows for CUDA 11
-            textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req]
+    if is_windows() and is_cuda118:  # No flash-attention on Windows for CUDA 11
+        textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req]
 
-        with open('temp_requirements.txt', 'w') as file:
-            file.write('\n'.join(textgen_requirements))
+    with open('temp_requirements.txt', 'w') as file:
+        file.write('\n'.join(textgen_requirements))
 
-        # Workaround for git+ packages not updating properly.
-        git_requirements = [req for req in textgen_requirements if req.startswith("git+")]
-        for req in git_requirements:
-            url = req.replace("git+", "")
-            package_name = url.split("/")[-1].split("@")[0].rstrip(".git")
-            run_cmd(f"python -m pip uninstall -y {package_name}", environment=True)
-            print(f"Uninstalled {package_name}")
+    # Workaround for git+ packages not updating properly.
+    git_requirements = [req for req in textgen_requirements if req.startswith("git+")]
+    for req in git_requirements:
+        url = req.replace("git+", "")
+        package_name = url.split("/")[-1].split("@")[0].rstrip(".git")
+        run_cmd(f"python -m pip uninstall -y {package_name}", environment=True)
+        print(f"Uninstalled {package_name}")
 
-        # Install/update the project requirements
-        run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True)
-        os.remove('temp_requirements.txt')
-    else:
-        print("Repository is already up to date. Skipping requirements installation.")
+    # Install/update the project requirements
+    run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True)
+    os.remove('temp_requirements.txt')
 
     # Check for '+cu' or '+rocm' in version string to determine if torch uses CUDA or ROCm. Check for pytorch-cuda as well for backwards compatibility
     if not any((is_cuda, is_rocm)) and run_cmd("conda list -f pytorch-cuda | grep pytorch-cuda", environment=True, capture_output=True).returncode == 1:
         clear_cache()
         return
 
+    if not os.path.exists("repositories/"):
+        os.mkdir("repositories")
+
     clear_cache()
 
 

From 42e80108f57830f14c2a20884371b7df2d60dd60 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 25 Jul 2024 08:01:42 -0700
Subject: [PATCH 20/20] UI: clear the markdown LRU cache when using the
 default/notebook tabs

---
 modules/html_generator.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/html_generator.py b/modules/html_generator.py
index 1b687ade..c5eba5a8 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -150,6 +150,7 @@ def convert_to_markdown_wrapped(string, use_cache=True):
 
 
 def generate_basic_html(string):
+    convert_to_markdown.cache_clear()
     string = convert_to_markdown(string)
     string = f'<style>{readable_css}</style><div class="readable-container">{string}</div>'
     return string