From f66ab63d64a7ce9dfa411fd150052355d56b5b70 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 23 Jul 2024 14:06:34 -0700 Subject: [PATCH 01/20] Bump transformers to 4.43 --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0c3f4690..6b1d6247 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,7 +24,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.42.* +transformers==4.43.* tqdm wandb diff --git a/requirements_amd.txt b/requirements_amd.txt index 7c9f4cda..b392089d 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.42.* +transformers==4.43.* tqdm wandb diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index cfe3a8e0..5aadd8c9 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.42.* +transformers==4.43.* tqdm wandb diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index a020387f..a166d4f6 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.42.* +transformers==4.43.* tqdm wandb diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 9f59a487..45511a8f 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.42.* +transformers==4.43.* tqdm wandb diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 6110eab6..d4913ac8 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.42.* +transformers==4.43.* tqdm wandb diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index d4591919..b468adaf 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.42.* +transformers==4.43.* tqdm wandb diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 8a486ef4..09ee3257 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -24,7 +24,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.42.* +transformers==4.43.* tqdm wandb diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 14e3aa88..bc8a59aa 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.42.* +transformers==4.43.* tqdm wandb From e6181e834ab0b32baa19a55773f369dc9a64802d Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 23 Jul 2024 15:26:02 -0700 Subject: [PATCH 02/20] Remove AutoAWQ as a standalone loader (it works better through transformers) --- modules/LoRA.py | 2 -- modules/loaders.py | 10 ---------- modules/models.py | 19 ------------------- modules/models_settings.py | 2 -- modules/shared.py | 9 ++------- modules/ui.py | 1 - modules/ui_model_menu.py | 1 - 7 files changed, 2 insertions(+), 42 deletions(-) diff --git a/modules/LoRA.py b/modules/LoRA.py index eda5e406..117022cf 100644 --- a/modules/LoRA.py +++ b/modules/LoRA.py @@ -72,8 +72,6 @@ def add_lora_autogptq(lora_names): else: if len(lora_names) > 1: logger.warning('AutoGPTQ can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.') - if not shared.args.no_inject_fused_attention: - logger.warning('Fused Attention + AutoGPTQ may break Lora loading. Disable it.') peft_config = GPTQLoraConfig( inference_mode=True, diff --git a/modules/loaders.py b/modules/loaders.py index 75ed897b..549de5fb 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -127,15 +127,6 @@ loaders_and_params = OrderedDict({ 'no_use_fast', 'autogptq_info', ], - 'AutoAWQ': [ - 'cpu_memory', - 'gpu_memory', - 'auto_devices', - 'max_seq_len', - 'no_inject_fused_attention', - 'trust_remote_code', - 'no_use_fast', - ], 'HQQ': [ 'hqq_backend', 'trust_remote_code', @@ -200,7 +191,6 @@ def transformers_samplers(): loaders_samplers = { 'Transformers': transformers_samplers(), 'AutoGPTQ': transformers_samplers(), - 'AutoAWQ': transformers_samplers(), 'HQQ': transformers_samplers(), 'ExLlamav2': { 'temperature', diff --git a/modules/models.py b/modules/models.py index 07c14308..ea046e9b 100644 --- a/modules/models.py +++ b/modules/models.py @@ -75,7 +75,6 @@ def load_model(model_name, loader=None): 'llamacpp_HF': llamacpp_HF_loader, 'ExLlamav2': ExLlamav2_loader, 'ExLlamav2_HF': ExLlamav2_HF_loader, - 'AutoAWQ': AutoAWQ_loader, 'HQQ': HQQ_loader, 'TensorRT-LLM': TensorRT_LLM_loader, } @@ -292,24 +291,6 @@ def llamacpp_HF_loader(model_name): return model -def AutoAWQ_loader(model_name): - from awq import AutoAWQForCausalLM - - model_dir = Path(f'{shared.args.model_dir}/{model_name}') - - model = AutoAWQForCausalLM.from_quantized( - quant_path=model_dir, - max_new_tokens=shared.args.max_seq_len, - trust_remote_code=shared.args.trust_remote_code, - fuse_layers=not shared.args.no_inject_fused_attention, - max_memory=get_max_memory_dict(), - batch_size=1, - safetensors=any(model_dir.glob('*.safetensors')), - ) - - return model - - def AutoGPTQ_loader(model_name): import modules.AutoGPTQ_loader diff --git a/modules/models_settings.py b/modules/models_settings.py index 7ae68125..1bb00ceb 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -180,8 +180,6 @@ def infer_loader(model_name, model_settings): loader = None elif (path_to_model / 'quantize_config.json').exists() or ('wbits' in model_settings and isinstance(model_settings['wbits'], int) and model_settings['wbits'] > 0): loader = 'ExLlamav2_HF' - elif (path_to_model / 'quant_config.json').exists() or re.match(r'.*-awq', model_name.lower()): - loader = 'AutoAWQ' elif len(list(path_to_model.glob('*.gguf'))) > 0 and path_to_model.is_dir() and (path_to_model / 'tokenizer_config.json').exists(): loader = 'llamacpp_HF' elif len(list(path_to_model.glob('*.gguf'))) > 0: diff --git a/modules/shared.py b/modules/shared.py index dec427dd..fe09a165 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -89,7 +89,7 @@ group.add_argument('--idle-timeout', type=int, default=0, help='Unload model aft # Model loader group = parser.add_argument_group('Model loader') -group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, AutoGPTQ, AutoAWQ.') +group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, AutoGPTQ.') # Transformers/Accelerate group = parser.add_argument_group('Transformers/Accelerate') @@ -160,10 +160,6 @@ group.add_argument('--disable_exllamav2', action='store_true', help='Disable ExL group.add_argument('--wbits', type=int, default=0, help='Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.') group.add_argument('--groupsize', type=int, default=-1, help='Group size.') -# AutoAWQ -group = parser.add_argument_group('AutoAWQ') -group.add_argument('--no_inject_fused_attention', action='store_true', help='Disable the use of fused attention, which will use less VRAM at the cost of slower inference.') - # HQQ group = parser.add_argument_group('HQQ') group.add_argument('--hqq-backend', type=str, default='PYTORCH_COMPILE', help='Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.') @@ -217,6 +213,7 @@ group.add_argument('--model_type', type=str, help='DEPRECATED') group.add_argument('--pre_layer', type=int, nargs='+', help='DEPRECATED') group.add_argument('--checkpoint', type=str, help='DEPRECATED') group.add_argument('--monkey-patch', action='store_true', help='DEPRECATED') +group.add_argument('--no_inject_fused_attention', action='store_true', help='DEPRECATED') args = parser.parse_args() args_defaults = parser.parse_args([]) @@ -267,8 +264,6 @@ def fix_loader_name(name): return 'ExLlamav2' elif name in ['exllamav2-hf', 'exllamav2_hf', 'exllama-v2-hf', 'exllama_v2_hf', 'exllama-v2_hf', 'exllama2-hf', 'exllama2_hf', 'exllama-2-hf', 'exllama_2_hf', 'exllama-2_hf']: return 'ExLlamav2_HF' - elif name in ['autoawq', 'awq', 'auto-awq']: - return 'AutoAWQ' elif name in ['hqq']: return 'HQQ' elif name in ['tensorrt', 'tensorrtllm', 'tensorrt_llm', 'tensorrt-llm', 'tensort', 'tensortllm']: diff --git a/modules/ui.py b/modules/ui.py index cfe709fa..47f92cf0 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -78,7 +78,6 @@ def list_model_elements(): 'groupsize', 'triton', 'desc_act', - 'no_inject_fused_attention', 'no_inject_fused_mlp', 'no_use_cuda_fp16', 'disable_exllama', diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 54ac9b12..2938c120 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -127,7 +127,6 @@ def create_ui(): shared.gradio['no_offload_kqv'] = gr.Checkbox(label="no_offload_kqv", value=shared.args.no_offload_kqv, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.') shared.gradio['no_mul_mat_q'] = gr.Checkbox(label="no_mul_mat_q", value=shared.args.no_mul_mat_q, info='Disable the mulmat kernels.') shared.gradio['triton'] = gr.Checkbox(label="triton", value=shared.args.triton) - shared.gradio['no_inject_fused_attention'] = gr.Checkbox(label="no_inject_fused_attention", value=shared.args.no_inject_fused_attention, info='Disable fused attention. Fused attention improves inference performance but uses more VRAM. Fuses layers for AutoAWQ. Disable if running low on VRAM.') shared.gradio['no_inject_fused_mlp'] = gr.Checkbox(label="no_inject_fused_mlp", value=shared.args.no_inject_fused_mlp, info='Affects Triton only. Disable fused MLP. Fused MLP improves performance but uses more VRAM. Disable if running low on VRAM.') shared.gradio['no_use_cuda_fp16'] = gr.Checkbox(label="no_use_cuda_fp16", value=shared.args.no_use_cuda_fp16, info='This can make models faster on some systems.') shared.gradio['desc_act'] = gr.Checkbox(label="desc_act", value=shared.args.desc_act, info='\'desc_act\', \'wbits\', and \'groupsize\' are used for old models without a quantize_config.json.') From 1815877061e87c9926079d79f85af12612be5d33 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 23 Jul 2024 18:48:10 -0700 Subject: [PATCH 03/20] UI: fix the default character not loading correctly on startup --- modules/ui_chat.py | 2 +- server.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 7085f5cd..8b370d86 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -84,7 +84,7 @@ def create_ui(): shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar']) with gr.Row(): - shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode') + shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'chat-instruct'] else None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode') with gr.Row(): shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct') diff --git a/server.py b/server.py index 57e26be8..d6069d5e 100644 --- a/server.py +++ b/server.py @@ -90,7 +90,7 @@ def create_interface(): # Force some events to be triggered on page load shared.persistent_interface_state.update({ 'loader': shared.args.loader or 'Transformers', - 'mode': shared.settings['mode'], + 'mode': shared.settings['mode'] if shared.settings['mode'] == 'instruct' else gr.update(), 'character_menu': shared.args.character or shared.settings['character'], 'instruction_template_str': shared.settings['instruction_template_str'], 'prompt_menu-default': shared.settings['prompt-default'], From e777b7334943e6d16e71750c1da1beb536bb153a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 23 Jul 2024 19:04:19 -0700 Subject: [PATCH 04/20] UI: prevent LaTeX from being rendered for inline "$" --- js/main.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/js/main.js b/js/main.js index bdbb7cef..3b8b13e4 100644 --- a/js/main.js +++ b/js/main.js @@ -213,12 +213,10 @@ function doSyntaxHighlighting() { renderMathInElement(element, { delimiters: [ { left: "$$", right: "$$", display: true }, - { left: "$", right: "$", display: false }, { left: "\\(", right: "\\)", display: false }, { left: "\\[", right: "\\]", display: true }, ], }); - }); observer.observe(targetElement, config); From 8b52b93e8566a7268eb487ba7d81def9e9227ded Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 23 Jul 2024 19:35:00 -0700 Subject: [PATCH 05/20] Make the Google Colab notebook functional again (attempt) --- Colab-TextGen-GPU.ipynb | 49 ++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/Colab-TextGen-GPU.ipynb b/Colab-TextGen-GPU.ipynb index 82e6c18e..739232a4 100644 --- a/Colab-TextGen-GPU.ipynb +++ b/Colab-TextGen-GPU.ipynb @@ -22,7 +22,7 @@ "source": [ "# oobabooga/text-generation-webui\n", "\n", - "After running both cells, a public gradio URL will appear at the bottom in a few minutes. You can optionally generate an API link.\n", + "After running both cells, a public gradio URL will appear at the bottom in around 10 minutes. You can optionally generate an API link.\n", "\n", "* Project page: https://github.com/oobabooga/text-generation-webui\n", "* Gradio server status: https://status.gradio.app/" @@ -53,43 +53,27 @@ "\n", "#@markdown If unsure about the branch, write \"main\" or leave it blank.\n", "\n", - "import torch\n", + "import os\n", "from pathlib import Path\n", "\n", + "os.environ.pop('PYTHONPATH', None)\n", + "\n", "if Path.cwd().name != 'text-generation-webui':\n", - " print(\"Installing the webui...\")\n", + " print(\"\\033[1;32;1m\\n --> Installing the web UI. This will take a while, but after the initial setup, you can download and test as many models as you like.\\033[0;37;0m\\n\")\n", "\n", " !git clone https://github.com/oobabooga/text-generation-webui\n", " %cd text-generation-webui\n", "\n", - " torver = torch.__version__\n", - " print(f\"TORCH: {torver}\")\n", - " is_cuda118 = '+cu118' in torver # 2.1.0+cu118\n", - "\n", - " if is_cuda118:\n", - " !python -m pip install --upgrade torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu118\n", - " else:\n", - " !python -m pip install --upgrade torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu121\n", - "\n", - " textgen_requirements = open('requirements.txt').read().splitlines()\n", - " if is_cuda118:\n", - " textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements]\n", - " with open('temp_requirements.txt', 'w') as file:\n", - " file.write('\\n'.join(textgen_requirements))\n", - "\n", - " !pip install -r temp_requirements.txt --upgrade\n", - "\n", - " print(\"\\033[1;32;1m\\n --> If you see a warning about \\\"previously imported packages\\\", just ignore it.\\033[0;37;0m\")\n", - " print(\"\\033[1;32;1m\\n --> There is no need to restart the runtime.\\n\\033[0;37;0m\")\n", - "\n", - " try:\n", - " import flash_attn\n", - " except:\n", - " !pip uninstall -y flash_attn\n", + " # Install the project in an isolated environment\n", + " !GPU_CHOICE=A \\\n", + " USE_CUDA118=FALSE \\\n", + " LAUNCH_AFTER_INSTALL=FALSE \\\n", + " INSTALL_EXTENSIONS=FALSE \\\n", + " ./start_linux.sh\n", "\n", "# Parameters\n", - "model_url = \"https://huggingface.co/TheBloke/MythoMax-L2-13B-GPTQ\" #@param {type:\"string\"}\n", - "branch = \"gptq-4bit-32g-actorder_True\" #@param {type:\"string\"}\n", + "model_url = \"https://huggingface.co/turboderp/gemma-2-9b-it-exl2\" #@param {type:\"string\"}\n", + "branch = \"8.0bpw\" #@param {type:\"string\"}\n", "command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant\" #@param {type:\"string\"}\n", "api = False #@param {type:\"boolean\"}\n", "\n", @@ -116,11 +100,10 @@ " output_folder = \"\"\n", "\n", "# Start the web UI\n", - "cmd = f\"python server.py --share\"\n", + "cmd = f\"./start_linux.sh {command_line_flags} --share\"\n", "if output_folder != \"\":\n", " cmd += f\" --model {output_folder}\"\n", - "cmd += f\" {command_line_flags}\"\n", - "print(cmd)\n", + "\n", "!$cmd" ], "metadata": { @@ -131,4 +114,4 @@ "outputs": [] } ] -} +} \ No newline at end of file From 9d5513fda0f5a78db0fc03262c6b3fdc72e166b0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 23 Jul 2024 19:38:04 -0700 Subject: [PATCH 06/20] Remove the AutoAWQ requirement --- requirements.txt | 1 - requirements_amd.txt | 2 -- requirements_amd_noavx2.txt | 2 -- requirements_noavx2.txt | 1 - 4 files changed, 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6b1d6247..db73b8b1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -62,4 +62,3 @@ https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -autoawq==0.2.5; platform_system == "Linux" or platform_system == "Windows" diff --git a/requirements_amd.txt b/requirements_amd.txt index b392089d..600db9b4 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -43,5 +43,3 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/ro https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" -https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 5aadd8c9..4f148c94 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -41,5 +41,3 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" -https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 09ee3257..603fb0b8 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -62,4 +62,3 @@ https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -autoawq==0.2.5; platform_system == "Linux" or platform_system == "Windows" From 98ed6d3a666e3924410d34568b3e1919709656a2 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 23 Jul 2024 19:50:56 -0700 Subject: [PATCH 07/20] Don't use flash attention on Google Colab --- Colab-TextGen-GPU.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Colab-TextGen-GPU.ipynb b/Colab-TextGen-GPU.ipynb index 739232a4..8e305e1d 100644 --- a/Colab-TextGen-GPU.ipynb +++ b/Colab-TextGen-GPU.ipynb @@ -74,7 +74,7 @@ "# Parameters\n", "model_url = \"https://huggingface.co/turboderp/gemma-2-9b-it-exl2\" #@param {type:\"string\"}\n", "branch = \"8.0bpw\" #@param {type:\"string\"}\n", - "command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant\" #@param {type:\"string\"}\n", + "command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant --no_flash_attn\" #@param {type:\"string\"}\n", "api = False #@param {type:\"boolean\"}\n", "\n", "if api:\n", @@ -114,4 +114,4 @@ "outputs": [] } ] -} \ No newline at end of file +} From e637b702ff9d6955e830fe96b94bf5313e9f2703 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 23 Jul 2024 21:29:30 -0700 Subject: [PATCH 08/20] UI: make text between quotes colored in chat mode --- css/main.css | 12 ++++++++++++ modules/chat.py | 2 +- modules/html_generator.py | 26 ++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/css/main.css b/css/main.css index d8e12e59..6f2a9fb7 100644 --- a/css/main.css +++ b/css/main.css @@ -406,6 +406,18 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { color: var(--body-text-color); } +.message q { + color: #707070; +} + +.dark .message q { + color: orange; +} + +.message q::before, .message q::after { + content: ""; +} + .message-body li { list-style-position: outside; } diff --git a/modules/chat.py b/modules/chat.py index c95673ce..9919cb76 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -488,7 +488,7 @@ def start_new_chat(state): greeting = replace_character_names(state['greeting'], state['name1'], state['name2']) if greeting != '': history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', greeting]] - history['visible'] += [['', apply_extensions('output', greeting, state, is_chat=True)]] + history['visible'] += [['', apply_extensions('output', html.escape(greeting), state, is_chat=True)]] unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S') save_history(history, unique_id, state['character_menu'], state['mode']) diff --git a/modules/html_generator.py b/modules/html_generator.py index 657133bd..61e61b0f 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -42,6 +42,29 @@ def fix_newlines(string): return string +def replace_quotes(text): + + # Define a list of quote pairs (opening and closing), using HTML entities + quote_pairs = [ + ('"', '"'), # Double quotes + ('“', '”'), # Unicode left and right double quotation marks + ('‘', '’'), # Unicode left and right single quotation marks + ('«', '»'), # French quotes + ('„', '“'), # German quotes + ('‘', '’'), # Alternative single quotes + ('“', '”'), # Unicode quotes (numeric entities) + ('“', '”'), # Unicode quotes (hex entities) + ] + + # Create a regex pattern that matches any of the quote pairs, including newlines + pattern = '|'.join(f'({re.escape(open_q)})(.*?)({re.escape(close_q)})' for open_q, close_q in quote_pairs) + + # Replace matched patterns with tags, keeping original quotes + replaced_text = re.sub(pattern, lambda m: f'{m.group(1)}{m.group(2)}{m.group(3)}', text, flags=re.DOTALL) + + return replaced_text + + def replace_blockquote(m): return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '') @@ -49,6 +72,9 @@ def replace_blockquote(m): @functools.lru_cache(maxsize=4096) def convert_to_markdown(string): + # Quote to + string = replace_quotes(string) + # Blockquote string = re.sub(r'(^|[\n])>', r'\1>', string) pattern = re.compile(r'\\begin{blockquote}(.*?)\\end{blockquote}', re.DOTALL) From 8a5f110c14f4ce4810c8bfd1a3fa8080935a61ba Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 24 Jul 2024 09:22:48 -0700 Subject: [PATCH 09/20] Bump ExLlamaV2 to 0.1.8 --- requirements.txt | 10 +++++----- requirements_amd.txt | 6 +++--- requirements_amd_noavx2.txt | 6 +++--- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_noavx2.txt | 10 +++++----- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/requirements.txt b/requirements.txt index db73b8b1..bc41421c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -53,11 +53,11 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index 600db9b4..df3ab7fb 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -40,6 +40,6 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp # AMD wheels https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.83+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.83+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 4f148c94..e85d1262 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -38,6 +38,6 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index a166d4f6..123b6d9b 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -36,4 +36,4 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/me https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 45511a8f..08509b05 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -38,4 +38,4 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/me https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 603fb0b8..28d13a90 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -53,11 +53,11 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" From 3b2c23dfb568d011276d5e2ec7ffc2596ce25580 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 24 Jul 2024 11:15:00 -0700 Subject: [PATCH 10/20] Add AutoAWQ 0.2.6 wheels for PyTorch 2.2.2 --- requirements.txt | 8 ++++++++ requirements_amd.txt | 4 ++++ requirements_amd_noavx2.txt | 4 ++++ requirements_noavx2.txt | 8 ++++++++ 4 files changed, 24 insertions(+) diff --git a/requirements.txt b/requirements.txt index bc41421c..4461c9cd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -62,3 +62,11 @@ https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_amd.txt b/requirements_amd.txt index df3ab7fb..48604f70 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -43,3 +43,7 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/ro https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index e85d1262..dcfaa5df 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -41,3 +41,7 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 28d13a90..4756d844 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -62,3 +62,11 @@ https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" From 947016d01092342fa07b150979d1e13efa45d975 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 24 Jul 2024 11:54:26 -0700 Subject: [PATCH 11/20] UI: make the markdown LRU cache infinite (for really long conversations) --- modules/chat.py | 19 +++++++++++++++++-- modules/html_generator.py | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index 9919cb76..c744defc 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -17,7 +17,11 @@ from PIL import Image import modules.shared as shared from modules import utils from modules.extensions import apply_extensions -from modules.html_generator import chat_html_wrapper, make_thumbnail +from modules.html_generator import ( + chat_html_wrapper, + convert_to_markdown, + make_thumbnail +) from modules.logging_colors import logger from modules.text_generation import ( generate_reply, @@ -368,7 +372,6 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess def impersonate_wrapper(text, state): - static_output = chat_html_wrapper(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) prompt = generate_chat_prompt('', state, impersonate=True) @@ -1044,6 +1047,8 @@ def handle_unique_id_select(state): history = load_history(state['unique_id'], state['character_menu'], state['mode']) html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + convert_to_markdown.cache_clear() + return [history, html] @@ -1052,6 +1057,8 @@ def handle_start_new_chat_click(state): histories = find_all_histories_with_first_prompts(state) html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + convert_to_markdown.cache_clear() + return [history, html, gr.update(choices=histories, value=histories[0][1])] @@ -1061,6 +1068,8 @@ def handle_delete_chat_confirm_click(state): history, unique_id = load_history_after_deletion(state, index) html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + convert_to_markdown.cache_clear() + return [ history, html, @@ -1099,6 +1108,8 @@ def handle_upload_chat_history(load_chat_history, state): html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + convert_to_markdown.cache_clear() + return [ history, html, @@ -1119,6 +1130,8 @@ def handle_character_menu_change(state): histories = find_all_histories_with_first_prompts(state) html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + convert_to_markdown.cache_clear() + return [ history, html, @@ -1136,6 +1149,8 @@ def handle_mode_change(state): histories = find_all_histories_with_first_prompts(state) html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + convert_to_markdown.cache_clear() + return [ history, html, diff --git a/modules/html_generator.py b/modules/html_generator.py index 61e61b0f..1b687ade 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -69,7 +69,7 @@ def replace_blockquote(m): return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '') -@functools.lru_cache(maxsize=4096) +@functools.lru_cache(maxsize=None) def convert_to_markdown(string): # Quote to From 7e2851e5058bf2a4569a7d374450ab549bcf2886 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 24 Jul 2024 15:04:12 -0700 Subject: [PATCH 12/20] UI: fix "Command for chat-instruct mode" not appearing by default --- modules/ui_chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 8b370d86..7ef8df4d 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -90,7 +90,7 @@ def create_ui(): shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct') with gr.Row(): - shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=False, elem_classes=['add_scrollbar']) + shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=shared.settings['mode'] == 'chat-instruct', elem_classes=['add_scrollbar']) def create_chat_settings_ui(): From 3170b6efc9e8e6189712c87e2dcf5723c89bb8ed Mon Sep 17 00:00:00 2001 From: Luana Date: Wed, 24 Jul 2024 22:23:29 -0300 Subject: [PATCH 13/20] Fixes Linux shebangs (#6110) --- cmd_linux.sh | 2 +- start_linux.sh | 2 +- update_wizard_linux.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd_linux.sh b/cmd_linux.sh index 1685050a..576dbf02 100755 --- a/cmd_linux.sh +++ b/cmd_linux.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash cd "$(dirname "${BASH_SOURCE[0]}")" diff --git a/start_linux.sh b/start_linux.sh index 5620c831..792daca8 100755 --- a/start_linux.sh +++ b/start_linux.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash cd "$(dirname "${BASH_SOURCE[0]}")" diff --git a/update_wizard_linux.sh b/update_wizard_linux.sh index c5add61e..3ada9a1e 100755 --- a/update_wizard_linux.sh +++ b/update_wizard_linux.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash cd "$(dirname "${BASH_SOURCE[0]}")" From 1f101ee3e5d0516ea17b905128b6b76b8d2b0f23 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 24 Jul 2024 18:56:54 -0700 Subject: [PATCH 14/20] UI: improve the quote colors --- css/main.css | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/css/main.css b/css/main.css index 6f2a9fb7..3ecf0044 100644 --- a/css/main.css +++ b/css/main.css @@ -406,12 +406,8 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { color: var(--body-text-color); } -.message q { - color: #707070; -} - .dark .message q { - color: orange; + color: #f5b031; } .message q::before, .message q::after { From ac30e7fe9c0ef0a03c7efc268f3300a4c6963ca4 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 24 Jul 2024 19:03:34 -0700 Subject: [PATCH 15/20] Updater: don't reinstall requirements if no updates after git pull --- one_click.py | 54 +++++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/one_click.py b/one_click.py index e94b6d44..547f4df4 100644 --- a/one_click.py +++ b/one_click.py @@ -337,6 +337,7 @@ def update_requirements(initial_installation=False, pull=True): git_creation_cmd = 'git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && git reset --hard origin/main && git branch --set-upstream-to=origin/main' run_cmd(git_creation_cmd, environment=True, assert_success=True) + repository_updated = False if pull: print_big_message("Updating the local copy of the repository with \"git pull\"") @@ -347,9 +348,13 @@ def update_requirements(initial_installation=False, pull=True): ] before_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check} - run_cmd("git pull --autostash", assert_success=True, environment=True) + pull_output = run_cmd("git pull --autostash", assert_success=True, environment=True, capture_output=True) after_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check} + # Check if git pull actually updated anything + if "Already up to date." not in pull_output: + repository_updated = True + # Check for differences in installation file hashes for file_name in files_to_check: if before_pull_hashes[file_name] != after_pull_hashes[file_name]: @@ -382,39 +387,40 @@ def update_requirements(initial_installation=False, pull=True): requirements_file = base_requirements - print_big_message(f"Installing webui requirements from file: {requirements_file}") - print(f"TORCH: {torver}\n") + if repository_updated or initial_installation: + print_big_message(f"Installing webui requirements from file: {requirements_file}") + print(f"TORCH: {torver}\n") - # Prepare the requirements file - textgen_requirements = open(requirements_file).read().splitlines() - if is_cuda118: - textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements if "auto-gptq" not in req] - if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11 - textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req] + # Prepare the requirements file + textgen_requirements = open(requirements_file).read().splitlines() - with open('temp_requirements.txt', 'w') as file: - file.write('\n'.join(textgen_requirements)) + if is_cuda118: + textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements if "auto-gptq" not in req] + if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11 + textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req] - # Workaround for git+ packages not updating properly. - git_requirements = [req for req in textgen_requirements if req.startswith("git+")] - for req in git_requirements: - url = req.replace("git+", "") - package_name = url.split("/")[-1].split("@")[0].rstrip(".git") - run_cmd(f"python -m pip uninstall -y {package_name}", environment=True) - print(f"Uninstalled {package_name}") + with open('temp_requirements.txt', 'w') as file: + file.write('\n'.join(textgen_requirements)) - # Install/update the project requirements - run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True) - os.remove('temp_requirements.txt') + # Workaround for git+ packages not updating properly. + git_requirements = [req for req in textgen_requirements if req.startswith("git+")] + for req in git_requirements: + url = req.replace("git+", "") + package_name = url.split("/")[-1].split("@")[0].rstrip(".git") + run_cmd(f"python -m pip uninstall -y {package_name}", environment=True) + print(f"Uninstalled {package_name}") + + # Install/update the project requirements + run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True) + os.remove('temp_requirements.txt') + else: + print("Repository is already up to date. Skipping requirements installation.") # Check for '+cu' or '+rocm' in version string to determine if torch uses CUDA or ROCm. Check for pytorch-cuda as well for backwards compatibility if not any((is_cuda, is_rocm)) and run_cmd("conda list -f pytorch-cuda | grep pytorch-cuda", environment=True, capture_output=True).returncode == 1: clear_cache() return - if not os.path.exists("repositories/"): - os.mkdir("repositories") - clear_cache() From b85ae6bc96ff03aca12f8ad6212da39ec9cca192 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 24 Jul 2024 19:10:17 -0700 Subject: [PATCH 16/20] Fix after previous commit --- one_click.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/one_click.py b/one_click.py index 547f4df4..d823ecb3 100644 --- a/one_click.py +++ b/one_click.py @@ -352,7 +352,7 @@ def update_requirements(initial_installation=False, pull=True): after_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check} # Check if git pull actually updated anything - if "Already up to date." not in pull_output: + if "Already up to date." not in pull_output.stdout.decode('utf-8'): repository_updated = True # Check for differences in installation file hashes From 14584fda366b8f38d02e31fbb511c0527438c078 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 24 Jul 2024 20:55:18 -0700 Subject: [PATCH 17/20] UI: don't change the color of italics in instruct mode --- css/html_instruct_style.css | 8 -------- 1 file changed, 8 deletions(-) diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css index 8a31d6e2..50b9402f 100644 --- a/css/html_instruct_style.css +++ b/css/html_instruct_style.css @@ -39,14 +39,6 @@ margin-bottom: 0 !important; } -.dark .message-body p em { - color: rgb(198 202 214) !important; -} - -.message-body p em { - color: rgb(110 110 110) !important; -} - .gradio-container .chat .assistant-message { padding: 20px; background: #f4f4f4; From d581334a41b0ddd2e9701aea4643ba6206f2edff Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 25 Jul 2024 05:38:52 -0700 Subject: [PATCH 18/20] Don't install AutoAWQ on CUDA 11.8 --- one_click.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/one_click.py b/one_click.py index d823ecb3..065265fe 100644 --- a/one_click.py +++ b/one_click.py @@ -395,7 +395,11 @@ def update_requirements(initial_installation=False, pull=True): textgen_requirements = open(requirements_file).read().splitlines() if is_cuda118: - textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements if "auto-gptq" not in req] + textgen_requirements = [ + req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') + for req in textgen_requirements + if "auto-gptq" not in req.lower() and "autoawq" not in req.lower() + ] if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11 textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req] From a34273755b7389adb14ee0a04d5d6345b0b00dcb Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 25 Jul 2024 07:34:01 -0700 Subject: [PATCH 19/20] Revert "Updater: don't reinstall requirements if no updates after git pull" This reverts commit ac30e7fe9c0ef0a03c7efc268f3300a4c6963ca4. --- one_click.py | 61 ++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 33 deletions(-) diff --git a/one_click.py b/one_click.py index 065265fe..0a0412ba 100644 --- a/one_click.py +++ b/one_click.py @@ -337,7 +337,6 @@ def update_requirements(initial_installation=False, pull=True): git_creation_cmd = 'git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && git reset --hard origin/main && git branch --set-upstream-to=origin/main' run_cmd(git_creation_cmd, environment=True, assert_success=True) - repository_updated = False if pull: print_big_message("Updating the local copy of the repository with \"git pull\"") @@ -348,13 +347,9 @@ def update_requirements(initial_installation=False, pull=True): ] before_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check} - pull_output = run_cmd("git pull --autostash", assert_success=True, environment=True, capture_output=True) + run_cmd("git pull --autostash", assert_success=True, environment=True) after_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check} - # Check if git pull actually updated anything - if "Already up to date." not in pull_output.stdout.decode('utf-8'): - repository_updated = True - # Check for differences in installation file hashes for file_name in files_to_check: if before_pull_hashes[file_name] != after_pull_hashes[file_name]: @@ -387,44 +382,44 @@ def update_requirements(initial_installation=False, pull=True): requirements_file = base_requirements - if repository_updated or initial_installation: - print_big_message(f"Installing webui requirements from file: {requirements_file}") - print(f"TORCH: {torver}\n") + print_big_message(f"Installing webui requirements from file: {requirements_file}") + print(f"TORCH: {torver}\n") - # Prepare the requirements file - textgen_requirements = open(requirements_file).read().splitlines() + # Prepare the requirements file + textgen_requirements = open(requirements_file).read().splitlines() + if is_cuda118: + textgen_requirements = [ + req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') + for req in textgen_requirements + if "auto-gptq" not in req.lower() and "autoawq" not in req.lower() + ] - if is_cuda118: - textgen_requirements = [ - req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') - for req in textgen_requirements - if "auto-gptq" not in req.lower() and "autoawq" not in req.lower() - ] - if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11 - textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req] + if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11 + textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req] - with open('temp_requirements.txt', 'w') as file: - file.write('\n'.join(textgen_requirements)) + with open('temp_requirements.txt', 'w') as file: + file.write('\n'.join(textgen_requirements)) - # Workaround for git+ packages not updating properly. - git_requirements = [req for req in textgen_requirements if req.startswith("git+")] - for req in git_requirements: - url = req.replace("git+", "") - package_name = url.split("/")[-1].split("@")[0].rstrip(".git") - run_cmd(f"python -m pip uninstall -y {package_name}", environment=True) - print(f"Uninstalled {package_name}") + # Workaround for git+ packages not updating properly. + git_requirements = [req for req in textgen_requirements if req.startswith("git+")] + for req in git_requirements: + url = req.replace("git+", "") + package_name = url.split("/")[-1].split("@")[0].rstrip(".git") + run_cmd(f"python -m pip uninstall -y {package_name}", environment=True) + print(f"Uninstalled {package_name}") - # Install/update the project requirements - run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True) - os.remove('temp_requirements.txt') - else: - print("Repository is already up to date. Skipping requirements installation.") + # Install/update the project requirements + run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True) + os.remove('temp_requirements.txt') # Check for '+cu' or '+rocm' in version string to determine if torch uses CUDA or ROCm. Check for pytorch-cuda as well for backwards compatibility if not any((is_cuda, is_rocm)) and run_cmd("conda list -f pytorch-cuda | grep pytorch-cuda", environment=True, capture_output=True).returncode == 1: clear_cache() return + if not os.path.exists("repositories/"): + os.mkdir("repositories") + clear_cache() From 42e80108f57830f14c2a20884371b7df2d60dd60 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 25 Jul 2024 08:01:42 -0700 Subject: [PATCH 20/20] UI: clear the markdown LRU cache when using the default/notebook tabs --- modules/html_generator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/html_generator.py b/modules/html_generator.py index 1b687ade..c5eba5a8 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -150,6 +150,7 @@ def convert_to_markdown_wrapped(string, use_cache=True): def generate_basic_html(string): + convert_to_markdown.cache_clear() string = convert_to_markdown(string) string = f'
{string}
' return string