Merge branch 'oobabooga:dev' into dev

This commit is contained in:
Artificiangel 2024-09-01 03:53:23 -04:00 committed by GitHub
commit fe2c268088
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS.
GPG key ID: B5690EEEBB952194
51 changed files with 1082 additions and 524 deletions

View file

@ -22,7 +22,7 @@
"source": [ "source": [
"# oobabooga/text-generation-webui\n", "# oobabooga/text-generation-webui\n",
"\n", "\n",
"After running both cells, a public gradio URL will appear at the bottom in a few minutes. You can optionally generate an API link.\n", "After running both cells, a public gradio URL will appear at the bottom in around 10 minutes. You can optionally generate an API link.\n",
"\n", "\n",
"* Project page: https://github.com/oobabooga/text-generation-webui\n", "* Project page: https://github.com/oobabooga/text-generation-webui\n",
"* Gradio server status: https://status.gradio.app/" "* Gradio server status: https://status.gradio.app/"
@ -53,44 +53,28 @@
"\n", "\n",
"#@markdown If unsure about the branch, write \"main\" or leave it blank.\n", "#@markdown If unsure about the branch, write \"main\" or leave it blank.\n",
"\n", "\n",
"import torch\n", "import os\n",
"from pathlib import Path\n", "from pathlib import Path\n",
"\n", "\n",
"os.environ.pop('PYTHONPATH', None)\n",
"\n",
"if Path.cwd().name != 'text-generation-webui':\n", "if Path.cwd().name != 'text-generation-webui':\n",
" print(\"Installing the webui...\")\n", " print(\"\\033[1;32;1m\\n --> Installing the web UI. This will take a while, but after the initial setup, you can download and test as many models as you like.\\033[0;37;0m\\n\")\n",
"\n", "\n",
" !git clone https://github.com/oobabooga/text-generation-webui\n", " !git clone https://github.com/oobabooga/text-generation-webui\n",
" %cd text-generation-webui\n", " %cd text-generation-webui\n",
"\n", "\n",
" torver = torch.__version__\n", " # Install the project in an isolated environment\n",
" print(f\"TORCH: {torver}\")\n", " !GPU_CHOICE=A \\\n",
" is_cuda118 = '+cu118' in torver # 2.1.0+cu118\n", " USE_CUDA118=FALSE \\\n",
"\n", " LAUNCH_AFTER_INSTALL=FALSE \\\n",
" if is_cuda118:\n", " INSTALL_EXTENSIONS=FALSE \\\n",
" !python -m pip install --upgrade torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu118\n", " ./start_linux.sh\n",
" else:\n",
" !python -m pip install --upgrade torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu121\n",
"\n",
" textgen_requirements = open('requirements.txt').read().splitlines()\n",
" if is_cuda118:\n",
" textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements]\n",
" with open('temp_requirements.txt', 'w') as file:\n",
" file.write('\\n'.join(textgen_requirements))\n",
"\n",
" !pip install -r temp_requirements.txt --upgrade\n",
"\n",
" print(\"\\033[1;32;1m\\n --> If you see a warning about \\\"previously imported packages\\\", just ignore it.\\033[0;37;0m\")\n",
" print(\"\\033[1;32;1m\\n --> There is no need to restart the runtime.\\n\\033[0;37;0m\")\n",
"\n",
" try:\n",
" import flash_attn\n",
" except:\n",
" !pip uninstall -y flash_attn\n",
"\n", "\n",
"# Parameters\n", "# Parameters\n",
"model_url = \"https://huggingface.co/TheBloke/MythoMax-L2-13B-GPTQ\" #@param {type:\"string\"}\n", "model_url = \"https://huggingface.co/turboderp/gemma-2-9b-it-exl2\" #@param {type:\"string\"}\n",
"branch = \"gptq-4bit-32g-actorder_True\" #@param {type:\"string\"}\n", "branch = \"8.0bpw\" #@param {type:\"string\"}\n",
"command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant\" #@param {type:\"string\"}\n", "command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant --no_flash_attn\" #@param {type:\"string\"}\n",
"api = False #@param {type:\"boolean\"}\n", "api = False #@param {type:\"boolean\"}\n",
"\n", "\n",
"if api:\n", "if api:\n",
@ -116,11 +100,10 @@
" output_folder = \"\"\n", " output_folder = \"\"\n",
"\n", "\n",
"# Start the web UI\n", "# Start the web UI\n",
"cmd = f\"python server.py --share\"\n", "cmd = f\"./start_linux.sh {command_line_flags} --share\"\n",
"if output_folder != \"\":\n", "if output_folder != \"\":\n",
" cmd += f\" --model {output_folder}\"\n", " cmd += f\" --model {output_folder}\"\n",
"cmd += f\" {command_line_flags}\"\n", "\n",
"print(cmd)\n",
"!$cmd" "!$cmd"
], ],
"metadata": { "metadata": {

View file

@ -10,27 +10,31 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
## Features ## Features
* 3 interface modes: default (two columns), notebook, and chat. * Multiple backends for text generation in a single UI and API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp) (through [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [HQQ](https://github.com/mobiusml/hqq), and [AQLM](https://github.com/Vahe1994/AQLM) are also supported through the Transformers loader.
* Multiple model backends: [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp) (through [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). * OpenAI-compatible API server with Chat and Completions endpoints see the [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples).
* Dropdown menu for quickly switching between different models. * Automatic prompt formatting for each model using the Jinja2 template in its metadata.
* Large number of extensions (built-in and user-contributed), including Coqui TTS for realistic voice outputs, Whisper STT for voice inputs, translation, [multimodal pipelines](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/multimodal), vector databases, Stable Diffusion integration, and a lot more. See [the wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [the extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details. * Three chat modes: `instruct`, `chat-instruct`, and `chat`, allowing for both instruction-following and casual conversations with characters. `chat-instruct` mode automatically applies the model's template to the chat prompt, ensuring high-quality outputs without manual setup.
* [Chat with custom characters](https://github.com/oobabooga/text-generation-webui/wiki/03-%E2%80%90-Parameters-Tab#character). * "Past chats" menu to quickly switch between conversations and start new ones.
* Precise chat templates for instruction-following models, including Llama-2-chat, Alpaca, Vicuna, Mistral. * Free-form generation in the Default/Notebook tabs without being limited to chat turns. Send formatted chat conversations from the Chat tab to these tabs.
* LoRA: train new LoRAs with your own data, load/unload LoRAs on the fly for generation. * Multiple sampling parameters and generation options for sophisticated text generation control.
* Transformers library integration: load models in 4-bit or 8-bit precision through bitsandbytes, use llama.cpp with transformers samplers (`llamacpp_HF` loader), CPU inference in 32-bit precision using PyTorch. * Easy switching between different models through the UI without restarting, using the "Model" tab.
* OpenAI-compatible API server with Chat and Completions endpoints -- see the [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples). * Simple LoRA fine-tuning tool to customize models with your data.
* All in one folder. The requirements are installed in a self-contained `installer_files` folder that doesn't interfere with the system's environment.
* Extensions support, including numerous built-in and user-contributed extensions. See [the wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [the extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details.
## How to install ## How to install
1) Clone or [download](https://github.com/oobabooga/text-generation-webui/archive/refs/heads/main.zip) the repository. 1) Clone or [download](https://github.com/oobabooga/text-generation-webui/archive/refs/heads/main.zip) the repository.
2) Run the `start_linux.sh`, `start_windows.bat`, `start_macos.sh`, or `start_wsl.bat` script depending on your OS. 2) Run the `start_linux.sh`, `start_windows.bat`, `start_macos.sh`, or `start_wsl.bat` script depending on your OS.
3) Select your GPU vendor when asked. 3) Select your GPU vendor when asked.
4) Once the installation ends, browse to `http://localhost:7860/?__theme=dark`. 4) Once the installation ends, browse to `http://localhost:7860`.
5) Have fun! 5) Have fun!
To restart the web UI in the future, just run the `start_` script again. This script creates an `installer_files` folder where it sets up the project's requirements. In case you need to reinstall the requirements, you can simply delete that folder and start the web UI again. To restart the web UI in the future, run the `start_` script again.
The script accepts command-line flags. Alternatively, you can edit the `CMD_FLAGS.txt` file with a text editor and add your flags there. This script creates an `installer_files` folder where it sets up the project's requirements. If you need to reinstall the requirements, just delete that folder and start the web UI again.
The script accepts command-line flags, such as `./start_linux.sh --help`. Alternatively, you can edit the `CMD_FLAGS.txt` file with a text editor and add your flags there, such as `--api` in case you need to use the API.
To get updates in the future, run `update_wizard_linux.sh`, `update_wizard_windows.bat`, `update_wizard_macos.sh`, or `update_wizard_wsl.bat`. To get updates in the future, run `update_wizard_linux.sh`, `update_wizard_windows.bat`, `update_wizard_macos.sh`, or `update_wizard_wsl.bat`.
@ -204,16 +208,16 @@ List of command-line flags
usage: server.py [-h] [--multi-user] [--character CHARACTER] [--model MODEL] [--lora LORA [LORA ...]] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--model-menu] [--settings SETTINGS] usage: server.py [-h] [--multi-user] [--character CHARACTER] [--model MODEL] [--lora LORA [LORA ...]] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--model-menu] [--settings SETTINGS]
[--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--chat-buttons] [--idle-timeout IDLE_TIMEOUT] [--loader LOADER] [--cpu] [--auto-devices] [--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--chat-buttons] [--idle-timeout IDLE_TIMEOUT] [--loader LOADER] [--cpu] [--auto-devices]
[--gpu-memory GPU_MEMORY [GPU_MEMORY ...]] [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code] [--gpu-memory GPU_MEMORY [GPU_MEMORY ...]] [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code]
[--force-safetensors] [--no_use_fast] [--use_flash_attention_2] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn] [--force-safetensors] [--no_use_fast] [--use_flash_attention_2] [--use_eager_attention] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE]
[--tensorcores] [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock] [--n-gpu-layers N_GPU_LAYERS] [--flash-attn] [--tensorcores] [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock]
[--tensor_split TENSOR_SPLIT] [--numa] [--logits_all] [--no_offload_kqv] [--cache-capacity CACHE_CAPACITY] [--row_split] [--streaming-llm] [--attention-sink-size ATTENTION_SINK_SIZE] [--n-gpu-layers N_GPU_LAYERS] [--tensor_split TENSOR_SPLIT] [--numa] [--logits_all] [--no_offload_kqv] [--cache-capacity CACHE_CAPACITY] [--row_split] [--streaming-llm]
[--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn] [--cache_8bit] [--cache_4bit] [--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--attention-sink-size ATTENTION_SINK_SIZE] [--tokenizer-dir TOKENIZER_DIR] [--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn]
[--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] [--desc_act] [--disable_exllama] [--disable_exllamav2] [--wbits WBITS] [--groupsize GROUPSIZE] [--no_inject_fused_attention] [--no_xformers] [--no_sdpa] [--cache_8bit] [--cache_4bit] [--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] [--desc_act]
[--hqq-backend HQQ_BACKEND] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR] [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] [--disable_exllama] [--disable_exllamav2] [--wbits WBITS] [--groupsize GROUPSIZE] [--hqq-backend HQQ_BACKEND] [--cpp-runner] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR]
[--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT] [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] [--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT]
[--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE]
[--api-key API_KEY] [--admin-key ADMIN_KEY] [--nowebui] [--multimodal-pipeline MULTIMODAL_PIPELINE] [--model_type MODEL_TYPE] [--pre_layer PRE_LAYER [PRE_LAYER ...]] [--subpath SUBPATH] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--nowebui]
[--checkpoint CHECKPOINT] [--monkey-patch] [--multimodal-pipeline MULTIMODAL_PIPELINE] [--model_type MODEL_TYPE] [--pre_layer PRE_LAYER [PRE_LAYER ...]] [--checkpoint CHECKPOINT] [--monkey-patch] [--no_inject_fused_attention]
Text generation web UI Text generation web UI
@ -237,7 +241,7 @@ Basic settings:
Model loader: Model loader:
--loader LOADER Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, --loader LOADER Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2,
AutoGPTQ, AutoAWQ. AutoGPTQ.
Transformers/Accelerate: Transformers/Accelerate:
--cpu Use the CPU to generate text. Warning: Training on CPU is extremely slow. --cpu Use the CPU to generate text. Warning: Training on CPU is extremely slow.
@ -254,6 +258,7 @@ Transformers/Accelerate:
--force-safetensors Set use_safetensors=True while loading the model. This prevents arbitrary code execution. --force-safetensors Set use_safetensors=True while loading the model. This prevents arbitrary code execution.
--no_use_fast Set use_fast=False while loading the tokenizer (it's True by default). Use this if you have any problems related to use_fast. --no_use_fast Set use_fast=False while loading the tokenizer (it's True by default). Use this if you have any problems related to use_fast.
--use_flash_attention_2 Set use_flash_attention_2=True while loading the model. --use_flash_attention_2 Set use_flash_attention_2=True while loading the model.
--use_eager_attention Set attn_implementation= eager while loading the model.
bitsandbytes 4-bit: bitsandbytes 4-bit:
--load-in-4bit Load the model with 4-bit precision (using bitsandbytes). --load-in-4bit Load the model with 4-bit precision (using bitsandbytes).
@ -263,7 +268,7 @@ bitsandbytes 4-bit:
llama.cpp: llama.cpp:
--flash-attn Use flash-attention. --flash-attn Use flash-attention.
--tensorcores Use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards. NVIDIA only. --tensorcores NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.
--n_ctx N_CTX Size of the prompt context. --n_ctx N_CTX Size of the prompt context.
--threads THREADS Number of threads to use. --threads THREADS Number of threads to use.
--threads-batch THREADS_BATCH Number of threads to use for batches/prompt processing. --threads-batch THREADS_BATCH Number of threads to use for batches/prompt processing.
@ -272,7 +277,7 @@ llama.cpp:
--no-mmap Prevent mmap from being used. --no-mmap Prevent mmap from being used.
--mlock Force the system to keep the model in RAM. --mlock Force the system to keep the model in RAM.
--n-gpu-layers N_GPU_LAYERS Number of layers to offload to the GPU. --n-gpu-layers N_GPU_LAYERS Number of layers to offload to the GPU.
--tensor_split TENSOR_SPLIT Split the model across multiple GPUs. Comma-separated list of proportions. Example: 18,17. --tensor_split TENSOR_SPLIT Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40.
--numa Activate NUMA task allocation for llama.cpp. --numa Activate NUMA task allocation for llama.cpp.
--logits_all Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower. --logits_all Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower.
--no_offload_kqv Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance. --no_offload_kqv Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.
@ -280,6 +285,7 @@ llama.cpp:
--row_split Split the model by rows across GPUs. This may improve multi-gpu performance. --row_split Split the model by rows across GPUs. This may improve multi-gpu performance.
--streaming-llm Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed. --streaming-llm Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.
--attention-sink-size ATTENTION_SINK_SIZE StreamingLLM: number of sink tokens. Only used if the trimmed prompt does not share a prefix with the old prompt. --attention-sink-size ATTENTION_SINK_SIZE StreamingLLM: number of sink tokens. Only used if the trimmed prompt does not share a prefix with the old prompt.
--tokenizer-dir TOKENIZER_DIR Load the tokenizer from this folder. Meant to be used with llamacpp_HF through the command-line.
ExLlamaV2: ExLlamaV2:
--gpu-split GPU_SPLIT Comma-separated list of VRAM (in GB) to use per GPU device for model layers. Example: 20,7,7. --gpu-split GPU_SPLIT Comma-separated list of VRAM (in GB) to use per GPU device for model layers. Example: 20,7,7.
@ -287,6 +293,8 @@ ExLlamaV2:
--max_seq_len MAX_SEQ_LEN Maximum sequence length. --max_seq_len MAX_SEQ_LEN Maximum sequence length.
--cfg-cache ExLlamav2_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader. --cfg-cache ExLlamav2_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader.
--no_flash_attn Force flash-attention to not be used. --no_flash_attn Force flash-attention to not be used.
--no_xformers Force xformers to not be used.
--no_sdpa Force Torch SDPA to not be used.
--cache_8bit Use 8-bit cache to save VRAM. --cache_8bit Use 8-bit cache to save VRAM.
--cache_4bit Use Q4 cache to save VRAM. --cache_4bit Use Q4 cache to save VRAM.
--num_experts_per_token NUM_EXPERTS_PER_TOKEN Number of experts to use for generation. Applies to MoE models like Mixtral. --num_experts_per_token NUM_EXPERTS_PER_TOKEN Number of experts to use for generation. Applies to MoE models like Mixtral.
@ -301,12 +309,12 @@ AutoGPTQ:
--wbits WBITS Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported. --wbits WBITS Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.
--groupsize GROUPSIZE Group size. --groupsize GROUPSIZE Group size.
AutoAWQ:
--no_inject_fused_attention Disable the use of fused attention, which will use less VRAM at the cost of slower inference.
HQQ: HQQ:
--hqq-backend HQQ_BACKEND Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN. --hqq-backend HQQ_BACKEND Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.
TensorRT-LLM:
--cpp-runner Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn't support streaming yet.
DeepSpeed: DeepSpeed:
--deepspeed Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration. --deepspeed Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.
--nvme-offload-dir NVME_OFFLOAD_DIR DeepSpeed: Directory to use for ZeRO-3 NVME offloading. --nvme-offload-dir NVME_OFFLOAD_DIR DeepSpeed: Directory to use for ZeRO-3 NVME offloading.
@ -327,6 +335,7 @@ Gradio:
--gradio-auth-path GRADIO_AUTH_PATH Set the Gradio authentication file path. The file should contain one or more user:password pairs in the same format as above. --gradio-auth-path GRADIO_AUTH_PATH Set the Gradio authentication file path. The file should contain one or more user:password pairs in the same format as above.
--ssl-keyfile SSL_KEYFILE The path to the SSL certificate key file. --ssl-keyfile SSL_KEYFILE The path to the SSL certificate key file.
--ssl-certfile SSL_CERTFILE The path to the SSL certificate cert file. --ssl-certfile SSL_CERTFILE The path to the SSL certificate cert file.
--subpath SUBPATH Customize the subpath for gradio, use with reverse proxy
API: API:
--api Enable the API extension. --api Enable the API extension.
@ -392,18 +401,11 @@ Run `python download-model.py --help` to see all the options.
https://colab.research.google.com/github/oobabooga/text-generation-webui/blob/main/Colab-TextGen-GPU.ipynb https://colab.research.google.com/github/oobabooga/text-generation-webui/blob/main/Colab-TextGen-GPU.ipynb
## Community
* Subreddit: https://www.reddit.com/r/Oobabooga/
* Discord: https://discord.gg/jwZCF2dPQN
## Acknowledgment ## Acknowledgment
In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition. In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition.
## Links
#### Community
* Subreddit: https://www.reddit.com/r/oobabooga/
* Discord: https://discord.gg/jwZCF2dPQN
#### Support
* ko-fi: https://ko-fi.com/oobabooga
* GitHub Sponsors: https://github.com/sponsors/oobabooga

View file

@ -1,4 +1,4 @@
#!/bin/bash #!/usr/bin/env bash
cd "$(dirname "${BASH_SOURCE[0]}")" cd "$(dirname "${BASH_SOURCE[0]}")"

View file

@ -90,6 +90,7 @@
line-height: 1.428571429 !important; line-height: 1.428571429 !important;
color: rgb(243 244 246) !important; color: rgb(243 244 246) !important;
text-shadow: 2px 2px 2px rgb(0 0 0); text-shadow: 2px 2px 2px rgb(0 0 0);
font-weight: 500;
} }
.message-body p em { .message-body p em {

View file

@ -46,6 +46,7 @@
.message-body p { .message-body p {
font-size: 15px !important; font-size: 15px !important;
line-height: 22.5px !important; line-height: 22.5px !important;
font-weight: 500;
} }
.message-body p, .chat .message-body ul, .chat .message-body ol { .message-body p, .chat .message-body ul, .chat .message-body ol {

View file

@ -88,6 +88,7 @@
margin-bottom: 0 !important; margin-bottom: 0 !important;
font-size: 15px !important; font-size: 15px !important;
line-height: 1.428571429 !important; line-height: 1.428571429 !important;
font-weight: 500;
} }
.dark .message-body p em { .dark .message-body p em {

View file

@ -44,6 +44,7 @@
margin-bottom: 0 !important; margin-bottom: 0 !important;
font-size: 15px !important; font-size: 15px !important;
line-height: 1.428571429 !important; line-height: 1.428571429 !important;
font-weight: 500;
} }
.dark .message-body p em { .dark .message-body p em {

View file

@ -1,4 +1,14 @@
pre code.hljs{display:block;overflow-x:auto;padding:1em}code.hljs{padding:3px 5px}/*! html body gradio-app .gradio-container pre code.hljs {
display: block;
overflow-x: auto;
padding: 1em
}
html body gradio-app .gradio-container code.hljs {
padding: 3px 5px
}
/*!
Theme: GitHub Dark Theme: GitHub Dark
Description: Dark theme as seen on github.com Description: Dark theme as seen on github.com
Author: github.com Author: github.com
@ -7,4 +17,95 @@ pre code.hljs{display:block;overflow-x:auto;padding:1em}code.hljs{padding:3px 5p
Outdated base version: https://github.com/primer/github-syntax-dark Outdated base version: https://github.com/primer/github-syntax-dark
Current colors taken from GitHub's CSS Current colors taken from GitHub's CSS
*/.hljs{color:#c9d1d9;background:#0d1117}.hljs-doctag,.hljs-keyword,.hljs-meta .hljs-keyword,.hljs-template-tag,.hljs-template-variable,.hljs-type,.hljs-variable.language_{color:#ff7b72}.hljs-title,.hljs-title.class_,.hljs-title.class_.inherited__,.hljs-title.function_{color:#d2a8ff}.hljs-attr,.hljs-attribute,.hljs-literal,.hljs-meta,.hljs-number,.hljs-operator,.hljs-selector-attr,.hljs-selector-class,.hljs-selector-id,.hljs-variable{color:#79c0ff}.hljs-meta .hljs-string,.hljs-regexp,.hljs-string{color:#a5d6ff}.hljs-built_in,.hljs-symbol{color:#ffa657}.hljs-code,.hljs-comment,.hljs-formula{color:#8b949e}.hljs-name,.hljs-quote,.hljs-selector-pseudo,.hljs-selector-tag{color:#7ee787}.hljs-subst{color:#c9d1d9}.hljs-section{color:#1f6feb;font-weight:700}.hljs-bullet{color:#f2cc60}.hljs-emphasis{color:#c9d1d9;font-style:italic}.hljs-strong{color:#c9d1d9;font-weight:700}.hljs-addition{color:#aff5b4;background-color:#033a16}.hljs-deletion{color:#ffdcd7;background-color:#67060c} */
html body gradio-app .gradio-container .hljs {
color: #c9d1d9;
background: #0d1117
}
html body gradio-app .gradio-container .hljs-doctag,
html body gradio-app .gradio-container .hljs-keyword,
html body gradio-app .gradio-container .hljs-meta .hljs-keyword,
html body gradio-app .gradio-container .hljs-template-tag,
html body gradio-app .gradio-container .hljs-template-variable,
html body gradio-app .gradio-container .hljs-type,
html body gradio-app .gradio-container .hljs-variable.language_ {
color: #ff7b72
}
html body gradio-app .gradio-container .hljs-title,
html body gradio-app .gradio-container .hljs-title.class_,
html body gradio-app .gradio-container .hljs-title.class_.inherited__,
html body gradio-app .gradio-container .hljs-title.function_ {
color: #d2a8ff
}
html body gradio-app .gradio-container .hljs-attr,
html body gradio-app .gradio-container .hljs-attribute,
html body gradio-app .gradio-container .hljs-literal,
html body gradio-app .gradio-container .hljs-meta,
html body gradio-app .gradio-container .hljs-number,
html body gradio-app .gradio-container .hljs-operator,
html body gradio-app .gradio-container .hljs-selector-attr,
html body gradio-app .gradio-container .hljs-selector-class,
html body gradio-app .gradio-container .hljs-selector-id,
html body gradio-app .gradio-container .hljs-variable {
color: #79c0ff
}
html body gradio-app .gradio-container .hljs-meta .hljs-string,
html body gradio-app .gradio-container .hljs-regexp,
html body gradio-app .gradio-container .hljs-string {
color: #a5d6ff
}
html body gradio-app .gradio-container .hljs-built_in,
html body gradio-app .gradio-container .hljs-symbol {
color: #ffa657
}
html body gradio-app .gradio-container .hljs-code,
html body gradio-app .gradio-container .hljs-comment,
html body gradio-app .gradio-container .hljs-formula {
color: #8b949e
}
html body gradio-app .gradio-container .hljs-name,
html body gradio-app .gradio-container .hljs-quote,
html body gradio-app .gradio-container .hljs-selector-pseudo,
html body gradio-app .gradio-container .hljs-selector-tag {
color: #7ee787
}
html body gradio-app .gradio-container .hljs-subst {
color: #c9d1d9
}
html body gradio-app .gradio-container .hljs-section {
color: #1f6feb;
font-weight: 700
}
html body gradio-app .gradio-container .hljs-bullet {
color: #f2cc60
}
html body gradio-app .gradio-container .hljs-emphasis {
color: #c9d1d9;
font-style: italic
}
html body gradio-app .gradio-container .hljs-strong {
color: #c9d1d9;
font-weight: 700
}
html body gradio-app .gradio-container .hljs-addition {
color: #aff5b4;
background-color: #033a16
}
html body gradio-app .gradio-container .hljs-deletion {
color: #ffdcd7;
background-color: #67060c
}

111
css/highlightjs/github.min.css vendored Normal file
View file

@ -0,0 +1,111 @@
html body gradio-app .gradio-container pre code.hljs {
display: block;
overflow-x: auto;
padding: 1em
}
html body gradio-app .gradio-container code.hljs {
padding: 3px 5px
}
/*!
Theme: GitHub
Description: Light theme as seen on github.com
Author: github.com
Maintainer: @Hirse
Updated: 2021-05-15
Outdated base version: https://github.com/primer/github-syntax-light
Current colors taken from GitHub's CSS
*/
html body gradio-app .gradio-container .hljs {
color: #24292e;
background: #fff
}
html body gradio-app .gradio-container .hljs-doctag,
html body gradio-app .gradio-container .hljs-keyword,
html body gradio-app .gradio-container .hljs-meta .hljs-keyword,
html body gradio-app .gradio-container .hljs-template-tag,
html body gradio-app .gradio-container .hljs-template-variable,
html body gradio-app .gradio-container .hljs-type,
html body gradio-app .gradio-container .hljs-variable.language_ {
color: #d73a49
}
html body gradio-app .gradio-container .hljs-title,
html body gradio-app .gradio-container .hljs-title.class_,
html body gradio-app .gradio-container .hljs-title.class_.inherited__,
html body gradio-app .gradio-container .hljs-title.function_ {
color: #6f42c1
}
html body gradio-app .gradio-container .hljs-attr,
html body gradio-app .gradio-container .hljs-attribute,
html body gradio-app .gradio-container .hljs-literal,
html body gradio-app .gradio-container .hljs-meta,
html body gradio-app .gradio-container .hljs-number,
html body gradio-app .gradio-container .hljs-operator,
html body gradio-app .gradio-container .hljs-selector-attr,
html body gradio-app .gradio-container .hljs-selector-class,
html body gradio-app .gradio-container .hljs-selector-id,
html body gradio-app .gradio-container .hljs-variable {
color: #005cc5
}
html body gradio-app .gradio-container .hljs-meta .hljs-string,
html body gradio-app .gradio-container .hljs-regexp,
html body gradio-app .gradio-container .hljs-string {
color: #032f62
}
html body gradio-app .gradio-container .hljs-built_in,
html body gradio-app .gradio-container .hljs-symbol {
color: #e36209
}
html body gradio-app .gradio-container .hljs-code,
html body gradio-app .gradio-container .hljs-comment,
html body gradio-app .gradio-container .hljs-formula {
color: #6a737d
}
html body gradio-app .gradio-container .hljs-name,
html body gradio-app .gradio-container .hljs-quote,
html body gradio-app .gradio-container .hljs-selector-pseudo,
html body gradio-app .gradio-container .hljs-selector-tag {
color: #22863a
}
html body gradio-app .gradio-container .hljs-subst {
color: #24292e
}
html body gradio-app .gradio-container .hljs-section {
color: #005cc5;
font-weight: 700
}
html body gradio-app .gradio-container .hljs-bullet {
color: #735c0f
}
html body gradio-app .gradio-container .hljs-emphasis {
color: #24292e;
font-style: italic
}
html body gradio-app .gradio-container .hljs-strong {
color: #24292e;
font-weight: 700
}
html body gradio-app .gradio-container .hljs-addition {
color: #22863a;
background-color: #f0fff4
}
html body gradio-app .gradio-container .hljs-deletion {
color: #b31d28;
background-color: #ffeef0
}

View file

@ -39,14 +39,6 @@
margin-bottom: 0 !important; margin-bottom: 0 !important;
} }
.dark .message-body p em {
color: rgb(198 202 214) !important;
}
.message-body p em {
color: rgb(110 110 110) !important;
}
.gradio-container .chat .assistant-message { .gradio-container .chat .assistant-message {
padding: 20px; padding: 20px;
background: #f4f4f4; background: #f4f4f4;

View file

@ -62,10 +62,6 @@ ol li p, ul li p {
border: 0; border: 0;
} }
.gradio-container-3-18-0 .prose * h1, h2, h3, h4 {
color: white;
}
.gradio-container { .gradio-container {
max-width: 100% !important; max-width: 100% !important;
padding-top: 0 !important; padding-top: 0 !important;
@ -378,6 +374,10 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
} }
} }
.chat-parent .prose {
visibility: visible;
}
.old-ui .chat-parent { .old-ui .chat-parent {
height: calc(100dvh - 192px - var(--header-height) - var(--input-delta)); height: calc(100dvh - 192px - var(--header-height) - var(--input-delta));
margin-bottom: var(--input-delta) !important; margin-bottom: var(--input-delta) !important;
@ -399,6 +399,22 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
padding-bottom: 15px !important; padding-bottom: 15px !important;
} }
.message-body h1,
.message-body h2,
.message-body h3,
.message-body h4 {
color: var(--body-text-color);
margin: 20px 0 10px 0;
}
.dark .message q {
color: #f5b031;
}
.message-body q::before, .message-body q::after {
content: "";
}
.message-body li { .message-body li {
list-style-position: outside; list-style-position: outside;
} }
@ -447,6 +463,11 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
border-radius: 5px; border-radius: 5px;
font-size: 82%; font-size: 82%;
padding: 1px 3px; padding: 1px 3px;
background: white !important;
color: #1f2328;
}
.dark .message-body code {
background: #0d1117 !important; background: #0d1117 !important;
color: rgb(201 209 217); color: rgb(201 209 217);
} }
@ -796,4 +817,3 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
max-height: 300px; max-height: 300px;
} }
} }

View file

@ -19,7 +19,7 @@ Add `--api` to your command-line flags.
### Examples ### Examples
For the documentation with all the parameters and their types, consult `http://127.0.0.1:5000/docs` or the [typing.py](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/openai/typing.py) file. For the documentation with all the endpoints, parameters and their types, consult `http://127.0.0.1:5000/docs` or the [typing.py](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/openai/typing.py) file.
The official examples in the [OpenAI documentation](https://platform.openai.com/docs/api-reference) should also work, and the same parameters apply (although the API here has more optional parameters). The official examples in the [OpenAI documentation](https://platform.openai.com/docs/api-reference) should also work, and the same parameters apply (although the API here has more optional parameters).
@ -114,6 +114,30 @@ curl -k http://127.0.0.1:5000/v1/internal/logits \
}' }'
``` ```
#### List models
```shell
curl -k http://127.0.0.1:5000/v1/internal/model/list \
-H "Content-Type: application/json"
```
#### Load model
```shell
curl -k http://127.0.0.1:5000/v1/internal/model/load \
-H "Content-Type: application/json" \
-d '{
"model_name": "model_name",
"args": {
"load_in_4bit": true,
"n_gpu_layers": 12
},
"settings": {
"instruction_template": "Alpaca"
}
}'
```
#### Python chat example #### Python chat example
```python ```python

View file

@ -29,6 +29,7 @@ base = os.environ.get("HF_ENDPOINT") or "https://huggingface.co"
class ModelDownloader: class ModelDownloader:
def __init__(self, max_retries=5): def __init__(self, max_retries=5):
self.max_retries = max_retries self.max_retries = max_retries
self.session = self.get_session()
def get_session(self): def get_session(self):
session = requests.Session() session = requests.Session()
@ -72,7 +73,7 @@ class ModelDownloader:
return model, branch return model, branch
def get_download_links_from_huggingface(self, model, branch, text_only=False, specific_file=None): def get_download_links_from_huggingface(self, model, branch, text_only=False, specific_file=None):
session = self.get_session() session = self.session
page = f"/api/models/{model}/tree/{branch}" page = f"/api/models/{model}/tree/{branch}"
cursor = b"" cursor = b""
@ -192,7 +193,7 @@ class ModelDownloader:
attempt = 0 attempt = 0
while attempt < max_retries: while attempt < max_retries:
attempt += 1 attempt += 1
session = self.get_session() session = self.session
headers = {} headers = {}
mode = 'wb' mode = 'wb'
@ -212,11 +213,15 @@ class ModelDownloader:
total_size = int(r.headers.get('content-length', 0)) total_size = int(r.headers.get('content-length', 0))
block_size = 1024 * 1024 # 1MB block_size = 1024 * 1024 # 1MB
filename_str = str(filename) # Convert PosixPath to string if necessary
tqdm_kwargs = { tqdm_kwargs = {
'total': total_size, 'total': total_size,
'unit': 'iB', 'unit': 'B',
'unit_scale': True, 'unit_scale': True,
'bar_format': '{l_bar}{bar}| {n_fmt}/{total_fmt} {rate_fmt}' 'unit_divisor': 1024,
'bar_format': '{desc}{percentage:3.0f}%|{bar:50}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]',
'desc': f"{filename_str}: "
} }
if 'COLAB_GPU' in os.environ: if 'COLAB_GPU' in os.environ:
@ -233,7 +238,7 @@ class ModelDownloader:
t.update(len(data)) t.update(len(data))
if total_size != 0 and self.progress_bar is not None: if total_size != 0 and self.progress_bar is not None:
count += len(data) count += len(data)
self.progress_bar(float(count) / float(total_size), f"{filename}") self.progress_bar(float(count) / float(total_size), f"{filename_str}")
break # Exit loop if successful break # Exit loop if successful
except (RequestException, ConnectionError, Timeout) as e: except (RequestException, ConnectionError, Timeout) as e:

View file

@ -319,7 +319,6 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
yield {'prompt': prompt} yield {'prompt': prompt}
return return
token_count = len(encode(prompt)[0])
debug_msg({'prompt': prompt, 'generate_params': generate_params}) debug_msg({'prompt': prompt, 'generate_params': generate_params})
if stream: if stream:
@ -330,7 +329,6 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
answer = '' answer = ''
seen_content = '' seen_content = ''
completion_token_count = 0
for a in generator: for a in generator:
answer = a['internal'][-1][1] answer = a['internal'][-1][1]
@ -345,6 +343,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
chunk = chat_streaming_chunk(new_content) chunk = chat_streaming_chunk(new_content)
yield chunk yield chunk
token_count = len(encode(prompt)[0])
completion_token_count = len(encode(answer)[0]) completion_token_count = len(encode(answer)[0])
stop_reason = "stop" stop_reason = "stop"
if token_count + completion_token_count >= generate_params['truncation_length'] or completion_token_count >= generate_params['max_new_tokens']: if token_count + completion_token_count >= generate_params['truncation_length'] or completion_token_count >= generate_params['max_new_tokens']:
@ -429,8 +428,6 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False):
prompt = decode(prompt)[0] prompt = decode(prompt)[0]
prefix = prompt if echo else '' prefix = prompt if echo else ''
token_count = len(encode(prompt)[0])
total_prompt_token_count += token_count
# generate reply ####################################### # generate reply #######################################
debug_msg({'prompt': prompt, 'generate_params': generate_params}) debug_msg({'prompt': prompt, 'generate_params': generate_params})
@ -440,6 +437,8 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False):
for a in generator: for a in generator:
answer = a answer = a
token_count = len(encode(prompt)[0])
total_prompt_token_count += token_count
completion_token_count = len(encode(answer)[0]) completion_token_count = len(encode(answer)[0])
total_completion_token_count += completion_token_count total_completion_token_count += completion_token_count
stop_reason = "stop" stop_reason = "stop"

9
js/dark_theme.js Normal file
View file

@ -0,0 +1,9 @@
function toggleDarkMode() {
document.body.classList.toggle("dark");
var currentCSS = document.getElementById("highlight-css");
if (currentCSS.getAttribute("href") === "file/css/highlightjs/github-dark.min.css") {
currentCSS.setAttribute("href", "file/css/highlightjs/github.min.css");
} else {
currentCSS.setAttribute("href", "file/css/highlightjs/github-dark.min.css");
}
}

View file

@ -218,7 +218,6 @@ function doSyntaxHighlighting() {
{ left: "\\[", right: "\\]", display: true }, { left: "\\[", right: "\\]", display: true },
], ],
}); });
}); });
observer.observe(targetElement, config); observer.observe(targetElement, config);
@ -445,14 +444,12 @@ function updateCssProperties() {
// Check if the chat container is visible // Check if the chat container is visible
if (chatContainer.clientHeight > 0) { if (chatContainer.clientHeight > 0) {
// Calculate new chat height and adjust CSS properties
var numericHeight = chatContainer.parentNode.clientHeight - chatInputHeight + 40 - 100; var numericHeight = chatContainer.parentNode.clientHeight - chatInputHeight + 40 - 100;
if (document.getElementById("chat-tab").style.paddingBottom != "") { if (document.getElementById("chat-tab").style.paddingBottom != "") {
numericHeight += 20; numericHeight += 20;
} }
const newChatHeight = `${numericHeight}px`;
const newChatHeight = `${numericHeight}px`;
document.documentElement.style.setProperty("--chat-height", newChatHeight); document.documentElement.style.setProperty("--chat-height", newChatHeight);
document.documentElement.style.setProperty("--input-delta", `${chatInputHeight - 40}px`); document.documentElement.style.setProperty("--input-delta", `${chatInputHeight - 40}px`);
@ -463,15 +460,19 @@ function updateCssProperties() {
// Adjust scrollTop based on input height change // Adjust scrollTop based on input height change
if (chatInputHeight !== currentChatInputHeight) { if (chatInputHeight !== currentChatInputHeight) {
chatContainer.scrollTop += chatInputHeight > currentChatInputHeight ? chatInputHeight : -chatInputHeight + 40; if (!isScrolled && chatInputHeight < currentChatInputHeight) {
chatContainer.scrollTop = chatContainer.scrollHeight;
} else {
chatContainer.scrollTop += chatInputHeight - currentChatInputHeight;
}
currentChatInputHeight = chatInputHeight; currentChatInputHeight = chatInputHeight;
} }
} }
} }
// Observe textarea size changes and call update function // Observe textarea size changes and call update function
new ResizeObserver(updateCssProperties) new ResizeObserver(updateCssProperties).observe(document.querySelector("#chat-input textarea"));
.observe(document.querySelector("#chat-input textarea"));
// Handle changes in window size // Handle changes in window size
window.addEventListener("resize", updateCssProperties); window.addEventListener("resize", updateCssProperties);

View file

@ -72,8 +72,6 @@ def add_lora_autogptq(lora_names):
else: else:
if len(lora_names) > 1: if len(lora_names) > 1:
logger.warning('AutoGPTQ can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.') logger.warning('AutoGPTQ can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.')
if not shared.args.no_inject_fused_attention:
logger.warning('Fused Attention + AutoGPTQ may break Lora loading. Disable it.')
peft_config = GPTQLoraConfig( peft_config = GPTQLoraConfig(
inference_mode=True, inference_mode=True,

View file

@ -3,6 +3,7 @@ import io
import requests import requests
from modules import shared
from modules.logging_colors import logger from modules.logging_colors import logger
original_open = open original_open = open
@ -54,6 +55,7 @@ def my_open(*args, **kwargs):
'\n <script src="file/js/katex/auto-render.min.js"></script>' '\n <script src="file/js/katex/auto-render.min.js"></script>'
'\n <script src="file/js/highlightjs/highlight.min.js"></script>' '\n <script src="file/js/highlightjs/highlight.min.js"></script>'
'\n <script src="file/js/highlightjs/highlightjs-copy.min.js"></script>' '\n <script src="file/js/highlightjs/highlightjs-copy.min.js"></script>'
f'\n <link id="highlight-css" rel="stylesheet" href="file/css/highlightjs/{"github-dark" if shared.settings["dark_theme"] else "github"}.min.css">'
'\n <script>hljs.addPlugin(new CopyButtonPlugin());</script>' '\n <script>hljs.addPlugin(new CopyButtonPlugin());</script>'
'\n </head>' '\n </head>'
) )

View file

@ -17,7 +17,11 @@ from PIL import Image
import modules.shared as shared import modules.shared as shared
from modules import utils from modules import utils
from modules.extensions import apply_extensions from modules.extensions import apply_extensions
from modules.html_generator import chat_html_wrapper, make_thumbnail from modules.html_generator import (
chat_html_wrapper,
convert_to_markdown,
make_thumbnail
)
from modules.logging_colors import logger from modules.logging_colors import logger
from modules.text_generation import ( from modules.text_generation import (
generate_reply, generate_reply,
@ -88,8 +92,16 @@ def generate_chat_prompt(user_input, state, **kwargs):
chat_template_str = replace_character_names(chat_template_str, state['name1'], state['name2']) chat_template_str = replace_character_names(chat_template_str, state['name1'], state['name2'])
instruction_template = jinja_env.from_string(state['instruction_template_str']) instruction_template = jinja_env.from_string(state['instruction_template_str'])
instruct_renderer = partial(instruction_template.render, add_generation_prompt=False)
chat_template = jinja_env.from_string(chat_template_str) chat_template = jinja_env.from_string(chat_template_str)
instruct_renderer = partial(
instruction_template.render,
builtin_tools=None,
tools=None,
tools_in_user_message=False,
add_generation_prompt=False
)
chat_renderer = partial( chat_renderer = partial(
chat_template.render, chat_template.render,
add_generation_prompt=False, add_generation_prompt=False,
@ -367,7 +379,6 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
def impersonate_wrapper(text, state): def impersonate_wrapper(text, state):
static_output = chat_html_wrapper(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) static_output = chat_html_wrapper(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
prompt = generate_chat_prompt('', state, impersonate=True) prompt = generate_chat_prompt('', state, impersonate=True)
@ -421,9 +432,12 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False):
send_dummy_message(text, state) send_dummy_message(text, state)
send_dummy_reply(state['start_with'], state) send_dummy_reply(state['start_with'], state)
history = state['history']
for i, history in enumerate(generate_chat_reply(text, state, regenerate, _continue, loading_message=True, for_ui=True)): for i, history in enumerate(generate_chat_reply(text, state, regenerate, _continue, loading_message=True, for_ui=True)):
yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']), history yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']), history
save_history(history, state['unique_id'], state['character_menu'], state['mode'])
def remove_last_message(history): def remove_last_message(history):
if len(history['visible']) > 0 and history['internal'][-1][0] != '<|BEGIN-VISIBLE-CHAT|>': if len(history['visible']) > 0 and history['internal'][-1][0] != '<|BEGIN-VISIBLE-CHAT|>':
@ -484,7 +498,7 @@ def start_new_chat(state):
greeting = replace_character_names(state['greeting'], state['name1'], state['name2']) greeting = replace_character_names(state['greeting'], state['name1'], state['name2'])
if greeting != '': if greeting != '':
history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', greeting]] history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', greeting]]
history['visible'] += [['', apply_extensions('output', greeting, state, is_chat=True)]] history['visible'] += [['', apply_extensions('output', html.escape(greeting), state, is_chat=True)]]
unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S') unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S')
save_history(history, unique_id, state['character_menu'], state['mode']) save_history(history, unique_id, state['character_menu'], state['mode'])
@ -995,3 +1009,207 @@ def my_yaml_output(data):
result += " " + line.rstrip(' ') + "\n" result += " " + line.rstrip(' ') + "\n"
return result return result
def handle_replace_last_reply_click(text, state):
history = replace_last_reply(text, state)
save_history(history, state['unique_id'], state['character_menu'], state['mode'])
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
return [history, html, ""]
def handle_send_dummy_message_click(text, state):
history = send_dummy_message(text, state)
save_history(history, state['unique_id'], state['character_menu'], state['mode'])
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
return [history, html, ""]
def handle_send_dummy_reply_click(text, state):
history = send_dummy_reply(text, state)
save_history(history, state['unique_id'], state['character_menu'], state['mode'])
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
return [history, html, ""]
def handle_remove_last_click(state):
last_input, history = remove_last_message(state['history'])
save_history(history, state['unique_id'], state['character_menu'], state['mode'])
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
return [history, html, last_input]
def handle_unique_id_select(state):
history = load_history(state['unique_id'], state['character_menu'], state['mode'])
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
convert_to_markdown.cache_clear()
return [history, html]
def handle_start_new_chat_click(state):
history = start_new_chat(state)
histories = find_all_histories_with_first_prompts(state)
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
convert_to_markdown.cache_clear()
return [history, html, gr.update(choices=histories, value=histories[0][1])]
def handle_delete_chat_confirm_click(state):
index = str(find_all_histories(state).index(state['unique_id']))
delete_history(state['unique_id'], state['character_menu'], state['mode'])
history, unique_id = load_history_after_deletion(state, index)
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
convert_to_markdown.cache_clear()
return [
history,
html,
unique_id,
gr.update(visible=False),
gr.update(visible=True),
gr.update(visible=False)
]
def handle_rename_chat_click():
return [
gr.update(visible=True, value="My New Chat"),
gr.update(visible=True),
gr.update(visible=True)
]
def handle_rename_chat_confirm(rename_to, state):
rename_history(state['unique_id'], rename_to, state['character_menu'], state['mode'])
histories = find_all_histories_with_first_prompts(state)
return [
gr.update(choices=histories, value=rename_to),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False)
]
def handle_upload_chat_history(load_chat_history, state):
history = start_new_chat(state)
history = load_history_json(load_chat_history, history)
histories = find_all_histories_with_first_prompts(state)
save_history(history, state['unique_id'], state['character_menu'], state['mode'])
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
convert_to_markdown.cache_clear()
return [
history,
html,
gr.update(choices=histories, value=histories[0][1])
]
def handle_character_menu_change(state):
name1, name2, picture, greeting, context = load_character(state['character_menu'], state['name1'], state['name2'])
state['name1'] = name1
state['name2'] = name2
state['character_picture'] = picture
state['greeting'] = greeting
state['context'] = context
history = load_latest_history(state)
histories = find_all_histories_with_first_prompts(state)
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
convert_to_markdown.cache_clear()
return [
history,
html,
name1,
name2,
picture,
greeting,
context,
gr.update(choices=histories, value=histories[0][1]),
]
def handle_mode_change(state):
history = load_latest_history(state)
histories = find_all_histories_with_first_prompts(state)
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
convert_to_markdown.cache_clear()
return [
history,
html,
gr.update(visible=state['mode'] != 'instruct'),
gr.update(visible=state['mode'] == 'chat-instruct'),
gr.update(choices=histories, value=histories[0][1])
]
def handle_save_character_click(name2):
return [
name2,
gr.update(visible=True)
]
def handle_load_template_click(instruction_template):
output = load_instruction_template(instruction_template)
return [
output,
"Select template to load..."
]
def handle_save_template_click(instruction_template_str):
contents = generate_instruction_template_yaml(instruction_template_str)
return [
"My Template.yaml",
"instruction-templates/",
contents,
gr.update(visible=True)
]
def handle_delete_template_click(template):
return [
f"{template}.yaml",
"instruction-templates/",
gr.update(visible=True)
]
def handle_your_picture_change(picture, state):
upload_your_profile_picture(picture)
html = redraw_html(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'], reset_cache=True)
return html
def handle_send_instruction_click(state):
state['mode'] = 'instruct'
state['history'] = {'internal': [], 'visible': []}
output = generate_chat_prompt("Input", state)
return output
def handle_send_chat_click(state):
output = generate_chat_prompt("", state, _continue=True)
return output

View file

@ -42,13 +42,47 @@ def fix_newlines(string):
return string return string
def replace_quotes(text):
# Define a list of quote pairs (opening and closing), using HTML entities
quote_pairs = [
('&quot;', '&quot;'), # Double quotes
('&ldquo;', '&rdquo;'), # Unicode left and right double quotation marks
('&lsquo;', '&rsquo;'), # Unicode left and right single quotation marks
('&laquo;', '&raquo;'), # French quotes
('&bdquo;', '&ldquo;'), # German quotes
('&lsquo;', '&rsquo;'), # Alternative single quotes
('&#8220;', '&#8221;'), # Unicode quotes (numeric entities)
('&#x201C;', '&#x201D;'), # Unicode quotes (hex entities)
]
# Create a regex pattern that matches any of the quote pairs, including newlines
pattern = '|'.join(f'({re.escape(open_q)})(.*?)({re.escape(close_q)})' for open_q, close_q in quote_pairs)
# Replace matched patterns with <q> tags, keeping original quotes
replaced_text = re.sub(pattern, lambda m: f'<q>{m.group(1)}{m.group(2)}{m.group(3)}</q>', text, flags=re.DOTALL)
return replaced_text
def replace_blockquote(m): def replace_blockquote(m):
return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '') return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '')
@functools.lru_cache(maxsize=4096) @functools.lru_cache(maxsize=None)
def convert_to_markdown(string): def convert_to_markdown(string):
# Make \[ \] LaTeX equations inline
pattern = r'^\s*\\\[\s*\n([\s\S]*?)\n\s*\\\]\s*$'
replacement = r'\\[ \1 \\]'
string = re.sub(pattern, replacement, string, flags=re.MULTILINE)
# Escape backslashes
string = string.replace('\\', '\\\\')
# Quote to <q></q>
string = replace_quotes(string)
# Blockquote # Blockquote
string = re.sub(r'(^|[\n])&gt;', r'\1>', string) string = re.sub(r'(^|[\n])&gt;', r'\1>', string)
pattern = re.compile(r'\\begin{blockquote}(.*?)\\end{blockquote}', re.DOTALL) pattern = re.compile(r'\\begin{blockquote}(.*?)\\end{blockquote}', re.DOTALL)
@ -69,12 +103,27 @@ def convert_to_markdown(string):
result = '' result = ''
is_code = False is_code = False
is_latex = False
for line in string.split('\n'): for line in string.split('\n'):
if line.lstrip(' ').startswith('```'): stripped_line = line.strip()
if stripped_line.startswith('```'):
is_code = not is_code is_code = not is_code
elif stripped_line.startswith('$$'):
is_latex = not is_latex
elif stripped_line.endswith('$$'):
is_latex = False
elif stripped_line.startswith('\\\\['):
is_latex = True
elif stripped_line.startswith('\\\\]'):
is_latex = False
elif stripped_line.endswith('\\\\]'):
is_latex = False
result += line result += line
if is_code or line.startswith('|'): # Don't add an extra \n for tables or code
# Don't add an extra \n for tables, code, or LaTeX
if is_code or is_latex or line.startswith('|'):
result += '\n' result += '\n'
else: else:
result += '\n\n' result += '\n\n'
@ -124,6 +173,7 @@ def convert_to_markdown_wrapped(string, use_cache=True):
def generate_basic_html(string): def generate_basic_html(string):
convert_to_markdown.cache_clear()
string = convert_to_markdown(string) string = convert_to_markdown(string)
string = f'<style>{readable_css}</style><div class="readable-container">{string}</div>' string = f'<style>{readable_css}</style><div class="readable-container">{string}</div>'
return string return string

View file

@ -127,15 +127,6 @@ loaders_and_params = OrderedDict({
'no_use_fast', 'no_use_fast',
'autogptq_info', 'autogptq_info',
], ],
'AutoAWQ': [
'cpu_memory',
'gpu_memory',
'auto_devices',
'max_seq_len',
'no_inject_fused_attention',
'trust_remote_code',
'no_use_fast',
],
'HQQ': [ 'HQQ': [
'hqq_backend', 'hqq_backend',
'trust_remote_code', 'trust_remote_code',
@ -200,7 +191,6 @@ def transformers_samplers():
loaders_samplers = { loaders_samplers = {
'Transformers': transformers_samplers(), 'Transformers': transformers_samplers(),
'AutoGPTQ': transformers_samplers(), 'AutoGPTQ': transformers_samplers(),
'AutoAWQ': transformers_samplers(),
'HQQ': transformers_samplers(), 'HQQ': transformers_samplers(),
'ExLlamav2': { 'ExLlamav2': {
'temperature', 'temperature',

View file

@ -13,8 +13,8 @@ global_scores = None
def get_next_logits(*args, **kwargs): def get_next_logits(*args, **kwargs):
if shared.args.idle_timeout > 0 and shared.model is None and shared.previous_model_name not in [None, 'None']: if shared.args.idle_timeout > 0 and shared.model is None and shared.model_name not in [None, 'None']:
shared.model, shared.tokenizer = load_model(shared.previous_model_name) shared.model, shared.tokenizer = load_model(shared.model_name)
needs_lock = not args[2] # use_samplers needs_lock = not args[2] # use_samplers
if needs_lock: if needs_lock:

View file

@ -75,7 +75,6 @@ def load_model(model_name, loader=None):
'llamacpp_HF': llamacpp_HF_loader, 'llamacpp_HF': llamacpp_HF_loader,
'ExLlamav2': ExLlamav2_loader, 'ExLlamav2': ExLlamav2_loader,
'ExLlamav2_HF': ExLlamav2_HF_loader, 'ExLlamav2_HF': ExLlamav2_HF_loader,
'AutoAWQ': AutoAWQ_loader,
'HQQ': HQQ_loader, 'HQQ': HQQ_loader,
'TensorRT-LLM': TensorRT_LLM_loader, 'TensorRT-LLM': TensorRT_LLM_loader,
} }
@ -99,7 +98,7 @@ def load_model(model_name, loader=None):
if model is None: if model is None:
return None, None return None, None
else: else:
tokenizer = load_tokenizer(model_name, model) tokenizer = load_tokenizer(model_name)
shared.settings.update({k: v for k, v in metadata.items() if k in shared.settings}) shared.settings.update({k: v for k, v in metadata.items() if k in shared.settings})
if loader.lower().startswith('exllama') or loader.lower().startswith('tensorrt'): if loader.lower().startswith('exllama') or loader.lower().startswith('tensorrt'):
@ -114,9 +113,13 @@ def load_model(model_name, loader=None):
return model, tokenizer return model, tokenizer
def load_tokenizer(model_name, model): def load_tokenizer(model_name, tokenizer_dir=None):
if tokenizer_dir:
path_to_model = Path(tokenizer_dir)
else:
path_to_model = Path(f"{shared.args.model_dir}/{model_name}/")
tokenizer = None tokenizer = None
path_to_model = Path(f"{shared.args.model_dir}/{model_name}/")
if path_to_model.exists(): if path_to_model.exists():
if shared.args.no_use_fast: if shared.args.no_use_fast:
logger.info('Loading the tokenizer with use_fast=False.') logger.info('Loading the tokenizer with use_fast=False.')
@ -279,35 +282,24 @@ def llamacpp_loader(model_name):
def llamacpp_HF_loader(model_name): def llamacpp_HF_loader(model_name):
from modules.llamacpp_hf import LlamacppHF from modules.llamacpp_hf import LlamacppHF
path = Path(f'{shared.args.model_dir}/{model_name}') if shared.args.tokenizer_dir:
logger.info(f'Using tokenizer from: \"{shared.args.tokenizer_dir}\"')
# Check if a HF tokenizer is available for the model
if all((path / file).exists() for file in ['tokenizer_config.json']):
logger.info(f'Using tokenizer from: \"{path}\"')
else: else:
logger.error("Could not load the model because a tokenizer in Transformers format was not found.") path = Path(f'{shared.args.model_dir}/{model_name}')
return None, None # Check if a HF tokenizer is available for the model
if all((path / file).exists() for file in ['tokenizer_config.json']):
logger.info(f'Using tokenizer from: \"{path}\"')
else:
logger.error("Could not load the model because a tokenizer in Transformers format was not found.")
return None, None
model = LlamacppHF.from_pretrained(model_name) model = LlamacppHF.from_pretrained(model_name)
return model
if shared.args.tokenizer_dir:
def AutoAWQ_loader(model_name): tokenizer = load_tokenizer(model_name, tokenizer_dir=shared.args.tokenizer_dir)
from awq import AutoAWQForCausalLM return model, tokenizer
else:
model_dir = Path(f'{shared.args.model_dir}/{model_name}') return model
model = AutoAWQForCausalLM.from_quantized(
quant_path=model_dir,
max_new_tokens=shared.args.max_seq_len,
trust_remote_code=shared.args.trust_remote_code,
fuse_layers=not shared.args.no_inject_fused_attention,
max_memory=get_max_memory_dict(),
batch_size=1,
safetensors=any(model_dir.glob('*.safetensors')),
)
return model
def AutoGPTQ_loader(model_name): def AutoGPTQ_loader(model_name):
@ -387,14 +379,15 @@ def clear_torch_cache():
torch.cuda.empty_cache() torch.cuda.empty_cache()
def unload_model(): def unload_model(keep_model_name=False):
shared.model = shared.tokenizer = None shared.model = shared.tokenizer = None
shared.previous_model_name = shared.model_name
shared.model_name = 'None'
shared.lora_names = [] shared.lora_names = []
shared.model_dirty_from_training = False shared.model_dirty_from_training = False
clear_torch_cache() clear_torch_cache()
if not keep_model_name:
shared.model_name = 'None'
def reload_model(): def reload_model():
unload_model() unload_model()
@ -412,7 +405,7 @@ def unload_model_if_idle():
if time.time() - last_generation_time > shared.args.idle_timeout * 60: if time.time() - last_generation_time > shared.args.idle_timeout * 60:
if shared.model is not None: if shared.model is not None:
logger.info("Unloading the model for inactivity.") logger.info("Unloading the model for inactivity.")
unload_model() unload_model(keep_model_name=True)
finally: finally:
shared.generation_lock.release() shared.generation_lock.release()

View file

@ -180,8 +180,6 @@ def infer_loader(model_name, model_settings):
loader = None loader = None
elif (path_to_model / 'quantize_config.json').exists() or ('wbits' in model_settings and isinstance(model_settings['wbits'], int) and model_settings['wbits'] > 0): elif (path_to_model / 'quantize_config.json').exists() or ('wbits' in model_settings and isinstance(model_settings['wbits'], int) and model_settings['wbits'] > 0):
loader = 'ExLlamav2_HF' loader = 'ExLlamav2_HF'
elif (path_to_model / 'quant_config.json').exists() or re.match(r'.*-awq', model_name.lower()):
loader = 'AutoAWQ'
elif len(list(path_to_model.glob('*.gguf'))) > 0 and path_to_model.is_dir() and (path_to_model / 'tokenizer_config.json').exists(): elif len(list(path_to_model.glob('*.gguf'))) > 0 and path_to_model.is_dir() and (path_to_model / 'tokenizer_config.json').exists():
loader = 'llamacpp_HF' loader = 'llamacpp_HF'
elif len(list(path_to_model.glob('*.gguf'))) > 0: elif len(list(path_to_model.glob('*.gguf'))) > 0:

View file

@ -13,7 +13,6 @@ from modules.logging_colors import logger
model = None model = None
tokenizer = None tokenizer = None
model_name = 'None' model_name = 'None'
previous_model_name = 'None'
is_seq2seq = False is_seq2seq = False
model_dirty_from_training = False model_dirty_from_training = False
lora_names = [] lora_names = []
@ -33,7 +32,7 @@ settings = {
'dark_theme': True, 'dark_theme': True,
'show_controls': True, 'show_controls': True,
'start_with': '', 'start_with': '',
'mode': 'chat', 'mode': 'chat-instruct',
'chat_style': 'cai-chat', 'chat_style': 'cai-chat',
'prompt-default': 'QA', 'prompt-default': 'QA',
'prompt-notebook': 'QA', 'prompt-notebook': 'QA',
@ -44,8 +43,6 @@ settings = {
'negative_prompt': '', 'negative_prompt': '',
'seed': -1, 'seed': -1,
'truncation_length': 2048, 'truncation_length': 2048,
'truncation_length_min': 0,
'truncation_length_max': 200000,
'max_tokens_second': 0, 'max_tokens_second': 0,
'max_updates_second': 0, 'max_updates_second': 0,
'prompt_lookup_num_tokens': 0, 'prompt_lookup_num_tokens': 0,
@ -89,7 +86,7 @@ group.add_argument('--idle-timeout', type=int, default=0, help='Unload model aft
# Model loader # Model loader
group = parser.add_argument_group('Model loader') group = parser.add_argument_group('Model loader')
group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, AutoGPTQ, AutoAWQ.') group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, AutoGPTQ.')
# Transformers/Accelerate # Transformers/Accelerate
group = parser.add_argument_group('Transformers/Accelerate') group = parser.add_argument_group('Transformers/Accelerate')
@ -118,7 +115,7 @@ group.add_argument('--quant_type', type=str, default='nf4', help='quant_type for
# llama.cpp # llama.cpp
group = parser.add_argument_group('llama.cpp') group = parser.add_argument_group('llama.cpp')
group.add_argument('--flash-attn', action='store_true', help='Use flash-attention.') group.add_argument('--flash-attn', action='store_true', help='Use flash-attention.')
group.add_argument('--tensorcores', action='store_true', help='Use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards. NVIDIA only.') group.add_argument('--tensorcores', action='store_true', help='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.')
group.add_argument('--n_ctx', type=int, default=2048, help='Size of the prompt context.') group.add_argument('--n_ctx', type=int, default=2048, help='Size of the prompt context.')
group.add_argument('--threads', type=int, default=0, help='Number of threads to use.') group.add_argument('--threads', type=int, default=0, help='Number of threads to use.')
group.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.') group.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.')
@ -127,7 +124,7 @@ group.add_argument('--n_batch', type=int, default=512, help='Maximum number of p
group.add_argument('--no-mmap', action='store_true', help='Prevent mmap from being used.') group.add_argument('--no-mmap', action='store_true', help='Prevent mmap from being used.')
group.add_argument('--mlock', action='store_true', help='Force the system to keep the model in RAM.') group.add_argument('--mlock', action='store_true', help='Force the system to keep the model in RAM.')
group.add_argument('--n-gpu-layers', type=int, default=0, help='Number of layers to offload to the GPU.') group.add_argument('--n-gpu-layers', type=int, default=0, help='Number of layers to offload to the GPU.')
group.add_argument('--tensor_split', type=str, default=None, help='Split the model across multiple GPUs. Comma-separated list of proportions. Example: 18,17.') group.add_argument('--tensor_split', type=str, default=None, help='Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40.')
group.add_argument('--numa', action='store_true', help='Activate NUMA task allocation for llama.cpp.') group.add_argument('--numa', action='store_true', help='Activate NUMA task allocation for llama.cpp.')
group.add_argument('--logits_all', action='store_true', help='Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower.') group.add_argument('--logits_all', action='store_true', help='Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower.')
group.add_argument('--no_offload_kqv', action='store_true', help='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.') group.add_argument('--no_offload_kqv', action='store_true', help='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
@ -135,6 +132,7 @@ group.add_argument('--cache-capacity', type=str, help='Maximum cache capacity (l
group.add_argument('--row_split', action='store_true', help='Split the model by rows across GPUs. This may improve multi-gpu performance.') group.add_argument('--row_split', action='store_true', help='Split the model by rows across GPUs. This may improve multi-gpu performance.')
group.add_argument('--streaming-llm', action='store_true', help='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.') group.add_argument('--streaming-llm', action='store_true', help='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
group.add_argument('--attention-sink-size', type=int, default=5, help='StreamingLLM: number of sink tokens. Only used if the trimmed prompt does not share a prefix with the old prompt.') group.add_argument('--attention-sink-size', type=int, default=5, help='StreamingLLM: number of sink tokens. Only used if the trimmed prompt does not share a prefix with the old prompt.')
group.add_argument('--tokenizer-dir', type=str, help='Load the tokenizer from this folder. Meant to be used with llamacpp_HF through the command-line.')
# ExLlamaV2 # ExLlamaV2
group = parser.add_argument_group('ExLlamaV2') group = parser.add_argument_group('ExLlamaV2')
@ -160,10 +158,6 @@ group.add_argument('--disable_exllamav2', action='store_true', help='Disable ExL
group.add_argument('--wbits', type=int, default=0, help='Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.') group.add_argument('--wbits', type=int, default=0, help='Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.')
group.add_argument('--groupsize', type=int, default=-1, help='Group size.') group.add_argument('--groupsize', type=int, default=-1, help='Group size.')
# AutoAWQ
group = parser.add_argument_group('AutoAWQ')
group.add_argument('--no_inject_fused_attention', action='store_true', help='Disable the use of fused attention, which will use less VRAM at the cost of slower inference.')
# HQQ # HQQ
group = parser.add_argument_group('HQQ') group = parser.add_argument_group('HQQ')
group.add_argument('--hqq-backend', type=str, default='PYTORCH_COMPILE', help='Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.') group.add_argument('--hqq-backend', type=str, default='PYTORCH_COMPILE', help='Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.')
@ -195,6 +189,7 @@ group.add_argument('--gradio-auth', type=str, help='Set Gradio authentication pa
group.add_argument('--gradio-auth-path', type=str, help='Set the Gradio authentication file path. The file should contain one or more user:password pairs in the same format as above.', default=None) group.add_argument('--gradio-auth-path', type=str, help='Set the Gradio authentication file path. The file should contain one or more user:password pairs in the same format as above.', default=None)
group.add_argument('--ssl-keyfile', type=str, help='The path to the SSL certificate key file.', default=None) group.add_argument('--ssl-keyfile', type=str, help='The path to the SSL certificate key file.', default=None)
group.add_argument('--ssl-certfile', type=str, help='The path to the SSL certificate cert file.', default=None) group.add_argument('--ssl-certfile', type=str, help='The path to the SSL certificate cert file.', default=None)
group.add_argument('--subpath', type=str, help='Customize the subpath for gradio, use with reverse proxy')
# API # API
group = parser.add_argument_group('API') group = parser.add_argument_group('API')
@ -216,6 +211,7 @@ group.add_argument('--model_type', type=str, help='DEPRECATED')
group.add_argument('--pre_layer', type=int, nargs='+', help='DEPRECATED') group.add_argument('--pre_layer', type=int, nargs='+', help='DEPRECATED')
group.add_argument('--checkpoint', type=str, help='DEPRECATED') group.add_argument('--checkpoint', type=str, help='DEPRECATED')
group.add_argument('--monkey-patch', action='store_true', help='DEPRECATED') group.add_argument('--monkey-patch', action='store_true', help='DEPRECATED')
group.add_argument('--no_inject_fused_attention', action='store_true', help='DEPRECATED')
args = parser.parse_args() args = parser.parse_args()
args_defaults = parser.parse_args([]) args_defaults = parser.parse_args([])
@ -266,8 +262,6 @@ def fix_loader_name(name):
return 'ExLlamav2' return 'ExLlamav2'
elif name in ['exllamav2-hf', 'exllamav2_hf', 'exllama-v2-hf', 'exllama_v2_hf', 'exllama-v2_hf', 'exllama2-hf', 'exllama2_hf', 'exllama-2-hf', 'exllama_2_hf', 'exllama-2_hf']: elif name in ['exllamav2-hf', 'exllamav2_hf', 'exllama-v2-hf', 'exllama_v2_hf', 'exllama-v2_hf', 'exllama2-hf', 'exllama2_hf', 'exllama-2-hf', 'exllama_2_hf', 'exllama-2_hf']:
return 'ExLlamav2_HF' return 'ExLlamav2_HF'
elif name in ['autoawq', 'awq', 'auto-awq']:
return 'AutoAWQ'
elif name in ['hqq']: elif name in ['hqq']:
return 'HQQ' return 'HQQ'
elif name in ['tensorrt', 'tensorrtllm', 'tensorrt_llm', 'tensorrt-llm', 'tensort', 'tensortllm']: elif name in ['tensorrt', 'tensorrtllm', 'tensorrt_llm', 'tensorrt-llm', 'tensort', 'tensortllm']:

View file

@ -32,8 +32,8 @@ from modules.models import clear_torch_cache, load_model
def generate_reply(*args, **kwargs): def generate_reply(*args, **kwargs):
if shared.args.idle_timeout > 0 and shared.model is None and shared.previous_model_name not in [None, 'None']: if shared.args.idle_timeout > 0 and shared.model is None and shared.model_name not in [None, 'None']:
shared.model, shared.tokenizer = load_model(shared.previous_model_name) shared.model, shared.tokenizer = load_model(shared.model_name)
shared.generation_lock.acquire() shared.generation_lock.acquire()
try: try:

View file

@ -165,7 +165,7 @@ def create_ui():
stride_length = gr.Slider(label='Stride', minimum=0, maximum=32768, value=512, step=256, info='Used to make the evaluation faster at the cost of accuracy. 1 = slowest but most accurate. 512 is a common value.') stride_length = gr.Slider(label='Stride', minimum=0, maximum=32768, value=512, step=256, info='Used to make the evaluation faster at the cost of accuracy. 1 = slowest but most accurate. 512 is a common value.')
with gr.Column(): with gr.Column():
max_length = gr.Slider(label='max_length', minimum=0, maximum=shared.settings['truncation_length_max'], value=0, step=256, info='The context for each evaluation. If set to 0, the maximum context length for the model will be used.') max_length = gr.Number(label='max_length', precision=0, step=256, value=0, info='The context for each evaluation. If set to 0, the maximum context length for the model will be used.')
with gr.Row(): with gr.Row():
start_current_evaluation = gr.Button("Evaluate loaded model", interactive=not mu) start_current_evaluation = gr.Button("Evaluate loaded model", interactive=not mu)

View file

@ -15,8 +15,6 @@ with open(Path(__file__).resolve().parent / '../css/main.css', 'r') as f:
css += f.read() css += f.read()
with open(Path(__file__).resolve().parent / '../css/katex/katex.min.css', 'r') as f: with open(Path(__file__).resolve().parent / '../css/katex/katex.min.css', 'r') as f:
css += f.read() css += f.read()
with open(Path(__file__).resolve().parent / '../css/highlightjs/github-dark.min.css', 'r') as f:
css += f.read()
with open(Path(__file__).resolve().parent / '../css/highlightjs/highlightjs-copy.min.css', 'r') as f: with open(Path(__file__).resolve().parent / '../css/highlightjs/highlightjs-copy.min.css', 'r') as f:
css += f.read() css += f.read()
with open(Path(__file__).resolve().parent / '../js/main.js', 'r') as f: with open(Path(__file__).resolve().parent / '../js/main.js', 'r') as f:
@ -29,6 +27,8 @@ with open(Path(__file__).resolve().parent / '../js/show_controls.js', 'r') as f:
show_controls_js = f.read() show_controls_js = f.read()
with open(Path(__file__).resolve().parent / '../js/update_big_picture.js', 'r') as f: with open(Path(__file__).resolve().parent / '../js/update_big_picture.js', 'r') as f:
update_big_picture_js = f.read() update_big_picture_js = f.read()
with open(Path(__file__).resolve().parent / '../js/dark_theme.js', 'r') as f:
dark_theme_js = f.read()
refresh_symbol = '🔄' refresh_symbol = '🔄'
delete_symbol = '🗑️' delete_symbol = '🗑️'
@ -78,7 +78,6 @@ def list_model_elements():
'groupsize', 'groupsize',
'triton', 'triton',
'desc_act', 'desc_act',
'no_inject_fused_attention',
'no_inject_fused_mlp', 'no_inject_fused_mlp',
'no_use_cuda_fp16', 'no_use_cuda_fp16',
'disable_exllama', 'disable_exllama',
@ -116,6 +115,7 @@ def list_model_elements():
'hqq_backend', 'hqq_backend',
'cpp_runner', 'cpp_runner',
] ]
if is_torch_xpu_available(): if is_torch_xpu_available():
for i in range(torch.xpu.device_count()): for i in range(torch.xpu.device_count()):
elements.append(f'gpu_memory_{i}') elements.append(f'gpu_memory_{i}')
@ -184,6 +184,7 @@ def list_interface_input_elements():
'start_with', 'start_with',
'character_menu', 'character_menu',
'history', 'history',
'unique_id',
'name1', 'name1',
'user_bio', 'user_bio',
'name2', 'name2',
@ -213,9 +214,11 @@ def list_interface_input_elements():
def gather_interface_values(*args): def gather_interface_values(*args):
interface_elements = list_interface_input_elements()
output = {} output = {}
for i, element in enumerate(list_interface_input_elements()): for element, value in zip(interface_elements, args):
output[element] = args[i] output[element] = value
if not shared.args.multi_user: if not shared.args.multi_user:
shared.persistent_interface_state = output shared.persistent_interface_state = output
@ -226,8 +229,14 @@ def gather_interface_values(*args):
def apply_interface_values(state, use_persistent=False): def apply_interface_values(state, use_persistent=False):
if use_persistent: if use_persistent:
state = shared.persistent_interface_state state = shared.persistent_interface_state
if 'textbox-default' in state:
state.pop('prompt_menu-default')
if 'textbox-notebook' in state:
state.pop('prompt_menu-notebook')
elements = list_interface_input_elements() elements = list_interface_input_elements()
if len(state) == 0: if len(state) == 0:
return [gr.update() for k in elements] # Dummy, do nothing return [gr.update() for k in elements] # Dummy, do nothing
else: else:
@ -236,7 +245,7 @@ def apply_interface_values(state, use_persistent=False):
def save_settings(state, preset, extensions_list, show_controls, theme_state): def save_settings(state, preset, extensions_list, show_controls, theme_state):
output = copy.deepcopy(shared.settings) output = copy.deepcopy(shared.settings)
exclude = ['name2', 'greeting', 'context', 'turn_template', 'truncation_length'] exclude = ['name2', 'greeting', 'context', 'truncation_length', 'instruction_template_str']
for k in state: for k in state:
if k in shared.settings and k not in exclude: if k in shared.settings and k not in exclude:
output[k] = state[k] output[k] = state[k]
@ -268,7 +277,7 @@ def save_settings(state, preset, extensions_list, show_controls, theme_state):
if key in shared.default_settings and output[key] == shared.default_settings[key]: if key in shared.default_settings and output[key] == shared.default_settings[key]:
output.pop(key) output.pop(key)
return yaml.dump(output, sort_keys=False, width=float("inf")) return yaml.dump(output, sort_keys=False, width=float("inf"), allow_unicode=True)
def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_class, interactive=True): def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_class, interactive=True):

View file

@ -85,13 +85,13 @@ def create_ui():
shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar']) shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
with gr.Row(): with gr.Row():
shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode') shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'chat-instruct'] else None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
with gr.Row(): with gr.Row():
shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct') shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
with gr.Row(): with gr.Row():
shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=False, elem_classes=['add_scrollbar']) shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=shared.settings['mode'] == 'chat-instruct', elem_classes=['add_scrollbar'])
def create_chat_settings_ui(): def create_chat_settings_ui():
@ -137,7 +137,7 @@ def create_chat_settings_ui():
shared.gradio['tavern_json'] = gr.State() shared.gradio['tavern_json'] = gr.State()
with gr.Column(): with gr.Column():
shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label='Name', interactive=False) shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label='Name', interactive=False)
shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=4, max_lines=4, label='Description', interactive=False) shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=10, label='Description', interactive=False, elem_classes=['add_scrollbar'])
shared.gradio['Submit tavern character'] = gr.Button(value='Submit', interactive=False) shared.gradio['Submit tavern character'] = gr.Button(value='Submit', interactive=False)
@ -181,169 +181,112 @@ def create_event_handlers():
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(
chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['textbox'].submit( shared.gradio['textbox'].submit(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(
chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Regenerate'].click( shared.gradio['Regenerate'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
partial(chat.generate_chat_reply_wrapper, regenerate=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then( partial(chat.generate_chat_reply_wrapper, regenerate=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Continue'].click( shared.gradio['Continue'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
partial(chat.generate_chat_reply_wrapper, _continue=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then( partial(chat.generate_chat_reply_wrapper, _continue=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Impersonate'].click( shared.gradio['Impersonate'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda x: x, gradio('textbox'), gradio('Chat input'), show_progress=False).then( lambda x: x, gradio('textbox'), gradio('Chat input'), show_progress=False).then(
chat.impersonate_wrapper, gradio(inputs), gradio('textbox', 'display'), show_progress=False).then( chat.impersonate_wrapper, gradio(inputs), gradio('textbox', 'display'), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Replace last reply'].click( shared.gradio['Replace last reply'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.replace_last_reply, gradio('textbox', 'interface_state'), gradio('history')).then( chat.handle_replace_last_reply_click, gradio('textbox', 'interface_state'), gradio('history', 'display', 'textbox'), show_progress=False)
lambda: '', None, gradio('textbox'), show_progress=False).then(
chat.redraw_html, gradio(reload_arr), gradio('display')).then(
chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None)
shared.gradio['Send dummy message'].click( shared.gradio['Send dummy message'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.send_dummy_message, gradio('textbox', 'interface_state'), gradio('history')).then( chat.handle_send_dummy_message_click, gradio('textbox', 'interface_state'), gradio('history', 'display', 'textbox'), show_progress=False)
lambda: '', None, gradio('textbox'), show_progress=False).then(
chat.redraw_html, gradio(reload_arr), gradio('display')).then(
chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None)
shared.gradio['Send dummy reply'].click( shared.gradio['Send dummy reply'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.send_dummy_reply, gradio('textbox', 'interface_state'), gradio('history')).then( chat.handle_send_dummy_reply_click, gradio('textbox', 'interface_state'), gradio('history', 'display', 'textbox'), show_progress=False)
lambda: '', None, gradio('textbox'), show_progress=False).then(
chat.redraw_html, gradio(reload_arr), gradio('display')).then(
chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None)
shared.gradio['Remove last'].click( shared.gradio['Remove last'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.remove_last_message, gradio('history'), gradio('textbox', 'history'), show_progress=False).then( chat.handle_remove_last_click, gradio('interface_state'), gradio('history', 'display', 'textbox'), show_progress=False)
chat.redraw_html, gradio(reload_arr), gradio('display')).then(
chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None)
shared.gradio['Stop'].click( shared.gradio['Stop'].click(
stop_everything_event, None, None, queue=False).then( stop_everything_event, None, None, queue=False).then(
chat.redraw_html, gradio(reload_arr), gradio('display')) chat.redraw_html, gradio(reload_arr), gradio('display'), show_progress=False)
if not shared.args.multi_user: if not shared.args.multi_user:
shared.gradio['unique_id'].select( shared.gradio['unique_id'].select(
chat.load_history, gradio('unique_id', 'character_menu', 'mode'), gradio('history')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.redraw_html, gradio(reload_arr), gradio('display')) chat.handle_unique_id_select, gradio('interface_state'), gradio('history', 'display'), show_progress=False)
shared.gradio['Start new chat'].click( shared.gradio['Start new chat'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.start_new_chat, gradio('interface_state'), gradio('history')).then( chat.handle_start_new_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False)
chat.redraw_html, gradio(reload_arr), gradio('display')).then(
lambda x: gr.update(choices=(histories := chat.find_all_histories_with_first_prompts(x)), value=histories[0][1]), gradio('interface_state'), gradio('unique_id'), show_progress=False)
shared.gradio['delete_chat'].click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, gradio(clear_arr)) shared.gradio['delete_chat'].click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, gradio(clear_arr))
shared.gradio['delete_chat-cancel'].click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, gradio(clear_arr)) shared.gradio['delete_chat-cancel'].click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, gradio(clear_arr))
shared.gradio['delete_chat-confirm'].click( shared.gradio['delete_chat-confirm'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda x, y: str(chat.find_all_histories(x).index(y)), gradio('interface_state', 'unique_id'), gradio('temporary_text')).then( chat.handle_delete_chat_confirm_click, gradio('interface_state'), gradio('history', 'display', 'unique_id') + gradio(clear_arr), show_progress=False)
chat.delete_history, gradio('unique_id', 'character_menu', 'mode'), None).then(
chat.load_history_after_deletion, gradio('interface_state', 'temporary_text'), gradio('history', 'unique_id'), show_progress=False).then(
chat.redraw_html, gradio(reload_arr), gradio('display')).then(
lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, gradio(clear_arr))
shared.gradio['rename_chat'].click(
lambda: "My New Chat", None, gradio('rename_to')).then(
lambda: [gr.update(visible=True)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False)
shared.gradio['rename_to-cancel'].click(
lambda: [gr.update(visible=False)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False)
shared.gradio['rename_chat'].click(chat.handle_rename_chat_click, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False)
shared.gradio['rename_to-cancel'].click(lambda: [gr.update(visible=False)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False)
shared.gradio['rename_to-confirm'].click( shared.gradio['rename_to-confirm'].click(
chat.rename_history, gradio('unique_id', 'rename_to', 'character_menu', 'mode'), None).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda: [gr.update(visible=False)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False).then( chat.handle_rename_chat_confirm, gradio('rename_to', 'interface_state'), gradio('unique_id', 'rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False)
lambda x, y: gr.update(choices=chat.find_all_histories_with_first_prompts(x), value=y), gradio('interface_state', 'rename_to'), gradio('unique_id'))
shared.gradio['rename_to'].submit( shared.gradio['rename_to'].submit(
chat.rename_history, gradio('unique_id', 'rename_to', 'character_menu', 'mode'), None).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda: [gr.update(visible=False)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False).then( chat.handle_rename_chat_confirm, gradio('rename_to', 'interface_state'), gradio('unique_id', 'rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False)
lambda x, y: gr.update(choices=chat.find_all_histories_with_first_prompts(x), value=y), gradio('interface_state', 'rename_to'), gradio('unique_id'))
shared.gradio['load_chat_history'].upload( shared.gradio['load_chat_history'].upload(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.start_new_chat, gradio('interface_state'), gradio('history')).then( chat.handle_upload_chat_history, gradio('load_chat_history', 'interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False).then(
chat.load_history_json, gradio('load_chat_history', 'history'), gradio('history')).then(
chat.redraw_html, gradio(reload_arr), gradio('display')).then(
lambda x: gr.update(choices=(histories := chat.find_all_histories_with_first_prompts(x)), value=histories[0][1]), gradio('interface_state'), gradio('unique_id'), show_progress=False).then(
chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_chat()}}') None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_chat()}}')
shared.gradio['character_menu'].change( shared.gradio['character_menu'].change(
chat.load_character, gradio('character_menu', 'name1', 'name2'), gradio('name1', 'name2', 'character_picture', 'greeting', 'context')).success(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.load_latest_history, gradio('interface_state'), gradio('history')).then( chat.handle_character_menu_change, gradio('interface_state'), gradio('history', 'display', 'name1', 'name2', 'character_picture', 'greeting', 'context', 'unique_id'), show_progress=False).then(
chat.redraw_html, gradio(reload_arr), gradio('display')).then(
lambda x: gr.update(choices=(histories := chat.find_all_histories_with_first_prompts(x)), value=histories[0][1]), gradio('interface_state'), gradio('unique_id'), show_progress=False).then(
None, None, None, js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}') None, None, None, js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}')
shared.gradio['mode'].change(None, gradio('mode'), None, js="(mode) => {mode === 'instruct' ? document.getElementById('character-menu').parentNode.parentNode.style.display = 'none' : document.getElementById('character-menu').parentNode.parentNode.style.display = ''}")
shared.gradio['mode'].change( shared.gradio['mode'].change(
lambda x: [gr.update(visible=x != 'instruct'), gr.update(visible=x == 'chat-instruct')], gradio('mode'), gradio('chat_style', 'chat-instruct_command'), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.load_latest_history, gradio('interface_state'), gradio('history')).then( chat.handle_mode_change, gradio('interface_state'), gradio('history', 'display', 'chat_style', 'chat-instruct_command', 'unique_id'), show_progress=False).then(
chat.redraw_html, gradio(reload_arr), gradio('display')).then( None, gradio('mode'), None, js="(mode) => {mode === 'instruct' ? document.getElementById('character-menu').parentNode.parentNode.style.display = 'none' : document.getElementById('character-menu').parentNode.parentNode.style.display = ''}")
lambda x: gr.update(choices=(histories := chat.find_all_histories_with_first_prompts(x)), value=histories[0][1]), gradio('interface_state'), gradio('unique_id'), show_progress=False)
shared.gradio['chat_style'].change(chat.redraw_html, gradio(reload_arr), gradio('display')) shared.gradio['chat_style'].change(chat.redraw_html, gradio(reload_arr), gradio('display'), show_progress=False)
shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, gradio('history'), gradio('textbox'), show_progress=False) shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, gradio('history'), gradio('textbox'), show_progress=False)
# Save/delete a character # Save/delete a character
shared.gradio['save_character'].click( shared.gradio['save_character'].click(chat.handle_save_character_click, gradio('name2'), gradio('save_character_filename', 'character_saver'), show_progress=False)
lambda x: x, gradio('name2'), gradio('save_character_filename')).then( shared.gradio['delete_character'].click(lambda: gr.update(visible=True), None, gradio('character_deleter'), show_progress=False)
lambda: gr.update(visible=True), None, gradio('character_saver')) shared.gradio['load_template'].click(chat.handle_load_template_click, gradio('instruction_template'), gradio('instruction_template_str', 'instruction_template'), show_progress=False)
shared.gradio['delete_character'].click(lambda: gr.update(visible=True), None, gradio('character_deleter'))
shared.gradio['load_template'].click(
chat.load_instruction_template, gradio('instruction_template'), gradio('instruction_template_str')).then(
lambda: "Select template to load...", None, gradio('instruction_template'))
shared.gradio['save_template'].click( shared.gradio['save_template'].click(
lambda: 'My Template.yaml', None, gradio('save_filename')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda: 'instruction-templates/', None, gradio('save_root')).then( chat.handle_save_template_click, gradio('instruction_template_str'), gradio('save_filename', 'save_root', 'save_contents', 'file_saver'), show_progress=False)
chat.generate_instruction_template_yaml, gradio('instruction_template_str'), gradio('save_contents')).then(
lambda: gr.update(visible=True), None, gradio('file_saver'))
shared.gradio['delete_template'].click(
lambda x: f'{x}.yaml', gradio('instruction_template'), gradio('delete_filename')).then(
lambda: 'instruction-templates/', None, gradio('delete_root')).then(
lambda: gr.update(visible=True), None, gradio('file_deleter'))
shared.gradio['delete_template'].click(chat.handle_delete_template_click, gradio('instruction_template'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False)
shared.gradio['save_chat_history'].click( shared.gradio['save_chat_history'].click(
lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then( lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then(
None, gradio('temporary_text', 'character_menu', 'mode'), None, js=f'(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}') None, gradio('temporary_text', 'character_menu', 'mode'), None, js=f'(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}')
shared.gradio['Submit character'].click( shared.gradio['Submit character'].click(
chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu')).then( chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu'), show_progress=False).then(
None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}') None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}')
shared.gradio['Submit tavern character'].click( shared.gradio['Submit tavern character'].click(
chat.upload_tavern_character, gradio('upload_img_tavern', 'tavern_json'), gradio('character_menu')).then( chat.upload_tavern_character, gradio('upload_img_tavern', 'tavern_json'), gradio('character_menu'), show_progress=False).then(
None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}') None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}')
shared.gradio['upload_json'].upload(lambda: gr.update(interactive=True), None, gradio('Submit character')) shared.gradio['upload_json'].upload(lambda: gr.update(interactive=True), None, gradio('Submit character'))
@ -351,35 +294,32 @@ def create_event_handlers():
shared.gradio['upload_img_tavern'].upload(chat.check_tavern_character, gradio('upload_img_tavern'), gradio('tavern_name', 'tavern_desc', 'tavern_json', 'Submit tavern character'), show_progress=False) shared.gradio['upload_img_tavern'].upload(chat.check_tavern_character, gradio('upload_img_tavern'), gradio('tavern_name', 'tavern_desc', 'tavern_json', 'Submit tavern character'), show_progress=False)
shared.gradio['upload_img_tavern'].clear(lambda: (None, None, None, gr.update(interactive=False)), None, gradio('tavern_name', 'tavern_desc', 'tavern_json', 'Submit tavern character'), show_progress=False) shared.gradio['upload_img_tavern'].clear(lambda: (None, None, None, gr.update(interactive=False)), None, gradio('tavern_name', 'tavern_desc', 'tavern_json', 'Submit tavern character'), show_progress=False)
shared.gradio['your_picture'].change( shared.gradio['your_picture'].change(
chat.upload_your_profile_picture, gradio('your_picture'), None).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
partial(chat.redraw_html, reset_cache=True), gradio(reload_arr), gradio('display')) chat.handle_your_picture_change, gradio('your_picture', 'interface_state'), gradio('display'), show_progress=False)
shared.gradio['send_instruction_to_default'].click( shared.gradio['send_instruction_to_default'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then( chat.handle_send_instruction_click, gradio('interface_state'), gradio('textbox-default'), show_progress=False).then(
partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('textbox-default')).then(
None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}') None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}')
shared.gradio['send_instruction_to_notebook'].click( shared.gradio['send_instruction_to_notebook'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then( chat.handle_send_instruction_click, gradio('interface_state'), gradio('textbox-notebook'), show_progress=False).then(
partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('textbox-notebook')).then(
None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}') None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}')
shared.gradio['send_instruction_to_negative_prompt'].click( shared.gradio['send_instruction_to_negative_prompt'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then( chat.handle_send_instruction_click, gradio('interface_state'), gradio('negative_prompt'), show_progress=False).then(
partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('negative_prompt')).then(
None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_generation_parameters()}}') None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_generation_parameters()}}')
shared.gradio['send-chat-to-default'].click( shared.gradio['send-chat-to-default'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
partial(chat.generate_chat_prompt, '', _continue=True), gradio('interface_state'), gradio('textbox-default')).then( chat.handle_send_chat_click, gradio('interface_state'), gradio('textbox-default'), show_progress=False).then(
None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}') None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}')
shared.gradio['send-chat-to-notebook'].click( shared.gradio['send-chat-to-notebook'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
partial(chat.generate_chat_prompt, '', _continue=True), gradio('interface_state'), gradio('textbox-notebook')).then( chat.handle_send_chat_click, gradio('interface_state'), gradio('textbox-notebook'), show_progress=False).then(
None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}') None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}')
shared.gradio['show_controls'].change(None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') shared.gradio['show_controls'].change(None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}')

View file

@ -64,38 +64,46 @@ def create_event_handlers():
shared.gradio['Generate-default'].click( shared.gradio['Generate-default'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['textbox-default'].submit( shared.gradio['textbox-default'].submit(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['markdown_render-default'].click(lambda x: x, gradio('output_textbox'), gradio('markdown-default'), queue=False)
shared.gradio['Continue-default'].click( shared.gradio['Continue-default'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
generate_reply_wrapper, [shared.gradio['output_textbox']] + gradio(inputs)[1:], gradio(outputs), show_progress=False).then( generate_reply_wrapper, [shared.gradio['output_textbox']] + gradio(inputs)[1:], gradio(outputs), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Stop-default'].click(stop_everything_event, None, None, queue=False) shared.gradio['Stop-default'].click(stop_everything_event, None, None, queue=False)
shared.gradio['markdown_render-default'].click(lambda x: x, gradio('output_textbox'), gradio('markdown-default'), queue=False)
shared.gradio['prompt_menu-default'].change(load_prompt, gradio('prompt_menu-default'), gradio('textbox-default'), show_progress=False) shared.gradio['prompt_menu-default'].change(load_prompt, gradio('prompt_menu-default'), gradio('textbox-default'), show_progress=False)
shared.gradio['save_prompt-default'].click( shared.gradio['save_prompt-default'].click(handle_save_prompt, gradio('textbox-default'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
lambda x: x, gradio('textbox-default'), gradio('save_contents')).then( shared.gradio['delete_prompt-default'].click(handle_delete_prompt, gradio('prompt_menu-default'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False)
lambda: 'prompts/', None, gradio('save_root')).then(
lambda: utils.current_time() + '.txt', None, gradio('save_filename')).then(
lambda: gr.update(visible=True), None, gradio('file_saver'))
shared.gradio['delete_prompt-default'].click(
lambda: 'prompts/', None, gradio('delete_root')).then(
lambda x: x + '.txt', gradio('prompt_menu-default'), gradio('delete_filename')).then(
lambda: gr.update(visible=True), None, gradio('file_deleter'))
shared.gradio['textbox-default'].change(lambda x: f"<span>{count_tokens(x)}</span>", gradio('textbox-default'), gradio('token-counter-default'), show_progress=False) shared.gradio['textbox-default'].change(lambda x: f"<span>{count_tokens(x)}</span>", gradio('textbox-default'), gradio('token-counter-default'), show_progress=False)
shared.gradio['get_logits-default'].click( shared.gradio['get_logits-default'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
logits.get_next_logits, gradio('textbox-default', 'interface_state', 'use_samplers-default', 'logits-default'), gradio('logits-default', 'logits-default-previous'), show_progress=False) logits.get_next_logits, gradio('textbox-default', 'interface_state', 'use_samplers-default', 'logits-default'), gradio('logits-default', 'logits-default-previous'), show_progress=False)
shared.gradio['get_tokens-default'].click(get_token_ids, gradio('textbox-default'), gradio('tokens-default'), show_progress=False) shared.gradio['get_tokens-default'].click(get_token_ids, gradio('textbox-default'), gradio('tokens-default'), show_progress=False)
def handle_save_prompt(text):
return [
text,
utils.current_time() + ".txt",
"prompts/",
gr.update(visible=True)
]
def handle_delete_prompt(prompt):
return [
prompt + ".txt",
"prompts/",
gr.update(visible=True)
]

View file

@ -1,3 +1,5 @@
import traceback
import gradio as gr import gradio as gr
from modules import chat, presets, shared, ui, utils from modules import chat, presets, shared, ui, utils
@ -47,57 +49,119 @@ def create_ui():
def create_event_handlers(): def create_event_handlers():
shared.gradio['save_confirm'].click(
lambda x, y, z: utils.save_file(x + y, z), gradio('save_root', 'save_filename', 'save_contents'), None).then(
lambda: gr.update(visible=False), None, gradio('file_saver'))
shared.gradio['delete_confirm'].click(
lambda x, y: utils.delete_file(x + y), gradio('delete_root', 'delete_filename'), None).then(
lambda: gr.update(visible=False), None, gradio('file_deleter'))
shared.gradio['delete_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_deleter'))
shared.gradio['save_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_saver'))
shared.gradio['save_character_confirm'].click(
chat.save_character, gradio('name2', 'greeting', 'context', 'character_picture', 'save_character_filename'), None).then(
lambda: gr.update(visible=False), None, gradio('character_saver')).then(
lambda x: gr.update(choices=utils.get_available_characters(), value=x), gradio('save_character_filename'), gradio('character_menu'))
shared.gradio['delete_character_confirm'].click(
lambda x: str(utils.get_available_characters().index(x)), gradio('character_menu'), gradio('temporary_text')).then(
chat.delete_character, gradio('character_menu'), None).then(
chat.update_character_menu_after_deletion, gradio('temporary_text'), gradio('character_menu')).then(
lambda: gr.update(visible=False), None, gradio('character_deleter'))
shared.gradio['save_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_saver'))
shared.gradio['delete_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_deleter'))
shared.gradio['save_preset'].click( shared.gradio['save_preset'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
presets.generate_preset_yaml, gradio('interface_state'), gradio('save_preset_contents')).then( handle_save_preset_click, gradio('interface_state'), gradio('save_preset_contents', 'save_preset_filename', 'preset_saver'), show_progress=False)
lambda: 'My Preset', None, gradio('save_preset_filename')).then(
lambda: gr.update(visible=True), None, gradio('preset_saver'))
shared.gradio['save_preset_confirm'].click( shared.gradio['delete_preset'].click(handle_delete_preset_click, gradio('preset_menu'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False)
lambda x, y: utils.save_file(f'presets/{x}.yaml', y), gradio('save_preset_filename', 'save_preset_contents'), None).then( shared.gradio['save_grammar'].click(handle_save_grammar_click, gradio('grammar_string'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
lambda: gr.update(visible=False), None, gradio('preset_saver')).then( shared.gradio['delete_grammar'].click(handle_delete_grammar_click, gradio('grammar_file'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False)
lambda x: gr.update(choices=utils.get_available_presets(), value=x), gradio('save_preset_filename'), gradio('preset_menu'))
shared.gradio['save_preset_cancel'].click(lambda: gr.update(visible=False), None, gradio('preset_saver')) shared.gradio['save_preset_confirm'].click(handle_save_preset_confirm_click, gradio('save_preset_filename', 'save_preset_contents'), gradio('preset_menu', 'preset_saver'), show_progress=False)
shared.gradio['save_confirm'].click(handle_save_confirm_click, gradio('save_root', 'save_filename', 'save_contents'), gradio('file_saver'), show_progress=False)
shared.gradio['delete_confirm'].click(handle_delete_confirm_click, gradio('delete_root', 'delete_filename'), gradio('file_deleter'), show_progress=False)
shared.gradio['save_character_confirm'].click(handle_save_character_confirm_click, gradio('name2', 'greeting', 'context', 'character_picture', 'save_character_filename'), gradio('character_menu', 'character_saver'), show_progress=False)
shared.gradio['delete_character_confirm'].click(handle_delete_character_confirm_click, gradio('character_menu'), gradio('character_menu', 'character_deleter'), show_progress=False)
shared.gradio['delete_preset'].click( shared.gradio['save_preset_cancel'].click(lambda: gr.update(visible=False), None, gradio('preset_saver'), show_progress=False)
lambda x: f'{x}.yaml', gradio('preset_menu'), gradio('delete_filename')).then( shared.gradio['save_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_saver'))
lambda: 'presets/', None, gradio('delete_root')).then( shared.gradio['delete_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_deleter'))
lambda: gr.update(visible=True), None, gradio('file_deleter')) shared.gradio['save_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_saver'), show_progress=False)
shared.gradio['delete_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_deleter'), show_progress=False)
shared.gradio['save_grammar'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda x: x, gradio('grammar_string'), gradio('save_contents')).then(
lambda: 'grammars/', None, gradio('save_root')).then(
lambda: 'My Fancy Grammar.gbnf', None, gradio('save_filename')).then(
lambda: gr.update(visible=True), None, gradio('file_saver'))
shared.gradio['delete_grammar'].click( def handle_save_preset_confirm_click(filename, contents):
lambda x: x, gradio('grammar_file'), gradio('delete_filename')).then( try:
lambda: 'grammars/', None, gradio('delete_root')).then( utils.save_file(f"presets/{filename}.yaml", contents)
lambda: gr.update(visible=True), None, gradio('file_deleter')) available_presets = utils.get_available_presets()
output = gr.update(choices=available_presets, value=filename),
except Exception:
output = gr.update()
traceback.print_exc()
return [
output,
gr.update(visible=False)
]
def handle_save_confirm_click(root, filename, contents):
try:
utils.save_file(root + filename, contents)
except Exception:
traceback.print_exc()
return gr.update(visible=False)
def handle_delete_confirm_click(root, filename):
try:
utils.delete_file(root + filename)
except Exception:
traceback.print_exc()
return gr.update(visible=False)
def handle_save_character_confirm_click(name2, greeting, context, character_picture, filename):
try:
chat.save_character(name2, greeting, context, character_picture, filename)
available_characters = utils.get_available_characters()
output = gr.update(choices=available_characters, value=filename)
except Exception:
output = gr.update()
traceback.print_exc()
return [
output,
gr.update(visible=False)
]
def handle_delete_character_confirm_click(character):
try:
index = str(utils.get_available_characters().index(character))
chat.delete_character(character)
output = chat.update_character_menu_after_deletion(index)
except Exception:
output = gr.update()
traceback.print_exc()
return [
output,
gr.update(visible=False)
]
def handle_save_preset_click(state):
contents = presets.generate_preset_yaml(state)
return [
contents,
"My Preset",
gr.update(visible=True)
]
def handle_delete_preset_click(preset):
return [
f"{preset}.yaml",
"presets/",
gr.update(visible=True)
]
def handle_save_grammar_click(grammar_string):
return [
grammar_string,
"My Fancy Grammar.gbnf",
"grammars/",
gr.update(visible=True)
]
def handle_delete_grammar_click(grammar_file):
return [
grammar_file,
"grammars/",
gr.update(visible=True)
]

View file

@ -66,7 +66,6 @@ def create_ui():
ui.create_refresh_button(shared.gradio['model_menu'], lambda: None, lambda: {'choices': utils.get_available_models()}, 'refresh-button', interactive=not mu) ui.create_refresh_button(shared.gradio['model_menu'], lambda: None, lambda: {'choices': utils.get_available_models()}, 'refresh-button', interactive=not mu)
shared.gradio['load_model'] = gr.Button("Load", visible=not shared.settings['autoload_model'], elem_classes='refresh-button', interactive=not mu) shared.gradio['load_model'] = gr.Button("Load", visible=not shared.settings['autoload_model'], elem_classes='refresh-button', interactive=not mu)
shared.gradio['unload_model'] = gr.Button("Unload", elem_classes='refresh-button', interactive=not mu) shared.gradio['unload_model'] = gr.Button("Unload", elem_classes='refresh-button', interactive=not mu)
shared.gradio['reload_model'] = gr.Button("Reload", elem_classes='refresh-button', interactive=not mu)
shared.gradio['save_model_settings'] = gr.Button("Save settings", elem_classes='refresh-button', interactive=not mu) shared.gradio['save_model_settings'] = gr.Button("Save settings", elem_classes='refresh-button', interactive=not mu)
with gr.Column(): with gr.Column():
@ -94,19 +93,19 @@ def create_ui():
shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend) shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend)
shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=256, value=shared.args.n_gpu_layers, info='Must be set to more than 0 for your GPU to be used.') shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=256, value=shared.args.n_gpu_layers, info='Must be set to more than 0 for your GPU to be used.')
shared.gradio['n_ctx'] = gr.Slider(minimum=0, maximum=shared.settings['truncation_length_max'], step=256, label="n_ctx", value=shared.args.n_ctx, info='Context length. Try lowering this if you run out of memory while loading the model.') shared.gradio['n_ctx'] = gr.Number(label="n_ctx", precision=0, step=256, value=shared.args.n_ctx, info='Context length. Try lowering this if you run out of memory while loading the model.')
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 18,17') shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, step=1, value=shared.args.n_batch) shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, step=1, value=shared.args.n_batch)
shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=256, value=shared.args.threads) shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=256, value=shared.args.threads)
shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch) shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
shared.gradio['wbits'] = gr.Dropdown(label="wbits", choices=["None", 1, 2, 3, 4, 8], value=shared.args.wbits if shared.args.wbits > 0 else "None") shared.gradio['wbits'] = gr.Dropdown(label="wbits", choices=["None", 1, 2, 3, 4, 8], value=shared.args.wbits if shared.args.wbits > 0 else "None")
shared.gradio['groupsize'] = gr.Dropdown(label="groupsize", choices=["None", 32, 64, 128, 1024], value=shared.args.groupsize if shared.args.groupsize > 0 else "None") shared.gradio['groupsize'] = gr.Dropdown(label="groupsize", choices=["None", 32, 64, 128, 1024], value=shared.args.groupsize if shared.args.groupsize > 0 else "None")
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7') shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
shared.gradio['max_seq_len'] = gr.Slider(label='max_seq_len', minimum=0, maximum=shared.settings['truncation_length_max'], step=256, info='Context length. Try lowering this if you run out of memory while loading the model.', value=shared.args.max_seq_len) shared.gradio['max_seq_len'] = gr.Number(label='max_seq_len', precision=0, step=256, value=shared.args.max_seq_len, info='Context length. Try lowering this if you run out of memory while loading the model.')
with gr.Blocks(): with gr.Blocks():
shared.gradio['alpha_value'] = gr.Slider(label='alpha_value', minimum=1, maximum=8, step=0.05, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.', value=shared.args.alpha_value) shared.gradio['alpha_value'] = gr.Number(label='alpha_value', value=shared.args.alpha_value, precision=2, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.')
shared.gradio['rope_freq_base'] = gr.Slider(label='rope_freq_base', minimum=0, maximum=20000000, step=1000, info='If greater than 0, will be used instead of alpha_value. Those two are related by rope_freq_base = 10000 * alpha_value ^ (64 / 63)', value=shared.args.rope_freq_base) shared.gradio['rope_freq_base'] = gr.Number(label='rope_freq_base', value=shared.args.rope_freq_base, precision=0, info='Positional embeddings frequency base for NTK RoPE scaling. Related to alpha_value by rope_freq_base = 10000 * alpha_value ^ (64 / 63). 0 = from model.')
shared.gradio['compress_pos_emb'] = gr.Slider(label='compress_pos_emb', minimum=1, maximum=8, step=0.1, info='Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.', value=shared.args.compress_pos_emb) shared.gradio['compress_pos_emb'] = gr.Number(label='compress_pos_emb', value=shared.args.compress_pos_emb, precision=2, info='Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.')
shared.gradio['autogptq_info'] = gr.Markdown('ExLlamav2_HF is recommended over AutoGPTQ for models derived from Llama.') shared.gradio['autogptq_info'] = gr.Markdown('ExLlamav2_HF is recommended over AutoGPTQ for models derived from Llama.')
@ -118,7 +117,7 @@ def create_ui():
shared.gradio['use_eager_attention'] = gr.Checkbox(label="use_eager_attention", value=shared.args.use_eager_attention, info='Set attn_implementation= eager while loading the model.') shared.gradio['use_eager_attention'] = gr.Checkbox(label="use_eager_attention", value=shared.args.use_eager_attention, info='Set attn_implementation= eager while loading the model.')
shared.gradio['flash_attn'] = gr.Checkbox(label="flash_attn", value=shared.args.flash_attn, info='Use flash-attention.') shared.gradio['flash_attn'] = gr.Checkbox(label="flash_attn", value=shared.args.flash_attn, info='Use flash-attention.')
shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices) shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices)
shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards.') shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.')
shared.gradio['cache_8bit'] = gr.Checkbox(label="cache_8bit", value=shared.args.cache_8bit, info='Use 8-bit cache to save VRAM.') shared.gradio['cache_8bit'] = gr.Checkbox(label="cache_8bit", value=shared.args.cache_8bit, info='Use 8-bit cache to save VRAM.')
shared.gradio['cache_4bit'] = gr.Checkbox(label="cache_4bit", value=shared.args.cache_4bit, info='Use Q4 cache to save VRAM.') shared.gradio['cache_4bit'] = gr.Checkbox(label="cache_4bit", value=shared.args.cache_4bit, info='Use Q4 cache to save VRAM.')
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='(experimental) Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.') shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='(experimental) Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
@ -128,7 +127,6 @@ def create_ui():
shared.gradio['no_offload_kqv'] = gr.Checkbox(label="no_offload_kqv", value=shared.args.no_offload_kqv, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.') shared.gradio['no_offload_kqv'] = gr.Checkbox(label="no_offload_kqv", value=shared.args.no_offload_kqv, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
shared.gradio['no_mul_mat_q'] = gr.Checkbox(label="no_mul_mat_q", value=shared.args.no_mul_mat_q, info='Disable the mulmat kernels.') shared.gradio['no_mul_mat_q'] = gr.Checkbox(label="no_mul_mat_q", value=shared.args.no_mul_mat_q, info='Disable the mulmat kernels.')
shared.gradio['triton'] = gr.Checkbox(label="triton", value=shared.args.triton) shared.gradio['triton'] = gr.Checkbox(label="triton", value=shared.args.triton)
shared.gradio['no_inject_fused_attention'] = gr.Checkbox(label="no_inject_fused_attention", value=shared.args.no_inject_fused_attention, info='Disable fused attention. Fused attention improves inference performance but uses more VRAM. Fuses layers for AutoAWQ. Disable if running low on VRAM.')
shared.gradio['no_inject_fused_mlp'] = gr.Checkbox(label="no_inject_fused_mlp", value=shared.args.no_inject_fused_mlp, info='Affects Triton only. Disable fused MLP. Fused MLP improves performance but uses more VRAM. Disable if running low on VRAM.') shared.gradio['no_inject_fused_mlp'] = gr.Checkbox(label="no_inject_fused_mlp", value=shared.args.no_inject_fused_mlp, info='Affects Triton only. Disable fused MLP. Fused MLP improves performance but uses more VRAM. Disable if running low on VRAM.')
shared.gradio['no_use_cuda_fp16'] = gr.Checkbox(label="no_use_cuda_fp16", value=shared.args.no_use_cuda_fp16, info='This can make models faster on some systems.') shared.gradio['no_use_cuda_fp16'] = gr.Checkbox(label="no_use_cuda_fp16", value=shared.args.no_use_cuda_fp16, info='This can make models faster on some systems.')
shared.gradio['desc_act'] = gr.Checkbox(label="desc_act", value=shared.args.desc_act, info='\'desc_act\', \'wbits\', and \'groupsize\' are used for old models without a quantize_config.json.') shared.gradio['desc_act'] = gr.Checkbox(label="desc_act", value=shared.args.desc_act, info='\'desc_act\', \'wbits\', and \'groupsize\' are used for old models without a quantize_config.json.')
@ -188,39 +186,24 @@ def create_ui():
def create_event_handlers(): def create_event_handlers():
shared.gradio['loader'].change(loaders.make_loader_params_visible, gradio('loader'), gradio(loaders.get_all_params())) shared.gradio['loader'].change(loaders.make_loader_params_visible, gradio('loader'), gradio(loaders.get_all_params()), show_progress=False)
# In this event handler, the interface state is read and updated # In this event handler, the interface state is read and updated
# with the model defaults (if any), and then the model is loaded # with the model defaults (if any), and then the model is loaded
# unless "autoload_model" is unchecked # unless "autoload_model" is unchecked
shared.gradio['model_menu'].change( shared.gradio['model_menu'].change(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
apply_model_settings_to_state, gradio('model_menu', 'interface_state'), gradio('interface_state')).then( handle_load_model_event_initial, gradio('model_menu', 'interface_state'), gradio(ui.list_interface_input_elements()) + gradio('interface_state'), show_progress=False).then(
ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False).then(
update_model_parameters, gradio('interface_state'), None).then(
load_model_wrapper, gradio('model_menu', 'loader', 'autoload_model'), gradio('model_status'), show_progress=False).success( load_model_wrapper, gradio('model_menu', 'loader', 'autoload_model'), gradio('model_status'), show_progress=False).success(
update_truncation_length, gradio('truncation_length', 'interface_state'), gradio('truncation_length')).then( handle_load_model_event_final, gradio('truncation_length', 'loader', 'interface_state'), gradio('truncation_length', 'filter_by_loader'), show_progress=False)
lambda x: x, gradio('loader'), gradio('filter_by_loader'))
shared.gradio['load_model'].click( shared.gradio['load_model'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
update_model_parameters, gradio('interface_state'), None).then( update_model_parameters, gradio('interface_state'), None).then(
partial(load_model_wrapper, autoload=True), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=False).success( partial(load_model_wrapper, autoload=True), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=False).success(
update_truncation_length, gradio('truncation_length', 'interface_state'), gradio('truncation_length')).then( handle_load_model_event_final, gradio('truncation_length', 'loader', 'interface_state'), gradio('truncation_length', 'filter_by_loader'), show_progress=False)
lambda x: x, gradio('loader'), gradio('filter_by_loader'))
shared.gradio['reload_model'].click(
unload_model, None, None).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
update_model_parameters, gradio('interface_state'), None).then(
partial(load_model_wrapper, autoload=True), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=False).success(
update_truncation_length, gradio('truncation_length', 'interface_state'), gradio('truncation_length')).then(
lambda x: x, gradio('loader'), gradio('filter_by_loader'))
shared.gradio['unload_model'].click(
unload_model, None, None).then(
lambda: "Model unloaded", None, gradio('model_status'))
shared.gradio['unload_model'].click(handle_unload_model_click, None, gradio('model_status'), show_progress=False)
shared.gradio['save_model_settings'].click( shared.gradio['save_model_settings'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
save_model_settings, gradio('model_menu', 'interface_state'), gradio('model_status'), show_progress=False) save_model_settings, gradio('model_menu', 'interface_state'), gradio('model_status'), show_progress=False)
@ -353,3 +336,20 @@ def update_truncation_length(current_length, state):
return state['n_ctx'] return state['n_ctx']
return current_length return current_length
def handle_load_model_event_initial(model, state):
state = apply_model_settings_to_state(model, state)
output = ui.apply_interface_values(state)
update_model_parameters(state)
return output + [state]
def handle_load_model_event_final(truncation_length, loader, state):
truncation_length = update_truncation_length(truncation_length, state)
return [truncation_length, loader]
def handle_unload_model_click():
unload_model()
return "Model unloaded"

View file

@ -7,6 +7,7 @@ from modules.text_generation import (
get_token_ids, get_token_ids,
stop_everything_event stop_everything_event
) )
from modules.ui_default import handle_delete_prompt, handle_save_prompt
from modules.utils import gradio from modules.utils import gradio
inputs = ('textbox-notebook', 'interface_state') inputs = ('textbox-notebook', 'interface_state')
@ -66,38 +67,32 @@ def create_event_handlers():
lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then( lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['textbox-notebook'].submit( shared.gradio['textbox-notebook'].submit(
lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then( lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Undo'].click(lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False)
shared.gradio['markdown_render-notebook'].click(lambda x: x, gradio('textbox-notebook'), gradio('markdown-notebook'), queue=False)
shared.gradio['Regenerate-notebook'].click( shared.gradio['Regenerate-notebook'].click(
lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False).then( lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Undo'].click(
lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False).then(
lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None)
shared.gradio['markdown_render-notebook'].click(lambda x: x, gradio('textbox-notebook'), gradio('markdown-notebook'), queue=False)
shared.gradio['Stop-notebook'].click(stop_everything_event, None, None, queue=False) shared.gradio['Stop-notebook'].click(stop_everything_event, None, None, queue=False)
shared.gradio['prompt_menu-notebook'].change(load_prompt, gradio('prompt_menu-notebook'), gradio('textbox-notebook'), show_progress=False) shared.gradio['prompt_menu-notebook'].change(load_prompt, gradio('prompt_menu-notebook'), gradio('textbox-notebook'), show_progress=False)
shared.gradio['save_prompt-notebook'].click( shared.gradio['save_prompt-notebook'].click(handle_save_prompt, gradio('textbox-notebook'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
lambda x: x, gradio('textbox-notebook'), gradio('save_contents')).then( shared.gradio['delete_prompt-notebook'].click(handle_delete_prompt, gradio('prompt_menu-notebook'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False)
lambda: 'prompts/', None, gradio('save_root')).then(
lambda: utils.current_time() + '.txt', None, gradio('save_filename')).then(
lambda: gr.update(visible=True), None, gradio('file_saver'))
shared.gradio['delete_prompt-notebook'].click(
lambda: 'prompts/', None, gradio('delete_root')).then(
lambda x: x + '.txt', gradio('prompt_menu-notebook'), gradio('delete_filename')).then(
lambda: gr.update(visible=True), None, gradio('file_deleter'))
shared.gradio['textbox-notebook'].input(lambda x: f"<span>{count_tokens(x)}</span>", gradio('textbox-notebook'), gradio('token-counter-notebook'), show_progress=False) shared.gradio['textbox-notebook'].input(lambda x: f"<span>{count_tokens(x)}</span>", gradio('textbox-notebook'), gradio('token-counter-notebook'), show_progress=False)
shared.gradio['get_logits-notebook'].click( shared.gradio['get_logits-notebook'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(

View file

@ -40,9 +40,9 @@ def create_ui(default_preset):
shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample') shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample')
with gr.Blocks(): with gr.Blocks():
shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=generate_params['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to value > 0 to enable DRY. Controls the magnitude of the penalty for the shortest penalized sequences.') shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=generate_params['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to greater than 0 to enable DRY. Recommended value: 0.8.')
shared.gradio['dry_base'] = gr.Slider(1, 4, value=generate_params['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.')
shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=generate_params['dry_allowed_length'], step=1, label='dry_allowed_length', info='Longest sequence that can be repeated without being penalized.') shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=generate_params['dry_allowed_length'], step=1, label='dry_allowed_length', info='Longest sequence that can be repeated without being penalized.')
shared.gradio['dry_base'] = gr.Slider(1, 4, value=generate_params['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.')
shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=generate_params['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.') shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=generate_params['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')
gr.Markdown("[Learn more](https://github.com/oobabooga/text-generation-webui/wiki/03-%E2%80%90-Parameters-Tab)") gr.Markdown("[Learn more](https://github.com/oobabooga/text-generation-webui/wiki/03-%E2%80%90-Parameters-Tab)")
@ -89,7 +89,7 @@ def create_ui(default_preset):
shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.') shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.')
with gr.Column(): with gr.Column():
shared.gradio['truncation_length'] = gr.Slider(value=get_truncation_length(), minimum=shared.settings['truncation_length_min'], maximum=shared.settings['truncation_length_max'], step=256, label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.') shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.')
shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.') shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.') shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.') shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.')
@ -102,10 +102,16 @@ def create_ui(default_preset):
def create_event_handlers(): def create_event_handlers():
shared.gradio['filter_by_loader'].change(loaders.blacklist_samplers, gradio('filter_by_loader', 'dynamic_temperature'), gradio(loaders.list_all_samplers()), show_progress=False) shared.gradio['filter_by_loader'].change(loaders.blacklist_samplers, gradio('filter_by_loader', 'dynamic_temperature'), gradio(loaders.list_all_samplers()), show_progress=False)
shared.gradio['preset_menu'].change(presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params())) shared.gradio['preset_menu'].change(
shared.gradio['random_preset'].click(presets.random_preset, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params())) ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
shared.gradio['grammar_file'].change(load_grammar, gradio('grammar_file'), gradio('grammar_string')) presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
shared.gradio['dynamic_temperature'].change(lambda x: [gr.update(visible=x)] * 3, gradio('dynamic_temperature'), gradio('dynatemp_low', 'dynatemp_high', 'dynatemp_exponent'))
shared.gradio['random_preset'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
presets.random_preset, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
shared.gradio['grammar_file'].change(load_grammar, gradio('grammar_file'), gradio('grammar_string'), show_progress=False)
shared.gradio['dynamic_temperature'].change(lambda x: [gr.update(visible=x)] * 3, gradio('dynamic_temperature'), gradio('dynatemp_low', 'dynatemp_high', 'dynatemp_exponent'), show_progress=False)
def get_truncation_length(): def get_truncation_length():

View file

@ -35,15 +35,22 @@ def create_ui():
None, None, None, js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;padding-top:20%;margin:0;height:100vh;color:lightgray;text-align:center;background:var(--body-background-fill)">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500); return []}') None, None, None, js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;padding-top:20%;margin:0;height:100vh;color:lightgray;text-align:center;background:var(--body-background-fill)">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500); return []}')
shared.gradio['toggle_dark_mode'].click( shared.gradio['toggle_dark_mode'].click(
None, None, None, js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}').then( lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')).then(
lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')) None, None, None, js=f'() => {{{ui.dark_theme_js}; toggleDarkMode()}}')
shared.gradio['save_settings'].click( shared.gradio['save_settings'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
ui.save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents')).then( handle_save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
lambda: './', None, gradio('save_root')).then(
lambda: 'settings.yaml', None, gradio('save_filename')).then(
lambda: gr.update(visible=True), None, gradio('file_saver')) def handle_save_settings(state, preset, extensions, show_controls, theme):
contents = ui.save_settings(state, preset, extensions, show_controls, theme)
return [
contents,
"settings.yaml",
"./",
gr.update(visible=True)
]
def set_interface_arguments(extensions, bool_active): def set_interface_arguments(extensions, bool_active):

View file

@ -95,11 +95,10 @@ def get_available_presets():
def get_available_prompts(): def get_available_prompts():
prompts = [] prompt_files = list(Path('prompts').glob('*.txt'))
files = set((k.stem for k in Path('prompts').glob('*.txt'))) sorted_files = sorted(prompt_files, key=lambda x: x.stat().st_mtime, reverse=True)
prompts += sorted([k for k in files if re.match('^[0-9]', k)], key=natural_keys, reverse=True) prompts = [file.stem for file in sorted_files]
prompts += sorted([k for k in files if re.match('^[^0-9]', k)], key=natural_keys) prompts.append('None')
prompts += ['None']
return prompts return prompts

View file

@ -388,7 +388,12 @@ def update_requirements(initial_installation=False, pull=True):
# Prepare the requirements file # Prepare the requirements file
textgen_requirements = open(requirements_file).read().splitlines() textgen_requirements = open(requirements_file).read().splitlines()
if is_cuda118: if is_cuda118:
textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements if "auto-gptq" not in req] textgen_requirements = [
req.replace('+cu121', '+cu118').replace('+cu122', '+cu118')
for req in textgen_requirements
if "auto-gptq" not in req.lower() and "autoawq" not in req.lower()
]
if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11 if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11
textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req] textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req]

View file

@ -1,4 +1,4 @@
accelerate==0.32.* accelerate==0.33.*
aqlm[gpu,cpu]==1.1.6; platform_system == "Linux" aqlm[gpu,cpu]==1.1.6; platform_system == "Linux"
auto-gptq==0.7.1 auto-gptq==0.7.1
bitsandbytes==0.43.* bitsandbytes==0.43.*
@ -14,7 +14,7 @@ numba==0.59.*
numpy==1.26.* numpy==1.26.*
optimum==1.17.* optimum==1.17.*
pandas pandas
peft==0.8.* peft==0.12.*
Pillow>=9.5.0 Pillow>=9.5.0
psutil psutil
pyyaml pyyaml
@ -24,7 +24,7 @@ safetensors==0.4.*
scipy scipy
sentencepiece sentencepiece
tensorboard tensorboard
transformers==4.42.* transformers==4.44.*
tqdm tqdm
wandb wandb
@ -37,31 +37,38 @@ soundfile
openai-whisper openai-whisper
# llama-cpp-python (CPU only, AVX2) # llama-cpp-python (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# llama-cpp-python (CUDA, no tensor cores) # llama-cpp-python (CUDA, no tensor cores)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# llama-cpp-python (CUDA, tensor cores) # llama-cpp-python (CUDA, tensor cores)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels # CUDA wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
autoawq==0.2.5; platform_system == "Linux" or platform_system == "Windows" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"

View file

@ -1,4 +1,4 @@
accelerate==0.32.* accelerate==0.33.*
colorama colorama
datasets datasets
einops einops
@ -11,7 +11,7 @@ numba==0.59.*
numpy==1.26.* numpy==1.26.*
optimum==1.17.* optimum==1.17.*
pandas pandas
peft==0.8.* peft==0.12.*
Pillow>=9.5.0 Pillow>=9.5.0
psutil psutil
pyyaml pyyaml
@ -21,7 +21,7 @@ safetensors==0.4.*
scipy scipy
sentencepiece sentencepiece
tensorboard tensorboard
transformers==4.42.* transformers==4.44.*
tqdm tqdm
wandb wandb
@ -32,16 +32,18 @@ sse-starlette==1.6.5
tiktoken tiktoken
# llama-cpp-python (CPU only, AVX2) # llama-cpp-python (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# AMD wheels # AMD wheels
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.82+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.89+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.82+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.89+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"

View file

@ -1,4 +1,4 @@
accelerate==0.32.* accelerate==0.33.*
colorama colorama
datasets datasets
einops einops
@ -11,7 +11,7 @@ numba==0.59.*
numpy==1.26.* numpy==1.26.*
optimum==1.17.* optimum==1.17.*
pandas pandas
peft==0.8.* peft==0.12.*
Pillow>=9.5.0 Pillow>=9.5.0
psutil psutil
pyyaml pyyaml
@ -21,7 +21,7 @@ safetensors==0.4.*
scipy scipy
sentencepiece sentencepiece
tensorboard tensorboard
transformers==4.42.* transformers==4.44.*
tqdm tqdm
wandb wandb
@ -32,14 +32,16 @@ sse-starlette==1.6.5
tiktoken tiktoken
# llama-cpp-python (CPU only, no AVX2) # llama-cpp-python (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# AMD wheels # AMD wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"

View file

@ -1,4 +1,4 @@
accelerate==0.32.* accelerate==0.33.*
colorama colorama
datasets datasets
einops einops
@ -11,7 +11,7 @@ numba==0.59.*
numpy==1.26.* numpy==1.26.*
optimum==1.17.* optimum==1.17.*
pandas pandas
peft==0.8.* peft==0.12.*
Pillow>=9.5.0 Pillow>=9.5.0
psutil psutil
pyyaml pyyaml
@ -21,7 +21,7 @@ safetensors==0.4.*
scipy scipy
sentencepiece sentencepiece
tensorboard tensorboard
transformers==4.42.* transformers==4.44.*
tqdm tqdm
wandb wandb
@ -32,8 +32,8 @@ sse-starlette==1.6.5
tiktoken tiktoken
# Mac wheels # Mac wheels
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9-py3-none-any.whl

View file

@ -1,4 +1,4 @@
accelerate==0.32.* accelerate==0.33.*
colorama colorama
datasets datasets
einops einops
@ -11,7 +11,7 @@ numba==0.59.*
numpy==1.26.* numpy==1.26.*
optimum==1.17.* optimum==1.17.*
pandas pandas
peft==0.8.* peft==0.12.*
Pillow>=9.5.0 Pillow>=9.5.0
psutil psutil
pyyaml pyyaml
@ -21,7 +21,7 @@ safetensors==0.4.*
scipy scipy
sentencepiece sentencepiece
tensorboard tensorboard
transformers==4.42.* transformers==4.44.*
tqdm tqdm
wandb wandb
@ -32,10 +32,10 @@ sse-starlette==1.6.5
tiktoken tiktoken
# Mac wheels # Mac wheels
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9-py3-none-any.whl

View file

@ -1,4 +1,4 @@
accelerate==0.32.* accelerate==0.33.*
colorama colorama
datasets datasets
einops einops
@ -11,7 +11,7 @@ numba==0.59.*
numpy==1.26.* numpy==1.26.*
optimum==1.17.* optimum==1.17.*
pandas pandas
peft==0.8.* peft==0.12.*
Pillow>=9.5.0 Pillow>=9.5.0
psutil psutil
pyyaml pyyaml
@ -21,7 +21,7 @@ safetensors==0.4.*
scipy scipy
sentencepiece sentencepiece
tensorboard tensorboard
transformers==4.42.* transformers==4.44.*
tqdm tqdm
wandb wandb
@ -32,7 +32,7 @@ sse-starlette==1.6.5
tiktoken tiktoken
# llama-cpp-python (CPU only, AVX2) # llama-cpp-python (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"

View file

@ -1,4 +1,4 @@
accelerate==0.32.* accelerate==0.33.*
colorama colorama
datasets datasets
einops einops
@ -11,7 +11,7 @@ numba==0.59.*
numpy==1.26.* numpy==1.26.*
optimum==1.17.* optimum==1.17.*
pandas pandas
peft==0.8.* peft==0.12.*
Pillow>=9.5.0 Pillow>=9.5.0
psutil psutil
pyyaml pyyaml
@ -21,7 +21,7 @@ safetensors==0.4.*
scipy scipy
sentencepiece sentencepiece
tensorboard tensorboard
transformers==4.42.* transformers==4.44.*
tqdm tqdm
wandb wandb
@ -32,7 +32,7 @@ sse-starlette==1.6.5
tiktoken tiktoken
# llama-cpp-python (CPU only, no AVX2) # llama-cpp-python (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"

View file

@ -1,4 +1,4 @@
accelerate==0.32.* accelerate==0.33.*
aqlm[gpu,cpu]==1.1.6; platform_system == "Linux" aqlm[gpu,cpu]==1.1.6; platform_system == "Linux"
auto-gptq==0.7.1 auto-gptq==0.7.1
bitsandbytes==0.43.* bitsandbytes==0.43.*
@ -14,7 +14,7 @@ numba==0.59.*
numpy==1.26.* numpy==1.26.*
optimum==1.17.* optimum==1.17.*
pandas pandas
peft==0.8.* peft==0.12.*
Pillow>=9.5.0 Pillow>=9.5.0
psutil psutil
pyyaml pyyaml
@ -24,7 +24,7 @@ safetensors==0.4.*
scipy scipy
sentencepiece sentencepiece
tensorboard tensorboard
transformers==4.42.* transformers==4.44.*
tqdm tqdm
wandb wandb
@ -35,31 +35,38 @@ sse-starlette==1.6.5
tiktoken tiktoken
# llama-cpp-python (CPU only, no AVX2) # llama-cpp-python (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# llama-cpp-python (CUDA, no tensor cores) # llama-cpp-python (CUDA, no tensor cores)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# llama-cpp-python (CUDA, tensor cores) # llama-cpp-python (CUDA, tensor cores)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels # CUDA wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
autoawq==0.2.5; platform_system == "Linux" or platform_system == "Windows" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"

View file

@ -1,4 +1,4 @@
accelerate==0.32.* accelerate==0.33.*
colorama colorama
datasets datasets
einops einops
@ -11,7 +11,7 @@ numba==0.59.*
numpy==1.26.* numpy==1.26.*
optimum==1.17.* optimum==1.17.*
pandas pandas
peft==0.8.* peft==0.12.*
Pillow>=9.5.0 Pillow>=9.5.0
psutil psutil
pyyaml pyyaml
@ -21,7 +21,7 @@ safetensors==0.4.*
scipy scipy
sentencepiece sentencepiece
tensorboard tensorboard
transformers==4.42.* transformers==4.44.*
tqdm tqdm
wandb wandb

View file

@ -90,7 +90,7 @@ def create_interface():
# Force some events to be triggered on page load # Force some events to be triggered on page load
shared.persistent_interface_state.update({ shared.persistent_interface_state.update({
'loader': shared.args.loader or 'Transformers', 'loader': shared.args.loader or 'Transformers',
'mode': shared.settings['mode'], 'mode': shared.settings['mode'] if shared.settings['mode'] == 'instruct' else gr.update(),
'character_menu': shared.args.character or shared.settings['character'], 'character_menu': shared.args.character or shared.settings['character'],
'instruction_template_str': shared.settings['instruction_template_str'], 'instruction_template_str': shared.settings['instruction_template_str'],
'prompt_menu-default': shared.settings['prompt-default'], 'prompt_menu-default': shared.settings['prompt-default'],
@ -146,11 +146,21 @@ def create_interface():
ui_model_menu.create_event_handlers() ui_model_menu.create_event_handlers()
# Interface launch events # Interface launch events
shared.gradio['interface'].load(None, None, None, js=f"() => {{if ({str(shared.settings['dark_theme']).lower()}) {{ document.getElementsByTagName('body')[0].classList.add('dark'); }} }}") shared.gradio['interface'].load(
shared.gradio['interface'].load(None, None, None, js=f"() => {{{js}}}") None,
shared.gradio['interface'].load(None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') gradio('show_controls'),
None,
js=f"""(x) => {{
if ({str(shared.settings['dark_theme']).lower()}) {{
document.getElementsByTagName('body')[0].classList.add('dark');
}}
{js}
{ui.show_controls_js}
toggle_controls(x);
}}"""
)
shared.gradio['interface'].load(partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False) shared.gradio['interface'].load(partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False)
shared.gradio['interface'].load(chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'))
extensions_module.create_extensions_tabs() # Extensions tabs extensions_module.create_extensions_tabs() # Extensions tabs
extensions_module.create_extensions_block() # Extensions block extensions_module.create_extensions_block() # Extensions block
@ -169,6 +179,7 @@ def create_interface():
ssl_verify=False if (shared.args.ssl_keyfile or shared.args.ssl_certfile) else True, ssl_verify=False if (shared.args.ssl_keyfile or shared.args.ssl_certfile) else True,
ssl_keyfile=shared.args.ssl_keyfile, ssl_keyfile=shared.args.ssl_keyfile,
ssl_certfile=shared.args.ssl_certfile, ssl_certfile=shared.args.ssl_certfile,
root_path=shared.args.subpath,
allowed_paths=["cache", "css", "extensions", "js"] allowed_paths=["cache", "css", "extensions", "js"]
) )

View file

@ -1,7 +1,7 @@
dark_theme: true dark_theme: true
show_controls: true show_controls: true
start_with: '' start_with: ''
mode: chat mode: chat-instruct
chat_style: cai-chat chat_style: cai-chat
prompt-default: QA prompt-default: QA
prompt-notebook: QA prompt-notebook: QA
@ -12,8 +12,6 @@ max_new_tokens_max: 4096
negative_prompt: '' negative_prompt: ''
seed: -1 seed: -1
truncation_length: 2048 truncation_length: 2048
truncation_length_min: 0
truncation_length_max: 200000
max_tokens_second: 0 max_tokens_second: 0
max_updates_second: 0 max_updates_second: 0
prompt_lookup_num_tokens: 0 prompt_lookup_num_tokens: 0

View file

@ -1,4 +1,4 @@
#!/bin/bash #!/usr/bin/env bash
cd "$(dirname "${BASH_SOURCE[0]}")" cd "$(dirname "${BASH_SOURCE[0]}")"

View file

@ -0,0 +1,4 @@
{
"instruction,output": "<|im_start|>system\n<|im_end|>\n<|im_start|>user\n%instruction%<|im_end|>\n<|im_start|>assistant\n%output%<|im_end|>",
"instruction,input,output": "<|im_start|>system\n<|im_end|>\n<|im_start|>user\n%instruction%: %input%<|im_end|>\n<|im_start|>assistant\n%output%<|im_end|>"
}

View file

@ -1,4 +1,4 @@
#!/bin/bash #!/usr/bin/env bash
cd "$(dirname "${BASH_SOURCE[0]}")" cd "$(dirname "${BASH_SOURCE[0]}")"