Merge branch 'main' into dandm1-raw_string_processing

2024-09-20 10:35:10 +02:00 · 2023-08-27 09:18:58 -07:00 · 2023-08-27 09:18:58 -07:00 · 4318c4cc18
commit 4318c4cc18
parent 7bd293e79e a965a36803
136 changed files with 4302 additions and 2273 deletions
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@ -0,0 +1,3 @@
+## Checklist:
+
+- [ ] I have read the [Contributing guidelines](https://github.com/oobabooga/text-generation-webui/wiki/Contributing-guidelines).
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@ -13,8 +13,8 @@ jobs:
      - uses: actions/stale@v5
        with:
          stale-issue-message: ""
-          close-issue-message: "This issue has been closed due to inactivity for 30 days. If you believe it is still relevant, please leave a comment below."
-          days-before-issue-stale: 30
+          close-issue-message: "This issue has been closed due to inactivity for 6 weeks. If you believe it is still relevant, please leave a comment below. You can tag a developer in your comment."
+          days-before-issue-stale: 42
          days-before-issue-close: 0
          stale-issue-label: "stale"
          days-before-pr-stale: -1
--- a/README.md
+++ b/README.md
@ -1,29 +1,27 @@
 # Text generation web UI

-A gradio web UI for running Large Language Models like LLaMA, llama.cpp, GPT-J, OPT, and GALACTICA.
+A Gradio web UI for Large Language Models.

 Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) of text generation.

-|![Image1](https://github.com/oobabooga/screenshots/raw/main/qa.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/cai3.png) |
+|![Image1](https://github.com/oobabooga/screenshots/raw/main/print_instruct.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/print_chat.png) |
 |:---:|:---:|
-|![Image3](https://github.com/oobabooga/screenshots/raw/main/gpt4chan.png) | ![Image4](https://github.com/oobabooga/screenshots/raw/main/galactica.png) |
+|![Image1](https://github.com/oobabooga/screenshots/raw/main/print_default.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/print_parameters.png) |

 ## Features

-* 3 interface modes: default, notebook, and chat
-* Multiple model backends: transformers, llama.cpp, ExLlama, AutoGPTQ, GPTQ-for-LLaMa
+* 3 interface modes: default (two columns), notebook, and chat
+* Multiple model backends: [transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp), [ExLlama](https://github.com/turboderp/exllama), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [GPTQ-for-LLaMa](https://github.com/qwopqwop200/GPTQ-for-LLaMa), [ctransformers](https://github.com/marella/ctransformers)
 * Dropdown menu for quickly switching between different models
-* LoRA: load and unload LoRAs on the fly, train a new LoRA
-* Precise instruction templates for chat mode, including Llama 2, Alpaca, Vicuna, WizardLM, StableLM, and many others
+* LoRA: load and unload LoRAs on the fly, train a new LoRA using QLoRA
+* Precise instruction templates for chat mode, including Llama-2-chat, Alpaca, Vicuna, WizardLM, StableLM, and many others
+* 4-bit, 8-bit, and CPU inference through the transformers library
+* Use llama.cpp models with transformers samplers (`llamacpp_HF` loader)
 * [Multimodal pipelines, including LLaVA and MiniGPT-4](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/multimodal)
-* 8-bit and 4-bit inference through bitsandbytes
-* CPU mode for transformers models
-* [DeepSpeed ZeRO-3 inference](docs/DeepSpeed.md)
-* [Extensions](docs/Extensions.md)
+* [Extensions framework](docs/Extensions.md)
 * [Custom chat characters](docs/Chat-mode.md)
 * Very efficient text streaming
 * Markdown output with LaTeX rendering, to use for instance with [GALACTICA](https://github.com/paperswithcode/galai)
-* Nice HTML output for GPT-4chan
 * API, including endpoints for websocket streaming ([see the examples](https://github.com/oobabooga/text-generation-webui/blob/main/api-examples))

 To learn how to use the various features, check out the Documentation: https://github.com/oobabooga/text-generation-webui/tree/main/docs
@ -38,26 +36,24 @@ To learn how to use the various features, check out the Documentation: https://g

 Just download the zip above, extract it, and double-click on "start". The web UI and all its dependencies will be installed in the same folder.

-* The source codes are here: https://github.com/oobabooga/one-click-installers
+* The source codes and more information can be found here: https://github.com/oobabooga/one-click-installers
 * There is no need to run the installers as admin.
-* AMD doesn't work on Windows.
 * Huge thanks to [@jllllll](https://github.com/jllllll), [@ClayShoaf](https://github.com/ClayShoaf), and [@xNul](https://github.com/xNul) for their contributions to these installers.

 ### Manual installation using Conda

-Recommended if you have some experience with the command line.
+Recommended if you have some experience with the command-line.

 #### 0. Install Conda

 https://docs.conda.io/en/latest/miniconda.html

-On Linux or WSL, it can be automatically installed with these two commands:
+On Linux or WSL, it can be automatically installed with these two commands ([source](https://educe-ubc.github.io/conda.html)):

 ```
 curl -sL "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" > "Miniconda3.sh"
 bash Miniconda3.sh
 ```
-Source: https://educe-ubc.github.io/conda.html

 #### 1. Create a new conda environment

@ -79,7 +75,7 @@ conda activate textgen

 The up-to-date commands can be found here: https://pytorch.org/get-started/locally/. 

-#### 2.1 Special instructions
+#### 2.1 Additional information

 * MacOS users: https://github.com/oobabooga/text-generation-webui/pull/393
 * AMD users: https://rentry.org/eq3hg
@ -92,9 +88,21 @@ cd text-generation-webui
 pip install -r requirements.txt
 ```

-#### bitsandbytes
+#### llama.cpp on AMD, Metal, and some specific CPUs

-bitsandbytes >= 0.39 may not work on older NVIDIA GPUs. In that case, to use `--load-in-8bit`, you may have to downgrade like this:
+Precompiled wheels are included for CPU-only and NVIDIA GPUs (cuBLAS). For AMD, Metal, and some specific CPUs, you need to uninstall those wheels and compile llama-cpp-python yourself.
+
+To uninstall:
+
+```
+pip uninstall -y llama-cpp-python llama-cpp-python-cuda
+```
+
+To compile: https://github.com/abetlen/llama-cpp-python#installation-with-openblas--cublas--clblast--metal
+
+#### bitsandbytes on older NVIDIA GPUs
+
+bitsandbytes >= 0.39 may not work. In that case, to use `--load-in-8bit`, you may have to downgrade like this:

 * Linux: `pip install bitsandbytes==0.38.1`
 * Windows: `pip install https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.38.1-py3-none-any.whl`
@ -113,37 +121,50 @@ docker compose up --build

 ### Updating the requirements

-From time to time, the `requirements.txt` changes. To update, use this command:
+From time to time, the `requirements.txt` changes. To update, use these commands:

 ```
 conda activate textgen
 cd text-generation-webui
 pip install -r requirements.txt --upgrade
 ```
+
 ## Downloading models

-Models should be placed inside the `models/` folder.
+Models should be placed in the `text-generation-webui/models` folder. They are usually downloaded from [Hugging Face](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads).

-[Hugging Face](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads) is the main place to download models. These are some examples:
+* Transformers or GPTQ models are made of several files and must be placed in a subfolder. Example:

-* [Pythia](https://huggingface.co/models?sort=downloads&search=eleutherai%2Fpythia+deduped)
-* [OPT](https://huggingface.co/models?search=facebook/opt)
-* [GALACTICA](https://huggingface.co/models?search=facebook/galactica)
-* [GPT-J 6B](https://huggingface.co/EleutherAI/gpt-j-6B/tree/main)
+```
+text-generation-webui
+├── models
+│   ├── lmsys_vicuna-33b-v1.3
+│   │   ├── config.json
+│   │   ├── generation_config.json
+│   │   ├── pytorch_model-00001-of-00007.bin
+│   │   ├── pytorch_model-00002-of-00007.bin
+│   │   ├── pytorch_model-00003-of-00007.bin
+│   │   ├── pytorch_model-00004-of-00007.bin
+│   │   ├── pytorch_model-00005-of-00007.bin
+│   │   ├── pytorch_model-00006-of-00007.bin
+│   │   ├── pytorch_model-00007-of-00007.bin
+│   │   ├── pytorch_model.bin.index.json
+│   │   ├── special_tokens_map.json
+│   │   ├── tokenizer_config.json
+│   │   └── tokenizer.model
+```

-You can automatically download a model from HF using the script `download-model.py`:
+In the "Model" tab of the UI, those models can be automatically downloaded from Hugging Face. You can also download them via the command-line with `python download-model.py organization/model`.

-    python download-model.py organization/model
+* GGML/GGUF models are a single file and should be placed directly into `models`. Example:

-For example:
+```
+text-generation-webui
+├── models
+│   ├── llama-13b.ggmlv3.q4_K_M.bin
+```

-    python download-model.py facebook/opt-1.3b
-
-To download a protected model, set env vars `HF_USER` and `HF_PASS` to your Hugging Face username and password (or [User Access Token](https://huggingface.co/settings/tokens)). The model's terms must first be accepted on the HF website.
-
-#### GGML models
-
-You can drop these directly into the `models/` folder, making sure that the file name contains `ggml` somewhere and ends in `.bin`.
+Those models must be downloaded manually, as they are not currently supported by the automated downloader.

 #### GPT-4chan

@ -169,7 +190,10 @@ After downloading the model, follow these steps:
 python download-model.py EleutherAI/gpt-j-6B --text-only
 ```

-When you load this model in default or notebook modes, the "HTML" tab will show the generated text in 4chan format.
+When you load this model in default or notebook modes, the "HTML" tab will show the generated text in 4chan format:
+
+![Image3](https://github.com/oobabooga/screenshots/raw/main/gpt4chan.png)
+
 </details>

 ## Starting the web UI
@ -189,8 +213,6 @@ Optionally, you can use the following command-line flags:
 | Flag                                       | Description |
 |--------------------------------------------|-------------|
 | `-h`, `--help`                             | Show this help message and exit. |
-| `--notebook`                               | Launch the web UI in notebook mode, where the output is written to the same text box as the input. |
-| `--chat`                                   | Launch the web UI in chat mode. |
 | `--multi-user`                             | Multi-user mode. Chat histories are not saved or automatically loaded. WARNING: this is highly experimental. |
 | `--character CHARACTER`                    | The name of the character to load in chat mode by default. |
 | `--model MODEL`                            | Name of the model to load by default. |
@ -198,7 +220,6 @@ Optionally, you can use the following command-line flags:
 | `--model-dir MODEL_DIR`                    | Path to directory with all the models. |
 | `--lora-dir LORA_DIR`                      | Path to directory with all the loras. |
 | `--model-menu`                             | Show a model menu in the terminal when the web UI is first launched. |
-| `--no-stream`                              | Don't stream the text output in real time. |
 | `--settings SETTINGS_FILE`                 | Load the default interface settings from this yaml file. See `settings-template.yaml` for an example. If you create a file called `settings.yaml`, this file will be loaded by default without the need to use the `--settings` flag. |
 | `--extensions EXTENSIONS [EXTENSIONS ...]` | The list of extensions to load. If you want to load more than one extension, write the names separated by spaces. |
 | `--verbose`                                | Print the prompts to the terminal. |
@ -207,7 +228,7 @@ Optionally, you can use the following command-line flags:

 | Flag                                       | Description |
 |--------------------------------------------|-------------|
-| `--loader LOADER`                          | Choose the model loader manually, otherwise, it will get autodetected. Valid options: transformers, autogptq, gptq-for-llama, exllama, exllama_hf, llamacpp, rwkv |
+| `--loader LOADER`                          | Choose the model loader manually, otherwise, it will get autodetected. Valid options: transformers, autogptq, gptq-for-llama, exllama, exllama_hf, llamacpp, rwkv, ctransformers |

 #### Accelerate/transformers

@ -237,20 +258,35 @@ Optionally, you can use the following command-line flags:
 | `--quant_type QUANT_TYPE`                   | quant_type for 4-bit. Valid options: nf4, fp4. |
 | `--use_double_quant`                        | use_double_quant for 4-bit. |

-#### llama.cpp
+#### GGML/GGUF (for llama.cpp and ctransformers)

 | Flag        | Description |
 |-------------|-------------|
 | `--threads` | Number of threads to use. |
 | `--n_batch` | Maximum number of prompt tokens to batch together when calling llama_eval. |
-| `--no-mmap` | Prevent mmap from being used. |
-| `--mlock`   | Force the system to keep the model in RAM. |
-| `--cache-capacity CACHE_CAPACITY`   | Maximum cache capacity. Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed. |
 | `--n-gpu-layers N_GPU_LAYERS` | Number of layers to offload to the GPU. Only works if llama-cpp-python was compiled with BLAS. Set this to 1000000000 to offload all layers to the GPU. |
 | `--n_ctx N_CTX` | Size of the prompt context. |
-| `--llama_cpp_seed SEED` | Seed for llama-cpp models. Default 0 (random). |
-| `--n_gqa N_GQA`         | grouped-query attention. Must be 8 for llama2 70b. |
-| `--rms_norm_eps RMS_NORM_EPS`  | Must be 1e-5 for llama2 70b. |
+
+#### llama.cpp
+
+| Flag          | Description |
+|---------------|---------------|
+| `--no-mmap`   | Prevent mmap from being used. |
+| `--mlock`     | Force the system to keep the model in RAM. |
+| `--mul_mat_q` | Activate new mulmat kernels. |
+| `--cache-capacity CACHE_CAPACITY`   | Maximum cache capacity. Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed. |
+| `--tensor_split TENSOR_SPLIT`  | Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17 |
+| `--llama_cpp_seed SEED`        | Seed for llama-cpp models. Default 0 (random). |
+| `--n_gqa N_GQA`                | GGML only (not used by GGUF): Grouped-Query Attention. Must be 8 for llama-2 70b. |
+| `--rms_norm_eps RMS_NORM_EPS`  | GGML only (not used by GGUF): 5e-6 is a good value for llama-2 models. |
+| `--cpu`                        | Use the CPU version of llama-cpp-python instead of the GPU-accelerated version. |
+|`--cfg-cache`                   | llamacpp_HF: Create an additional cache for CFG negative prompts. |
+
+#### ctransformers
+
+| Flag        | Description |
+|-------------|-------------|
+| `--model_type MODEL_TYPE` | Model type of pre-quantized model. Currently gpt2, gptj, gptneox, falcon, llama, mpt, starcoder (gptbigcode), dollyv2, and replit are supported. |

 #### AutoGPTQ

@ -261,6 +297,7 @@ Optionally, you can use the following command-line flags:
 | `--no_inject_fused_mlp`        | Triton mode only: disable the use of fused MLP, which will use less VRAM at the cost of slower inference. |
 | `--no_use_cuda_fp16`           | This can make models faster on some systems. |
 | `--desc_act`                   | For models that don't have a quantize_config.json, this parameter is used to define whether to set desc_act or not in BaseQuantizeConfig. |
+| `--disable_exllama`            | Disable ExLlama kernel, which can improve inference speed on some systems. |

 #### ExLlama

@ -268,6 +305,7 @@ Optionally, you can use the following command-line flags:
 |------------------|-------------|
 |`--gpu-split`     | Comma-separated list of VRAM (in GB) to use per GPU device for model layers, e.g. `20,7,7` |
 |`--max_seq_len MAX_SEQ_LEN`           | Maximum sequence length. |
+|`--cfg-cache`                         | ExLlama_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader, but not necessary for CFG with base ExLlama. |

 #### GPTQ-for-LLaMa

@ -279,9 +317,6 @@ Optionally, you can use the following command-line flags:
 | `--pre_layer PRE_LAYER [PRE_LAYER ...]`  | The number of layers to allocate to the GPU. Setting this parameter enables CPU offloading for 4-bit models. For multi-gpu, write the numbers separated by spaces, eg `--pre_layer 30 60`. |
 | `--checkpoint CHECKPOINT` | The path to the quantized checkpoint file. If not specified, it will be automatically detected. |
 | `--monkey-patch`          | Apply the monkey patch for using LoRAs with quantized models.
-| `--quant_attn`         | (triton) Enable quant attention. |
-| `--warmup_autotune`    | (triton) Enable warmup autotune. |
-| `--fused_mlp`          | (triton) Enable fused mlp. |

 #### DeepSpeed

@ -298,12 +333,13 @@ Optionally, you can use the following command-line flags:
 | `--rwkv-strategy RWKV_STRATEGY` | RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8". |
 | `--rwkv-cuda-on`                | RWKV: Compile the CUDA kernel for better performance. |

-#### RoPE (for llama.cpp and ExLlama only)
+#### RoPE (for llama.cpp, ExLlama, and transformers)

 | Flag             | Description |
 |------------------|-------------|
-|`--compress_pos_emb COMPRESS_POS_EMB` | Positional embeddings compression factor. Should typically be set to max_seq_len / 2048. |
-|`--alpha_value ALPHA_VALUE`           | Positional embeddings alpha factor for NTK RoPE scaling. Scaling is not identical to embedding compression. Use either this or compress_pos_emb, not both. |
+| `--alpha_value ALPHA_VALUE`           | Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both. |
+| `--rope_freq_base ROPE_FREQ_BASE`     | If greater than 0, will be used instead of alpha_value. Those two are related by rope_freq_base = 10000 * alpha_value ^ (64 / 63). |
+| `--compress_pos_emb COMPRESS_POS_EMB` | Positional embeddings compression factor. Should be set to (context length) / (model's original context length). Equal to 1/rope_freq_scale. |

 #### Gradio

@ -316,6 +352,8 @@ Optionally, you can use the following command-line flags:
 | `--auto-launch`                       | Open the web UI in the default browser upon launch. |
 | `--gradio-auth USER:PWD`              | set gradio authentication like "username:password"; or comma-delimit multiple like "u1:p1,u2:p2,u3:p3" |
 | `--gradio-auth-path GRADIO_AUTH_PATH` | Set the gradio authentication file path. The file should contain one or more user:password pairs in this format: "u1:p1,u2:p2,u3:p3" |
+| `--ssl-keyfile SSL_KEYFILE`           | The path to the SSL certificate key file. |
+| `--ssl-certfile SSL_CERTFILE`         | The path to the SSL certificate cert file. |

 #### API

@ -323,6 +361,7 @@ Optionally, you can use the following command-line flags:
 |---------------------------------------|-------------|
 | `--api`                               | Enable the API extension. |
 | `--public-api`                        | Create a public URL for the API using Cloudfare. |
+| `--public-api-id PUBLIC_API_ID`       | Tunnel ID for named Cloudflare Tunnel. Use together with public-api option. |
 | `--api-blocking-port BLOCKING_PORT`   | The listening port for the blocking API. |
 | `--api-streaming-port STREAMING_PORT` | The listening port for the streaming API. |

@ -340,12 +379,13 @@ The presets that are included by default are the result of a contest that receiv

 ## Contributing

-* Pull requests, suggestions, and issue reports are welcome. 
-* Make sure to carefully [search](https://github.com/oobabooga/text-generation-webui/issues) existing issues before starting a new one.
-* If you have some experience with git, testing an open pull request and leaving a comment on whether it works as expected or not is immensely helpful.
-* A simple way to contribute, even if you are not a programmer, is to leave a 👍 on an issue or pull request that you find relevant.
+If you would like to contribute to the project, check out the [Contributing guidelines](https://github.com/oobabooga/text-generation-webui/wiki/Contributing-guidelines).

 ## Community

-* Subreddit: https://www.reddit.com/r/oobaboogazz/
+* Subreddit: https://www.reddit.com/r/oobabooga/
 * Discord: https://discord.gg/jwZCF2dPQN
+
+## Acknowledgment
+
+In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition, which will allow me to dedicate more time towards realizing the full potential of text-generation-webui.
--- a/api-examples/api-example-chat-stream.py
+++ b/api-examples/api-example-chat-stream.py
@ -20,18 +20,23 @@ async def run(user_input, history):
    request = {
        'user_input': user_input,
        'max_new_tokens': 250,
+        'auto_max_new_tokens': False,
        'history': history,
        'mode': 'instruct',  # Valid options: 'chat', 'chat-instruct', 'instruct'
        'character': 'Example',
        'instruction_template': 'Vicuna-v1.1',  # Will get autodetected if unset
-        # 'context_instruct': '',  # Optional
        'your_name': 'You',
-
+        # 'name1': 'name of user', # Optional
+        # 'name2': 'name of character', # Optional
+        # 'context': 'character context', # Optional
+        # 'greeting': 'greeting', # Optional
+        # 'name1_instruct': 'You', # Optional
+        # 'name2_instruct': 'Assistant', # Optional
+        # 'context_instruct': 'context_instruct', # Optional
+        # 'turn_template': 'turn_template', # Optional
        'regenerate': False,
        '_continue': False,
-        'stop_at_newline': False,
-        'chat_generation_attempts': 1,
-        'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
+        'chat_instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',

        # Generation params. If 'preset' is set to different than 'None', the values
        # in presets/preset-name.yaml are used instead of the individual numbers.
@ -56,6 +61,8 @@ async def run(user_input, history):
        'mirostat_mode': 0,
        'mirostat_tau': 5,
        'mirostat_eta': 0.1,
+        'guidance_scale': 1,
+        'negative_prompt': '',

        'seed': -1,
        'add_bos_token': True,
--- a/api-examples/api-example-chat.py
+++ b/api-examples/api-example-chat.py
@ -14,18 +14,23 @@ def run(user_input, history):
    request = {
        'user_input': user_input,
        'max_new_tokens': 250,
+        'auto_max_new_tokens': False,
        'history': history,
        'mode': 'instruct',  # Valid options: 'chat', 'chat-instruct', 'instruct'
        'character': 'Example',
        'instruction_template': 'Vicuna-v1.1',  # Will get autodetected if unset
-        # 'context_instruct': '',  # Optional
        'your_name': 'You',
-
+        # 'name1': 'name of user', # Optional
+        # 'name2': 'name of character', # Optional
+        # 'context': 'character context', # Optional
+        # 'greeting': 'greeting', # Optional
+        # 'name1_instruct': 'You', # Optional
+        # 'name2_instruct': 'Assistant', # Optional
+        # 'context_instruct': 'context_instruct', # Optional
+        # 'turn_template': 'turn_template', # Optional
        'regenerate': False,
        '_continue': False,
-        'stop_at_newline': False,
-        'chat_generation_attempts': 1,
-        'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
+        'chat_instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',

        # Generation params. If 'preset' is set to different than 'None', the values
        # in presets/preset-name.yaml are used instead of the individual numbers.
@ -50,6 +55,8 @@ def run(user_input, history):
        'mirostat_mode': 0,
        'mirostat_tau': 5,
        'mirostat_eta': 0.1,
+        'guidance_scale': 1,
+        'negative_prompt': '',

        'seed': -1,
        'add_bos_token': True,
--- a/api-examples/api-example-stream.py
+++ b/api-examples/api-example-stream.py
@ -20,6 +20,7 @@ async def run(context):
    request = {
        'prompt': context,
        'max_new_tokens': 250,
+        'auto_max_new_tokens': False,

        # Generation params. If 'preset' is set to different than 'None', the values
        # in presets/preset-name.yaml are used instead of the individual numbers.
@ -44,6 +45,8 @@ async def run(context):
        'mirostat_mode': 0,
        'mirostat_tau': 5,
        'mirostat_eta': 0.1,
+        'guidance_scale': 1,
+        'negative_prompt': '',

        'seed': -1,
        'add_bos_token': True,
--- a/api-examples/api-example.py
+++ b/api-examples/api-example.py
@ -12,6 +12,7 @@ def run(prompt):
    request = {
        'prompt': prompt,
        'max_new_tokens': 250,
+        'auto_max_new_tokens': False,

        # Generation params. If 'preset' is set to different than 'None', the values
        # in presets/preset-name.yaml are used instead of the individual numbers.
@ -36,6 +37,8 @@ def run(prompt):
        'mirostat_mode': 0,
        'mirostat_tau': 5,
        'mirostat_eta': 0.1,
+        'guidance_scale': 1,
+        'negative_prompt': '',

        'seed': -1,
        'add_bos_token': True,
--- a/characters/instruction-following/WizardLM.yaml
+++ b/characters/instruction-following/WizardLM.yaml
@ -1,4 +0,0 @@
-user: ""
-bot: "### Response:"
-turn_template: "<|user-message|>\n\n<|bot|><|bot-message|>\n\n</s>"
-context: ""
--- a/css/chat.css
+++ b/css/chat.css
@ -1,126 +0,0 @@
-.h-\[40vh\], .wrap.svelte-byatnx.svelte-byatnx.svelte-byatnx {
-    height: 66.67vh
-}
-
-.gradio-container {
-    margin-left: auto !important;
-    margin-right: auto !important;
-}
-
-.w-screen {
-    width: unset
-}
-
-div.svelte-362y77>*, div.svelte-362y77>.form>* {
-    flex-wrap: nowrap
-}
-
-/* fixes the API documentation in chat mode */
-.api-docs.svelte-1iguv9h.svelte-1iguv9h.svelte-1iguv9h {
-    display: grid;
-}
-
-.pending.svelte-1ed2p3z {
-    opacity: 1;
-}
-
-#extensions {
-    padding: 0;
-    padding: 0;
-}
-
-#gradio-chatbot {
-    height: 66.67vh;
-}
-
-.wrap.svelte-6roggh.svelte-6roggh {
-    max-height: 92.5%;
-}
-
-/* This is for the microphone button in the whisper extension */
-.sm.svelte-1ipelgc {
-    width: 100%;
-}
-
-#main button {
-    min-width: 0 !important;
-}
-
-/*****************************************************/
-/*************** Chat box declarations ***************/
-/*****************************************************/
-
-.chat {
-    margin-left: auto;
-    margin-right: auto;
-    max-width: 800px;
-    height: calc(100vh - 286px);
-    overflow-y: auto;
-    padding-right: 20px;
-    display: flex;
-    flex-direction: column-reverse;
-    word-break: break-word;
-    overflow-wrap: anywhere;
-    padding-top: 1px;
-}
-
-.message-body li {
-    margin-top: 0.5em !important;
-    margin-bottom: 0.5em !important;
-}
-
-.message-body li > p {
-    display: inline !important;
-}
-
-.message-body ul, .message-body ol {
-    font-size: 15px !important;
-}
-
-.message-body ul {
-    list-style-type: disc !important;
-}
-
-.message-body pre {
-    margin-bottom: 1.25em !important;
-}
-
-.message-body code {
-    white-space: pre-wrap !important;
-    word-wrap: break-word !important;
-}
-
-.message-body :not(pre) > code {
-    white-space: normal !important;
-}
-
-@media print {
-    body {
-        visibility: hidden;
-    }
-
-    .chat {
-        visibility: visible;
-        position: absolute;
-        left: 0;
-        top: 0;
-        max-width: none;
-        max-height: none;
-        width: 100%;
-        height: fit-content;
-        display: flex;
-        flex-direction: column-reverse;
-    }
-    
-    .message {
-        break-inside: avoid;
-    }
-    
-    .gradio-container {
-        overflow: visible;
-    }
-    
-    .tab-nav {
-        display: none !important;
-    }
-}
--- a/css/chat.js
+++ b/css/chat.js
@ -1,4 +0,0 @@
-document.getElementById("main").childNodes[0].style = "max-width: 800px; margin-left: auto; margin-right: auto";
-document.getElementById("extensions").style.setProperty("max-width", "800px");
-document.getElementById("extensions").style.setProperty("margin-left", "auto");
-document.getElementById("extensions").style.setProperty("margin-right", "auto");
--- a/css/chat_style-TheEncrypted777.css
+++ b/css/chat_style-TheEncrypted777.css
@ -10,17 +10,10 @@
    line-height: 1.428571429;
 }

-.circle-you {
-    background-color: gray;
-    border-radius: 1rem;
-    /*Change color to any you like to be the border of your image*/
-    border: 2px solid white;
-}
-
+.circle-you,
 .circle-bot {
    background-color: gray;
    border-radius: 1rem;
-    /*Change color to any you like to be the border of the bot's image*/
    border: 2px solid white;
 }

@ -105,3 +98,39 @@
 .message-body p em {
    color: rgb(110, 110, 110) !important;
 }
+
+@media screen and (max-width: 688px) {
+    .message {
+        display: grid;
+        grid-template-columns: 60px minmax(0, 1fr);
+        padding-bottom: 25px;
+        font-size: 15px;
+        font-family: Helvetica, Arial, sans-serif;
+        line-height: 1.428571429;
+    }
+
+    .circle-you, .circle-bot {
+        width: 50px;
+        height: 73px;
+        border-radius: 0.5rem;
+    }
+
+    .circle-bot img,
+    .circle-you img {
+        width: 100%;
+        height: 100%;
+        object-fit: cover;
+    }
+
+    .text {
+        padding-left: 0px;
+    }
+
+    .message-body p {
+        font-size: 16px !important;
+    }
+
+    .username {
+        font-size: 20px;
+    }
+}
--- a/css/html_4chan_style.css
+++ b/css/html_4chan_style.css
@ -98,7 +98,7 @@
    margin-right: 40px !important;
 }

-#parent #container .message {
+#parent #container .message_4chan {
    color: black;
    border: none;
 }
--- a/css/html_instruct_style.css
+++ b/css/html_instruct_style.css
@ -43,6 +43,10 @@
    margin-bottom: 9px !important;
 }

+.gradio-container .chat .assistant-message:last-child, .gradio-container .chat .user-message:last-child {
+    margin-bottom: 0px !important;
+}
+
 .dark .chat .assistant-message {
    background-color: #3741519e;
    border: 1px solid #4b5563;
@ -58,5 +62,5 @@ code {
 }

 .dark code {
-    background-color: #1a212f !important;
+    background-color: #0e1321 !important;
 }
--- a/css/html_readable_style.css
+++ b/css/html_readable_style.css
@ -26,4 +26,8 @@

 .container :not(pre) > code {
    white-space: normal !important;
+}
+
+.container .hoverable {
+    font-size: 14px;
 }
--- a/css/main.css
+++ b/css/main.css
@ -7,6 +7,7 @@
 }

 .small-button {
+    min-width: 0 !important;
    max-width: 171px;
    height: 39.594px;
    align-self: end;
@ -26,6 +27,10 @@
    max-width: 2.2em;
 }

+.button_nowrap {
+    white-space: nowrap;
+}
+
 #slim-column {
    flex: none !important;
    min-width: 0 !important;
@ -41,9 +46,6 @@
    min-height: 0
 }

-#accordion {
-}
-
 .dark svg {
    fill: white;
 }
@ -56,7 +58,7 @@ ol li p, ul li p {
    display: inline-block;
 }

-#main, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab {
+#chat-tab, #default-tab, #notebook-tab, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab {
    border: 0;
 }

@ -70,7 +72,6 @@ ol li p, ul li p {
 }

 #extensions {
-    padding: 15px;
    margin-bottom: 35px;
 }

@ -90,6 +91,8 @@ div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * {
 .header_bar {
    background-color: #f7f7f7;
    margin-bottom: 20px;
+    display: inline !important;
+    overflow-x: scroll;
 }

 .dark .header_bar {
@ -97,19 +100,36 @@ div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * {
    background-color: #8080802b;
 }

+.header_bar button.selected {
+    border-radius: 0;
+}
+
 .textbox_default textarea {
-    height: calc(100vh - 380px);
+    height: calc(100dvh - 280px);
 }

 .textbox_default_output textarea {
-    height: calc(100vh - 190px);
+    height: calc(100dvh - 190px);
 }

 .textbox textarea {
-    height: calc(100vh - 241px);
+    height: calc(100dvh - 241px);
 }

-.textbox_default textarea, .textbox_default_output textarea, .textbox textarea {
+.textbox_logits textarea {
+    height: calc(100dvh - 241px);
+}
+
+.textbox_logits_notebook textarea {
+    height: calc(100dvh - 292px);
+}
+
+.textbox_default textarea,
+.textbox_default_output textarea,
+.textbox_logits textarea,
+.textbox_logits_notebook textarea,
+.textbox textarea
+{
    font-size: 16px !important;
    color: #46464A !important;
 }
@ -118,6 +138,16 @@ div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * {
    color: #efefef !important;
 }

+@media screen and (max-width: 711px) {
+    .textbox_default textarea {
+        height: calc(100dvh - 271px);
+    }
+
+    div .default-token-counter {
+        top: calc( 0.5 * (100dvh - 245px) ) !important;
+    }
+}
+
 /* Hide the gradio footer*/
 footer {
    display: none !important;
@ -157,7 +187,7 @@ button {
 }

 .pretty_scrollbar::-webkit-scrollbar {
-  width: 10px;
+  width: 5px;
 }

 .pretty_scrollbar::-webkit-scrollbar-track {
@ -167,13 +197,11 @@ button {
 .pretty_scrollbar::-webkit-scrollbar-thumb,
 .pretty_scrollbar::-webkit-scrollbar-thumb:hover {
  background: #c5c5d2;
-  border-radius: 10px;
 }

 .dark .pretty_scrollbar::-webkit-scrollbar-thumb,
 .dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover {
  background: #374151;
-  border-radius: 10px;
 }

 .pretty_scrollbar::-webkit-resizer {
@ -183,3 +211,207 @@ button {
 .dark .pretty_scrollbar::-webkit-resizer {
  background: #374151;
 }
+
+audio {
+  max-width: 100%;
+}
+
+/* Copied from https://github.com/AUTOMATIC1111/stable-diffusion-webui */
+.token-counter {
+  position: absolute !important;
+  top: calc( 0.5 * (100dvh - 215px) ) !important;
+  right: 2px;
+  z-index: 100;
+  background: var(--input-background-fill) !important;
+  min-height: 0 !important;
+}
+
+.default-token-counter {
+  top: calc( 0.5 * (100dvh - 255px) ) !important;
+}
+
+.token-counter span {
+  padding: 1px;
+  box-shadow: 0 0 0 0.3em rgba(192,192,192,0.15), inset 0 0 0.6em rgba(192,192,192,0.075);
+  border: 2px solid rgba(192,192,192,0.4) !important;
+  border-radius: 0.4em;
+}
+
+.no-background {
+  background: var(--background-fill-primary) !important;
+  padding: 0px !important;
+}
+
+/*****************************************************/
+/*************** Chat UI declarations ****************/
+/*****************************************************/
+
+.h-\[40vh\], .wrap.svelte-byatnx.svelte-byatnx.svelte-byatnx {
+    height: 66.67vh
+}
+
+.gradio-container {
+    margin-left: auto !important;
+    margin-right: auto !important;
+}
+
+.w-screen {
+    width: unset
+}
+
+div.svelte-362y77>*, div.svelte-362y77>.form>* {
+    flex-wrap: nowrap
+}
+
+.pending.svelte-1ed2p3z {
+    opacity: 1;
+}
+
+.wrap.svelte-6roggh.svelte-6roggh {
+    max-height: 92.5%;
+}
+
+/* This is for the microphone button in the whisper extension */
+.sm.svelte-1ipelgc {
+    width: 100%;
+}
+
+#chat-tab button, #notebook-tab button, #default-tab button {
+    min-width: 0 !important;
+}
+
+#chat-tab > :first-child, #extensions {
+    max-width: 800px;
+    margin-left: auto;
+    margin-right: auto;
+}
+
+@media screen and (max-width: 688px) {
+    #chat-tab {
+        padding: 0px;
+    }
+
+    #chat {
+        height: calc(100dvh - 262px) !important;
+    }
+
+    .bigchat #chat {
+        height: calc(100dvh - 180px) !important;
+    }
+
+    .chat {
+        flex-direction: column-reverse !important;
+    }
+}
+
+.chat {
+    margin-left: auto;
+    margin-right: auto;
+    max-width: 800px;
+    height: 100%;
+    overflow-y: auto;
+    padding-right: 15px;
+    display: flex;
+    flex-direction: column;
+    word-break: break-word;
+    overflow-wrap: anywhere;
+    padding-top: 6px;
+}
+
+#chat {
+    height: calc(100dvh - 272px);
+}
+
+.bigchat #chat {
+    height: calc(100dvh - 200px);
+}
+
+#show-controls {
+    position: absolute;
+    background-color: transparent;
+    left: calc(100% - 130px);
+}
+
+.chat > .messages {
+    display: flex;
+    flex-direction: column;
+}
+
+.chat .message:last-child {
+    margin-bottom: 0px !important;
+    padding-bottom: 0px !important;
+}
+
+.message-body li {
+    margin-top: 0.5em !important;
+    margin-bottom: 0.5em !important;
+}
+
+.message-body li > p {
+    display: inline !important;
+}
+
+.message-body ul, .message-body ol {
+    font-size: 15px !important;
+}
+
+.message-body ul {
+    list-style-type: disc !important;
+}
+
+.message-body pre {
+    margin-bottom: 1.25em !important;
+}
+
+.message-body code {
+    white-space: pre-wrap !important;
+    word-wrap: break-word !important;
+}
+
+.message-body :not(pre) > code {
+    white-space: normal !important;
+}
+
+#chat-input {
+    padding: 0;
+    padding-top: 18px;
+    background: var(--background-fill-primary);
+    border: none;
+}
+
+#chat-input textarea:focus {
+    box-shadow: none !important;
+}
+
+@media print {
+    body {
+        visibility: hidden;
+    }
+
+    .chat {
+        visibility: visible;
+        position: absolute;
+        left: 0;
+        top: 0;
+        max-width: unset;
+        max-height: unset;
+        width: 100%;
+        overflow-y: visible;
+    }
+    
+    .message {
+        break-inside: avoid;
+    }
+    
+    .gradio-container {
+        overflow: visible;
+    }
+    
+    .tab-nav {
+        display: none !important;
+    }
+    
+    #chat-tab > :first-child {
+        max-width: unset;
+    }
+}
--- a/css/main.js
+++ b/css/main.js
@ -1,25 +0,0 @@
-document.getElementById("main").parentNode.childNodes[0].classList.add("header_bar");
-document.getElementById("main").parentNode.style = "padding: 0; margin: 0";
-document.getElementById("main").parentNode.parentNode.parentNode.style = "padding: 0";
-
-// Get references to the elements
-let main = document.getElementById('main');
-let main_parent = main.parentNode;
-let extensions = document.getElementById('extensions');
-
-// Add an event listener to the main element
-main_parent.addEventListener('click', function(e) {
-    // Check if the main element is visible
-    if (main.offsetHeight > 0 && main.offsetWidth > 0) {
-        extensions.style.display = 'flex';
-    } else {
-        extensions.style.display = 'none';
-    }
-});
-
-const textareaElements = document.querySelectorAll('.add_scrollbar textarea');
-for(i = 0; i < textareaElements.length; i++) {
-    textareaElements[i].classList.remove('scroll-hide');
-    textareaElements[i].classList.add('pretty_scrollbar');
-    textareaElements[i].style.resize = "none";
-}
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -16,7 +16,7 @@ RUN . /build/venv/bin/activate && \

 # https://developer.nvidia.com/cuda-gpus
 # for a rtx 2060: ARG TORCH_CUDA_ARCH_LIST="7.5"
-ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
+ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
 RUN . /build/venv/bin/activate && \
    python3 setup_cuda.py bdist_wheel -d .

@ -26,7 +26,7 @@ LABEL maintainer="Your Name <your.email@example.com>"
 LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI"

 RUN apt-get update && \
-    apt-get install --no-install-recommends -y python3-dev libportaudio2 libasound-dev git python3 python3-pip make g++ && \
+    apt-get install --no-install-recommends -y python3-dev libportaudio2 libasound-dev git python3 python3-pip make g++ ffmpeg && \
    rm -rf /var/lib/apt/lists/*

 RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv
@ -51,11 +51,15 @@ COPY extensions/elevenlabs_tts/requirements.txt /app/extensions/elevenlabs_tts/r
 COPY extensions/google_translate/requirements.txt /app/extensions/google_translate/requirements.txt
 COPY extensions/silero_tts/requirements.txt /app/extensions/silero_tts/requirements.txt
 COPY extensions/whisper_stt/requirements.txt /app/extensions/whisper_stt/requirements.txt
+COPY extensions/superbooga/requirements.txt /app/extensions/superbooga/requirements.txt
+COPY extensions/openai/requirements.txt /app/extensions/openai/requirements.txt
 RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/api && pip3 install -r requirements.txt
 RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/elevenlabs_tts && pip3 install -r requirements.txt
 RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/google_translate && pip3 install -r requirements.txt
 RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/silero_tts && pip3 install -r requirements.txt
 RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/whisper_stt && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/superbooga && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/openai && pip3 install -r requirements.txt

 COPY requirements.txt /app/requirements.txt
 RUN . /app/venv/bin/activate && \
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@ -23,6 +23,7 @@ services:
      - ./prompts:/app/prompts
      - ./softprompts:/app/softprompts
      - ./training:/app/training
+      - ./cloudflared:/etc/cloudflared
    deploy:
      resources:
        reservations:
--- a/docs/Extensions.md
+++ b/docs/Extensions.md
@ -39,8 +39,8 @@ The extensions framework is based on special functions and variables that you ca
 | `def ui()` | Creates custom gradio elements when the UI is launched. | 
 | `def custom_css()` | Returns custom CSS as a string. It is applied whenever the web UI is loaded. |
 | `def custom_js()` | Same as above but for javascript. |
-| `def input_modifier(string, state)`  | Modifies the input string before it enters the model. In chat mode, it is applied to the user message. Otherwise, it is applied to the entire prompt. |
-| `def output_modifier(string, state)`  | Modifies the output string before it is presented in the UI. In chat mode, it is applied to the bot's reply. Otherwise, it is applied to the entire output. |
+| `def input_modifier(string, state, is_chat=False)`  | Modifies the input string before it enters the model. In chat mode, it is applied to the user message. Otherwise, it is applied to the entire prompt. |
+| `def output_modifier(string, state, is_chat=False)`  | Modifies the output string before it is presented in the UI. In chat mode, it is applied to the bot's reply. Otherwise, it is applied to the entire output. |
 | `def chat_input_modifier(text, visible_text, state)` | Modifies both the visible and internal inputs in chat mode. Can be used to hijack the chat input with custom content. |
 | `def bot_prefix_modifier(string, state)`  | Applied in chat mode to the prefix for the bot's reply. |
 | `def state_modifier(state)`  | Modifies the dictionary containing the UI input parameters before it is used by the text generation functions. |
@ -163,7 +163,7 @@ def chat_input_modifier(text, visible_text, state):
    """
    return text, visible_text

-def input_modifier(string, state):
+def input_modifier(string, state, is_chat=False):
    """
    In default/notebook modes, modifies the whole prompt.

@ -196,7 +196,7 @@ def logits_processor_modifier(processor_list, input_ids):
    processor_list.append(MyLogits())
    return processor_list

-def output_modifier(string, state):
+def output_modifier(string, state, is_chat=False):
    """
    Modifies the LLM output before it gets presented.

--- a/docs/GPTQ-models-(4-bit-mode).md
+++ b/docs/GPTQ-models-(4-bit-mode).md
@ -64,59 +64,19 @@ python server.py --autogptq --gpu-memory 3000MiB 6000MiB --model model_name

 ### Using LoRAs with AutoGPTQ

-Not supported yet.
+Works fine for a single LoRA.

 ## GPTQ-for-LLaMa

 GPTQ-for-LLaMa is the original adaptation of GPTQ for the LLaMA model. It was made possible by [@qwopqwop200](https://github.com/qwopqwop200/GPTQ-for-LLaMa): https://github.com/qwopqwop200/GPTQ-for-LLaMa

-Different branches of GPTQ-for-LLaMa are currently available, including:
-
-| Branch | Comment |
-|----|----|
-| [Old CUDA branch (recommended)](https://github.com/oobabooga/GPTQ-for-LLaMa/) | The fastest branch, works on Windows and Linux. |
-| [Up-to-date triton branch](https://github.com/qwopqwop200/GPTQ-for-LLaMa) | Slightly more precise than the old CUDA branch from 13b upwards, significantly more precise for 7b. 2x slower for small context size and only works on Linux. |
-| [Up-to-date CUDA branch](https://github.com/qwopqwop200/GPTQ-for-LLaMa/tree/cuda) | As precise as the up-to-date triton branch, 10x slower than the old cuda branch for small context size. |
-
-Overall, I recommend using the old CUDA branch. It is included by default in the one-click-installer for this web UI.
-
-### Installation
-
-Start by cloning GPTQ-for-LLaMa into your `text-generation-webui/repositories` folder:
-
-```
-mkdir repositories
-cd repositories
-git clone https://github.com/oobabooga/GPTQ-for-LLaMa.git -b cuda
-```
-
-If you want to you to use the up-to-date CUDA or triton branches instead of the old CUDA branch, use these commands:
-
-```
-git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa.git -b cuda
-```
-
-```
-git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa.git -b triton
-```
-
-Next you need to install the CUDA extensions. You can do that either by installing the precompiled wheels, or by compiling the wheels yourself.
+A Python package containing both major CUDA versions of GPTQ-for-LLaMa is used to simplify installation and compatibility: https://github.com/jllllll/GPTQ-for-LLaMa-CUDA

 ### Precompiled wheels

-Kindly provided by our friend jllllll: https://github.com/jllllll/GPTQ-for-LLaMa-Wheels
+Kindly provided by our friend jllllll: https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases

-Windows:
-
-```
-pip install https://github.com/jllllll/GPTQ-for-LLaMa-Wheels/raw/main/quant_cuda-0.0.0-cp310-cp310-win_amd64.whl
-```
-
-Linux:
-
-```
-pip install https://github.com/jllllll/GPTQ-for-LLaMa-Wheels/raw/Linux-x64/quant_cuda-0.0.0-cp310-cp310-linux_x86_64.whl
-```
+Wheels are included in requirements.txt and are installed with the webui on supported systems.

 ### Manual installation

@ -124,20 +84,19 @@ pip install https://github.com/jllllll/GPTQ-for-LLaMa-Wheels/raw/Linux-x64/quant

 ```
 conda activate textgen
-conda install -c conda-forge cudatoolkit-dev
+conda install cuda -c nvidia/label/cuda-11.7.1
 ```

 The command above takes some 10 minutes to run and shows no progress bar or updates along the way.

-You are also going to need to have a C++ compiler installed. On Linux, `sudo apt install build-essential` or equivalent is enough.
+You are also going to need to have a C++ compiler installed. On Linux, `sudo apt install build-essential` or equivalent is enough. On Windows, Visual Studio or Visual Studio Build Tools is required.

-If you're using an older version of CUDA toolkit (e.g. 11.7) but the latest version of `gcc` and `g++` (12.0+), you should downgrade with: `conda install -c conda-forge gxx==11.3.0`. Kernel compilation will fail otherwise.
+If you're using an older version of CUDA toolkit (e.g. 11.7) but the latest version of `gcc` and `g++` (12.0+) on Linux, you should downgrade with: `conda install -c conda-forge gxx==11.3.0`. Kernel compilation will fail otherwise.

 #### Step 2: compile the CUDA extensions

 ```
-cd repositories/GPTQ-for-LLaMa
-python setup_cuda.py install
+python -m pip install git+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA -v
 ```

 ### Getting pre-converted LLaMA weights
--- a/download-model.py
+++ b/download-model.py
@ -24,14 +24,14 @@ from tqdm.contrib.concurrent import thread_map

 class ModelDownloader:
    def __init__(self, max_retries=5):
-        self.s = requests.Session()
+        self.session = requests.Session()
        if max_retries:
-            self.s.mount('https://cdn-lfs.huggingface.co', HTTPAdapter(max_retries=max_retries))
-            self.s.mount('https://huggingface.co', HTTPAdapter(max_retries=max_retries))
+            self.session.mount('https://cdn-lfs.huggingface.co', HTTPAdapter(max_retries=max_retries))
+            self.session.mount('https://huggingface.co', HTTPAdapter(max_retries=max_retries))
        if os.getenv('HF_USER') is not None and os.getenv('HF_PASS') is not None:
-            self.s.auth = (os.getenv('HF_USER'), os.getenv('HF_PASS'))
+            self.session.auth = (os.getenv('HF_USER'), os.getenv('HF_PASS'))
        if os.getenv('HF_TOKEN') is not None:
-            self.s.headers = {'authorization': f'Bearer {os.getenv("HF_TOKEN")}'}
+            self.session.headers = {'authorization': f'Bearer {os.getenv("HF_TOKEN")}'}

    def sanitize_model_and_branch_names(self, model, branch):
        if model[-1] == '/':
@ -57,12 +57,13 @@ class ModelDownloader:
        classifications = []
        has_pytorch = False
        has_pt = False
-        # has_ggml = False
+        has_gguf = False
+        has_ggml = False
        has_safetensors = False
        is_lora = False
        while True:
            url = f"{base}{page}" + (f"?cursor={cursor.decode()}" if cursor else "")
-            r = self.s.get(url, timeout=10)
+            r = self.session.get(url, timeout=10)
            r.raise_for_status()
            content = r.content

@ -75,13 +76,14 @@ class ModelDownloader:
                if not is_lora and fname.endswith(('adapter_config.json', 'adapter_model.bin')):
                    is_lora = True

-                is_pytorch = re.match("(pytorch|adapter|gptq)_model.*\.bin", fname)
-                is_safetensors = re.match(".*\.safetensors", fname)
-                is_pt = re.match(".*\.pt", fname)
-                is_ggml = re.match(".*ggml.*\.bin", fname)
-                is_tokenizer = re.match("(tokenizer|ice|spiece).*\.model", fname)
-                is_text = re.match(".*\.(txt|json|py|md)", fname) or is_tokenizer
-                if any((is_pytorch, is_safetensors, is_pt, is_ggml, is_tokenizer, is_text)):
+                is_pytorch = re.match(r"(pytorch|adapter|gptq)_model.*\.bin", fname)
+                is_safetensors = re.match(r".*\.safetensors", fname)
+                is_pt = re.match(r".*\.pt", fname)
+                is_gguf = re.match(r'.*\.gguf', fname)
+                is_ggml = re.match(r".*ggml.*\.bin", fname)
+                is_tokenizer = re.match(r"(tokenizer|ice|spiece).*\.model", fname)
+                is_text = re.match(r".*\.(txt|json|py|md)", fname) or is_tokenizer
+                if any((is_pytorch, is_safetensors, is_pt, is_gguf, is_ggml, is_tokenizer, is_text)):
                    if 'lfs' in dict[i]:
                        sha256.append([fname, dict[i]['lfs']['oid']])

@ -101,8 +103,11 @@ class ModelDownloader:
                        elif is_pt:
                            has_pt = True
                            classifications.append('pt')
+                        elif is_gguf:
+                            has_gguf = True
+                            classifications.append('gguf')
                        elif is_ggml:
-                            # has_ggml = True
+                            has_ggml = True
                            classifications.append('ggml')

            cursor = base64.b64encode(f'{{"file_name":"{dict[-1]["path"]}"}}'.encode()) + b':50'
@ -115,6 +120,12 @@ class ModelDownloader:
                if classifications[i] in ['pytorch', 'pt']:
                    links.pop(i)

+        # If both GGML and GGUF are available, download GGUF only
+        if has_ggml and has_gguf:
+            for i in range(len(classifications) - 1, -1, -1):
+                if classifications[i] == 'ggml':
+                    links.pop(i)
+
        return links, sha256, is_lora

    def get_output_folder(self, model, branch, is_lora, base_folder=None):
@ -136,7 +147,7 @@ class ModelDownloader:
        if output_path.exists() and not start_from_scratch:

            # Check if the file has already been downloaded completely
-            r = self.s.get(url, stream=True, timeout=10)
+            r = self.session.get(url, stream=True, timeout=10)
            total_size = int(r.headers.get('content-length', 0))
            if output_path.stat().st_size >= total_size:
                return
@ -145,7 +156,7 @@ class ModelDownloader:
            headers = {'Range': f'bytes={output_path.stat().st_size}-'}
            mode = 'ab'

-        with self.s.get(url, stream=True, headers=headers, timeout=10) as r:
+        with self.session.get(url, stream=True, headers=headers, timeout=10) as r:
            r.raise_for_status()  # Do not continue the download if the request was unsuccessful
            total_size = int(r.headers.get('content-length', 0))
            block_size = 1024 * 1024  # 1MB
--- a/extensions/api/blocking_api.py
+++ b/extensions/api/blocking_api.py
@ -200,7 +200,7 @@ class Handler(BaseHTTPRequestHandler):
        super().end_headers()


-def _run_server(port: int, share: bool = False):
+def _run_server(port: int, share: bool = False, tunnel_id=str):
    address = '0.0.0.0' if shared.args.listen else '127.0.0.1'

    server = ThreadingHTTPServer((address, port), Handler)
@ -210,7 +210,7 @@ def _run_server(port: int, share: bool = False):

    if share:
        try:
-            try_start_cloudflared(port, max_attempts=3, on_start=on_start)
+            try_start_cloudflared(port, tunnel_id, max_attempts=3, on_start=on_start)
        except Exception:
            pass
    else:
@ -220,5 +220,5 @@ def _run_server(port: int, share: bool = False):
    server.serve_forever()


-def start_server(port: int, share: bool = False):
-    Thread(target=_run_server, args=[port, share], daemon=True).start()
+def start_server(port: int, share: bool = False, tunnel_id=str):
+    Thread(target=_run_server, args=[port, share, tunnel_id], daemon=True).start()
--- a/extensions/api/requirements.txt
+++ b/extensions/api/requirements.txt
@ -1,2 +1,2 @@
-flask_cloudflared==0.0.12
+flask_cloudflared==0.0.14
 websockets==11.0.2
--- a/extensions/api/script.py
+++ b/extensions/api/script.py
@ -4,5 +4,5 @@ from modules import shared


 def setup():
-    blocking_api.start_server(shared.args.api_blocking_port, share=shared.args.public_api)
-    streaming_api.start_server(shared.args.api_streaming_port, share=shared.args.public_api)
+    blocking_api.start_server(shared.args.api_blocking_port, share=shared.args.public_api, tunnel_id=shared.args.public_api_id)
+    streaming_api.start_server(shared.args.api_streaming_port, share=shared.args.public_api, tunnel_id=shared.args.public_api_id)
--- a/extensions/api/streaming_api.py
+++ b/extensions/api/streaming_api.py
@ -102,7 +102,7 @@ async def _run(host: str, port: int):
        await asyncio.Future()  # run forever


-def _run_server(port: int, share: bool = False):
+def _run_server(port: int, share: bool = False, tunnel_id=str):
    address = '0.0.0.0' if shared.args.listen else '127.0.0.1'

    def on_start(public_url: str):
@ -111,7 +111,7 @@ def _run_server(port: int, share: bool = False):

    if share:
        try:
-            try_start_cloudflared(port, max_attempts=3, on_start=on_start)
+            try_start_cloudflared(port, tunnel_id, max_attempts=3, on_start=on_start)
        except Exception as e:
            print(e)
    else:
@ -120,5 +120,5 @@ def _run_server(port: int, share: bool = False):
    asyncio.run(_run(host=address, port=port))


-def start_server(port: int, share: bool = False):
-    Thread(target=_run_server, args=[port, share], daemon=True).start()
+def start_server(port: int, share: bool = False, tunnel_id=str):
+    Thread(target=_run_server, args=[port, share, tunnel_id], daemon=True).start()
--- a/extensions/api/util.py
+++ b/extensions/api/util.py
@ -21,6 +21,7 @@ def build_parameters(body, chat=False):

    generate_params = {
        'max_new_tokens': int(body.get('max_new_tokens', body.get('max_length', 200))),
+        'auto_max_new_tokens': bool(body.get('auto_max_new_tokens', False)),
        'do_sample': bool(body.get('do_sample', True)),
        'temperature': float(body.get('temperature', 0.5)),
        'top_p': float(body.get('top_p', 1)),
@ -42,6 +43,8 @@ def build_parameters(body, chat=False):
        'mirostat_mode': int(body.get('mirostat_mode', 0)),
        'mirostat_tau': float(body.get('mirostat_tau', 5)),
        'mirostat_eta': float(body.get('mirostat_eta', 0.1)),
+        'guidance_scale': float(body.get('guidance_scale', 1)),
+        'negative_prompt': str(body.get('negative_prompt', '')),
        'seed': int(body.get('seed', -1)),
        'add_bos_token': bool(body.get('add_bos_token', True)),
        'truncation_length': int(body.get('truncation_length', body.get('max_context_length', 2048))),
@ -65,30 +68,28 @@ def build_parameters(body, chat=False):
        name1, name2, _, greeting, context, _ = load_character_memoized(character, str(body.get('your_name', shared.settings['name1'])), shared.settings['name2'], instruct=False)
        name1_instruct, name2_instruct, _, _, context_instruct, turn_template = load_character_memoized(instruction_template, '', '', instruct=True)
        generate_params.update({
-            'stop_at_newline': bool(body.get('stop_at_newline', shared.settings['stop_at_newline'])),
-            'chat_generation_attempts': int(body.get('chat_generation_attempts', shared.settings['chat_generation_attempts'])),
            'mode': str(body.get('mode', 'chat')),
-            'name1': name1,
-            'name2': name2,
-            'context': context,
-            'greeting': greeting,
-            'name1_instruct': name1_instruct,
-            'name2_instruct': name2_instruct,
-            'context_instruct': body.get('context_instruct', context_instruct),
-            'turn_template': turn_template,
-            'chat-instruct_command': str(body.get('chat-instruct_command', shared.settings['chat-instruct_command'])),
+            'name1': str(body.get('name1', name1)),
+            'name2': str(body.get('name2', name2)),
+            'context': str(body.get('context', context)),
+            'greeting': str(body.get('greeting', greeting)),
+            'name1_instruct': str(body.get('name1_instruct', name1_instruct)),
+            'name2_instruct': str(body.get('name2_instruct', name2_instruct)),
+            'context_instruct': str(body.get('context_instruct', context_instruct)),
+            'turn_template': str(body.get('turn_template', turn_template)),
+            'chat-instruct_command': str(body.get('chat_instruct_command', body.get('chat-instruct_command', shared.settings['chat-instruct_command']))),
            'history': body.get('history', {'internal': [], 'visible': []})
        })

    return generate_params


-def try_start_cloudflared(port: int, max_attempts: int = 3, on_start: Optional[Callable[[str], None]] = None):
+def try_start_cloudflared(port: int, tunnel_id: str, max_attempts: int = 3, on_start: Optional[Callable[[str], None]] = None):
    Thread(target=_start_cloudflared, args=[
-           port, max_attempts, on_start], daemon=True).start()
+           port, tunnel_id, max_attempts, on_start], daemon=True).start()


-def _start_cloudflared(port: int, max_attempts: int = 3, on_start: Optional[Callable[[str], None]] = None):
+def _start_cloudflared(port: int, tunnel_id: str, max_attempts: int = 3, on_start: Optional[Callable[[str], None]] = None):
    try:
        from flask_cloudflared import _run_cloudflared
    except ImportError:
@ -98,7 +99,10 @@ def _start_cloudflared(port: int, max_attempts: int = 3, on_start: Optional[Call

    for _ in range(max_attempts):
        try:
-            public_url = _run_cloudflared(port, port + 1)
+            if tunnel_id is not None:
+                public_url = _run_cloudflared(port, port + 1, tunnel_id=tunnel_id)
+            else:
+                public_url = _run_cloudflared(port, port + 1)

            if on_start:
                on_start(public_url)
--- a/extensions/elevenlabs_tts/script.py
+++ b/extensions/elevenlabs_tts/script.py
@ -1,12 +1,13 @@
+import html
 import re
 from pathlib import Path

 import elevenlabs
 import gradio as gr

-from modules import chat, shared
-from modules.utils import gradio
+from modules import chat, shared, ui_chat
 from modules.logging_colors import logger
+from modules.utils import gradio

 params = {
    'activate': True,
@ -111,7 +112,7 @@ def output_modifier(string):
    output_file = Path(f'extensions/elevenlabs_tts/outputs/{wav_idx:06d}.mp3'.format(wav_idx))
    print(f'Outputting audio to {str(output_file)}')
    try:
-        audio = elevenlabs.generate(text=string, voice=params['selected_voice'], model=params['model'])
+        audio = elevenlabs.generate(text=html.unescape(string), voice=params['selected_voice'], model=params['model'])
        elevenlabs.save(audio, str(output_file))

        autoplay = 'autoplay' if params['autoplay'] else ''
@ -167,24 +168,23 @@ def ui():
        convert_cancel = gr.Button('Cancel', visible=False)
        convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False)

-    if shared.is_chat():
-        # Convert history with confirmation
-        convert_arr = [convert_confirm, convert, convert_cancel]
-        convert.click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr)
-        convert_confirm.click(
-            lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr).then(
-            remove_tts_from_history, gradio('history'), gradio('history')).then(
-            chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then(
-            chat.redraw_html, shared.reload_inputs, gradio('display'))
+    # Convert history with confirmation
+    convert_arr = [convert_confirm, convert, convert_cancel]
+    convert.click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr)
+    convert_confirm.click(
+        lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr).then(
+        remove_tts_from_history, gradio('history'), gradio('history')).then(
+        chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then(
+        chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'))

-        convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)
+    convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)

-        # Toggle message text in history
-        show_text.change(
-            lambda x: params.update({"show_text": x}), show_text, None).then(
-            toggle_text_in_history, gradio('history'), gradio('history')).then(
-            chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then(
-            chat.redraw_html, shared.reload_inputs, gradio('display'))
+    # Toggle message text in history
+    show_text.change(
+        lambda x: params.update({"show_text": x}), show_text, None).then(
+        toggle_text_in_history, gradio('history'), gradio('history')).then(
+        chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then(
+        chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'))

    # Event functions to update the parameters in the backend
    activate.change(lambda x: params.update({'activate': x}), activate, None)
--- a/extensions/example/script.py
+++ b/extensions/example/script.py
@ -59,7 +59,7 @@ def chat_input_modifier(text, visible_text, state):
    """
    return text, visible_text

-def input_modifier(string, state):
+def input_modifier(string, state, is_chat=False):
    """
    In default/notebook modes, modifies the whole prompt.

@ -92,7 +92,7 @@ def logits_processor_modifier(processor_list, input_ids):
    processor_list.append(MyLogits())
    return processor_list

-def output_modifier(string, state):
+def output_modifier(string, state, is_chat=False):
    """
    Modifies the LLM output before it gets presented.

--- a/extensions/gallery/script.js
+++ b/extensions/gallery/script.js
@ -0,0 +1,33 @@
+let gallery_element = document.getElementById('gallery-extension');
+let chat_mode_element = document.getElementById('chat-mode');
+
+let extensions_block = document.getElementById('extensions');
+let extensions_block_size = extensions_block.childNodes.length;
+let gallery_only = (extensions_block_size == 5);
+
+document.querySelector('.header_bar').addEventListener('click', function(event) {
+    if (event.target.tagName === 'BUTTON') {
+        const buttonText = event.target.textContent.trim();
+
+        let chat_visible = (buttonText == 'Chat');
+        let default_visible = (buttonText == 'Default');
+        let notebook_visible = (buttonText == 'Notebook');
+        let chat_mode_visible = (chat_mode_element.offsetHeight > 0 && chat_mode_element.offsetWidth > 0);
+
+        // Only show this extension in the Chat tab
+        if (chat_visible) {
+            if (chat_mode_visible) {
+                gallery_element.style.display = 'block';
+                extensions_block.style.display = '';
+            } else {
+                gallery_element.style.display = 'none';
+                extensions_block.style.display = 'none';
+            }
+        } else {
+            gallery_element.style.display = 'none';
+            if (gallery_only) {
+                extensions_block.style.display = 'none';
+            }
+        }
+    }
+});
--- a/extensions/gallery/script.py
+++ b/extensions/gallery/script.py
@ -82,8 +82,13 @@ def select_character(evt: gr.SelectData):
    return (evt.value[1])


+def custom_js():
+    path_to_js = Path(__file__).parent.resolve() / 'script.js'
+    return open(path_to_js, 'r').read()
+
+
 def ui():
-    with gr.Accordion("Character gallery", open=False):
+    with gr.Accordion("Character gallery", open=False, elem_id='gallery-extension'):
        update = gr.Button("Refresh")
        gr.HTML(value="<style>" + generate_css() + "</style>")
        gallery = gr.Dataset(components=[gr.HTML(visible=False)],
--- a/extensions/long_replies/script.py
+++ b/extensions/long_replies/script.py
@ -28,7 +28,7 @@ class MyLogits(LogitsProcessor):
    def __call__(self, input_ids, scores):
        if input_ids.shape[-1] - initial_size < params["min_length"]:
            scores[...,self.newline_id] = -1000
-            scores[...,shared.tokenizer.eos_token_id] = -1000
+            # scores[...,shared.tokenizer.eos_token_id] = -1000

        # probs = torch.softmax(scores, dim=-1, dtype=torch.float)
        # probs[0] /= probs[0].sum()
--- a/extensions/openai/completions.py
+++ b/extensions/openai/completions.py
@ -165,7 +165,7 @@ def messages_to_prompt(body: dict, req_params: dict, max_tokens):
    # Instruct models can be much better
    if shared.settings['instruction_template']:
        try:
-            instruct = yaml.safe_load(open(f"characters/instruction-following/{shared.settings['instruction_template']}.yaml", 'r'))
+            instruct = yaml.safe_load(open(f"instruction-templates/{shared.settings['instruction_template']}.yaml", 'r'))

            template = instruct['turn_template']
            system_message_template = "{message}"
@ -193,7 +193,7 @@ def messages_to_prompt(body: dict, req_params: dict, max_tokens):
        except Exception as e:
            req_params['stopping_strings'].extend(['\nUser:', 'User:'])  # XXX User: prompt here also

-            print(f"Exception: When loading characters/instruction-following/{shared.settings['instruction_template']}.yaml: {repr(e)}")
+            print(f"Exception: When loading instruction-templates/{shared.settings['instruction_template']}.yaml: {repr(e)}")
            print("Warning: Loaded default instruction-following template for model.")

    else:
--- a/extensions/openai/defaults.py
+++ b/extensions/openai/defaults.py
@ -4,6 +4,7 @@ import copy
 # Data type is important, Ex. use 0.0 for a float 0
 default_req_params = {
    'max_new_tokens': 16,  # 'Inf' for chat
+    'auto_max_new_tokens': False,
    'temperature': 1.0,
    'top_p': 1.0,
    'top_k': 1,  # choose 20 for chat in absence of another default
@ -32,6 +33,8 @@ default_req_params = {
    'mirostat_mode': 0,
    'mirostat_tau': 5.0,
    'mirostat_eta': 0.1,
+    'guidance_scale': 1,
+    'negative_prompt': '',
    'ban_eos_token': False,
    'skip_special_tokens': True,
    'custom_stopping_strings': '',
--- a/extensions/openai/edits.py
+++ b/extensions/openai/edits.py
@ -31,7 +31,7 @@ def edits(instruction: str, input: str, temperature=1.0, top_p=1.0) -> dict:
            stopping_strings.extend(['\n###'])
        else:
            try:
-                instruct = yaml.safe_load(open(f"characters/instruction-following/{shared.settings['instruction_template']}.yaml", 'r'))
+                instruct = yaml.safe_load(open(f"instruction-templates/{shared.settings['instruction_template']}.yaml", 'r'))

                template = instruct['turn_template']
                template = template\
@ -45,7 +45,7 @@ def edits(instruction: str, input: str, temperature=1.0, top_p=1.0) -> dict:

            except Exception as e:
                instruction_template = default_template
-                print(f"Exception: When loading characters/instruction-following/{shared.settings['instruction_template']}.yaml: {repr(e)}")
+                print(f"Exception: When loading instruction-templates/{shared.settings['instruction_template']}.yaml: {repr(e)}")
                print("Warning: Loaded default instruction-following template (Alpaca) for model.")
    else:
        stopping_strings.extend(['\n###'])
--- a/extensions/openai/script.py
+++ b/extensions/openai/script.py
@ -67,10 +67,13 @@ class Handler(BaseHTTPRequestHandler):
        self.send_response(code)
        self.send_access_control_headers()
        self.send_header('Content-Type', 'application/json')
-        self.end_headers()

        response = json.dumps(ret)
        r_utf8 = response.encode('utf-8')
+
+        self.send_header('Content-Length', str(len(r_utf8)))
+        self.end_headers()
+
        self.wfile.write(r_utf8)
        if not no_debug:
            debug_msg(r_utf8)
--- a/extensions/perplexity_colors/script.py
+++ b/extensions/perplexity_colors/script.py
@ -1,17 +1,22 @@
+import time
+
 import gradio
+import numpy as np
 import torch
 from transformers import LogitsProcessor
-import numpy as np

-from modules import shared
+from modules import html_generator, shared

 params = {
+    'active': True,
    'color_by_perplexity': False,
    'color_by_probability': False,
-    'ppl_scale': 15.0, # No slider for this right now, because I don't think it really needs to be changed. Very large perplexity scores don't show up often.
-    #'probability_dropdown': False
+    'ppl_scale': 15.0,  # No slider for this right now, because I don't think it really needs to be changed. Very large perplexity scores don't show up often.
+    'probability_dropdown': False,
+    'verbose': False  # For debugging mostly
 }

+
 class PerplexityLogits(LogitsProcessor):
    def __init__(self, verbose=False):
        self.generated_token_ids = []
@ -23,9 +28,10 @@ class PerplexityLogits(LogitsProcessor):
        self.verbose = verbose

    def __call__(self, input_ids, scores):
+        # t0 = time.time()
        probs = torch.softmax(scores, dim=-1, dtype=torch.float)
-        log_probs = torch.nan_to_num(torch.log(probs))
-        entropy = -torch.sum(probs*log_probs)
+        log_probs = torch.nan_to_num(torch.log(probs))  # Note: This is to convert log(0) nan to 0, but probs*log_probs makes this 0 not affect the perplexity.
+        entropy = -torch.sum(probs * log_probs)
        entropy = entropy.cpu().numpy()
        perplexity = round(float(np.exp(entropy)), 4)
        self.perplexities_list.append(perplexity)
@ -36,25 +42,25 @@ class PerplexityLogits(LogitsProcessor):
        if len(self.selected_probs) > 0:
            # Is the selected token in the top tokens?
            if self.verbose:
-                print(shared.tokenizer.decode(last_token_id))
-                print([shared.tokenizer.decode(token_id) for token_id in self.top_token_ids_list[-1]])
-                print(self.top_probs_list[-1])
-            if last_token_id in self.top_token_ids_list[-1]:
-                idx = self.top_token_ids_list[-1].index(last_token_id)
-                self.selected_probs.append(self.top_probs_list[-1][idx])
+                print('Probs: Token after', shared.tokenizer.decode(last_token_id))
+                print('Probs:', [shared.tokenizer.decode(token_id) for token_id in self.top_token_ids_list[-1][0]])
+                print('Probs:', [round(float(prob), 4) for prob in self.top_probs_list[-1][0]])
+            if last_token_id in self.top_token_ids_list[-1][0]:
+                idx = self.top_token_ids_list[-1][0].index(last_token_id)
+                self.selected_probs.append(self.top_probs_list[-1][0][idx])
            else:
-                self.top_token_ids_list[-1].append(last_token_id)
+                self.top_token_ids_list[-1][0].append(last_token_id)
                last_prob = round(float(self.last_probs[last_token_id]), 4)
-                self.top_probs_list[-1].append(last_prob)
+                self.top_probs_list[-1][0].append(last_prob)
                self.selected_probs.append(last_prob)
        else:
-            self.selected_probs.append(1.0) # Placeholder for the last token of the prompt
+            self.selected_probs.append(1.0)  # Placeholder for the last token of the prompt

        if self.verbose:
            pplbar = "-"
            if not np.isnan(perplexity):
-                pplbar = "*"*round(perplexity)
-            print(f"{last_token}\t{perplexity:.2f}\t{pplbar}")
+                pplbar = "*" * round(perplexity)
+            print(f"PPL: Token after {shared.tokenizer.decode(last_token_id)}\t{perplexity:.2f}\t{pplbar}")

        # Get top 5 probabilities
        top_tokens_and_probs = torch.topk(probs, 5)
@ -63,153 +69,241 @@ class PerplexityLogits(LogitsProcessor):

        self.top_token_ids_list.append(top_token_ids)
        self.top_probs_list.append(top_probs)
-        
-        probs = probs.cpu().numpy().flatten()
-        self.last_probs = probs # Need to keep this as a reference for top probs

+        probs = probs.cpu().numpy().flatten()
+        self.last_probs = probs  # Need to keep this as a reference for top probs
+
+        # t1 = time.time()
+        # print(f"PPL Processor: {(t1-t0):.3f} s")
+        # About 1 ms, though occasionally up to around 100 ms, not sure why...
        # Doesn't actually modify the logits!
        return scores

+
 # Stores the perplexity and top probabilities
 ppl_logits_processor = None

+
 def logits_processor_modifier(logits_processor_list, input_ids):
    global ppl_logits_processor
-    ppl_logits_processor = PerplexityLogits()
-    logits_processor_list.append(ppl_logits_processor)
+    if params['active']:
+        ppl_logits_processor = PerplexityLogits(verbose=params['verbose'])
+        logits_processor_list.append(ppl_logits_processor)
+

 def output_modifier(text):
    global ppl_logits_processor
+    # t0 = time.time()
+
+    if not params['active']:
+        return text

    # TODO: It's probably more efficient to do this above rather than modifying all these lists
    # Remove last element of perplexities_list, top_token_ids_list, top_tokens_list, top_probs_list since everything is off by one because this extension runs before generation
    perplexities = ppl_logits_processor.perplexities_list[:-1]
    top_token_ids_list = ppl_logits_processor.top_token_ids_list[:-1]
-    top_tokens_list = [[shared.tokenizer.decode(token_id) for token_id in top_token_ids] for top_token_ids in top_token_ids_list]
+    top_tokens_list = [[shared.tokenizer.decode(token_id) for token_id in top_token_ids[0]] for top_token_ids in top_token_ids_list]
    top_probs_list = ppl_logits_processor.top_probs_list[:-1]
    # Remove first element of generated_token_ids, generated_tokens, selected_probs because they are for the last token of the prompt
    gen_token_ids = ppl_logits_processor.generated_token_ids[1:]
    gen_tokens = [shared.tokenizer.decode(token_id) for token_id in gen_token_ids]
    sel_probs = ppl_logits_processor.selected_probs[1:]

-    end_part = '</span>' # Helps with finding the index after replacing part of the text.
-    in_code = False # Since the <span> tags mess up code blocks, avoid coloring while inside a code block, based on finding tokens with '`' in them
+    end_part = '</div></div>' if params['probability_dropdown'] else '</span>'  # Helps with finding the index after replacing part of the text.

-    if params['color_by_probability'] and params['color_by_perplexity']:
-        i = 0
-        for token, prob, ppl, top_tokens, top_probs in zip(gen_tokens, sel_probs, perplexities, top_tokens_list, top_probs_list):
-            if '`' in token:
-                in_code = not in_code
-                continue
-            if in_code:
-                continue
+    i = 0
+    for token, prob, ppl, top_tokens, top_probs in zip(gen_tokens, sel_probs, perplexities, top_tokens_list, top_probs_list):
+        color = 'ffffff'
+        if params['color_by_probability'] and params['color_by_perplexity']:
            color = probability_perplexity_color_scale(prob, ppl)
-            if token in text[i:]:
-                text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1)
-                i += text[i:].find(end_part) + len(end_part)
-    elif params['color_by_perplexity']:
-        i = 0
-        for token, ppl, top_tokens, top_probs in zip(gen_tokens, perplexities, top_tokens_list, top_probs_list):
-            if '`' in token:
-                in_code = not in_code
-                continue
-            if in_code:
-                continue
+        elif params['color_by_perplexity']:
            color = perplexity_color_scale(ppl)
-            if token in text[i:]:
-                text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1)
-                i += text[i:].find(end_part) + len(end_part)
-    elif params['color_by_probability']:
-        i = 0
-        for token, prob, top_tokens, top_probs in zip(gen_tokens, sel_probs, top_tokens_list, top_probs_list):
-            if '`' in token:
-                in_code = not in_code
-                continue
-            if in_code:
-                continue
+        elif params['color_by_probability']:
            color = probability_color_scale(prob)
-            if token in text[i:]:
+        if token in text[i:]:
+            if params['probability_dropdown']:
+                text = text[:i] + text[i:].replace(token, add_dropdown_html(token, color, top_tokens, top_probs[0], ppl), 1)
+            else:
                text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1)
-                i += text[i:].find(end_part) + len(end_part)
+            i += text[i:].find(end_part) + len(end_part)

-    print('Average perplexity:', round(np.mean(perplexities), 4))
+    # Use full perplexity list for calculating the average here.
+    print('Average perplexity:', round(np.mean(ppl_logits_processor.perplexities_list[:-1]), 4))
+    # t1 = time.time()
+    # print(f"Modifier: {(t1-t0):.3f} s")
+    # About 50 ms
    return text

-# Green-yellow-red color scale
+
 def probability_color_scale(prob):
+    '''
+    Green-yellow-red color scale
+    '''
+
    rv = 0
    gv = 0
    if prob <= 0.5:
        rv = 'ff'
-        gv = hex(int(255*prob*2))[2:]
+        gv = hex(int(255 * prob * 2))[2:]
        if len(gv) < 2:
-            gv = '0'*(2 - len(gv)) + gv
+            gv = '0' * (2 - len(gv)) + gv
    else:
-        rv = hex(int(255 - 255*(prob - 0.5)*2))[2:]
+        rv = hex(int(255 - 255 * (prob - 0.5) * 2))[2:]
        gv = 'ff'
        if len(rv) < 2:
-            rv = '0'*(2 - len(rv)) + rv
+            rv = '0' * (2 - len(rv)) + rv
+
    return rv + gv + '00'

-# Red component only, white for 0 perplexity (sorry if you're not in dark mode)
+
 def perplexity_color_scale(ppl):
-    value = hex(max(int(255.0 - params['ppl_scale']*(float(ppl)-1.0)), 0))[2:]
+    '''
+    Red component only, white for 0 perplexity (sorry if you're not in dark mode)
+    '''
+    value = hex(max(int(255.0 - params['ppl_scale'] * (float(ppl) - 1.0)), 0))[2:]
    if len(value) < 2:
-        value = '0'*(2 - len(value)) + value
+        value = '0' * (2 - len(value)) + value
+
    return 'ff' + value + value

-# Green-yellow-red for probability and blue component for perplexity
+
 def probability_perplexity_color_scale(prob, ppl):
+    '''
+    Green-yellow-red for probability and blue component for perplexity
+    '''
+
    rv = 0
    gv = 0
-    bv = hex(min(max(int(params['ppl_scale']*(float(ppl)-1.0)), 0), 255))[2:]
+    bv = hex(min(max(int(params['ppl_scale'] * (float(ppl) - 1.0)), 0), 255))[2:]
    if len(bv) < 2:
-            bv = '0'*(2 - len(bv)) + bv
+        bv = '0' * (2 - len(bv)) + bv
+
    if prob <= 0.5:
        rv = 'ff'
-        gv = hex(int(255*prob*2))[2:]
+        gv = hex(int(255 * prob * 2))[2:]
        if len(gv) < 2:
-            gv = '0'*(2 - len(gv)) + gv
+            gv = '0' * (2 - len(gv)) + gv
    else:
-        rv = hex(int(255 - 255*(prob - 0.5)*2))[2:]
+        rv = hex(int(255 - 255 * (prob - 0.5) * 2))[2:]
        gv = 'ff'
        if len(rv) < 2:
-            rv = '0'*(2 - len(rv)) + rv
+            rv = '0' * (2 - len(rv)) + rv
+
    return rv + gv + bv

+
 def add_color_html(token, color):
    return f'<span style="color: #{color}">{token}</span>'

-"""
-# This is still very broken at the moment, needs CSS too but I'm not very good at CSS (and neither is GPT-4 apparently) so I still need to figure that out.
-def add_dropdown_html(token, color, top_tokens, top_probs):
-    html = f'<span class="hoverable" style="color: #{color}">{token}<div class="dropdown"><table class="dropdown-content">'
-    for token, prob in zip(top_tokens, top_probs):
-        # TODO: Background color? Bold for selected token?
-        # Bigger issue: Why is there a newline after the first token, and the dropdown fails there?
-        # The HTML ends up like <p><span>word</span></p><div>...</div>,
-        # even though for all other tokens it shows up correctly.
+
+# TODO: Major issue: Applying this to too many tokens will cause a permanent slowdown in generation speed until the messages are removed from the history.
+# I think the issue is from HTML elements taking up space in the visible history, and things like history deepcopy add latency proportional to the size of the history.
+# Potential solution is maybe to modify the main generation code to send just the internal text and not the visible history, to avoid moving too much around.
+# I wonder if we can also avoid using deepcopy here.
+def add_dropdown_html(token, color, top_tokens, top_probs, perplexity=0):
+    html = f'<div class="hoverable"><span style="color: #{color}">{token}</span><div class="dropdown"><table class="dropdown-content"><tbody>'
+    for token_option, prob in zip(top_tokens, top_probs):
+        # TODO: Bold for selected token?
+        # Using divs prevented the problem of divs inside spans causing issues.
+        # Now the problem is that divs show the same whitespace of one space between every token.
+        # There is probably some way to fix this in CSS that I don't know about.
        row_color = probability_color_scale(prob)
-        html += f'<tr><td style="color: #{row_color}">{token}</td><td style="color: #{row_color}">{prob}</td></tr>'
-    html += '</table></div></span>'
-    return html
-"""
+        row_class = ' class="selected"' if token_option == token else ''
+        html += f'<tr{row_class}><td style="color: #{row_color}">{token_option}</td><td style="color: #{row_color}">{prob:.4f}</td></tr>'
+    if perplexity != 0:
+        ppl_color = perplexity_color_scale(perplexity)
+        html += f'<tr><td>Perplexity:</td><td style="color: #{ppl_color}">{perplexity:.4f}</td></tr>'
+    html += '</tbody></table></div></div>'
+    return html  # About 750 characters per token...
+
+
+def custom_css():
+    return """
+        .dropdown {
+            display: none;
+            position: absolute;
+            z-index: 50;
+            background-color: var(--block-background-fill);
+            box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2);
+            width: max-content;
+            overflow: visible;
+            padding: 5px;
+            border-radius: 10px;
+            border: 1px solid var(--border-color-primary);
+        }
+
+        .dropdown-content {
+            border: none;
+            z-index: 50;
+        }
+
+        .dropdown-content tr.selected {
+            background-color: var(--block-label-background-fill);
+        }
+
+        .dropdown-content td {
+            color: var(--body-text-color);
+        }
+
+        .hoverable {
+            color: var(--body-text-color);
+            position: relative;
+            display: inline-block;
+            overflow: visible;
+            font-size: 15px;
+            line-height: 1.75;
+            margin: 0;
+            padding: 0;
+        }
+
+        .hoverable:hover .dropdown {
+            display: block;
+        }
+
+        pre {
+            white-space: pre-wrap;
+        }
+
+        # TODO: This makes the hover menus extend outside the bounds of the chat area, which is good.
+        # However, it also makes the scrollbar disappear, which is bad.
+        # The scroll bar needs to still be present. So for now, we can't see dropdowns that extend past the edge of the chat area.
+        #.chat {
+        #    overflow-y: auto;
+        #}
+    """
+
+
+# Monkeypatch applied to html_generator.py
+# We simply don't render markdown into HTML. We wrap everything in <pre> tags to preserve whitespace
+# formatting. If you're coloring tokens by perplexity or probability, or especially if you're using
+# the probability dropdown, you probably care more about seeing the tokens the model actually outputted
+# rather than rendering ```code blocks``` or *italics*.
+def convert_to_markdown(string):
+    return '<pre>' + string + '</pre>'
+
+
+html_generator.convert_to_markdown = convert_to_markdown
+

 def ui():
-    color_by_ppl_check = gradio.Checkbox(value=False, label="Color by perplexity", info="Higher perplexity is more red. If also showing probability, higher perplexity has more blue component.")
+    def update_active_check(x):
+        params.update({'active': x})
+
    def update_color_by_ppl_check(x):
        params.update({'color_by_perplexity': x})
-    color_by_ppl_check.change(update_color_by_ppl_check, color_by_ppl_check, None)

-    color_by_prob_check = gradio.Checkbox(value=False, label="Color by probability", info="Green-yellow-red linear scale, with 100% green, 50% yellow, 0% red.")
    def update_color_by_prob_check(x):
        params.update({'color_by_probability': x})
-    color_by_prob_check.change(update_color_by_prob_check, color_by_prob_check, None)

-    # Doesn't work yet...
-    """
-    prob_dropdown_check = gradio.Checkbox(value=False, label="Probability dropdown")
    def update_prob_dropdown_check(x):
        params.update({'probability_dropdown': x})
+
+    active_check = gradio.Checkbox(value=True, label="Compute probabilities and perplexity scores", info="Activate this extension. Note that this extension currently does not work with exllama or llama.cpp.")
+    color_by_ppl_check = gradio.Checkbox(value=False, label="Color by perplexity", info="Higher perplexity is more red. If also showing probability, higher perplexity has more blue component.")
+    color_by_prob_check = gradio.Checkbox(value=False, label="Color by probability", info="Green-yellow-red linear scale, with 100% green, 50% yellow, 0% red.")
+    prob_dropdown_check = gradio.Checkbox(value=False, label="Probability dropdown", info="Hover over a token to show a dropdown of top token probabilities. Currently slightly buggy with whitespace between tokens.")
+
+    active_check.change(update_active_check, active_check, None)
+    color_by_ppl_check.change(update_color_by_ppl_check, color_by_ppl_check, None)
+    color_by_prob_check.change(update_color_by_prob_check, color_by_prob_check, None)
    prob_dropdown_check.change(update_prob_dropdown_check, prob_dropdown_check, None)
-    """
--- a/extensions/sd_api_pictures/script.py
+++ b/extensions/sd_api_pictures/script.py
@ -133,6 +133,9 @@ def get_SD_pictures(description, character):
    if params['manage_VRAM']:
        give_VRAM_priority('SD')

+    description = re.sub('<audio.*?</audio>', ' ', description)
+    description = f"({description}:1)"
+
    payload = {
        "prompt": params['prompt_prefix'] + description,
        "seed": params['seed'],
--- a/extensions/send_pictures/script.py
+++ b/extensions/send_pictures/script.py
@ -5,7 +5,7 @@ import gradio as gr
 import torch
 from transformers import BlipForConditionalGeneration, BlipProcessor

-from modules import chat, shared
+from modules import chat, shared, ui_chat
 from modules.ui import gather_interface_values
 from modules.utils import gradio

@ -54,5 +54,5 @@ def ui():
            "value": generate_chat_picture(picture, name1, name2)
        }), [picture_select, shared.gradio['name1'], shared.gradio['name2']], None).then(
        gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        chat.generate_chat_reply_wrapper, shared.input_params, gradio('display', 'history'), show_progress=False).then(
+        chat.generate_chat_reply_wrapper, gradio(ui_chat.inputs), gradio('display', 'history'), show_progress=False).then(
        lambda: None, None, picture_select, show_progress=False)
--- a/extensions/silero_tts/harvard_sentences.txt
+++ b/extensions/silero_tts/harvard_sentences.txt
@ -0,0 +1,720 @@
+The birch canoe slid on the smooth planks.
+Glue the sheet to the dark blue background.
+It's easy to tell the depth of a well.
+These days a chicken leg is a rare dish.
+Rice is often served in round bowls.
+The juice of lemons makes fine punch.
+The box was thrown beside the parked truck.
+The hogs were fed chopped corn and garbage.
+Four hours of steady work faced us.
+A large size in stockings is hard to sell.
+The boy was there when the sun rose.
+A rod is used to catch pink salmon.
+The source of the huge river is the clear spring.
+Kick the ball straight and follow through.
+Help the woman get back to her feet.
+A pot of tea helps to pass the evening.
+Smoky fires lack flame and heat.
+The soft cushion broke the man's fall.
+The salt breeze came across from the sea.
+The girl at the booth sold fifty bonds.
+The small pup gnawed a hole in the sock.
+The fish twisted and turned on the bent hook.
+Press the pants and sew a button on the vest.
+The swan dive was far short of perfect.
+The beauty of the view stunned the young boy.
+Two blue fish swam in the tank.
+Her purse was full of useless trash.
+The colt reared and threw the tall rider.
+It snowed, rained, and hailed the same morning.
+Read verse out loud for pleasure.
+Hoist the load to your left shoulder.
+Take the winding path to reach the lake.
+Note closely the size of the gas tank.
+Wipe the grease off his dirty face.
+Mend the coat before you go out.
+The wrist was badly strained and hung limp.
+The stray cat gave birth to kittens.
+The young girl gave no clear response.
+The meal was cooked before the bell rang.
+What joy there is in living.
+A king ruled the state in the early days.
+The ship was torn apart on the sharp reef.
+Sickness kept him home the third week.
+The wide road shimmered in the hot sun.
+The lazy cow lay in the cool grass.
+Lift the square stone over the fence.
+The rope will bind the seven books at once.
+Hop over the fence and plunge in.
+The friendly gang left the drug store.
+Mesh wire keeps chicks inside.
+The frosty air passed through the coat.
+The crooked maze failed to fool the mouse.
+Adding fast leads to wrong sums.
+The show was a flop from the very start.
+A saw is a tool used for making boards.
+The wagon moved on well oiled wheels.
+March the soldiers past the next hill.
+A cup of sugar makes sweet fudge.
+Place a rosebush near the porch steps.
+Both lost their lives in the raging storm.
+We talked of the side show in the circus.
+Use a pencil to write the first draft.
+He ran half way to the hardware store.
+The clock struck to mark the third period.
+A small creek cut across the field.
+Cars and busses stalled in snow drifts.
+The set of china hit the floor with a crash.
+This is a grand season for hikes on the road.
+The dune rose from the edge of the water.
+Those words were the cue for the actor to leave.
+A yacht slid around the point into the bay.
+The two met while playing on the sand.
+The ink stain dried on the finished page.
+The walled town was seized without a fight.
+The lease ran out in sixteen weeks.
+A tame squirrel makes a nice pet.
+The horn of the car woke the sleeping cop.
+The heart beat strongly and with firm strokes.
+The pearl was worn in a thin silver ring.
+The fruit peel was cut in thick slices.
+The Navy attacked the big task force.
+See the cat glaring at the scared mouse.
+There are more than two factors here.
+The hat brim was wide and too droopy.
+The lawyer tried to lose his case.
+The grass curled around the fence post.
+Cut the pie into large parts.
+Men strive but seldom get rich.
+Always close the barn door tight.
+He lay prone and hardly moved a limb.
+The slush lay deep along the street.
+A wisp of cloud hung in the blue air.
+A pound of sugar costs more than eggs.
+The fin was sharp and cut the clear water.
+The play seems dull and quite stupid.
+Bail the boat to stop it from sinking.
+The term ended in late June that year.
+A tusk is used to make costly gifts.
+Ten pins were set in order.
+The bill was paid every third week.
+Oak is strong and also gives shade.
+Cats and dogs each hate the other.
+The pipe began to rust while new.
+Open the crate but don't break the glass.
+Add the sum to the product of these three.
+Thieves who rob friends deserve jail.
+The ripe taste of cheese improves with age.
+Act on these orders with great speed.
+The hog crawled under the high fence.
+Move the vat over the hot fire.
+The bark of the pine tree was shiny and dark.
+Leaves turn brown and yellow in the fall.
+The pennant waved when the wind blew.
+Split the log with a quick, sharp blow.
+Burn peat after the logs give out.
+He ordered peach pie with ice cream.
+Weave the carpet on the right hand side.
+Hemp is a weed found in parts of the tropics.
+A lame back kept his score low.
+We find joy in the simplest things.
+Type out three lists of orders.
+The harder he tried the less he got done.
+The boss ran the show with a watchful eye.
+The cup cracked and spilled its contents.
+Paste can cleanse the most dirty brass.
+The slang word for raw whiskey is booze.
+It caught its hind paw in a rusty trap.
+The wharf could be seen at the farther shore.
+Feel the heat of the weak dying flame.
+The tiny girl took off her hat.
+A cramp is no small danger on a swim.
+He said the same phrase thirty times.
+Pluck the bright rose without leaves.
+Two plus seven is less than ten.
+The glow deepened in the eyes of the sweet girl.
+Bring your problems to the wise chief.
+Write a fond note to the friend you cherish.
+Clothes and lodging are free to new men.
+We frown when events take a bad turn.
+Port is a strong wine with a smoky taste.
+The young kid jumped the rusty gate.
+Guess the results from the first scores.
+A salt pickle tastes fine with ham.
+The just claim got the right verdict.
+These thistles bend in a high wind.
+Pure bred poodles have curls.
+The tree top waved in a graceful way.
+The spot on the blotter was made by green ink.
+Mud was spattered on the front of his white shirt.
+The cigar burned a hole in the desk top.
+The empty flask stood on the tin tray.
+A speedy man can beat this track mark.
+He broke a new shoelace that day.
+The coffee stand is too high for the couch.
+The urge to write short stories is rare.
+The pencils have all been used.
+The pirates seized the crew of the lost ship.
+We tried to replace the coin but failed.
+She sewed the torn coat quite neatly.
+The sofa cushion is red and of light weight.
+The jacket hung on the back of the wide chair.
+At that high level the air is pure.
+Drop the two when you add the figures.
+A filing case is now hard to buy.
+An abrupt start does not win the prize.
+Wood is best for making toys and blocks.
+The office paint was a dull, sad tan.
+He knew the skill of the great young actress.
+A rag will soak up spilled water.
+A shower of dirt fell from the hot pipes.
+Steam hissed from the broken valve.
+The child almost hurt the small dog.
+There was a sound of dry leaves outside.
+The sky that morning was clear and bright blue.
+Torn scraps littered the stone floor.
+Sunday is the best part of the week.
+The doctor cured him with these pills.
+The new girl was fired today at noon.
+They felt gay when the ship arrived in port.
+Add the store's account to the last cent.
+Acid burns holes in wool cloth.
+Fairy tales should be fun to write.
+Eight miles of woodland burned to waste.
+The third act was dull and tired the players.
+A young child should not suffer fright.
+Add the column and put the sum here.
+We admire and love a good cook.
+There the flood mark is ten inches.
+He carved a head from the round block of marble.
+She has a smart way of wearing clothes.
+The fruit of a fig tree is apple-shaped.
+Corn cobs can be used to kindle a fire.
+Where were they when the noise started.
+The paper box is full of thumb tacks.
+Sell your gift to a buyer at a good gain.
+The tongs lay beside the ice pail.
+The petals fall with the next puff of wind.
+Bring your best compass to the third class.
+They could laugh although they were sad.
+Farmers came in to thresh the oat crop.
+The brown house was on fire to the attic.
+The lure is used to catch trout and flounder.
+Float the soap on top of the bath water.
+A blue crane is a tall wading bird.
+A fresh start will work such wonders.
+The club rented the rink for the fifth night.
+After the dance, they went straight home.
+The hostess taught the new maid to serve.
+He wrote his last novel there at the inn.
+Even the worst will beat his low score.
+The cement had dried when he moved it.
+The loss of the second ship was hard to take.
+The fly made its way along the wall.
+Do that with a wooden stick.
+Live wires should be kept covered.
+The large house had hot water taps.
+It is hard to erase blue or red ink.
+Write at once or you may forget it.
+The doorknob was made of bright clean brass.
+The wreck occurred by the bank on Main Street.
+A pencil with black lead writes best.
+Coax a young calf to drink from a bucket.
+Schools for ladies teach charm and grace.
+The lamp shone with a steady green flame.
+They took the axe and the saw to the forest.
+The ancient coin was quite dull and worn.
+The shaky barn fell with a loud crash.
+Jazz and swing fans like fast music.
+Rake the rubbish up and then burn it.
+Slash the gold cloth into fine ribbons.
+Try to have the court decide the case.
+They are pushed back each time they attack.
+He broke his ties with groups of former friends.
+They floated on the raft to sun their white backs.
+The map had an X that meant nothing.
+Whitings are small fish caught in nets.
+Some ads serve to cheat buyers.
+Jerk the rope and the bell rings weakly.
+A waxed floor makes us lose balance.
+Madam, this is the best brand of corn.
+On the islands the sea breeze is soft and mild.
+The play began as soon as we sat down.
+This will lead the world to more sound and fury.
+Add salt before you fry the egg.
+The rush for funds reached its peak Tuesday.
+The birch looked stark white and lonesome.
+The box is held by a bright red snapper.
+To make pure ice, you freeze water.
+The first worm gets snapped early.
+Jump the fence and hurry up the bank.
+Yell and clap as the curtain slides back.
+They are men who walk the middle of the road.
+Both brothers wear the same size.
+In some form or other we need fun.
+The prince ordered his head chopped off.
+The houses are built of red clay bricks.
+Ducks fly north but lack a compass.
+Fruit flavors are used in fizz drinks.
+These pills do less good than others.
+Canned pears lack full flavor.
+The dark pot hung in the front closet.
+Carry the pail to the wall and spill it there.
+The train brought our hero to the big town.
+We are sure that one war is enough.
+Gray paint stretched for miles around.
+The rude laugh filled the empty room.
+High seats are best for football fans.
+Tea served from the brown jug is tasty.
+A dash of pepper spoils beef stew.
+A zestful food is the hot-cross bun.
+The horse trotted around the field at a brisk pace.
+Find the twin who stole the pearl necklace.
+Cut the cord that binds the box tightly.
+The red tape bound the smuggled food.
+Look in the corner to find the tan shirt.
+The cold drizzle will halt the bond drive.
+Nine men were hired to dig the ruins.
+The junk yard had a mouldy smell.
+The flint sputtered and lit a pine torch.
+Soak the cloth and drown the sharp odor.
+The shelves were bare of both jam or crackers.
+A joy to every child is the swan boat.
+All sat frozen and watched the screen.
+A cloud of dust stung his tender eyes.
+To reach the end he needs much courage.
+Shape the clay gently into block form.
+A ridge on a smooth surface is a bump or flaw.
+Hedge apples may stain your hands green.
+Quench your thirst, then eat the crackers.
+Tight curls get limp on rainy days.
+The mute muffled the high tones of the horn.
+The gold ring fits only a pierced ear.
+The old pan was covered with hard fudge.
+Watch the log float in the wide river.
+The node on the stalk of wheat grew daily.
+The heap of fallen leaves was set on fire.
+Write fast if you want to finish early.
+His shirt was clean but one button was gone.
+The barrel of beer was a brew of malt and hops.
+Tin cans are absent from store shelves.
+Slide the box into that empty space.
+The plant grew large and green in the window.
+The beam dropped down on the workmen's head.
+Pink clouds floated with the breeze.
+She danced like a swan, tall and graceful.
+The tube was blown and the tire flat and useless.
+It is late morning on the old wall clock.
+Let's all join as we sing the last chorus.
+The last switch cannot be turned off.
+The fight will end in just six minutes.
+The store walls were lined with colored frocks.
+The peace league met to discuss their plans.
+The rise to fame of a person takes luck.
+Paper is scarce, so write with much care.
+The quick fox jumped on the sleeping cat.
+The nozzle of the fire hose was bright brass.
+Screw the round cap on as tight as needed.
+Time brings us many changes.
+The purple tie was ten years old.
+Men think and plan and sometimes act.
+Fill the ink jar with sticky glue.
+He smoke a big pipe with strong contents.
+We need grain to keep our mules healthy.
+Pack the records in a neat thin case.
+The crunch of feet in the snow was the only sound.
+The copper bowl shone in the sun's rays.
+Boards will warp unless kept dry.
+The plush chair leaned against the wall.
+Glass will clink when struck by metal.
+Bathe and relax in the cool green grass.
+Nine rows of soldiers stood in line.
+The beach is dry and shallow at low tide.
+The idea is to sew both edges straight.
+The kitten chased the dog down the street.
+Pages bound in cloth make a book.
+Try to trace the fine lines of the painting.
+Women form less than half of the group.
+The zones merge in the central part of town.
+A gem in the rough needs work to polish.
+Code is used when secrets are sent.
+Most of the news is easy for us to hear.
+He used the lathe to make brass objects.
+The vane on top of the pole revolved in the wind.
+Mince pie is a dish served to children.
+The clan gathered on each dull night.
+Let it burn, it gives us warmth and comfort.
+A castle built from sand fails to endure.
+A child's wit saved the day for us.
+Tack the strip of carpet to the worn floor.
+Next Tuesday we must vote.
+Pour the stew from the pot into the plate.
+Each penny shone like new.
+The man went to the woods to gather sticks.
+The dirt piles were lines along the road.
+The logs fell and tumbled into the clear stream.
+Just hoist it up and take it away.
+A ripe plum is fit for a king's palate.
+Our plans right now are hazy.
+Brass rings are sold by these natives.
+It takes a good trap to capture a bear.
+Feed the white mouse some flower seeds.
+The thaw came early and freed the stream.
+He took the lead and kept it the whole distance.
+The key you designed will fit the lock.
+Plead to the council to free the poor thief.
+Better hash is made of rare beef.
+This plank was made for walking on.
+The lake sparkled in the red hot sun.
+He crawled with care along the ledge.
+Tend the sheep while the dog wanders.
+It takes a lot of help to finish these.
+Mark the spot with a sign painted red.
+Take two shares as a fair profit.
+The fur of cats goes by many names.
+North winds bring colds and fevers.
+He asks no person to vouch for him.
+Go now and come here later.
+A sash of gold silk will trim her dress.
+Soap can wash most dirt away.
+That move means the game is over.
+He wrote down a long list of items.
+A siege will crack the strong defense.
+Grape juice and water mix well.
+Roads are paved with sticky tar.
+Fake stones shine but cost little.
+The drip of the rain made a pleasant sound.
+Smoke poured out of every crack.
+Serve the hot rum to the tired heroes.
+Much of the story makes good sense.
+The sun came up to light the eastern sky.
+Heave the line over the port side.
+A lathe cuts and trims any wood.
+It's a dense crowd in two distinct ways.
+His hip struck the knee of the next player.
+The stale smell of old beer lingers.
+The desk was firm on the shaky floor.
+It takes heat to bring out the odor.
+Beef is scarcer than some lamb.
+Raise the sail and steer the ship northward.
+A cone costs five cents on Mondays.
+A pod is what peas always grow in.
+Jerk the dart from the cork target.
+No cement will hold hard wood.
+We now have a new base for shipping.
+A list of names is carved around the base.
+The sheep were led home by a dog.
+Three for a dime, the young peddler cried.
+The sense of smell is better than that of touch.
+No hardship seemed to keep him sad.
+Grace makes up for lack of beauty.
+Nudge gently but wake her now.
+The news struck doubt into restless minds.
+Once we stood beside the shore.
+A chink in the wall allowed a draft to blow.
+Fasten two pins on each side.
+A cold dip restores health and zest.
+He takes the oath of office each March.
+The sand drifts over the sill of the old house.
+The point of the steel pen was bent and twisted.
+There is a lag between thought and act.
+Seed is needed to plant the spring corn.
+Draw the chart with heavy black lines.
+The boy owed his pal thirty cents.
+The chap slipped into the crowd and was lost.
+Hats are worn to tea and not to dinner.
+The ramp led up to the wide highway.
+Beat the dust from the rug onto the lawn.
+Say it slowly but make it ring clear.
+The straw nest housed five robins.
+Screen the porch with woven straw mats.
+This horse will nose his way to the finish.
+The dry wax protects the deep scratch.
+He picked up the dice for a second roll.
+These coins will be needed to pay his debt.
+The nag pulled the frail cart along.
+Twist the valve and release hot steam.
+The vamp of the shoe had a gold buckle.
+The smell of burned rags itches my nose.
+New pants lack cuffs and pockets.
+The marsh will freeze when cold enough.
+They slice the sausage thin with a knife.
+The bloom of the rose lasts a few days.
+A gray mare walked before the colt.
+Breakfast buns are fine with a hot drink.
+Bottles hold four kinds of rum.
+The man wore a feather in his felt hat.
+He wheeled the bike past the winding road.
+Drop the ashes on the worn old rug.
+The desk and both chairs were painted tan.
+Throw out the used paper cup and plate.
+A clean neck means a neat collar.
+The couch cover and hall drapes were blue.
+The stems of the tall glasses cracked and broke.
+The wall phone rang loud and often.
+The clothes dried on a thin wooden rack.
+Turn on the lantern which gives us light.
+The cleat sank deeply into the soft turf.
+The bills were mailed promptly on the tenth of the month.
+To have is better than to wait and hope.
+The price is fair for a good antique clock.
+The music played on while they talked.
+Dispense with a vest on a day like this.
+The bunch of grapes was pressed into wine.
+He sent the figs, but kept the ripe cherries.
+The hinge on the door creaked with old age.
+The screen before the fire kept in the sparks.
+Fly by night, and you waste little time.
+Thick glasses helped him read the print.
+Birth and death mark the limits of life.
+The chair looked strong but had no bottom.
+The kite flew wildly in the high wind.
+A fur muff is stylish once more.
+The tin box held priceless stones.
+We need an end of all such matter.
+The case was puzzling to the old and wise.
+The bright lanterns were gay on the dark lawn.
+We don't get much money but we have fun.
+The youth drove with zest, but little skill.
+Five years he lived with a shaggy dog.
+A fence cuts through the corner lot.
+The way to save money is not to spend much.
+Shut the hatch before the waves push it in.
+The odor of spring makes young hearts jump.
+Crack the walnut with your sharp side teeth.
+He offered proof in the form of a large chart.
+Send the stuff in a thick paper bag.
+A quart of milk is water for the most part.
+They told wild tales to frighten him.
+The three story house was built of stone.
+In the rear of the ground floor was a large passage.
+A man in a blue sweater sat at the desk.
+Oats are a food eaten by horse and man.
+Their eyelids droop for want of sleep.
+A sip of tea revives his tired friend.
+There are many ways to do these things.
+Tuck the sheet under the edge of the mat.
+A force equal to that would move the earth.
+We like to see clear weather.
+The work of the tailor is seen on each side.
+Take a chance and win a china doll.
+Shake the dust from your shoes, stranger.
+She was kind to sick old people.
+The square wooden crate was packed to be shipped.
+The dusty bench stood by the stone wall.
+We dress to suit the weather of most days.
+Smile when you say nasty words.
+A bowl of rice is free with chicken stew.
+The water in this well is a source of good health.
+Take shelter in this tent, but keep still.
+That guy is the writer of a few banned books.
+The little tales they tell are false.
+The door was barred, locked, and bolted as well.
+Ripe pears are fit for a queen's table.
+A big wet stain was on the round carpet.
+The kite dipped and swayed, but stayed aloft.
+The pleasant hours fly by much too soon.
+The room was crowded with a wild mob.
+This strong arm shall shield your honor.
+She blushed when he gave her a white orchid.
+The beetle droned in the hot June sun.
+Press the pedal with your left foot.
+Neat plans fail without luck.
+The black trunk fell from the landing.
+The bank pressed for payment of the debt.
+The theft of the pearl pin was kept secret.
+Shake hands with this friendly child.
+The vast space stretched into the far distance.
+A rich farm is rare in this sandy waste.
+His wide grin earned many friends.
+Flax makes a fine brand of paper.
+Hurdle the pit with the aid of a long pole.
+A strong bid may scare your partner stiff.
+Even a just cause needs power to win.
+Peep under the tent and see the clowns.
+The leaf drifts along with a slow spin.
+Cheap clothes are flashy but don't last.
+A thing of small note can cause despair.
+Flood the mails with requests for this book.
+A thick coat of black paint covered all.
+The pencil was cut to be sharp at both ends.
+Those last words were a strong statement.
+He wrote his name boldly at the top of the sheet.
+Dill pickles are sour but taste fine.
+Down that road is the way to the grain farmer.
+Either mud or dust are found at all times.
+The best method is to fix it in place with clips.
+If you mumble your speech will be lost.
+At night the alarm roused him from a deep sleep.
+Read just what the meter says.
+Fill your pack with bright trinkets for the poor.
+The small red neon lamp went out.
+Clams are small, round, soft, and tasty.
+The fan whirled its round blades softly.
+The line where the edges join was clean.
+Breathe deep and smell the piny air.
+It matters not if he reads these words or those.
+A brown leather bag hung from its strap.
+A toad and a frog are hard to tell apart.
+A white silk jacket goes with any shoes.
+A break in the dam almost caused a flood.
+Paint the sockets in the wall dull green.
+The child crawled into the dense grass.
+Bribes fail where honest men work.
+Trample the spark, else the flames will spread.
+The hilt of the sword was carved with fine designs.
+A round hole was drilled through the thin board.
+Footprints showed the path he took up the beach.
+She was waiting at my front lawn.
+A vent near the edge brought in fresh air.
+Prod the old mule with a crooked stick.
+It is a band of steel three inches wide.
+The pipe ran almost the length of the ditch.
+It was hidden from sight by a mass of leaves and shrubs.
+The weight of the package was seen on the high scale.
+Wake and rise, and step into the green outdoors.
+The green light in the brown box flickered.
+The brass tube circled the high wall.
+The lobes of her ears were pierced to hold rings.
+Hold the hammer near the end to drive the nail.
+Next Sunday is the twelfth of the month.
+Every word and phrase he speaks is true.
+He put his last cartridge into the gun and fired.
+They took their kids from the public school.
+Drive the screw straight into the wood.
+Keep the hatch tight and the watch constant.
+Sever the twine with a quick snip of the knife.
+Paper will dry out when wet.
+Slide the catch back and open the desk.
+Help the weak to preserve their strength.
+A sullen smile gets few friends.
+Stop whistling and watch the boys march.
+Jerk the cord, and out tumbles the gold.
+Slide the tray across the glass top.
+The cloud moved in a stately way and was gone.
+Light maple makes for a swell room.
+Set the piece here and say nothing.
+Dull stories make her laugh.
+A stiff cord will do to fasten your shoe.
+Get the trust fund to the bank early.
+Choose between the high road and the low.
+A plea for funds seems to come again.
+He lent his coat to the tall gaunt stranger.
+There is a strong chance it will happen once more.
+The duke left the park in a silver coach.
+Greet the new guests and leave quickly.
+When the frost has come it is time for turkey.
+Sweet words work better than fierce.
+A thin stripe runs down the middle.
+A six comes up more often than a ten.
+Lush fern grow on the lofty rocks.
+The ram scared the school children off.
+The team with the best timing looks good.
+The farmer swapped his horse for a brown ox.
+Sit on the perch and tell the others what to do.
+A steep trail is painful for our feet.
+The early phase of life moves fast.
+Green moss grows on the northern side.
+Tea in thin china has a sweet taste.
+Pitch the straw through the door of the stable.
+The latch on the back gate needed a nail.
+The goose was brought straight from the old market.
+The sink is the thing in which we pile dishes.
+A whiff of it will cure the most stubborn cold.
+The facts don't always show who is right.
+She flaps her cape as she parades the street.
+The loss of the cruiser was a blow to the fleet.
+Loop the braid to the left and then over.
+Plead with the lawyer to drop the lost cause.
+Calves thrive on tender spring grass.
+Post no bills on this office wall.
+Tear a thin sheet from the yellow pad.
+A cruise in warm waters in a sleek yacht is fun.
+A streak of color ran down the left edge.
+It was done before the boy could see it.
+Crouch before you jump or miss the mark.
+Pack the kits and don't forget the salt.
+The square peg will settle in the round hole.
+Fine soap saves tender skin.
+Poached eggs and tea must suffice.
+Bad nerves are jangled by a door slam.
+Ship maps are different from those for planes.
+Dimes showered down from all sides.
+They sang the same tunes at each party.
+The sky in the west is tinged with orange red.
+The pods of peas ferment in bare fields.
+The horse balked and threw the tall rider.
+The hitch between the horse and cart broke.
+Pile the coal high in the shed corner.
+A gold vase is both rare and costly.
+The knife was hung inside its bright sheath.
+The rarest spice comes from the far East.
+The roof should be tilted at a sharp slant.
+A smatter of French is worse than none.
+The mule trod the treadmill day and night.
+The aim of the contest is to raise a great fund.
+To send it now in large amounts is bad.
+There is a fine hard tang in salty air.
+Cod is the main business of the north shore.
+The slab was hewn from heavy blocks of slate.
+Dunk the stale biscuits into strong drink.
+Hang tinsel from both branches.
+Cap the jar with a tight brass cover.
+The poor boy missed the boat again.
+Be sure to set the lamp firmly in the hole.
+Pick a card and slip it under the pack.
+A round mat will cover the dull spot.
+The first part of the plan needs changing.
+A good book informs of what we ought to know.
+The mail comes in three batches per day.
+You cannot brew tea in a cold pot.
+Dots of light betrayed the black cat.
+Put the chart on the mantel and tack it down.
+The night shift men rate extra pay.
+The red paper brightened the dim stage.
+See the player scoot to third base.
+Slide the bill between the two leaves.
+Many hands help get the job done.
+We don't like to admit our small faults.
+No doubt about the way the wind blows.
+Dig deep in the earth for pirate's gold.
+The steady drip is worse than a drenching rain.
+A flat pack takes less luggage space.
+Green ice frosted the punch bowl.
+A stuffed chair slipped from the moving van.
+The stitch will serve but needs to be shortened.
+A thin book fits in the side pocket.
+The gloss on top made it unfit to read.
+The hail pattered on the burnt brown grass.
+Seven seals were stamped on great sheets.
+Our troops are set to strike heavy blows.
+The store was jammed before the sale could start.
+It was a bad error on the part of the new judge.
+One step more and the board will collapse.
+Take the match and strike it against your shoe.
+The pot boiled, but the contents failed to jell.
+The baby puts his right foot in his mouth.
+The bombs left most of the town in ruins.
+Stop and stare at the hard working man.
+The streets are narrow and full of sharp turns.
+The pup jerked the leash as he saw a feline shape.
+Open your book to the first page.
+Fish evade the net and swim off.
+Dip the pail once and let it settle.
+Will you please answer that phone.
+The big red apple fell to the ground.
+The curtain rose and the show was on.
+The young prince became heir to the throne.
+He sent the boy on a short errand.
+Leave now and you will arrive on time.
+The corner store was robbed last night.
+A gold ring will please most any girl.
+The long journey home took a year.
+She saw a cat in the neighbor's house.
+A pink shell was found on the sandy beach.
+Small children came to see him.
+The grass and bushes were wet with dew.
+The blind man counted his old coins.
+A severe storm tore down the barn.
+She called his name many times.
+When you hear the bell, come quickly.
--- a/extensions/silero_tts/script.py
+++ b/extensions/silero_tts/script.py
@ -1,3 +1,5 @@
+import html
+import random
 import time
 from pathlib import Path

@ -5,7 +7,7 @@ import gradio as gr
 import torch

 from extensions.silero_tts import tts_preprocessor
-from modules import chat, shared
+from modules import chat, shared, ui_chat
 from modules.utils import gradio

 torch._C._jit_set_profiling_mode(False)
@ -106,6 +108,7 @@ def history_modifier(history):

 def output_modifier(string, state):
    global model, current_params, streaming_state
+
    for i in params:
        if params[i] != current_params[i]:
            model = load_model()
@ -116,7 +119,7 @@ def output_modifier(string, state):
        return string

    original_string = string
-    string = tts_preprocessor.preprocess(string)
+    string = tts_preprocessor.preprocess(html.unescape(string))

    if string == '':
        string = '*Empty reply, try regenerating*'
@ -140,6 +143,35 @@ def setup():
    model = load_model()


+def random_sentence():
+    with open(Path("extensions/silero_tts/harvard_sentences.txt")) as f:
+        return random.choice(list(f))
+
+
+def voice_preview(preview_text):
+    global model, current_params, streaming_state
+
+    for i in params:
+        if params[i] != current_params[i]:
+            model = load_model()
+            current_params = params.copy()
+            break
+
+    string = tts_preprocessor.preprocess(preview_text or random_sentence())
+
+    output_file = Path('extensions/silero_tts/outputs/voice_preview.wav')
+    prosody = f"<prosody rate=\"{params['voice_speed']}\" pitch=\"{params['voice_pitch']}\">"
+    silero_input = f'<speak>{prosody}{xmlesc(string)}</prosody></speak>'
+    model.save_wav(ssml_text=silero_input, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
+
+    return f'<audio src="file/{output_file.as_posix()}?{int(time.time())}" controls autoplay></audio>'
+
+
+def custom_css():
+    path_to_css = Path(__file__).parent.resolve() / 'style.css'
+    return open(path_to_css, 'r').read()
+
+
 def ui():
    # Gradio elements
    with gr.Accordion("Silero TTS"):
@ -153,31 +185,33 @@ def ui():
            v_pitch = gr.Dropdown(value=params['voice_pitch'], choices=voice_pitches, label='Voice pitch')
            v_speed = gr.Dropdown(value=params['voice_speed'], choices=voice_speeds, label='Voice speed')

+        with gr.Row():
+            preview_text = gr.Text(show_label=False, placeholder="Preview text", elem_id="silero_preview_text")
+            preview_play = gr.Button("Preview")
+            preview_audio = gr.HTML(visible=False)
+
        with gr.Row():
            convert = gr.Button('Permanently replace audios with the message texts')
            convert_cancel = gr.Button('Cancel', visible=False)
            convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False)

-        gr.Markdown('[Click here for Silero audio samples](https://oobabooga.github.io/silero-samples/index.html)')
+    # Convert history with confirmation
+    convert_arr = [convert_confirm, convert, convert_cancel]
+    convert.click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr)
+    convert_confirm.click(
+        lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr).then(
+        remove_tts_from_history, gradio('history'), gradio('history')).then(
+        chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then(
+        chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'))

-    if shared.is_chat():
-        # Convert history with confirmation
-        convert_arr = [convert_confirm, convert, convert_cancel]
-        convert.click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr)
-        convert_confirm.click(
-            lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr).then(
-            remove_tts_from_history, gradio('history'), gradio('history')).then(
-            chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then(
-            chat.redraw_html, shared.reload_inputs, gradio('display'))
+    convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)

-        convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)
-
-        # Toggle message text in history
-        show_text.change(
-            lambda x: params.update({"show_text": x}), show_text, None).then(
-            toggle_text_in_history, gradio('history'), gradio('history')).then(
-            chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then(
-            chat.redraw_html, shared.reload_inputs, gradio('display'))
+    # Toggle message text in history
+    show_text.change(
+        lambda x: params.update({"show_text": x}), show_text, None).then(
+        toggle_text_in_history, gradio('history'), gradio('history')).then(
+        chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then(
+        chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'))

    # Event functions to update the parameters in the backend
    activate.change(lambda x: params.update({"activate": x}), activate, None)
@ -185,3 +219,7 @@ def ui():
    voice.change(lambda x: params.update({"speaker": x}), voice, None)
    v_pitch.change(lambda x: params.update({"voice_pitch": x}), v_pitch, None)
    v_speed.change(lambda x: params.update({"voice_speed": x}), v_speed, None)
+
+    # Play preview
+    preview_text.submit(voice_preview, preview_text, preview_audio)
+    preview_play.click(voice_preview, preview_text, preview_audio)
--- a/extensions/silero_tts/style.css
+++ b/extensions/silero_tts/style.css
@ -0,0 +1,8 @@
+.SDAP .hires_opts input[type="number"] {
+    width: 6em !important;
+}
+
+/* silero_tts preview */
+.form:has(> #silero_preview_text) {
+    min-width: 75%
+}
--- a/extensions/superbooga/script.py
+++ b/extensions/superbooga/script.py
@ -4,7 +4,7 @@ import textwrap
 import gradio as gr
 from bs4 import BeautifulSoup

-from modules import chat, shared
+from modules import chat
 from modules.logging_colors import logger

 from .chromadb import add_chunks_to_collector, make_collector
@ -96,7 +96,8 @@ def apply_settings(chunk_count, chunk_count_initial, time_weight):
 def custom_generate_chat_prompt(user_input, state, **kwargs):
    global chat_collector

-    history = state['history']
+    # get history as being modified when using regenerate.
+    history = kwargs['history']

    if state['mode'] == 'instruct':
        results = collector.get_sorted(user_input, n_results=params['chunk_count'])
@ -142,8 +143,8 @@ def remove_special_tokens(string):
    return re.sub(pattern, '', string)


-def input_modifier(string):
-    if shared.is_chat():
+def input_modifier(string, state, is_chat=False):
+    if is_chat:
        return string

    # Find the user input
--- a/characters/instruction-following/Airoboros-v1.2.yaml
+++ b/characters/instruction-following/Airoboros-v1.2.yaml
--- a/characters/instruction-following/Alpaca.yaml
+++ b/characters/instruction-following/Alpaca.yaml
--- a/characters/instruction-following/Bactrian.yaml
+++ b/characters/instruction-following/Bactrian.yaml
--- a/characters/instruction-following/Baichuan
+++ b/characters/instruction-following/Baichuan
--- a/characters/instruction-following/Baize.yaml
+++ b/characters/instruction-following/Baize.yaml
--- a/characters/instruction-following/Bluemoon.yaml
+++ b/characters/instruction-following/Bluemoon.yaml
--- a/characters/instruction-following/ChatGLM.yaml
+++ b/characters/instruction-following/ChatGLM.yaml
--- a/characters/instruction-following/Chinese-Vicuna-Chat.yaml
+++ b/characters/instruction-following/Chinese-Vicuna-Chat.yaml
--- a/characters/instruction-following/Galactica
+++ b/characters/instruction-following/Galactica
--- a/characters/instruction-following/Galactica
+++ b/characters/instruction-following/Galactica
--- a/characters/instruction-following/Galactica
+++ b/characters/instruction-following/Galactica
--- a/characters/instruction-following/Galactica
+++ b/characters/instruction-following/Galactica
--- a/characters/instruction-following/Galactica
+++ b/characters/instruction-following/Galactica
--- a/characters/instruction-following/Galactica
+++ b/characters/instruction-following/Galactica
--- a/characters/instruction-following/Galactica.yaml
+++ b/characters/instruction-following/Galactica.yaml
--- a/characters/instruction-following/Gorilla.yaml
+++ b/characters/instruction-following/Gorilla.yaml
--- a/characters/instruction-following/Guanaco
+++ b/characters/instruction-following/Guanaco
--- a/characters/instruction-following/Guanaco-QLoRA.yaml
+++ b/characters/instruction-following/Guanaco-QLoRA.yaml
--- a/characters/instruction-following/Guanaco.yaml
+++ b/characters/instruction-following/Guanaco.yaml
--- a/characters/instruction-following/H2O-human_bot.yaml
+++ b/characters/instruction-following/H2O-human_bot.yaml
--- a/characters/instruction-following/H2O-prompt_answer.yaml
+++ b/characters/instruction-following/H2O-prompt_answer.yaml
--- a/characters/instruction-following/Hippogriff.yaml
+++ b/characters/instruction-following/Hippogriff.yaml
--- a/characters/instruction-following/INCITE-Chat.yaml
+++ b/characters/instruction-following/INCITE-Chat.yaml
--- a/characters/instruction-following/INCITE-Instruct.yaml
+++ b/characters/instruction-following/INCITE-Instruct.yaml
--- a/characters/instruction-following/KoAlpaca.yaml
+++ b/characters/instruction-following/KoAlpaca.yaml
--- a/characters/instruction-following/Koala.yaml
+++ b/characters/instruction-following/Koala.yaml
--- a/characters/instruction-following/LLaVA.yaml
+++ b/characters/instruction-following/LLaVA.yaml
--- a/characters/instruction-following/Llama-v2.yaml
+++ b/characters/instruction-following/Llama-v2.yaml
--- a/characters/instruction-following/MOSS.yaml
+++ b/characters/instruction-following/MOSS.yaml
--- a/characters/instruction-following/MPT-Chat.yaml
+++ b/characters/instruction-following/MPT-Chat.yaml
--- a/characters/instruction-following/Manticore
+++ b/characters/instruction-following/Manticore
--- a/characters/instruction-following/Metharme.yaml
+++ b/characters/instruction-following/Metharme.yaml
--- a/characters/instruction-following/Minotaur.yaml
+++ b/characters/instruction-following/Minotaur.yaml
--- a/characters/instruction-following/NewHope.yaml
+++ b/characters/instruction-following/NewHope.yaml
--- a/characters/instruction-following/Open
+++ b/characters/instruction-following/Open
--- a/characters/instruction-following/OpenBuddy.yaml
+++ b/characters/instruction-following/OpenBuddy.yaml
--- a/instruction-templates/OpenChat.yaml
+++ b/instruction-templates/OpenChat.yaml
@ -0,0 +1,4 @@
+user: "GPT4 User:"
+bot: "GPT4 Assistant:"
+turn_template: "<|user|> <|user-message|><|end_of_turn|><|bot|> <|bot-message|><|end_of_turn|>"
+context: ""
--- a/instruction-templates/OpenOrca-Platypus2.yaml
+++ b/instruction-templates/OpenOrca-Platypus2.yaml
@ -0,0 +1,4 @@
+user: "### Instruction:"
+bot: "### Response:"
+turn_template: "<|user|> <|user-message|>\n\n<|bot|> <|bot-message|>\n\n"
+context: ""
--- a/characters/instruction-following/Orca
+++ b/characters/instruction-following/Orca
--- a/characters/instruction-following/RWKV-Raven.yaml
+++ b/characters/instruction-following/RWKV-Raven.yaml
--- a/characters/instruction-following/Samantha.yaml
+++ b/characters/instruction-following/Samantha.yaml
--- a/instruction-templates/StableBeluga2.yaml
+++ b/instruction-templates/StableBeluga2.yaml
@ -0,0 +1,4 @@
+user: "### User:"
+bot: "### Assistant:"
+turn_template: "<|user|>\n<|user-message|>\n\n<|bot|>\n<|bot-message|>\n\n"
+context: "### System:\nThis is a system prompt, please behave and help the user.\n\n"
--- a/characters/instruction-following/StableLM.yaml
+++ b/characters/instruction-following/StableLM.yaml
--- a/characters/instruction-following/StableVicuna.yaml
+++ b/characters/instruction-following/StableVicuna.yaml
--- a/characters/instruction-following/Starchat-Beta.yaml
+++ b/characters/instruction-following/Starchat-Beta.yaml
--- a/characters/instruction-following/Tulu.yaml
+++ b/characters/instruction-following/Tulu.yaml
--- a/characters/instruction-following/Vicuna-v0.yaml
+++ b/characters/instruction-following/Vicuna-v0.yaml
--- a/characters/instruction-following/Vicuna-v1.1.yaml
+++ b/characters/instruction-following/Vicuna-v1.1.yaml
--- a/characters/instruction-following/Vigogne-Chat.yaml
+++ b/characters/instruction-following/Vigogne-Chat.yaml
--- a/characters/instruction-following/Vigogne-Instruct.yaml
+++ b/characters/instruction-following/Vigogne-Instruct.yaml
--- a/characters/instruction-following/Wizard-Mega
+++ b/characters/instruction-following/Wizard-Mega
--- a/characters/instruction-following/Wizard-Mega
+++ b/characters/instruction-following/Wizard-Mega
--- a/characters/instruction-following/Wizard-Mega.yaml
+++ b/characters/instruction-following/Wizard-Mega.yaml
--- a/characters/instruction-following/Ziya.yaml
+++ b/characters/instruction-following/Ziya.yaml
--- a/js/main.js
+++ b/js/main.js
@ -0,0 +1,93 @@
+let main_parent = document.getElementById('chat-tab').parentNode;
+let extensions = document.getElementById('extensions');
+
+main_parent.childNodes[0].classList.add("header_bar");
+main_parent.style = "padding: 0; margin: 0";
+main_parent.parentNode.parentNode.style = "padding: 0";
+
+document.querySelector('.header_bar').addEventListener('click', function(event) {
+    if (event.target.tagName === 'BUTTON') {
+        const buttonText = event.target.textContent.trim();
+
+        let chat_visible = (buttonText == 'Chat');
+        let default_visible = (buttonText == 'Default');
+        let notebook_visible = (buttonText == 'Notebook');
+
+        // Check if one of the generation tabs is visible
+        if (chat_visible || notebook_visible || default_visible) {
+            extensions.style.display = 'flex';
+            if (chat_visible) {
+                extensions.style.maxWidth = "800px";
+                extensions.style.padding = "0px";
+            } else {
+                extensions.style.maxWidth = "none";
+                extensions.style.padding = "15px";
+            }
+        } else {
+            extensions.style.display = 'none';
+        }
+    }
+});
+
+//------------------------------------------------
+// Add some scrollbars
+//------------------------------------------------
+const textareaElements = document.querySelectorAll('.add_scrollbar textarea');
+for(i = 0; i < textareaElements.length; i++) {
+    textareaElements[i].classList.remove('scroll-hide');
+    textareaElements[i].classList.add('pretty_scrollbar');
+    textareaElements[i].style.resize = "none";
+}
+
+//------------------------------------------------
+// Stop generation on Esc pressed
+//------------------------------------------------
+document.addEventListener("keydown", function(event) {
+  if (event.key === "Escape") {
+    // Find the element with id 'stop' and click it
+    var stopButton = document.getElementById("stop");
+    if (stopButton) {
+      stopButton.click();
+    }
+  }
+});
+
+//------------------------------------------------
+// Chat scrolling
+//------------------------------------------------
+const targetElement = document.getElementById('chat').parentNode.parentNode.parentNode;
+
+// Create a MutationObserver instance
+const observer = new MutationObserver(function(mutations) {
+  mutations.forEach(function(mutation) {
+    let childElement = targetElement.childNodes[2].childNodes[0].childNodes[1];
+    childElement.scrollTop = childElement.scrollHeight;
+  });
+});
+
+// Configure the observer to watch for changes in the subtree and attributes
+const config = {
+  childList: true,
+  subtree: true,
+  characterData: true,
+  attributeOldValue: true,
+  characterDataOldValue: true
+};
+
+// Start observing the target element
+observer.observe(targetElement, config);
+
+//------------------------------------------------
+// Improve the looks of the chat input field
+//------------------------------------------------
+document.getElementById('chat-input').parentNode.style.background = 'transparent';
+document.getElementById('chat-input').parentNode.style.border = 'none';
+
+//------------------------------------------------
+// Remove some backgrounds
+//------------------------------------------------
+const noBackgroundelements = document.querySelectorAll('.no-background');
+for(i = 0; i < noBackgroundelements.length; i++) {
+    noBackgroundelements[i].parentNode.style.border = 'none';
+    noBackgroundelements[i].parentNode.parentNode.parentNode.style.alignItems = 'center';
+}
--- a/js/save_files.js
+++ b/js/save_files.js
@ -0,0 +1,40 @@
+// Functions for downloading JSON files
+function getCurrentTimestamp() {
+    const now = new Date();
+    const timezoneOffset = now.getTimezoneOffset() * 60000; // Convert to milliseconds
+    const localTime = new Date(now.getTime() - timezoneOffset);
+    const formattedTimestamp = localTime.toISOString().replace(/[-:]/g, '').slice(0, 15);
+    return formattedTimestamp;
+}
+
+function saveFile(contents, filename) {
+    const element = document.createElement('a');
+    element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(contents));
+    element.setAttribute('download', filename);
+    element.style.display = 'none';
+    document.body.appendChild(element);
+    element.click();
+    document.body.removeChild(element);
+}
+
+function saveHistory(history, character, mode) {
+    let path = null;
+
+    if (['chat', 'chat-instruct'].includes(mode) && character && character.trim() !== '') {
+        path = `history_${character}_${getCurrentTimestamp()}.json`;
+    } else {
+        try {
+            path = `history_${mode}_${getCurrentTimestamp()}.json`;
+        } catch (error) {
+            path = `history_${getCurrentTimestamp()}.json`;
+        }
+    }
+    saveFile(history, path);
+}
+
+function saveSession(session) {
+    let path = null;
+
+    path = `session_${getCurrentTimestamp()}.json`;
+    saveFile(session, path);
+}
--- a/js/show_controls.js
+++ b/js/show_controls.js
@ -0,0 +1,18 @@
+const belowChatInput = document.querySelectorAll("#chat-tab > div > :nth-child(n+3), #extensions");
+const chatParent = document.getElementById("chat").parentNode;
+
+function toggle_controls(value) {
+    if (value) {
+        belowChatInput.forEach(element => {
+          element.style.display = "inherit";
+        });
+
+        chatParent.classList.remove("bigchat");
+    } else {
+        belowChatInput.forEach(element => {
+          element.style.display = "none";
+        });
+
+        chatParent.classList.add("bigchat");
+    }
+}
--- a/js/switch_tabs.js
+++ b/js/switch_tabs.js
@ -0,0 +1,43 @@
+let chat_tab = document.getElementById('chat-tab');
+let main_parent = chat_tab.parentNode;
+
+function scrollToTop() {
+    window.scrollTo({
+        top: 0,
+        // behavior: 'smooth'
+    });
+}
+
+function switch_to_chat() {
+    let chat_tab_button = main_parent.childNodes[0].childNodes[1];
+    chat_tab_button.click();
+    scrollToTop();
+}
+
+function switch_to_default() {
+    let default_tab_button = main_parent.childNodes[0].childNodes[4];
+    default_tab_button.click();
+    scrollToTop();
+}
+
+function switch_to_notebook() {
+    let notebook_tab_button = main_parent.childNodes[0].childNodes[7];
+    notebook_tab_button.click();
+    scrollToTop();
+}
+
+function switch_to_generation_parameters() {
+    let parameters_tab_button = main_parent.childNodes[0].childNodes[10];
+    let generation_tab_button = document.getElementById('character-menu').parentNode.parentNode.parentNode.parentNode.parentNode.parentNode.childNodes[0].childNodes[1];
+    parameters_tab_button.click();
+    generation_tab_button.click();
+    scrollToTop();
+}
+
+function switch_to_character() {
+    let parameters_tab_button = main_parent.childNodes[0].childNodes[10];
+    let character_tab_button = document.getElementById('character-menu').parentNode.parentNode.parentNode.parentNode.parentNode.parentNode.childNodes[0].childNodes[4];
+    parameters_tab_button.click();
+    character_tab_button.click();
+    scrollToTop();
+}
--- a/Show more
+++ b/Show more