diff --git a/api-example-stream.py b/api-example-stream.py
index 49058776..ad8f7bf8 100644
--- a/api-example-stream.py
+++ b/api-example-stream.py
@@ -5,7 +5,7 @@ import sys
 try:
     import websockets
 except ImportError:
-    print("Websockets package not found. Make sure it's installed.") 
+    print("Websockets package not found. Make sure it's installed.")
 
 # For local streaming, the websockets are hosted without ssl - ws://
 HOST = 'localhost:5005'
@@ -14,6 +14,7 @@ URI = f'ws://{HOST}/api/v1/stream'
 # For reverse-proxied streaming, the remote will likely host with ssl - wss://
 # URI = 'wss://your-uri-here.trycloudflare.com/api/v1/stream'
 
+
 async def run(context):
     # Note: the selected defaults change from time to time.
     request = {
@@ -42,7 +43,7 @@ async def run(context):
     async with websockets.connect(URI, ping_interval=None) as websocket:
         await websocket.send(json.dumps(request))
 
-        yield context # Remove this if you just want to see the reply
+        yield context  # Remove this if you just want to see the reply
 
         while True:
             incoming_data = await websocket.recv()
@@ -58,7 +59,7 @@ async def run(context):
 async def print_response_stream(prompt):
     async for response in run(prompt):
         print(response, end='')
-        sys.stdout.flush() # If we don't flush, we won't see tokens in realtime.
+        sys.stdout.flush()  # If we don't flush, we won't see tokens in realtime.
 
 
 if __name__ == '__main__':
diff --git a/api-example.py b/api-example.py
index d6053fda..f35ea1db 100644
--- a/api-example.py
+++ b/api-example.py
@@ -7,6 +7,7 @@ URI = f'http://{HOST}/api/v1/generate'
 # For reverse-proxied streaming, the remote will likely host with ssl - https://
 # URI = 'https://your-uri-here.trycloudflare.com/api/v1/generate'
 
+
 def run(prompt):
     request = {
         'prompt': prompt,
@@ -37,6 +38,7 @@ def run(prompt):
         result = response.json()['results'][0]['text']
         print(prompt + result)
 
+
 if __name__ == '__main__':
     prompt = "In order to make homemade bread, follow these steps:\n1)"
     run(prompt)
diff --git a/extensions/api/blocking_api.py b/extensions/api/blocking_api.py
index 2c72d789..57cc0b9e 100644
--- a/extensions/api/blocking_api.py
+++ b/extensions/api/blocking_api.py
@@ -2,11 +2,10 @@ import json
 from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
 from threading import Thread
 
+from extensions.api.util import build_parameters, try_start_cloudflared
 from modules import shared
 from modules.text_generation import encode, generate_reply
 
-from extensions.api.util import build_parameters, try_start_cloudflared
-
 
 class Handler(BaseHTTPRequestHandler):
     def do_GET(self):
diff --git a/extensions/api/script.py b/extensions/api/script.py
index efeed71f..3911b106 100644
--- a/extensions/api/script.py
+++ b/extensions/api/script.py
@@ -5,6 +5,7 @@ from modules import shared
 BLOCKING_PORT = 5000
 STREAMING_PORT = 5005
 
+
 def setup():
     blocking_api.start_server(BLOCKING_PORT, share=shared.args.public_api)
     streaming_api.start_server(STREAMING_PORT, share=shared.args.public_api)
diff --git a/extensions/api/streaming_api.py b/extensions/api/streaming_api.py
index 42570c94..e847178a 100644
--- a/extensions/api/streaming_api.py
+++ b/extensions/api/streaming_api.py
@@ -1,12 +1,12 @@
-import json
 import asyncio
-from websockets.server import serve
+import json
 from threading import Thread
 
-from modules import shared
-from modules.text_generation import generate_reply
+from websockets.server import serve
 
 from extensions.api.util import build_parameters, try_start_cloudflared
+from modules import shared
+from modules.text_generation import generate_reply
 
 PATH = '/api/v1/stream'
 
diff --git a/extensions/character_bias/script.py b/extensions/character_bias/script.py
index 614d9ce3..ff12f3af 100644
--- a/extensions/character_bias/script.py
+++ b/extensions/character_bias/script.py
@@ -1,6 +1,7 @@
-import gradio as gr
 import os
 
+import gradio as gr
+
 # get the current directory of the script
 current_dir = os.path.dirname(os.path.abspath(__file__))
 
diff --git a/extensions/llava/script.py b/extensions/llava/script.py
index eaf6b313..3f6c73a2 100644
--- a/extensions/llava/script.py
+++ b/extensions/llava/script.py
@@ -1,6 +1,8 @@
-import gradio as gr
 import logging
 
+import gradio as gr
+
+
 def ui():
     gr.Markdown("### This extension is deprecated, use \"multimodal\" extension instead")
     logging.error("LLaVA extension is deprecated, use \"multimodal\" extension instead")
diff --git a/extensions/multimodal/multimodal_embedder.py b/extensions/multimodal/multimodal_embedder.py
index 816e3866..62e99ca7 100644
--- a/extensions/multimodal/multimodal_embedder.py
+++ b/extensions/multimodal/multimodal_embedder.py
@@ -6,10 +6,11 @@ from io import BytesIO
 from typing import Any, List, Optional
 
 import torch
+from PIL import Image
+
 from extensions.multimodal.pipeline_loader import load_pipeline
 from modules import shared
 from modules.text_generation import encode, get_max_prompt_length
-from PIL import Image
 
 
 @dataclass
diff --git a/extensions/multimodal/script.py b/extensions/multimodal/script.py
index aeaadffd..2ca11bf5 100644
--- a/extensions/multimodal/script.py
+++ b/extensions/multimodal/script.py
@@ -7,6 +7,7 @@ from io import BytesIO
 
 import gradio as gr
 import torch
+
 from extensions.multimodal.multimodal_embedder import MultimodalEmbedder
 from modules import shared
 
diff --git a/extensions/openai/script.py b/extensions/openai/script.py
index 9eb35a46..c46dbe04 100644
--- a/extensions/openai/script.py
+++ b/extensions/openai/script.py
@@ -1,11 +1,12 @@
 import base64
 import json
-import numpy as np
 import os
 import time
 from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
 from threading import Thread
 
+import numpy as np
+
 from modules import shared
 from modules.text_generation import encode, generate_reply
 
@@ -61,6 +62,7 @@ def float_list_to_base64(float_list):
     ascii_string = encoded_bytes.decode('ascii')
     return ascii_string
 
+
 class Handler(BaseHTTPRequestHandler):
     def do_GET(self):
         if self.path.startswith('/v1/models'):
@@ -387,8 +389,8 @@ class Handler(BaseHTTPRequestHandler):
                     "created": created_time,
                     "model": model,  # TODO: add Lora info?
                     resp_list: [{
-                            "index": 0,
-                            "finish_reason": "stop",
+                        "index": 0,
+                        "finish_reason": "stop",
                     }],
                     "usage": {
                         "prompt_tokens": token_count,
diff --git a/extensions/sd_api_pictures/script.py b/extensions/sd_api_pictures/script.py
index 2d4e39dc..2c054242 100644
--- a/extensions/sd_api_pictures/script.py
+++ b/extensions/sd_api_pictures/script.py
@@ -6,12 +6,13 @@ from datetime import date
 from pathlib import Path
 
 import gradio as gr
-import modules.shared as shared
 import requests
 import torch
-from modules.models import reload_model, unload_model
 from PIL import Image
 
+import modules.shared as shared
+from modules.models import reload_model, unload_model
+
 torch._C._jit_set_profiling_mode(False)
 
 # parameters which can be customized in settings.json of webui
@@ -77,6 +78,7 @@ SD_models = ['NeverEndingDream']  # TODO: get with http://{address}}/sdapi/v1/sd
 
 picture_response = False  # specifies if the next model response should appear as a picture
 
+
 def remove_surrounded_chars(string):
     # this expression matches to 'as few symbols as possible (0 upwards) between any asterisks' OR
     # 'as few symbols as possible (0 upwards) between an asterisk and the end of the string'
@@ -122,7 +124,6 @@ def input_modifier(string):
 
 # Get and save the Stable Diffusion-generated picture
 def get_SD_pictures(description):
-
     global params
 
     if params['manage_VRAM']:
@@ -259,6 +260,7 @@ def SD_api_address_update(address):
 
     return gr.Textbox.update(label=msg)
 
+
 def ui():
 
     # Gradio elements
@@ -290,12 +292,11 @@ def ui():
                 cfg_scale = gr.Number(label="CFG Scale", value=params['cfg_scale'], elem_id="cfg_box")
                 with gr.Column() as hr_options:
                     restore_faces = gr.Checkbox(value=params['restore_faces'], label='Restore faces')
-                    enable_hr = gr.Checkbox(value=params['enable_hr'], label='Hires. fix')                    
+                    enable_hr = gr.Checkbox(value=params['enable_hr'], label='Hires. fix')
             with gr.Row(visible=params['enable_hr'], elem_classes="hires_opts") as hr_options:
-                    hr_scale = gr.Slider(1, 4, value=params['hr_scale'], step=0.1, label='Upscale by')
-                    denoising_strength = gr.Slider(0, 1, value=params['denoising_strength'], step=0.01, label='Denoising strength')
-                    hr_upscaler = gr.Textbox(placeholder=params['hr_upscaler'], value=params['hr_upscaler'], label='Upscaler')                    
-
+                hr_scale = gr.Slider(1, 4, value=params['hr_scale'], step=0.1, label='Upscale by')
+                denoising_strength = gr.Slider(0, 1, value=params['denoising_strength'], step=0.01, label='Denoising strength')
+                hr_upscaler = gr.Textbox(placeholder=params['hr_upscaler'], value=params['hr_upscaler'], label='Upscaler')
 
     # Event functions to update the parameters in the backend
     address.change(lambda x: params.update({"address": filter_address(x)}), address, None)
diff --git a/extensions/silero_tts/script.py b/extensions/silero_tts/script.py
index 345e3821..3166bb63 100644
--- a/extensions/silero_tts/script.py
+++ b/extensions/silero_tts/script.py
@@ -4,6 +4,7 @@ from pathlib import Path
 
 import gradio as gr
 import torch
+
 from extensions.silero_tts import tts_preprocessor
 from modules import chat, shared
 from modules.html_generator import chat_html_wrapper
@@ -216,4 +217,4 @@ def ui():
 
     # Play preview
     preview_text.submit(voice_preview, preview_text, preview_audio)
-    preview_play.click(voice_preview, preview_text, preview_audio)
\ No newline at end of file
+    preview_play.click(voice_preview, preview_text, preview_audio)
diff --git a/extensions/silero_tts/test_tts.py b/extensions/silero_tts/test_tts.py
index ad8ee764..ebc2c102 100644
--- a/extensions/silero_tts/test_tts.py
+++ b/extensions/silero_tts/test_tts.py
@@ -2,7 +2,6 @@ import time
 from pathlib import Path
 
 import torch
-
 import tts_preprocessor
 
 torch._C._jit_set_profiling_mode(False)
diff --git a/extensions/silero_tts/tts_preprocessor.py b/extensions/silero_tts/tts_preprocessor.py
index eb2ca41b..daefdcbd 100644
--- a/extensions/silero_tts/tts_preprocessor.py
+++ b/extensions/silero_tts/tts_preprocessor.py
@@ -69,7 +69,7 @@ def remove_surrounded_chars(string):
     # first this expression will check if there is a string nested exclusively between a alt=
     # and a style= string. This would correspond to only a the alt text of an embedded image
     # If it matches it will only keep that part as the string, and rend it for further processing
-    # Afterwards this expression matches to 'as few symbols as possible (0 upwards) between any 
+    # Afterwards this expression matches to 'as few symbols as possible (0 upwards) between any
     # asterisks' OR' as few symbols as possible (0 upwards) between an asterisk and the end of the string'
     if re.search(r'(?<=alt=)(.*)(?=style=)', string, re.DOTALL):
         m = re.search(r'(?<=alt=)(.*)(?=style=)', string, re.DOTALL)
diff --git a/extensions/superbooga/script.py b/extensions/superbooga/script.py
index 5b98128e..e239c58a 100644
--- a/extensions/superbooga/script.py
+++ b/extensions/superbooga/script.py
@@ -59,7 +59,7 @@ class ChromaCollector(Collecter):
     def get_ids(self, search_strings: list[str], n_results: int) -> list[str]:
         n_results = min(len(self.ids), n_results)
         result = self.collection.query(query_texts=search_strings, n_results=n_results, include=['documents'])['ids'][0]
-        return list(map(lambda x : int(x[2:]), result))
+        return list(map(lambda x: int(x[2:]), result))
 
     def clear(self):
         self.collection.delete(ids=self.ids)
@@ -162,13 +162,13 @@ def input_modifier(string):
 def custom_generate_chat_prompt(user_input, state, **kwargs):
     if len(shared.history['internal']) > 2 and user_input != '':
         chunks = []
-        for i in range(len(shared.history['internal'])-1):
+        for i in range(len(shared.history['internal']) - 1):
             chunks.append('\n'.join(shared.history['internal'][i]))
 
         add_chunks_to_collector(chunks)
         query = '\n'.join(shared.history['internal'][-1] + [user_input])
         try:
-            best_ids = collector.get_ids(query, n_results=len(shared.history['internal'])-1)
+            best_ids = collector.get_ids(query, n_results=len(shared.history['internal']) - 1)
 
             # Sort the history by relevance instead of by chronological order,
             # except for the latest message
@@ -226,7 +226,7 @@ def ui():
 
         ## Chat mode
 
-        In chat mode, the extension automatically sorts the history by relevance instead of chronologically, except for the very latest input/reply pair. 
+        In chat mode, the extension automatically sorts the history by relevance instead of chronologically, except for the very latest input/reply pair.
 
         That is, the prompt will include (starting from the end):
 
diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py
index 9daee7be..32226404 100644
--- a/extensions/whisper_stt/script.py
+++ b/extensions/whisper_stt/script.py
@@ -1,5 +1,6 @@
 import gradio as gr
 import speech_recognition as sr
+
 from modules import shared
 
 input_hijack = {
diff --git a/modules/RWKV.py b/modules/RWKV.py
index 35d650e1..bb6bab50 100644
--- a/modules/RWKV.py
+++ b/modules/RWKV.py
@@ -24,13 +24,12 @@ class RWKVModel:
     @classmethod
     def from_pretrained(self, path, dtype="fp16", device="cuda"):
         tokenizer_path = Path(f"{path.parent}/20B_tokenizer.json")
-
         if shared.args.rwkv_strategy is None:
             model = RWKV(model=str(path), strategy=f'{device} {dtype}')
         else:
             model = RWKV(model=str(path), strategy=shared.args.rwkv_strategy)
-        pipeline = PIPELINE(model, str(tokenizer_path))
 
+        pipeline = PIPELINE(model, str(tokenizer_path))
         result = self()
         result.pipeline = pipeline
         result.model = model
@@ -83,7 +82,6 @@ class RWKVModel:
             out = self.cached_output_logits
 
         for i in range(token_count):
-
             # forward
             tokens = self.pipeline.encode(ctx) if i == 0 else [token]
             while len(tokens) > 0:
@@ -91,35 +89,38 @@ class RWKVModel:
                 tokens = tokens[args.chunk_len:]
 
             # cache the model state after scanning the context
-            # we don't cache the state after processing our own generated tokens because 
-            # the output string might be post-processed arbitrarily. Therefore, what's fed into the model 
+            # we don't cache the state after processing our own generated tokens because
+            # the output string might be post-processed arbitrarily. Therefore, what's fed into the model
             # on the next round of chat might be slightly different what what it output on the previous round
             if i == 0:
                 self.cached_context += ctx
                 self.cached_model_state = copy.deepcopy(state)
                 self.cached_output_logits = copy.deepcopy(out)
-            
+
             # adjust probabilities
             for n in args.token_ban:
                 out[n] = -float('inf')
+
             for n in occurrence:
                 out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
-            
+
             # sampler
             token = self.pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p, top_k=args.top_k)
             if token in args.token_stop:
                 break
+
             all_tokens += [token]
             if token not in occurrence:
                 occurrence[token] = 1
             else:
                 occurrence[token] += 1
-            
+
             # output
             tmp = self.pipeline.decode([token])
-            if '\ufffd' not in tmp: # is valid utf-8 string?
+            if '\ufffd' not in tmp:  # is valid utf-8 string?
                 if callback:
                     callback(tmp)
+
                 out_str += tmp
 
         return out_str
@@ -133,7 +134,6 @@ class RWKVTokenizer:
     def from_pretrained(self, path):
         tokenizer_path = path / "20B_tokenizer.json"
         tokenizer = Tokenizer.from_file(str(tokenizer_path))
-
         result = self()
         result.tokenizer = tokenizer
         return result
diff --git a/modules/deepspeed_parameters.py b/modules/deepspeed_parameters.py
index 3dbed437..9116f579 100644
--- a/modules/deepspeed_parameters.py
+++ b/modules/deepspeed_parameters.py
@@ -1,5 +1,4 @@
 def generate_ds_config(ds_bf16, train_batch_size, nvme_offload_dir):
-
     '''
     DeepSpeed configration
     https://huggingface.co/docs/transformers/main_classes/deepspeed
diff --git a/modules/evaluate.py b/modules/evaluate.py
index 3134280c..adafa713 100644
--- a/modules/evaluate.py
+++ b/modules/evaluate.py
@@ -20,6 +20,8 @@ def load_past_evaluations():
         return df
     else:
         return pd.DataFrame(columns=['Model', 'LoRAs', 'Dataset', 'Perplexity', 'stride', 'max_length', 'Date', 'Comment'])
+
+
 past_evaluations = load_past_evaluations()
 
 
diff --git a/modules/extensions.py b/modules/extensions.py
index 8e88e0cc..47629012 100644
--- a/modules/extensions.py
+++ b/modules/extensions.py
@@ -7,7 +7,6 @@ import gradio as gr
 import extensions
 import modules.shared as shared
 
-
 state = {}
 available_extensions = []
 setup_called = set()
@@ -91,7 +90,7 @@ def _apply_state_modifier_extensions(state):
             state = getattr(extension, "state_modifier")(state)
 
     return state
- 
+
 
 # Extension functions that override the default tokenizer output - currently only the first one will work
 def _apply_tokenizer_extensions(function_name, state, prompt, input_ids, input_embeds):
@@ -108,7 +107,7 @@ def _apply_custom_tokenized_length(prompt):
     for extension, _ in iterator():
         if hasattr(extension, 'custom_tokenized_length'):
             return getattr(extension, 'custom_tokenized_length')(prompt)
-    
+
     return None
 
 
diff --git a/modules/logging_colors.py b/modules/logging_colors.py
index 5485b090..5c9714f7 100644
--- a/modules/logging_colors.py
+++ b/modules/logging_colors.py
@@ -1,6 +1,8 @@
 # Copied from https://stackoverflow.com/a/1336640
 
 import logging
+import platform
+
 
 def add_coloring_to_emit_windows(fn):
     # add methods we need to the class
@@ -11,6 +13,7 @@ def add_coloring_to_emit_windows(fn):
 
     def _set_color(self, code):
         import ctypes
+
         # Constants from the Windows API
         self.STD_OUTPUT_HANDLE = -11
         hdl = ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE)
@@ -94,7 +97,6 @@ def add_coloring_to_emit_ansi(fn):
     return new
 
 
-import platform
 if platform.system() == 'Windows':
     # Windows does not support ANSI escapes and we are using API calls to set the console color
     logging.StreamHandler.emit = add_coloring_to_emit_windows(logging.StreamHandler.emit)
diff --git a/modules/models.py b/modules/models.py
index 1f2219ae..d5f6594c 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -161,10 +161,10 @@ def load_model(model_name):
     # Custom
     else:
         params = {
-          "low_cpu_mem_usage": True,
-          "trust_remote_code": trust_remote_code
+            "low_cpu_mem_usage": True,
+            "trust_remote_code": trust_remote_code
         }
-        
+
         if not any((shared.args.cpu, torch.cuda.is_available(), torch.has_mps)):
             logging.warning("torch.cuda.is_available() returned False. This means that no GPU has been detected. Falling back to CPU mode.")
             shared.args.cpu = True
@@ -288,7 +288,7 @@ def load_soft_prompt(name):
                         logging.info(f"{field}: {', '.join(j[field])}")
                     else:
                         logging.info(f"{field}: {j[field]}")
-                        
+
             logging.info()
             tensor = np.load('tensor.npy')
             Path('tensor.npy').unlink()
diff --git a/server.py b/server.py
index e2bbeaef..10df484b 100644
--- a/server.py
+++ b/server.py
@@ -377,7 +377,7 @@ def create_model_menus():
 
     shared.gradio['lora_menu_apply'].click(load_lora_wrapper, shared.gradio['lora_menu'], shared.gradio['model_status'], show_progress=False)
     shared.gradio['download_model_button'].click(download_model_wrapper, shared.gradio['custom_model_menu'], shared.gradio['model_status'], show_progress=False)
-    shared.gradio['autoload_model'].change(lambda x : gr.update(visible=not x), shared.gradio['autoload_model'], load)
+    shared.gradio['autoload_model'].change(lambda x: gr.update(visible=not x), shared.gradio['autoload_model'], load)
 
 
 def create_settings_menus(default_preset):