From e116d31180be881b0cb81d7fd7a8ed0a6bbd19fa Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 21 May 2023 22:42:34 -0300
Subject: [PATCH] Prevent unwanted log messages from modules

---
 extensions/llava/script.py                    |  6 +--
 extensions/multimodal/multimodal_embedder.py  |  6 +--
 extensions/multimodal/pipeline_loader.py      |  8 ++--
 .../multimodal/pipelines/llava/llava.py       | 15 +++----
 extensions/multimodal/script.py               |  4 +-
 extensions/superbooga/chromadb.py             |  9 ++---
 extensions/superbooga/script.py               |  6 +--
 modules/AutoGPTQ_loader.py                    |  8 ++--
 modules/GPTQ_loader.py                        | 22 +++++------
 modules/LoRA.py                               |  6 +--
 modules/chat.py                               | 18 ++++-----
 modules/extensions.py                         |  6 +--
 modules/llama_attn_hijack.py                  |  8 ++--
 modules/llamacpp_model.py                     |  6 +--
 modules/logging_colors.py                     |  5 +++
 modules/models.py                             | 26 ++++++-------
 modules/shared.py                             |  9 +++--
 modules/text_generation.py                    |  4 +-
 modules/training.py                           | 39 ++++++++++---------
 server.py                                     | 20 +++++-----
 20 files changed, 120 insertions(+), 111 deletions(-)

diff --git a/extensions/llava/script.py b/extensions/llava/script.py
index 3f6c73a2..781d584b 100644
--- a/extensions/llava/script.py
+++ b/extensions/llava/script.py
@@ -1,8 +1,8 @@
-import logging
-
 import gradio as gr
 
+from modules.logging_colors import logger
+
 
 def ui():
     gr.Markdown("### This extension is deprecated, use \"multimodal\" extension instead")
-    logging.error("LLaVA extension is deprecated, use \"multimodal\" extension instead")
+    logger.error("LLaVA extension is deprecated, use \"multimodal\" extension instead")
diff --git a/extensions/multimodal/multimodal_embedder.py b/extensions/multimodal/multimodal_embedder.py
index 62e99ca7..626077cb 100644
--- a/extensions/multimodal/multimodal_embedder.py
+++ b/extensions/multimodal/multimodal_embedder.py
@@ -1,5 +1,4 @@
 import base64
-import logging
 import re
 from dataclasses import dataclass
 from io import BytesIO
@@ -10,6 +9,7 @@ from PIL import Image
 
 from extensions.multimodal.pipeline_loader import load_pipeline
 from modules import shared
+from modules.logging_colors import logger
 from modules.text_generation import encode, get_max_prompt_length
 
 
@@ -26,7 +26,7 @@ class MultimodalEmbedder:
     def __init__(self, params: dict):
         pipeline, source = load_pipeline(params)
         self.pipeline = pipeline
-        logging.info(f'Multimodal: loaded pipeline {self.pipeline.name()} from pipelines/{source} ({self.pipeline.__class__.__name__})')
+        logger.info(f'Multimodal: loaded pipeline {self.pipeline.name()} from pipelines/{source} ({self.pipeline.__class__.__name__})')
 
     def _split_prompt(self, prompt: str, load_images: bool = False) -> List[PromptPart]:
         """Splits a prompt into a list of `PromptParts` to separate image data from text.
@@ -138,7 +138,7 @@ class MultimodalEmbedder:
 
         # notify user if we truncated an image
         if removed_images > 0:
-            logging.warning(f"Multimodal: removed {removed_images} image(s) from prompt. Try decreasing max_new_tokens if generation is broken")
+            logger.warning(f"Multimodal: removed {removed_images} image(s) from prompt. Try decreasing max_new_tokens if generation is broken")
 
         return encoded
 
diff --git a/extensions/multimodal/pipeline_loader.py b/extensions/multimodal/pipeline_loader.py
index 3ebdb104..8fcd0a9b 100644
--- a/extensions/multimodal/pipeline_loader.py
+++ b/extensions/multimodal/pipeline_loader.py
@@ -1,4 +1,3 @@
-import logging
 import traceback
 from importlib import import_module
 from pathlib import Path
@@ -6,6 +5,7 @@ from typing import Tuple
 
 from extensions.multimodal.abstract_pipeline import AbstractMultimodalPipeline
 from modules import shared
+from modules.logging_colors import logger
 
 
 def _get_available_pipeline_modules():
@@ -21,8 +21,8 @@ def load_pipeline(params: dict) -> Tuple[AbstractMultimodalPipeline, str]:
         try:
             pipeline_modules[name] = import_module(f'extensions.multimodal.pipelines.{name}.pipelines')
         except:
-            logging.warning(f'Failed to get multimodal pipelines from {name}')
-            logging.warning(traceback.format_exc())
+            logger.warning(f'Failed to get multimodal pipelines from {name}')
+            logger.warning(traceback.format_exc())
 
     if shared.args.multimodal_pipeline is not None:
         for k in pipeline_modules:
@@ -48,5 +48,5 @@ def load_pipeline(params: dict) -> Tuple[AbstractMultimodalPipeline, str]:
         log = f'Multimodal - ERROR: Failed to load multimodal pipeline "{shared.args.multimodal_pipeline}", available pipelines are: {available}.'
     else:
         log = f'Multimodal - ERROR: Failed to determine multimodal pipeline for model {shared.args.model}, please select one manually using --multimodal-pipeline [PIPELINE]. Available pipelines are: {available}.'
-    logging.critical(f'{log} Please specify a correct pipeline, or disable the extension')
+    logger.critical(f'{log} Please specify a correct pipeline, or disable the extension')
     raise RuntimeError(f'{log} Please specify a correct pipeline, or disable the extension')
diff --git a/extensions/multimodal/pipelines/llava/llava.py b/extensions/multimodal/pipelines/llava/llava.py
index ad800165..16f0e06f 100644
--- a/extensions/multimodal/pipelines/llava/llava.py
+++ b/extensions/multimodal/pipelines/llava/llava.py
@@ -1,16 +1,17 @@
-import logging
 import time
 from abc import abstractmethod
 from typing import List, Tuple
 
 import torch
-from extensions.multimodal.abstract_pipeline import AbstractMultimodalPipeline
 from huggingface_hub import hf_hub_download
-from modules import shared
-from modules.text_generation import encode
 from PIL import Image
 from transformers import CLIPImageProcessor, CLIPVisionModel
 
+from extensions.multimodal.abstract_pipeline import AbstractMultimodalPipeline
+from modules import shared
+from modules.logging_colors import logger
+from modules.text_generation import encode
+
 
 class LLaVA_v0_Pipeline(AbstractMultimodalPipeline):
     CLIP_REPO = "openai/clip-vit-large-patch14"
@@ -26,11 +27,11 @@ class LLaVA_v0_Pipeline(AbstractMultimodalPipeline):
     def _load_models(self):
         start_ts = time.time()
 
-        logging.info(f"LLaVA - Loading CLIP from {LLaVA_v0_Pipeline.CLIP_REPO} as {self.clip_dtype} on {self.clip_device}...")
+        logger.info(f"LLaVA - Loading CLIP from {LLaVA_v0_Pipeline.CLIP_REPO} as {self.clip_dtype} on {self.clip_device}...")
         image_processor = CLIPImageProcessor.from_pretrained(LLaVA_v0_Pipeline.CLIP_REPO, torch_dtype=self.clip_dtype)
         vision_tower = CLIPVisionModel.from_pretrained(LLaVA_v0_Pipeline.CLIP_REPO, torch_dtype=self.clip_dtype).to(self.clip_device)
 
-        logging.info(f"LLaVA - Loading projector from {self.llava_projector_repo()} as {self.projector_dtype} on {self.projector_device}...")
+        logger.info(f"LLaVA - Loading projector from {self.llava_projector_repo()} as {self.projector_dtype} on {self.projector_device}...")
         projector_path = hf_hub_download(self.llava_projector_repo(), self.llava_projector_filename())
         mm_projector = torch.nn.Linear(*self.llava_projector_shape())
         projector_data = torch.load(projector_path)
@@ -38,7 +39,7 @@ class LLaVA_v0_Pipeline(AbstractMultimodalPipeline):
         mm_projector.bias = torch.nn.Parameter(projector_data['model.mm_projector.bias'].to(dtype=self.projector_dtype), False)
         mm_projector = mm_projector.to(self.projector_device)
 
-        logging.info(f"LLaVA supporting models loaded, took {time.time() - start_ts:.2f} seconds")
+        logger.info(f"LLaVA supporting models loaded, took {time.time() - start_ts:.2f} seconds")
         return image_processor, vision_tower, mm_projector
 
     @staticmethod
diff --git a/extensions/multimodal/script.py b/extensions/multimodal/script.py
index 2ca11bf5..b3f654e4 100644
--- a/extensions/multimodal/script.py
+++ b/extensions/multimodal/script.py
@@ -1,5 +1,4 @@
 import base64
-import logging
 import re
 import time
 from functools import partial
@@ -10,6 +9,7 @@ import torch
 
 from extensions.multimodal.multimodal_embedder import MultimodalEmbedder
 from modules import shared
+from modules.logging_colors import logger
 
 params = {
     "add_all_images_to_prompt": False,
@@ -78,7 +78,7 @@ def tokenizer_modifier(state, prompt, input_ids, input_embeds):
         return prompt, input_ids, input_embeds
 
     prompt, input_ids, input_embeds, total_embedded = multimodal_embedder.forward(prompt, state, params)
-    logging.info(f'Embedded {total_embedded} image(s) in {time.time()-start_ts:.2f}s')
+    logger.info(f'Embedded {total_embedded} image(s) in {time.time()-start_ts:.2f}s')
     return (prompt,
             input_ids.unsqueeze(0).to(shared.model.device, dtype=torch.int64),
             input_embeds.unsqueeze(0).to(shared.model.device, dtype=shared.model.dtype))
diff --git a/extensions/superbooga/chromadb.py b/extensions/superbooga/chromadb.py
index 088a6d7a..75efe70b 100644
--- a/extensions/superbooga/chromadb.py
+++ b/extensions/superbooga/chromadb.py
@@ -1,13 +1,12 @@
-import logging
-
+import chromadb
 import posthog
 import torch
+from chromadb.config import Settings
 from sentence_transformers import SentenceTransformer
 
-import chromadb
-from chromadb.config import Settings
+from modules.logging_colors import logger
 
-logging.info('Intercepting all calls to posthog :)')
+logger.info('Intercepting all calls to posthog :)')
 posthog.capture = lambda *args, **kwargs: None
 
 
diff --git a/extensions/superbooga/script.py b/extensions/superbooga/script.py
index c9f1a22d..f36f6b01 100644
--- a/extensions/superbooga/script.py
+++ b/extensions/superbooga/script.py
@@ -1,4 +1,3 @@
-import logging
 import re
 import textwrap
 
@@ -6,6 +5,7 @@ import gradio as gr
 from bs4 import BeautifulSoup
 
 from modules import chat, shared
+from modules.logging_colors import logger
 
 from .chromadb import add_chunks_to_collector, make_collector
 from .download_urls import download_urls
@@ -123,14 +123,14 @@ def custom_generate_chat_prompt(user_input, state, **kwargs):
                     if shared.history['internal'][id_][0] != '<|BEGIN-VISIBLE-CHAT|>':
                         additional_context += make_single_exchange(id_)
 
-                logging.warning(f'Adding the following new context:\n{additional_context}')
+                logger.warning(f'Adding the following new context:\n{additional_context}')
                 state['context'] = state['context'].strip() + '\n' + additional_context
                 kwargs['history'] = {
                     'internal': [shared.history['internal'][i] for i in range(hist_size) if i not in best_ids],
                     'visible': ''
                 }
             except RuntimeError:
-                logging.error("Couldn't query the database, moving on...")
+                logger.error("Couldn't query the database, moving on...")
 
     return chat.generate_chat_prompt(user_input, state, **kwargs)
 
diff --git a/modules/AutoGPTQ_loader.py b/modules/AutoGPTQ_loader.py
index adbee7eb..e80e0db0 100644
--- a/modules/AutoGPTQ_loader.py
+++ b/modules/AutoGPTQ_loader.py
@@ -1,9 +1,9 @@
-import logging
 from pathlib import Path
 
 from auto_gptq import AutoGPTQForCausalLM
 
 import modules.shared as shared
+from modules.logging_colors import logger
 from modules.models import get_max_memory_dict
 
 
@@ -17,13 +17,13 @@ def load_quantized(model_name):
         found = list(path_to_model.glob(f"*{ext}"))
         if len(found) > 0:
             if len(found) > 1:
-                logging.warning(f'More than one {ext} model has been found. The last one will be selected. It could be wrong.')
+                logger.warning(f'More than one {ext} model has been found. The last one will be selected. It could be wrong.')
 
             pt_path = found[-1]
             break
 
     if pt_path is None:
-        logging.error("The model could not be loaded because its checkpoint file in .bin/.pt/.safetensors format could not be located.")
+        logger.error("The model could not be loaded because its checkpoint file in .bin/.pt/.safetensors format could not be located.")
         return
 
     # Define the params for AutoGPTQForCausalLM.from_quantized
@@ -35,6 +35,6 @@ def load_quantized(model_name):
         'max_memory': get_max_memory_dict()
     }
 
-    logging.warning(f"The AutoGPTQ params are: {params}")
+    logger.warning(f"The AutoGPTQ params are: {params}")
     model = AutoGPTQForCausalLM.from_quantized(path_to_model, **params)
     return model
diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py
index 8fef5a87..ddc5f9a5 100644
--- a/modules/GPTQ_loader.py
+++ b/modules/GPTQ_loader.py
@@ -1,5 +1,4 @@
 import inspect
-import logging
 import re
 import sys
 from pathlib import Path
@@ -10,14 +9,15 @@ import transformers
 from transformers import AutoConfig, AutoModelForCausalLM
 
 import modules.shared as shared
+from modules.logging_colors import logger
 
 sys.path.insert(0, str(Path("repositories/GPTQ-for-LLaMa")))
 
 try:
     import llama_inference_offload
 except ImportError:
-    logging.error('Failed to load GPTQ-for-LLaMa')
-    logging.error('See https://github.com/oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md')
+    logger.error('Failed to load GPTQ-for-LLaMa')
+    logger.error('See https://github.com/oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md')
     sys.exit(-1)
 
 try:
@@ -127,7 +127,7 @@ def find_quantized_model_file(model_name):
             found = list(path_to_model.glob(f"*{ext}"))
             if len(found) > 0:
                 if len(found) > 1:
-                    logging.warning(f'More than one {ext} model has been found. The last one will be selected. It could be wrong.')
+                    logger.warning(f'More than one {ext} model has been found. The last one will be selected. It could be wrong.')
 
                 pt_path = found[-1]
                 break
@@ -138,8 +138,8 @@ def find_quantized_model_file(model_name):
 # The function that loads the model in modules/models.py
 def load_quantized(model_name):
     if shared.args.model_type is None:
-        logging.error("The model could not be loaded because its type could not be inferred from its name.")
-        logging.error("Please specify the type manually using the --model_type argument.")
+        logger.error("The model could not be loaded because its type could not be inferred from its name.")
+        logger.error("Please specify the type manually using the --model_type argument.")
         return None
 
     # Select the appropriate load_quant function
@@ -148,21 +148,21 @@ def load_quantized(model_name):
         load_quant = llama_inference_offload.load_quant
     elif model_type in ('llama', 'opt', 'gptj'):
         if shared.args.pre_layer:
-            logging.warning("Ignoring --pre_layer because it only works for llama model type.")
+            logger.warning("Ignoring --pre_layer because it only works for llama model type.")
 
         load_quant = _load_quant
     else:
-        logging.error("Unknown pre-quantized model type specified. Only 'llama', 'opt' and 'gptj' are supported")
+        logger.error("Unknown pre-quantized model type specified. Only 'llama', 'opt' and 'gptj' are supported")
         exit()
 
     # Find the quantized model weights file (.pt/.safetensors)
     path_to_model = Path(f'{shared.args.model_dir}/{model_name}')
     pt_path = find_quantized_model_file(model_name)
     if not pt_path:
-        logging.error("Could not find the quantized model in .pt or .safetensors format, exiting...")
+        logger.error("Could not find the quantized model in .pt or .safetensors format, exiting...")
         exit()
     else:
-        logging.info(f"Found the following quantized model: {pt_path}")
+        logger.info(f"Found the following quantized model: {pt_path}")
 
     # qwopqwop200's offload
     if model_type == 'llama' and shared.args.pre_layer:
@@ -190,7 +190,7 @@ def load_quantized(model_name):
                 max_memory = accelerate.utils.get_balanced_memory(model)
 
             device_map = accelerate.infer_auto_device_map(model, max_memory=max_memory, no_split_module_classes=["LlamaDecoderLayer"])
-            logging.info("Using the following device map for the quantized model:", device_map)
+            logger.info("Using the following device map for the quantized model:", device_map)
             # https://huggingface.co/docs/accelerate/package_reference/big_modeling#accelerate.dispatch_model
             model = accelerate.dispatch_model(model, device_map=device_map, offload_buffers=True)
 
diff --git a/modules/LoRA.py b/modules/LoRA.py
index 08bf5b88..56f90771 100644
--- a/modules/LoRA.py
+++ b/modules/LoRA.py
@@ -1,10 +1,10 @@
-import logging
 from pathlib import Path
 
 import torch
 from peft import PeftModel
 
 import modules.shared as shared
+from modules.logging_colors import logger
 
 
 def add_lora_to_model(lora_names):
@@ -19,7 +19,7 @@ def add_lora_to_model(lora_names):
 
     # Add a LoRA when another LoRA is already present
     if len(removed_set) == 0 and len(prior_set) > 0:
-        logging.info(f"Adding the LoRA(s) named {added_set} to the model...")
+        logger.info(f"Adding the LoRA(s) named {added_set} to the model...")
         for lora in added_set:
             shared.model.load_adapter(Path(f"{shared.args.lora_dir}/{lora}"), lora)
 
@@ -31,7 +31,7 @@ def add_lora_to_model(lora_names):
         shared.model = shared.model.base_model.model
 
     if len(lora_names) > 0:
-        logging.info("Applying the following LoRAs to {}: {}".format(shared.model_name, ', '.join(lora_names)))
+        logger.info("Applying the following LoRAs to {}: {}".format(shared.model_name, ', '.join(lora_names)))
         params = {}
         if not shared.args.cpu:
             params['dtype'] = shared.model.dtype
diff --git a/modules/chat.py b/modules/chat.py
index 386fae0a..7e980f32 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -3,7 +3,6 @@ import base64
 import copy
 import io
 import json
-import logging
 import re
 from datetime import datetime
 from pathlib import Path
@@ -14,6 +13,7 @@ from PIL import Image
 import modules.shared as shared
 from modules.extensions import apply_extensions
 from modules.html_generator import chat_html_wrapper, make_thumbnail
+from modules.logging_colors import logger
 from modules.text_generation import (generate_reply, get_encoded_length,
                                      get_max_prompt_length)
 from modules.utils import replace_all
@@ -187,7 +187,7 @@ def chatbot_wrapper(text, history, state, regenerate=False, _continue=False, loa
     output = copy.deepcopy(history)
     output = apply_extensions('history', output)
     if shared.model_name == 'None' or shared.model is None:
-        logging.error("No model is loaded! Select one in the Model tab.")
+        logger.error("No model is loaded! Select one in the Model tab.")
         yield output
         return
 
@@ -278,7 +278,7 @@ def chatbot_wrapper(text, history, state, regenerate=False, _continue=False, loa
 
 def impersonate_wrapper(text, state):
     if shared.model_name == 'None' or shared.model is None:
-        logging.error("No model is loaded! Select one in the Model tab.")
+        logger.error("No model is loaded! Select one in the Model tab.")
         yield ''
         return
 
@@ -584,7 +584,7 @@ def upload_character(json_file, img, tavern=False):
         img = Image.open(io.BytesIO(img))
         img.save(Path(f'characters/{outfile_name}.png'))
 
-    logging.info(f'New character saved to "characters/{outfile_name}.json".')
+    logger.info(f'New character saved to "characters/{outfile_name}.json".')
     return outfile_name
 
 
@@ -608,18 +608,18 @@ def upload_your_profile_picture(img):
     else:
         img = make_thumbnail(img)
         img.save(Path('cache/pfp_me.png'))
-        logging.info('Profile picture saved to "cache/pfp_me.png"')
+        logger.info('Profile picture saved to "cache/pfp_me.png"')
 
 
 def delete_file(path):
     if path.exists():
-        logging.warning(f'Deleting {path}')
+        logger.warning(f'Deleting {path}')
         path.unlink(missing_ok=True)
 
 
 def save_character(name, greeting, context, picture, filename, instruct=False):
     if filename == "":
-        logging.error("The filename is empty, so the character will not be saved.")
+        logger.error("The filename is empty, so the character will not be saved.")
         return
 
     folder = 'characters' if not instruct else 'characters/instruction-following'
@@ -634,11 +634,11 @@ def save_character(name, greeting, context, picture, filename, instruct=False):
     with filepath.open('w') as f:
         yaml.dump(data, f)
 
-    logging.info(f'Wrote {filepath}')
+    logger.info(f'Wrote {filepath}')
     path_to_img = Path(f'{folder}/{filename}.png')
     if picture and not instruct:
         picture.save(path_to_img)
-        logging.info(f'Wrote {path_to_img}')
+        logger.info(f'Wrote {path_to_img}')
     elif path_to_img.exists():
         delete_file(path_to_img)
 
diff --git a/modules/extensions.py b/modules/extensions.py
index d41ae3df..4950e04e 100644
--- a/modules/extensions.py
+++ b/modules/extensions.py
@@ -1,4 +1,3 @@
-import logging
 import traceback
 from functools import partial
 
@@ -6,6 +5,7 @@ import gradio as gr
 
 import extensions
 import modules.shared as shared
+from modules.logging_colors import logger
 
 state = {}
 available_extensions = []
@@ -29,7 +29,7 @@ def load_extensions():
     for i, name in enumerate(shared.args.extensions):
         if name in available_extensions:
             if name != 'api':
-                logging.info(f'Loading the extension "{name}"...')
+                logger.info(f'Loading the extension "{name}"...')
             try:
                 exec(f"import extensions.{name}.script")
                 extension = getattr(extensions, name).script
@@ -40,7 +40,7 @@ def load_extensions():
 
                 state[name] = [True, i]
             except:
-                logging.error(f'Failed to load the extension "{name}".')
+                logger.error(f'Failed to load the extension "{name}".')
                 traceback.print_exc()
 
 
diff --git a/modules/llama_attn_hijack.py b/modules/llama_attn_hijack.py
index e953f523..925cdaa3 100644
--- a/modules/llama_attn_hijack.py
+++ b/modules/llama_attn_hijack.py
@@ -1,4 +1,3 @@
-import logging
 import math
 import sys
 from typing import Optional, Tuple
@@ -8,21 +7,22 @@ import torch.nn as nn
 import transformers.models.llama.modeling_llama
 
 import modules.shared as shared
+from modules.logging_colors import logger
 
 if shared.args.xformers:
     try:
         import xformers.ops
     except Exception:
-        logging.error("xformers not found! Please install it before trying to use it.", file=sys.stderr)
+        logger.error("xformers not found! Please install it before trying to use it.", file=sys.stderr)
 
 
 def hijack_llama_attention():
     if shared.args.xformers:
         transformers.models.llama.modeling_llama.LlamaAttention.forward = xformers_forward
-        logging.info("Replaced attention with xformers_attention")
+        logger.info("Replaced attention with xformers_attention")
     elif shared.args.sdp_attention:
         transformers.models.llama.modeling_llama.LlamaAttention.forward = sdp_attention_forward
-        logging.info("Replaced attention with sdp_attention")
+        logger.info("Replaced attention with sdp_attention")
 
 
 def xformers_forward(
diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py
index 0ed33543..94830898 100644
--- a/modules/llamacpp_model.py
+++ b/modules/llamacpp_model.py
@@ -6,20 +6,20 @@ Documentation:
 https://abetlen.github.io/llama-cpp-python/
 '''
 
-import logging
 import re
 
 from llama_cpp import Llama, LlamaCache
 
 from modules import shared
 from modules.callbacks import Iteratorize
+from modules.logging_colors import logger
 
 
 class LlamaCppModel:
     def __init__(self):
         self.initialized = False
 
-    def __del__(self):        
+    def __del__(self):
         self.model.__del__()
 
     @classmethod
@@ -35,7 +35,7 @@ class LlamaCppModel:
             else:
                 cache_capacity = int(shared.args.cache_capacity)
 
-        logging.info("Cache capacity is " + str(cache_capacity) + " bytes")
+        logger.info("Cache capacity is " + str(cache_capacity) + " bytes")
 
         params = {
             'model_path': str(path),
diff --git a/modules/logging_colors.py b/modules/logging_colors.py
index 5c9714f7..80cb2d1d 100644
--- a/modules/logging_colors.py
+++ b/modules/logging_colors.py
@@ -3,6 +3,8 @@
 import logging
 import platform
 
+logging.basicConfig(format='%(levelname)s:%(message)s')
+
 
 def add_coloring_to_emit_windows(fn):
     # add methods we need to the class
@@ -107,3 +109,6 @@ else:
     # log.addFilter(log_filter())
     # //hdlr = logging.StreamHandler()
     # //hdlr.setFormatter(formatter())
+
+logger = logging.getLogger('text-generation-webui')
+logger.setLevel(logging.DEBUG)
diff --git a/modules/models.py b/modules/models.py
index cf233bb1..0050602e 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -1,6 +1,5 @@
 import gc
 import json
-import logging
 import os
 import re
 import time
@@ -17,6 +16,7 @@ from transformers import (AutoConfig, AutoModel, AutoModelForCausalLM,
 
 import modules.shared as shared
 from modules import llama_attn_hijack
+from modules.logging_colors import logger
 
 transformers.logging.set_verbosity_error()
 
@@ -71,12 +71,12 @@ def find_model_type(model_name):
 
 
 def load_model(model_name):
-    logging.info(f"Loading {model_name}...")
+    logger.info(f"Loading {model_name}...")
     t0 = time.time()
 
     shared.model_type = find_model_type(model_name)
     if shared.model_type == 'None':
-        logging.error('The path to the model does not exist. Exiting.')
+        logger.error('The path to the model does not exist. Exiting.')
         return None, None
 
     if shared.args.autogptq:
@@ -106,7 +106,7 @@ def load_model(model_name):
     if any((shared.args.xformers, shared.args.sdp_attention)):
         llama_attn_hijack.hijack_llama_attention()
 
-    logging.info(f"Loaded the model in {(time.time()-t0):.2f} seconds.\n")
+    logger.info(f"Loaded the model in {(time.time()-t0):.2f} seconds.\n")
     return model, tokenizer
 
 
@@ -119,7 +119,7 @@ def load_tokenizer(model_name, model):
         if shared.model_type not in ['llava', 'oasst']:
             for p in [Path(f"{shared.args.model_dir}/llama-tokenizer/"), Path(f"{shared.args.model_dir}/oobabooga_llama-tokenizer/")]:
                 if p.exists():
-                    logging.info(f"Loading the universal LLaMA tokenizer from {p}...")
+                    logger.info(f"Loading the universal LLaMA tokenizer from {p}...")
                     tokenizer = LlamaTokenizer.from_pretrained(p, clean_up_tokenization_spaces=True)
                     return tokenizer
 
@@ -162,7 +162,7 @@ def huggingface_loader(model_name):
         model = LoaderClass.from_pretrained(Path(f"{shared.args.model_dir}/{model_name}"), torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16)
         model = deepspeed.initialize(model=model, config_params=ds_config, model_parameters=None, optimizer=None, lr_scheduler=None)[0]
         model.module.eval()  # Inference
-        logging.info(f"DeepSpeed ZeRO-3 is enabled: {is_deepspeed_zero3_enabled()}")
+        logger.info(f"DeepSpeed ZeRO-3 is enabled: {is_deepspeed_zero3_enabled()}")
 
     # Custom
     else:
@@ -172,7 +172,7 @@ def huggingface_loader(model_name):
         }
 
         if not any((shared.args.cpu, torch.cuda.is_available(), torch.has_mps)):
-            logging.warning("torch.cuda.is_available() returned False. This means that no GPU has been detected. Falling back to CPU mode.")
+            logger.warning("torch.cuda.is_available() returned False. This means that no GPU has been detected. Falling back to CPU mode.")
             shared.args.cpu = True
 
         if shared.args.cpu:
@@ -254,7 +254,7 @@ def llamacpp_loader(model_name):
     else:
         model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('*ggml*.bin'))[0]
 
-    logging.info(f"llama.cpp weights detected: {model_file}\n")
+    logger.info(f"llama.cpp weights detected: {model_file}\n")
     model, tokenizer = LlamaCppModel.from_pretrained(model_file)
     return model, tokenizer
 
@@ -263,7 +263,7 @@ def GPTQ_loader(model_name):
 
     # Monkey patch
     if shared.args.monkey_patch:
-        logging.warning("Applying the monkey patch for using LoRAs in 4-bit mode. It may cause undefined behavior outside its intended scope.")
+        logger.warning("Applying the monkey patch for using LoRAs in 4-bit mode. It may cause undefined behavior outside its intended scope.")
         from modules.monkey_patch_gptq_lora import load_model_llama
 
         model, _ = load_model_llama(model_name)
@@ -302,7 +302,7 @@ def get_max_memory_dict():
             suggestion -= 1000
 
         suggestion = int(round(suggestion / 1000))
-        logging.warning(f"Auto-assiging --gpu-memory {suggestion} for your GPU to try to prevent out-of-memory errors. You can manually set other values.")
+        logger.warning(f"Auto-assiging --gpu-memory {suggestion} for your GPU to try to prevent out-of-memory errors. You can manually set other values.")
         max_memory = {0: f'{suggestion}GiB', 'cpu': f'{shared.args.cpu_memory or 99}GiB'}
 
     return max_memory if len(max_memory) > 0 else None
@@ -333,13 +333,13 @@ def load_soft_prompt(name):
             zf.extract('tensor.npy')
             zf.extract('meta.json')
             j = json.loads(open('meta.json', 'r').read())
-            logging.info(f"\nLoading the softprompt \"{name}\".")
+            logger.info(f"\nLoading the softprompt \"{name}\".")
             for field in j:
                 if field != 'name':
                     if type(j[field]) is list:
-                        logging.info(f"{field}: {', '.join(j[field])}")
+                        logger.info(f"{field}: {', '.join(j[field])}")
                     else:
-                        logging.info(f"{field}: {j[field]}")
+                        logger.info(f"{field}: {j[field]}")
 
             tensor = np.load('tensor.npy')
             Path('tensor.npy').unlink()
diff --git a/modules/shared.py b/modules/shared.py
index 7f945366..e1b5bf9e 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -1,10 +1,11 @@
 import argparse
-import logging
 from collections import OrderedDict
 from pathlib import Path
 
 import yaml
 
+from modules.logging_colors import logger
+
 model = None
 tokenizer = None
 model_name = "None"
@@ -180,14 +181,14 @@ args_defaults = parser.parse_args([])
 deprecated_dict = {}
 for k in deprecated_dict:
     if getattr(args, k) != deprecated_dict[k][1]:
-        logging.warning(f"--{k} is deprecated and will be removed. Use --{deprecated_dict[k][0]} instead.")
+        logger.warning(f"--{k} is deprecated and will be removed. Use --{deprecated_dict[k][0]} instead.")
         setattr(args, deprecated_dict[k][0], getattr(args, k))
 
 # Security warnings
 if args.trust_remote_code:
-    logging.warning("trust_remote_code is enabled. This is dangerous.")
+    logger.warning("trust_remote_code is enabled. This is dangerous.")
 if args.share:
-    logging.warning("The gradio \"share link\" feature downloads a proprietary and unaudited blob to create a reverse tunnel. This is potentially dangerous.")
+    logger.warning("The gradio \"share link\" feature downloads a proprietary and unaudited blob to create a reverse tunnel. This is potentially dangerous.")
 
 
 def add_extension(name):
diff --git a/modules/text_generation.py b/modules/text_generation.py
index 253bd302..e5fa4467 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -1,5 +1,4 @@
 import ast
-import logging
 import random
 import re
 import time
@@ -14,6 +13,7 @@ from modules.callbacks import (Iteratorize, Stream,
                                _SentinelTokenStoppingCriteria)
 from modules.extensions import apply_extensions
 from modules.html_generator import generate_4chan_html, generate_basic_html
+from modules.logging_colors import logger
 from modules.models import clear_torch_cache, local_rank
 
 
@@ -159,7 +159,7 @@ def generate_reply(question, state, eos_token=None, stopping_strings=None, is_ch
     generate_func = apply_extensions('custom_generate_reply')
     if generate_func is None:
         if shared.model_name == 'None' or shared.model is None:
-            logging.error("No model is loaded! Select one in the Model tab.")
+            logger.error("No model is loaded! Select one in the Model tab.")
             yield question
             return
 
diff --git a/modules/training.py b/modules/training.py
index 0228b1c6..ec6a25a2 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -1,5 +1,4 @@
 import json
-import logging
 import math
 import sys
 import threading
@@ -15,8 +14,9 @@ from peft import (LoraConfig, get_peft_model, prepare_model_for_int8_training,
                   set_peft_model_state_dict)
 
 from modules import shared, ui, utils
-from modules.evaluate import calculate_perplexity, generate_markdown_table, save_past_evaluations
-
+from modules.evaluate import (calculate_perplexity, generate_markdown_table,
+                              save_past_evaluations)
+from modules.logging_colors import logger
 
 # This mapping is from a very recent commit, not yet released.
 # If not available, default to a backup map for some common model types.
@@ -24,7 +24,8 @@ try:
     from peft.utils.other import \
         TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING as \
         model_to_lora_modules
-    from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
+    from transformers.models.auto.modeling_auto import \
+        MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
     MODEL_CLASSES = {v: k for k, v in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES}
 except:
     standard_modules = ["q_proj", "v_proj"]
@@ -217,13 +218,13 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
         if model_type == "PeftModelForCausalLM":
             if len(shared.args.lora_names) > 0:
                 yield "You are trying to train a LoRA while you already have another LoRA loaded. This will work, but may have unexpected effects. *(Will continue anyway in 5 seconds, press `Interrupt` to stop.)*"
-                logging.warning("Training LoRA over top of another LoRA. May have unexpected effects.")
+                logger.warning("Training LoRA over top of another LoRA. May have unexpected effects.")
             else:
                 yield "Model ID not matched due to LoRA loading. Consider reloading base model. *(Will continue anyway in 5 seconds, press `Interrupt` to stop.)*"
-                logging.warning("Model ID not matched due to LoRA loading. Consider reloading base model.")
+                logger.warning("Model ID not matched due to LoRA loading. Consider reloading base model.")
         else:
             yield "LoRA training has only currently been validated for LLaMA, OPT, GPT-J, and GPT-NeoX models. Unexpected errors may follow. *(Will continue anyway in 5 seconds, press `Interrupt` to stop.)*"
-            logging.warning(f"LoRA training has only currently been validated for LLaMA, OPT, GPT-J, and GPT-NeoX models. (Found model type: {model_type})")
+            logger.warning(f"LoRA training has only currently been validated for LLaMA, OPT, GPT-J, and GPT-NeoX models. (Found model type: {model_type})")
 
         time.sleep(5)
 
@@ -233,7 +234,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
 
     elif not shared.args.load_in_8bit and shared.args.wbits <= 0:
         yield "It is highly recommended you use `--load-in-8bit` for LoRA training. *(Will continue anyway in 2 seconds, press `Interrupt` to stop.)*"
-        logging.warning("It is highly recommended you use `--load-in-8bit` for LoRA training.")
+        logger.warning("It is highly recommended you use `--load-in-8bit` for LoRA training.")
         time.sleep(2)  # Give it a moment for the message to show in UI before continuing
 
     if cutoff_len <= 0 or micro_batch_size <= 0 or batch_size <= 0 or actual_lr <= 0 or lora_rank <= 0 or lora_alpha <= 0:
@@ -253,7 +254,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
 
     # == Prep the dataset, format, etc ==
     if raw_text_file not in ['None', '']:
-        logging.info("Loading raw text file dataset...")
+        logger.info("Loading raw text file dataset...")
         with open(clean_path('training/datasets', f'{raw_text_file}.txt'), 'r', encoding='utf-8') as file:
             raw_text = file.read().replace('\r', '')
 
@@ -311,7 +312,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
             prompt = generate_prompt(data_point)
             return tokenize(prompt)
 
-        logging.info("Loading JSON datasets...")
+        logger.info("Loading JSON datasets...")
         data = load_dataset("json", data_files=clean_path('training/datasets', f'{dataset}.json'))
         train_data = data['train'].map(generate_and_tokenize_prompt)
 
@@ -323,10 +324,10 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
 
     # == Start prepping the model itself ==
     if not hasattr(shared.model, 'lm_head') or hasattr(shared.model.lm_head, 'weight'):
-        logging.info("Getting model ready...")
+        logger.info("Getting model ready...")
         prepare_model_for_int8_training(shared.model)
 
-    logging.info("Prepping for training...")
+    logger.info("Prepping for training...")
     config = LoraConfig(
         r=lora_rank,
         lora_alpha=lora_alpha,
@@ -337,10 +338,10 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
     )
 
     try:
-        logging.info("Creating LoRA model...")
+        logger.info("Creating LoRA model...")
         lora_model = get_peft_model(shared.model, config)
         if not always_override and Path(f"{lora_file_path}/adapter_model.bin").is_file():
-            logging.info("Loading existing LoRA data...")
+            logger.info("Loading existing LoRA data...")
             state_dict_peft = torch.load(f"{lora_file_path}/adapter_model.bin")
             set_peft_model_state_dict(lora_model, state_dict_peft)
     except:
@@ -418,7 +419,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
         json.dump({x: vars[x] for x in PARAMETERS}, file)
 
     # == Main run and monitor loop ==
-    logging.info("Starting training...")
+    logger.info("Starting training...")
     yield "Starting..."
     if WANT_INTERRUPT:
         yield "Interrupted before start."
@@ -428,7 +429,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
         trainer.train()
         # Note: save in the thread in case the gradio thread breaks (eg browser closed)
         lora_model.save_pretrained(lora_file_path)
-        logging.info("LoRA training run is completed and saved.")
+        logger.info("LoRA training run is completed and saved.")
         tracked.did_save = True
 
     thread = threading.Thread(target=threaded_run)
@@ -460,14 +461,14 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
 
     # Saving in the train thread might fail if an error occurs, so save here if so.
     if not tracked.did_save:
-        logging.info("Training complete, saving...")
+        logger.info("Training complete, saving...")
         lora_model.save_pretrained(lora_file_path)
 
     if WANT_INTERRUPT:
-        logging.info("Training interrupted.")
+        logger.info("Training interrupted.")
         yield f"Interrupted. Incomplete LoRA saved to `{lora_file_path}`"
     else:
-        logging.info("Training complete!")
+        logger.info("Training complete!")
         yield f"Done! LoRA saved to `{lora_file_path}`"
 
 
diff --git a/server.py b/server.py
index f2c8dfa4..f0bf4fd3 100644
--- a/server.py
+++ b/server.py
@@ -1,17 +1,18 @@
-import logging
 import os
-import requests
 import warnings
-import modules.logging_colors
+
+import requests
+
+from modules.logging_colors import logger
 
 os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
 os.environ['BITSANDBYTES_NOWELCOME'] = '1'
 warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')
-logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
+
 
 # This is a hack to prevent Gradio from phoning home when it gets imported
 def my_get(url, **kwargs):
-    logging.info('Gradio HTTP request redirected to localhost :)')
+    logger.info('Gradio HTTP request redirected to localhost :)')
     kwargs.setdefault('allow_redirects', True)
     return requests.api.request('get', 'http://127.0.0.1/', **kwargs)
 
@@ -49,7 +50,8 @@ from modules.extensions import apply_extensions
 from modules.html_generator import chat_html_wrapper
 from modules.LoRA import add_lora_to_model
 from modules.models import load_model, load_soft_prompt, unload_model
-from modules.text_generation import generate_reply_wrapper, get_encoded_length, stop_everything_event
+from modules.text_generation import (generate_reply_wrapper,
+                                     get_encoded_length, stop_everything_event)
 
 
 def load_model_wrapper(selected_model, autoload=False):
@@ -388,7 +390,7 @@ def create_model_menus():
                         shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
                         shared.gradio['mlock'] = gr.Checkbox(label="mlock", value=shared.args.mlock)
 
-            with gr.Row():                
+            with gr.Row():
                 shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready')
 
     # In this event handler, the interface state is read and updated
@@ -971,7 +973,7 @@ if __name__ == "__main__":
         settings_file = Path('settings.json')
 
     if settings_file is not None:
-        logging.info(f"Loading settings from {settings_file}...")
+        logger.info(f"Loading settings from {settings_file}...")
         new_settings = json.loads(open(settings_file, 'r').read())
         for item in new_settings:
             shared.settings[item] = new_settings[item]
@@ -1015,7 +1017,7 @@ if __name__ == "__main__":
     # Select the model from a command-line menu
     elif shared.args.model_menu:
         if len(available_models) == 0:
-            logging.error('No models are available! Please download at least one.')
+            logger.error('No models are available! Please download at least one.')
             sys.exit(0)
         else:
             print('The following models are available:\n')