text-generation-webui/modules/AutoGPTQ_loader.py

from pathlib import Path

from auto_gptq import AutoGPTQForCausalLM

import modules.shared as shared
from modules.logging_colors import logger
from modules.models import get_max_memory_dict


def load_quantized(model_name):
    path_to_model = Path(f'{shared.args.model_dir}/{model_name}')
    pt_path = None
    use_safetensors = False

    # Find the model checkpoint
    for ext in ['.safetensors', '.pt', '.bin']:
        found = list(path_to_model.glob(f"*{ext}"))
        if len(found) > 0:
            if len(found) > 1:
                logger.warning(f'More than one {ext} model has been found. The last one will be selected. It could be wrong.')

            pt_path = found[-1]
            if ext == '.safetensors':
                use_safetensors = True

            break

    if pt_path is None:
        logger.error("The model could not be loaded because its checkpoint file in .bin/.pt/.safetensors format could not be located.")
        return

    # Define the params for AutoGPTQForCausalLM.from_quantized
    params = {
        'model_basename': pt_path.stem,
        'device': "cuda:0" if not shared.args.cpu else "cpu",
        'use_triton': shared.args.triton,
        'use_safetensors': use_safetensors,
        'max_memory': get_max_memory_dict()
    }

    logger.warning(f"The AutoGPTQ params are: {params}")
    model = AutoGPTQForCausalLM.from_quantized(path_to_model, **params)
    return model
Add AutoGPTQ support (basic) (#2132) 2023-05-17 16:12:12 +02:00			`from pathlib import Path`

			`from auto_gptq import AutoGPTQForCausalLM`

			`import modules.shared as shared`
Prevent unwanted log messages from modules 2023-05-22 03:42:34 +02:00			`from modules.logging_colors import logger`
Add AutoGPTQ support (basic) (#2132) 2023-05-17 16:12:12 +02:00			`from modules.models import get_max_memory_dict`


			`def load_quantized(model_name):`
			`path_to_model = Path(f'{shared.args.model_dir}/{model_name}')`
			`pt_path = None`
			`use_safetensors = False`

			`# Find the model checkpoint`
Add various checks to model loading functions 2023-05-17 20:52:23 +02:00			`for ext in ['.safetensors', '.pt', '.bin']:`
			`found = list(path_to_model.glob(f"*{ext}"))`
			`if len(found) > 0:`
			`if len(found) > 1:`
Prevent unwanted log messages from modules 2023-05-22 03:42:34 +02:00			`logger.warning(f'More than one {ext} model has been found. The last one will be selected. It could be wrong.')`
Add various checks to model loading functions 2023-05-17 20:52:23 +02:00
			`pt_path = found[-1]`
Small AutoGPTQ fix 2023-05-23 20:20:01 +02:00			`if ext == '.safetensors':`
			`use_safetensors = True`

Add various checks to model loading functions 2023-05-17 20:52:23 +02:00			`break`

			`if pt_path is None:`
Prevent unwanted log messages from modules 2023-05-22 03:42:34 +02:00			`logger.error("The model could not be loaded because its checkpoint file in .bin/.pt/.safetensors format could not be located.")`
Add various checks to model loading functions 2023-05-17 20:52:23 +02:00			`return`
Add AutoGPTQ support (basic) (#2132) 2023-05-17 16:12:12 +02:00
			`# Define the params for AutoGPTQForCausalLM.from_quantized`
			`params = {`
			`'model_basename': pt_path.stem,`
			`'device': "cuda:0" if not shared.args.cpu else "cpu",`
			`'use_triton': shared.args.triton,`
			`'use_safetensors': use_safetensors,`
			`'max_memory': get_max_memory_dict()`
			`}`

Prevent unwanted log messages from modules 2023-05-22 03:42:34 +02:00			`logger.warning(f"The AutoGPTQ params are: {params}")`
Add AutoGPTQ support (basic) (#2132) 2023-05-17 16:12:12 +02:00			`model = AutoGPTQForCausalLM.from_quantized(path_to_model, **params)`
			`return model`