diff --git a/models/config.yaml b/models/config.yaml index 9d484d24..9c8ce827 100644 --- a/models/config.yaml +++ b/models/config.yaml @@ -20,8 +20,6 @@ model_type: 'dollyv2' .*replit: model_type: 'replit' -.*AWQ: - n_batch: 1 .*(oasst|openassistant-|stablelm-7b-sft-v7-epoch-3): instruction_template: 'Open Assistant' skip_special_tokens: false diff --git a/modules/loaders.py b/modules/loaders.py index c7e5d800..92a04d49 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -135,7 +135,6 @@ loaders_and_params = OrderedDict({ 'gpu_memory', 'auto_devices', 'max_seq_len', - 'n_batch', 'no_inject_fused_attention', 'trust_remote_code', 'use_fast', diff --git a/modules/models.py b/modules/models.py index 087adada..71203152 100644 --- a/modules/models.py +++ b/modules/models.py @@ -298,7 +298,7 @@ def AutoAWQ_loader(model_name): trust_remote_code=shared.args.trust_remote_code, fuse_layers=not shared.args.no_inject_fused_attention, max_memory=get_max_memory_dict(), - batch_size=shared.args.n_batch, + batch_size=1, safetensors=any(model_dir.glob('*.safetensors')), )