diff --git a/modules/ctransformers_model.py b/modules/ctransformers_model.py index 5e0f347c..8b8b5c4d 100644 --- a/modules/ctransformers_model.py +++ b/modules/ctransformers_model.py @@ -19,7 +19,9 @@ class CtransformersModel: gpu_layers=shared.args.n_gpu_layers, batch_size=shared.args.n_batch, context_length=shared.args.n_ctx, - stream=True + stream=True, + mmap=not shared.args.no_mmap, + mlock=shared.args.mlock ) self.model = AutoModelForCausalLM.from_pretrained( diff --git a/modules/loaders.py b/modules/loaders.py index 7444555f..472e8ddb 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -101,7 +101,9 @@ loaders_and_params = OrderedDict({ 'n_gpu_layers', 'n_batch', 'threads', - 'model_type' + 'model_type', + 'no_mmap', + 'mlock' ] }) diff --git a/requirements.txt b/requirements.txt index 05a3ec0e..25c953ee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,4 +42,4 @@ https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_ https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" # ctransformers -https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX2/ctransformers-0.2.22+cu117-py3-none-any.whl +https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX2/ctransformers-0.2.23+cu117-py3-none-any.whl