diff --git a/modules/RoPE.py b/modules/RoPE.py deleted file mode 100644 index 31163a33..00000000 --- a/modules/RoPE.py +++ /dev/null @@ -1,18 +0,0 @@ -def get_alpha_value(alpha, base): - ''' - Gets alpha_value from alpha_value and rope_freq_base - ''' - if base > 0: - return (base / 10000.) ** (63 / 64.) - else: - return alpha - - -def get_rope_freq_base(alpha, base): - ''' - Gets rope_freq_base from alpha_value and rope_freq_base - ''' - if base > 0: - return base - else: - return 10000 * alpha ** (64 / 63.) diff --git a/modules/llamacpp_hf.py b/modules/llamacpp_hf.py index f30be66a..74af5fbf 100644 --- a/modules/llamacpp_hf.py +++ b/modules/llamacpp_hf.py @@ -7,7 +7,7 @@ from torch.nn import CrossEntropyLoss from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel from transformers.modeling_outputs import CausalLMOutputWithPast -from modules import RoPE, llama_cpp_python_hijack, shared +from modules import llama_cpp_python_hijack, shared from modules.logging_colors import logger try: @@ -212,7 +212,7 @@ class LlamacppHF(PreTrainedModel): 'mul_mat_q': not shared.args.no_mul_mat_q, 'numa': shared.args.numa, 'n_gpu_layers': shared.args.n_gpu_layers, - 'rope_freq_base': RoPE.get_rope_freq_base(shared.args.alpha_value, shared.args.rope_freq_base), + 'rope_freq_base': shared.args.rope_freq_base, 'tensor_split': tensor_split_list, 'rope_freq_scale': 1.0 / shared.args.compress_pos_emb, 'logits_all': shared.args.logits_all, diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py index b2a25d36..d62fd517 100644 --- a/modules/llamacpp_model.py +++ b/modules/llamacpp_model.py @@ -4,7 +4,7 @@ from functools import partial import numpy as np import torch -from modules import RoPE, llama_cpp_python_hijack, shared +from modules import llama_cpp_python_hijack, shared from modules.callbacks import Iteratorize from modules.logging_colors import logger from modules.text_generation import get_max_prompt_length @@ -92,7 +92,7 @@ class LlamaCppModel: 'mul_mat_q': not shared.args.no_mul_mat_q, 'numa': shared.args.numa, 'n_gpu_layers': shared.args.n_gpu_layers, - 'rope_freq_base': RoPE.get_rope_freq_base(shared.args.alpha_value, shared.args.rope_freq_base), + 'rope_freq_base': shared.args.rope_freq_base, 'tensor_split': tensor_split_list, 'rope_freq_scale': 1.0 / shared.args.compress_pos_emb, 'offload_kqv': not shared.args.no_offload_kqv, diff --git a/modules/loaders.py b/modules/loaders.py index 5099ffb0..7bf1cde4 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -22,7 +22,6 @@ loaders_and_params = OrderedDict({ 'no_use_fast', 'use_flash_attention_2', 'alpha_value', - 'rope_freq_base', 'compress_pos_emb', 'disable_exllama', 'disable_exllamav2', @@ -38,7 +37,6 @@ loaders_and_params = OrderedDict({ 'no_mmap', 'mlock', 'no_mul_mat_q', - 'alpha_value', 'rope_freq_base', 'compress_pos_emb', 'cpu', @@ -60,7 +58,6 @@ loaders_and_params = OrderedDict({ 'no_mmap', 'mlock', 'no_mul_mat_q', - 'alpha_value', 'rope_freq_base', 'compress_pos_emb', 'cpu', diff --git a/modules/models.py b/modules/models.py index cb32a3da..bd54c146 100644 --- a/modules/models.py +++ b/modules/models.py @@ -25,7 +25,7 @@ from transformers import ( ) import modules.shared as shared -from modules import RoPE, sampler_hijack +from modules import sampler_hijack from modules.logging_colors import logger from modules.models_settings import get_model_metadata @@ -248,7 +248,7 @@ def huggingface_loader(model_name): if shared.args.compress_pos_emb > 1: params['rope_scaling'] = {'type': 'linear', 'factor': shared.args.compress_pos_emb} elif shared.args.alpha_value > 1: - params['rope_scaling'] = {'type': 'dynamic', 'factor': RoPE.get_alpha_value(shared.args.alpha_value, shared.args.rope_freq_base)} + params['rope_scaling'] = {'type': 'dynamic', 'factor': shared.args.alpha_value} logger.info("TRANSFORMERS_PARAMS=") pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(params)