diff --git a/css/main.css b/css/main.css index 3ecf0044..cf3babdb 100644 --- a/css/main.css +++ b/css/main.css @@ -404,13 +404,14 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { .message-body h3, .message-body h4 { color: var(--body-text-color); + margin: 20px 0 10px 0; } .dark .message q { color: #f5b031; } -.message q::before, .message q::after { +.message-body q::before, .message-body q::after { content: ""; } diff --git a/download-model.py b/download-model.py index 08b23d08..0014b689 100644 --- a/download-model.py +++ b/download-model.py @@ -212,11 +212,15 @@ class ModelDownloader: total_size = int(r.headers.get('content-length', 0)) block_size = 1024 * 1024 # 1MB + filename_str = str(filename) # Convert PosixPath to string if necessary + tqdm_kwargs = { 'total': total_size, - 'unit': 'iB', + 'unit': 'B', 'unit_scale': True, - 'bar_format': '{l_bar}{bar}| {n_fmt}/{total_fmt} {rate_fmt}' + 'unit_divisor': 1024, + 'bar_format': '{desc}{percentage:3.0f}%|{bar:50}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]', + 'desc': f"{filename_str}: " } if 'COLAB_GPU' in os.environ: @@ -233,7 +237,7 @@ class ModelDownloader: t.update(len(data)) if total_size != 0 and self.progress_bar is not None: count += len(data) - self.progress_bar(float(count) / float(total_size), f"{filename}") + self.progress_bar(float(count) / float(total_size), f"{filename_str}") break # Exit loop if successful except (RequestException, ConnectionError, Timeout) as e: diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py index 44c1df86..646dee2d 100644 --- a/extensions/openai/completions.py +++ b/extensions/openai/completions.py @@ -319,7 +319,6 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p yield {'prompt': prompt} return - token_count = len(encode(prompt)[0]) debug_msg({'prompt': prompt, 'generate_params': generate_params}) if stream: @@ -330,7 +329,6 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p answer = '' seen_content = '' - completion_token_count = 0 for a in generator: answer = a['internal'][-1][1] @@ -345,6 +343,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p chunk = chat_streaming_chunk(new_content) yield chunk + token_count = len(encode(prompt)[0]) completion_token_count = len(encode(answer)[0]) stop_reason = "stop" if token_count + completion_token_count >= generate_params['truncation_length'] or completion_token_count >= generate_params['max_new_tokens']: @@ -429,8 +428,6 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False): prompt = decode(prompt)[0] prefix = prompt if echo else '' - token_count = len(encode(prompt)[0]) - total_prompt_token_count += token_count # generate reply ####################################### debug_msg({'prompt': prompt, 'generate_params': generate_params}) @@ -440,6 +437,8 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False): for a in generator: answer = a + token_count = len(encode(prompt)[0]) + total_prompt_token_count += token_count completion_token_count = len(encode(answer)[0]) total_completion_token_count += completion_token_count stop_reason = "stop" diff --git a/js/main.js b/js/main.js index 3b8b13e4..899bd8f0 100644 --- a/js/main.js +++ b/js/main.js @@ -213,6 +213,7 @@ function doSyntaxHighlighting() { renderMathInElement(element, { delimiters: [ { left: "$$", right: "$$", display: true }, + { left: "$", right: "$", display: false }, { left: "\\(", right: "\\)", display: false }, { left: "\\[", right: "\\]", display: true }, ], @@ -459,7 +460,12 @@ function updateCssProperties() { // Adjust scrollTop based on input height change if (chatInputHeight !== currentChatInputHeight) { - chatContainer.scrollTop += chatInputHeight - currentChatInputHeight; + if (!isScrolled && chatInputHeight < currentChatInputHeight) { + chatContainer.scrollTop = chatContainer.scrollHeight; + } else { + chatContainer.scrollTop += chatInputHeight - currentChatInputHeight; + } + currentChatInputHeight = chatInputHeight; } } diff --git a/modules/chat.py b/modules/chat.py index c744defc..00c4ffa9 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -26,8 +26,7 @@ from modules.logging_colors import logger from modules.text_generation import ( generate_reply, get_encoded_length, - get_max_prompt_length, - stop_everything_event + get_max_prompt_length ) from modules.utils import delete_file, get_available_characters, save_file @@ -93,8 +92,16 @@ def generate_chat_prompt(user_input, state, **kwargs): chat_template_str = replace_character_names(chat_template_str, state['name1'], state['name2']) instruction_template = jinja_env.from_string(state['instruction_template_str']) - instruct_renderer = partial(instruction_template.render, add_generation_prompt=False) chat_template = jinja_env.from_string(chat_template_str) + + instruct_renderer = partial( + instruction_template.render, + builtin_tools=None, + tools=None, + tools_in_user_message=False, + add_generation_prompt=False + ) + chat_renderer = partial( chat_template.render, add_generation_prompt=False, @@ -1036,13 +1043,6 @@ def handle_remove_last_click(state): return [history, html, last_input] -def handle_stop_click(state): - stop_everything_event() - html = redraw_html(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) - - return html - - def handle_unique_id_select(state): history = load_history(state['unique_id'], state['character_menu'], state['mode']) html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) diff --git a/modules/html_generator.py b/modules/html_generator.py index c5eba5a8..d0afd6b2 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -72,6 +72,14 @@ def replace_blockquote(m): @functools.lru_cache(maxsize=None) def convert_to_markdown(string): + # Make \[ \] LaTeX equations inline + pattern = r'^\s*\\\[\s*\n([\s\S]*?)\n\s*\\\]\s*$' + replacement = r'\\[ \1 \\]' + string = re.sub(pattern, replacement, string, flags=re.MULTILINE) + + # Escape backslashes + string = string.replace('\\', '\\\\') + # Quote to string = replace_quotes(string) @@ -95,12 +103,27 @@ def convert_to_markdown(string): result = '' is_code = False + is_latex = False for line in string.split('\n'): - if line.lstrip(' ').startswith('```'): + stripped_line = line.strip() + + if stripped_line.startswith('```'): is_code = not is_code + elif stripped_line.startswith('$$'): + is_latex = not is_latex + elif stripped_line.endswith('$$'): + is_latex = False + elif stripped_line.startswith('\\\\['): + is_latex = True + elif stripped_line.startswith('\\\\]'): + is_latex = False + elif stripped_line.endswith('\\\\]'): + is_latex = False result += line - if is_code or line.startswith('|'): # Don't add an extra \n for tables or code + + # Don't add an extra \n for tables, code, or LaTeX + if is_code or is_latex or line.startswith('|'): result += '\n' else: result += '\n\n' diff --git a/modules/logits.py b/modules/logits.py index 4233c8a5..73cabb41 100644 --- a/modules/logits.py +++ b/modules/logits.py @@ -13,8 +13,8 @@ global_scores = None def get_next_logits(*args, **kwargs): - if shared.args.idle_timeout > 0 and shared.model is None and shared.previous_model_name not in [None, 'None']: - shared.model, shared.tokenizer = load_model(shared.previous_model_name) + if shared.args.idle_timeout > 0 and shared.model is None and shared.model_name not in [None, 'None']: + shared.model, shared.tokenizer = load_model(shared.model_name) needs_lock = not args[2] # use_samplers if needs_lock: diff --git a/modules/models.py b/modules/models.py index ea046e9b..ecef9060 100644 --- a/modules/models.py +++ b/modules/models.py @@ -368,14 +368,15 @@ def clear_torch_cache(): torch.cuda.empty_cache() -def unload_model(): +def unload_model(keep_model_name=False): shared.model = shared.tokenizer = None - shared.previous_model_name = shared.model_name - shared.model_name = 'None' shared.lora_names = [] shared.model_dirty_from_training = False clear_torch_cache() + if not keep_model_name: + shared.model_name = 'None' + def reload_model(): unload_model() @@ -393,7 +394,7 @@ def unload_model_if_idle(): if time.time() - last_generation_time > shared.args.idle_timeout * 60: if shared.model is not None: logger.info("Unloading the model for inactivity.") - unload_model() + unload_model(keep_model_name=True) finally: shared.generation_lock.release() diff --git a/modules/shared.py b/modules/shared.py index fe09a165..c27657ff 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -13,7 +13,6 @@ from modules.logging_colors import logger model = None tokenizer = None model_name = 'None' -previous_model_name = 'None' is_seq2seq = False model_dirty_from_training = False lora_names = [] @@ -44,8 +43,6 @@ settings = { 'negative_prompt': '', 'seed': -1, 'truncation_length': 2048, - 'truncation_length_min': 0, - 'truncation_length_max': 200000, 'max_tokens_second': 0, 'max_updates_second': 0, 'prompt_lookup_num_tokens': 0, diff --git a/modules/text_generation.py b/modules/text_generation.py index d971a30e..75e5ef36 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -32,8 +32,8 @@ from modules.models import clear_torch_cache, load_model def generate_reply(*args, **kwargs): - if shared.args.idle_timeout > 0 and shared.model is None and shared.previous_model_name not in [None, 'None']: - shared.model, shared.tokenizer = load_model(shared.previous_model_name) + if shared.args.idle_timeout > 0 and shared.model is None and shared.model_name not in [None, 'None']: + shared.model, shared.tokenizer = load_model(shared.model_name) shared.generation_lock.acquire() try: diff --git a/modules/training.py b/modules/training.py index a810fb6e..b003fc8c 100644 --- a/modules/training.py +++ b/modules/training.py @@ -165,7 +165,7 @@ def create_ui(): stride_length = gr.Slider(label='Stride', minimum=0, maximum=32768, value=512, step=256, info='Used to make the evaluation faster at the cost of accuracy. 1 = slowest but most accurate. 512 is a common value.') with gr.Column(): - max_length = gr.Slider(label='max_length', minimum=0, maximum=shared.settings['truncation_length_max'], value=0, step=256, info='The context for each evaluation. If set to 0, the maximum context length for the model will be used.') + max_length = gr.Number(label='max_length', precision=0, step=256, value=0, info='The context for each evaluation. If set to 0, the maximum context length for the model will be used.') with gr.Row(): start_current_evaluation = gr.Button("Evaluate loaded model", interactive=not mu) diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 7ef8df4d..57143cd8 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -7,6 +7,7 @@ from PIL import Image from modules import chat, shared, ui, utils from modules.html_generator import chat_html_wrapper +from modules.text_generation import stop_everything_event from modules.utils import gradio inputs = ('Chat input', 'interface_state') @@ -221,8 +222,8 @@ def create_event_handlers(): chat.handle_remove_last_click, gradio('interface_state'), gradio('history', 'display', 'textbox'), show_progress=False) shared.gradio['Stop'].click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.handle_stop_click, gradio('interface_state'), gradio('display'), show_progress=False) + stop_everything_event, None, None, queue=False).then( + chat.redraw_html, gradio(reload_arr), gradio('display'), show_progress=False) if not shared.args.multi_user: shared.gradio['unique_id'].select( diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 2938c120..1883fdca 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -93,7 +93,7 @@ def create_ui(): shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend) shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=256, value=shared.args.n_gpu_layers, info='Must be set to more than 0 for your GPU to be used.') - shared.gradio['n_ctx'] = gr.Slider(minimum=0, maximum=shared.settings['truncation_length_max'], step=256, label="n_ctx", value=shared.args.n_ctx, info='Context length. Try lowering this if you run out of memory while loading the model.') + shared.gradio['n_ctx'] = gr.Number(label="n_ctx", precision=0, step=256, value=shared.args.n_ctx, info='Context length. Try lowering this if you run out of memory while loading the model.') shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40') shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, step=1, value=shared.args.n_batch) shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=256, value=shared.args.threads) @@ -101,11 +101,11 @@ def create_ui(): shared.gradio['wbits'] = gr.Dropdown(label="wbits", choices=["None", 1, 2, 3, 4, 8], value=shared.args.wbits if shared.args.wbits > 0 else "None") shared.gradio['groupsize'] = gr.Dropdown(label="groupsize", choices=["None", 32, 64, 128, 1024], value=shared.args.groupsize if shared.args.groupsize > 0 else "None") shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7') - shared.gradio['max_seq_len'] = gr.Slider(label='max_seq_len', minimum=0, maximum=shared.settings['truncation_length_max'], step=256, info='Context length. Try lowering this if you run out of memory while loading the model.', value=shared.args.max_seq_len) + shared.gradio['max_seq_len'] = gr.Number(label='max_seq_len', precision=0, step=256, value=shared.args.max_seq_len, info='Context length. Try lowering this if you run out of memory while loading the model.') with gr.Blocks(): shared.gradio['alpha_value'] = gr.Number(label='alpha_value', value=shared.args.alpha_value, precision=2, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.') shared.gradio['rope_freq_base'] = gr.Number(label='rope_freq_base', value=shared.args.rope_freq_base, precision=0, info='Positional embeddings frequency base for NTK RoPE scaling. Related to alpha_value by rope_freq_base = 10000 * alpha_value ^ (64 / 63). 0 = from model.') - shared.gradio['compress_pos_emb'] = gr.Number(label='compress_pos_emb', value=shared.args.compress_pos_emb, precision=0, info='Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.') + shared.gradio['compress_pos_emb'] = gr.Number(label='compress_pos_emb', value=shared.args.compress_pos_emb, precision=2, info='Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.') shared.gradio['autogptq_info'] = gr.Markdown('ExLlamav2_HF is recommended over AutoGPTQ for models derived from Llama.') diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index cd4288b3..234e1af2 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -89,7 +89,7 @@ def create_ui(default_preset): shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.') with gr.Column(): - shared.gradio['truncation_length'] = gr.Slider(value=get_truncation_length(), minimum=shared.settings['truncation_length_min'], maximum=shared.settings['truncation_length_max'], step=256, label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.') + shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.') shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.') shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.') shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.') diff --git a/requirements.txt b/requirements.txt index d6e755a0..b1c68917 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,22 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index 9c34c95d..9cef52be 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -32,14 +32,14 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.83+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.83+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.85+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.85+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 5df8f2ba..e6df644c 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 123b6d9b..35131b95 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -32,8 +32,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 08509b05..ee9876ee 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index d4913ac8..87b1a95c 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index b468adaf..91c30354 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 1bbd4091..1adcec6f 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -35,22 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/settings-template.yaml b/settings-template.yaml index 8d6e8dd8..59c76c35 100644 --- a/settings-template.yaml +++ b/settings-template.yaml @@ -12,8 +12,6 @@ max_new_tokens_max: 4096 negative_prompt: '' seed: -1 truncation_length: 2048 -truncation_length_min: 0 -truncation_length_max: 200000 max_tokens_second: 0 max_updates_second: 0 prompt_lookup_num_tokens: 0