Merge pull request #6199 from oobabooga/dev

Merge dev branch
This commit is contained in:
oobabooga 2024-07-05 00:17:14 -03:00 committed by GitHub
commit 363efe54f4
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS.
GPG key ID: B5690EEEBB952194
28 changed files with 463 additions and 229 deletions

View file

@ -49,7 +49,7 @@
.gradio-container .chat .assistant-message {
padding: 20px;
background: var(--color-grey-200);
background: #f4f4f4;
margin-top: 9px !important;
margin-bottom: 12px !important;
border-radius: 7px;

View file

@ -95,7 +95,7 @@ gradio-app > :first-child {
}
.header_bar {
background-color: #f7f7f7;
background-color: #f4f4f4;
box-shadow: 0 0 3px rgba(22 22 22 / 35%);
margin-bottom: 0;
overflow-x: scroll;
@ -336,6 +336,11 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
padding-left: 0;
padding-right: 0;
}
.chat {
padding-left: 0;
padding-right: 0;
}
}
.chat {
@ -391,7 +396,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
.chat .message:last-child {
margin-bottom: 0 !important;
padding-bottom: 0 !important;
padding-bottom: 15px !important;
}
.message-body li {
@ -510,7 +515,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
#show-controls {
position: absolute;
height: 100%;
background-color: var(--background-fill-primary);
background-color: transparent;
border: 0 !important;
border-radius: 0;
}

View file

@ -33,7 +33,7 @@ params = {
'hr_upscaler': 'ESRGAN_4x',
'hr_scale': '1.0',
'seed': -1,
'sampler_name': 'DPM++ 2M Karras',
'sampler_name': 'DPM++ 2M',
'steps': 32,
'cfg_scale': 7,
'textgen_prefix': 'Please provide a detailed and vivid description of [subject]',

View file

@ -0,0 +1,86 @@
console.log("Whisper STT script loaded");
let mediaRecorder;
let audioChunks = [];
let isRecording = false;
window.startStopRecording = function() {
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
console.error("getUserMedia not supported on your browser!");
return;
}
if (isRecording == false) {
//console.log("Start recording function called");
navigator.mediaDevices.getUserMedia({ audio: true })
.then(stream => {
//console.log("Got audio stream");
mediaRecorder = new MediaRecorder(stream);
audioChunks = []; // Reset audio chunks
mediaRecorder.start();
//console.log("MediaRecorder started");
recButton.icon;
recordButton.innerHTML = recButton.innerHTML = "Stop";
isRecording = true;
mediaRecorder.addEventListener("dataavailable", event => {
//console.log("Data available event, data size: ", event.data.size);
audioChunks.push(event.data);
});
mediaRecorder.addEventListener("stop", () => {
//console.log("MediaRecorder stopped");
if (audioChunks.length > 0) {
const audioBlob = new Blob(audioChunks, { type: "audio/webm" });
//console.log("Audio blob created, size: ", audioBlob.size);
const reader = new FileReader();
reader.readAsDataURL(audioBlob);
reader.onloadend = function() {
const base64data = reader.result;
//console.log("Audio converted to base64, length: ", base64data.length);
const audioBase64Input = document.querySelector("#audio-base64 textarea");
if (audioBase64Input) {
audioBase64Input.value = base64data;
audioBase64Input.dispatchEvent(new Event("input", { bubbles: true }));
audioBase64Input.dispatchEvent(new Event("change", { bubbles: true }));
//console.log("Updated textarea with base64 data");
} else {
console.error("Could not find audio-base64 textarea");
}
};
} else {
console.error("No audio data recorded for Whisper");
}
});
});
} else {
//console.log("Stopping MediaRecorder");
recordButton.innerHTML = recButton.innerHTML = "Rec.";
isRecording = false;
mediaRecorder.stop();
}
};
const recordButton = gradioApp().querySelector("#record-button");
recordButton.addEventListener("click", window.startStopRecording);
function gradioApp() {
const elems = document.getElementsByTagName("gradio-app");
const gradioShadowRoot = elems.length == 0 ? null : elems[0].shadowRoot;
return gradioShadowRoot ? gradioShadowRoot : document;
}
// extra rec button next to generate button
var recButton = recordButton.cloneNode(true);
var generate_button = document.getElementById("Generate");
generate_button.insertAdjacentElement("afterend", recButton);
recButton.style.setProperty("margin-left", "-10px");
recButton.innerHTML = "Rec.";
recButton.addEventListener("click", function() {
recordButton.click();
});

View file

@ -1,5 +1,13 @@
import base64
import gc
import io
from pathlib import Path
import gradio as gr
import speech_recognition as sr
import numpy as np
import torch
import whisper
from pydub import AudioSegment
from modules import shared
@ -8,13 +16,16 @@ input_hijack = {
'value': ["", ""]
}
# parameters which can be customized in settings.json of webui
# parameters which can be customized in settings.yaml of webui
params = {
'whipser_language': 'english',
'whipser_model': 'small.en',
'auto_submit': True
}
startup_device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
WHISPERMODEL = whisper.load_model(params['whipser_model'], device=startup_device)
def chat_input_modifier(text, visible_text, state):
global input_hijack
@ -25,47 +36,84 @@ def chat_input_modifier(text, visible_text, state):
return text, visible_text
def do_stt(audio, whipser_model, whipser_language):
transcription = ""
r = sr.Recognizer()
def do_stt(audio, whipser_language):
# use pydub to convert sample_rate and sample_width for whisper input
dubaudio = AudioSegment.from_file(io.BytesIO(audio))
dubaudio = dubaudio.set_channels(1)
dubaudio = dubaudio.set_frame_rate(16000)
dubaudio = dubaudio.set_sample_width(2)
# Convert to AudioData
audio_data = sr.AudioData(sample_rate=audio[0], frame_data=audio[1], sample_width=4)
# same method to get the array as openai whisper repo used from wav file
audio_np = np.frombuffer(dubaudio.raw_data, np.int16).flatten().astype(np.float32) / 32768.0
try:
transcription = r.recognize_whisper(audio_data, language=whipser_language, model=whipser_model)
except sr.UnknownValueError:
print("Whisper could not understand audio")
except sr.RequestError as e:
print("Could not request results from Whisper", e)
if len(whipser_language) == 0:
result = WHISPERMODEL.transcribe(audio=audio_np)
else:
result = WHISPERMODEL.transcribe(audio=audio_np, language=whipser_language)
return result["text"]
def auto_transcribe(audio, auto_submit, whipser_language):
if audio is None or audio == "":
print("Whisper received no audio data")
return "", ""
audio_bytes = base64.b64decode(audio.split(',')[1])
transcription = do_stt(audio_bytes, whipser_language)
if auto_submit:
input_hijack.update({"state": True, "value": [transcription, transcription]})
return transcription
def auto_transcribe(audio, auto_submit, whipser_model, whipser_language):
if audio is None:
return "", ""
transcription = do_stt(audio, whipser_model, whipser_language)
if auto_submit:
input_hijack.update({"state": True, "value": [transcription, transcription]})
def reload_whispermodel(whisper_model_name: str, whisper_language: str, device: str):
if len(whisper_model_name) > 0:
global WHISPERMODEL
WHISPERMODEL = None
if torch.cuda.is_available():
torch.cuda.empty_cache()
gc.collect()
return transcription, None
if device != "none":
if device == "cuda":
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
WHISPERMODEL = whisper.load_model(whisper_model_name, device=device)
params.update({"whipser_model": whisper_model_name})
if ".en" in whisper_model_name:
whisper_language = "english"
audio_update = gr.Audio.update(interactive=True)
else:
audio_update = gr.Audio.update(interactive=False)
return [whisper_model_name, whisper_language, str(device), audio_update]
def ui():
with gr.Accordion("Whisper STT", open=True):
with gr.Row():
audio = gr.Audio(source="microphone")
audio = gr.Textbox(elem_id="audio-base64", visible=False)
record_button = gr.Button("Rec.", elem_id="record-button", elem_classes="custom-button")
with gr.Row():
with gr.Accordion("Settings", open=False):
auto_submit = gr.Checkbox(label='Submit the transcribed audio automatically', value=params['auto_submit'])
whipser_model = gr.Dropdown(label='Whisper Model', value=params['whipser_model'], choices=["tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large"])
whipser_language = gr.Dropdown(label='Whisper Language', value=params['whipser_language'], choices=["chinese", "german", "spanish", "russian", "korean", "french", "japanese", "portuguese", "turkish", "polish", "catalan", "dutch", "arabic", "swedish", "italian", "indonesian", "hindi", "finnish", "vietnamese", "hebrew", "ukrainian", "greek", "malay", "czech", "romanian", "danish", "hungarian", "tamil", "norwegian", "thai", "urdu", "croatian", "bulgarian", "lithuanian", "latin", "maori", "malayalam", "welsh", "slovak", "telugu", "persian", "latvian", "bengali", "serbian", "azerbaijani", "slovenian", "kannada", "estonian", "macedonian", "breton", "basque", "icelandic", "armenian", "nepali", "mongolian", "bosnian", "kazakh", "albanian", "swahili", "galician", "marathi", "punjabi", "sinhala", "khmer", "shona", "yoruba", "somali", "afrikaans", "occitan", "georgian", "belarusian", "tajik", "sindhi", "gujarati", "amharic", "yiddish", "lao", "uzbek", "faroese", "haitian creole", "pashto", "turkmen", "nynorsk", "maltese", "sanskrit", "luxembourgish", "myanmar", "tibetan", "tagalog", "malagasy", "assamese", "tatar", "hawaiian", "lingala", "hausa", "bashkir", "javanese", "sundanese"])
device_dropd = gr.Dropdown(label='Device', value=str(startup_device), choices=["cuda", "cpu", "none"])
whisper_model_dropd = gr.Dropdown(label='Whisper Model', value=params['whipser_model'], choices=["tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large"])
whisper_language = gr.Dropdown(label='Whisper Language', value=params['whipser_language'], choices=["english", "chinese", "german", "spanish", "russian", "korean", "french", "japanese", "portuguese", "turkish", "polish", "catalan", "dutch", "arabic", "swedish", "italian", "indonesian", "hindi", "finnish", "vietnamese", "hebrew", "ukrainian", "greek", "malay", "czech", "romanian", "danish", "hungarian", "tamil", "norwegian", "thai", "urdu", "croatian", "bulgarian", "lithuanian", "latin", "maori", "malayalam", "welsh", "slovak", "telugu", "persian", "latvian", "bengali", "serbian", "azerbaijani", "slovenian", "kannada", "estonian", "macedonian", "breton", "basque", "icelandic", "armenian", "nepali", "mongolian", "bosnian", "kazakh", "albanian", "swahili", "galician", "marathi", "punjabi", "sinhala", "khmer", "shona", "yoruba", "somali", "afrikaans", "occitan", "georgian", "belarusian", "tajik", "sindhi", "gujarati", "amharic", "yiddish", "lao", "uzbek", "faroese", "haitian creole", "pashto", "turkmen", "nynorsk", "maltese", "sanskrit", "luxembourgish", "myanmar", "tibetan", "tagalog", "malagasy", "assamese", "tatar", "hawaiian", "lingala", "hausa", "bashkir", "javanese", "sundanese"])
audio.stop_recording(
auto_transcribe, [audio, auto_submit, whipser_model, whipser_language], [shared.gradio['textbox'], audio]).then(
None, auto_submit, None, js="(check) => {if (check) { document.getElementById('Generate').click() }}")
audio.change(
auto_transcribe, [audio, auto_submit, whisper_language], [shared.gradio['textbox']]).then(
None, auto_submit, None, _js="(check) => {if (check) { document.getElementById('Generate').click() }}")
whipser_model.change(lambda x: params.update({"whipser_model": x}), whipser_model, None)
whipser_language.change(lambda x: params.update({"whipser_language": x}), whipser_language, None)
device_dropd.input(reload_whispermodel, [whisper_model_dropd, whisper_language, device_dropd], [whisper_model_dropd, whisper_language, device_dropd, audio])
whisper_model_dropd.change(reload_whispermodel, [whisper_model_dropd, whisper_language, device_dropd], [whisper_model_dropd, whisper_language, device_dropd, audio])
whisper_language.change(lambda x: params.update({"whipser_language": x}), whisper_language, None)
auto_submit.change(lambda x: params.update({"auto_submit": x}), auto_submit, None)
def custom_js():
"""
Returns custom javascript as a string. It is applied whenever the web UI is
loaded.
:return:
"""
with open(Path(__file__).parent.resolve() / "script.js", "r") as f:
return f.read()

View file

@ -7,30 +7,30 @@ main_parent.parentNode.style = "gap: 0";
main_parent.parentNode.parentNode.style = "padding: 0";
document.querySelector(".header_bar").addEventListener("click", function(event) {
if (event.target.tagName === "BUTTON") {
const buttonText = event.target.textContent.trim();
if (event.target.tagName !== "BUTTON") return;
let chat_visible = (buttonText == "Chat");
let default_visible = (buttonText == "Default");
let notebook_visible = (buttonText == "Notebook");
const buttonText = event.target.textContent.trim();
const extensionsVisible = ["Chat", "Default", "Notebook"].includes(buttonText);
const chatVisible = buttonText === "Chat";
const showControlsChecked = document.querySelector("#show-controls input").checked;
const extensions = document.querySelector("#extensions");
// Check if one of the generation tabs is visible
if (chat_visible || notebook_visible || default_visible) {
extensions && (extensions.style.display = "flex");
if (chat_visible) {
this.style.marginBottom = "0px";
extensions && (extensions.style.maxWidth = "880px");
extensions && (extensions.style.padding = "0px");
} else {
this.style.marginBottom = "19px";
extensions && (extensions.style.maxWidth = "none");
extensions && (extensions.style.padding = "15px");
}
} else {
this.style.marginBottom = "19px";
extensions && (extensions.style.display = "none");
if (extensionsVisible) {
if (extensions) {
extensions.style.display = "flex";
extensions.style.maxWidth = chatVisible ? "880px" : "none";
extensions.style.padding = chatVisible ? "0px" : "15px";
}
this.style.marginBottom = chatVisible ? "0px" : "19px";
if (chatVisible && !showControlsChecked) {
document.querySelectorAll("#chat-tab > div > :nth-child(n+2), #extensions").forEach(element => {
element.style.display = "none";
});
}
} else {
this.style.marginBottom = "19px";
if (extensions) extensions.style.display = "none";
}
});
@ -539,3 +539,64 @@ document.querySelectorAll(".focus-on-chat-input").forEach(element => {
// Fix a border around the "past chats" menu
//------------------------------------------------
document.getElementById("past-chats").parentNode.style.borderRadius = "0px";
//------------------------------------------------
// Allow the character dropdown to coexist at the
// Chat tab and the Parameters > Character tab
//------------------------------------------------
const headerBar = document.querySelector(".header_bar");
let originalParent;
let originalIndex; // To keep track of the original position
let movedElement;
function moveToChatTab() {
const characterMenu = document.getElementById("character-menu");
const grandParent = characterMenu.parentElement.parentElement;
// Save the initial location for the character dropdown
if (!originalParent) {
originalParent = grandParent.parentElement;
originalIndex = Array.from(originalParent.children).indexOf(grandParent);
movedElement = grandParent;
}
// Do not show the Character dropdown in the Chat tab when "instruct" mode is selected
const instructRadio = document.querySelector("#chat-mode input[value=\"instruct\"]");
if (instructRadio && instructRadio.checked) {
grandParent.style.display = "none";
}
const chatControlsFirstChild = document.querySelector("#chat-controls").firstElementChild;
const newParent = chatControlsFirstChild;
let newPosition = newParent.children.length - 2;
newParent.insertBefore(grandParent, newParent.children[newPosition]);
document.getElementById("save-character").style.display = "none";
}
function restoreOriginalPosition() {
if (originalParent && movedElement) {
if (originalIndex >= originalParent.children.length) {
originalParent.appendChild(movedElement);
} else {
originalParent.insertBefore(movedElement, originalParent.children[originalIndex]);
}
document.getElementById("save-character").style.display = "";
movedElement.style.display = "";
}
}
headerBar.addEventListener("click", (e) => {
if (e.target.tagName === "BUTTON") {
const tabName = e.target.textContent.trim();
if (tabName === "Chat") {
moveToChatTab();
} else {
restoreOriginalPosition();
}
}
});
moveToChatTab();

View file

@ -3,6 +3,7 @@ import copy
import functools
import html
import json
import pprint
import re
from datetime import datetime
from functools import partial
@ -259,10 +260,27 @@ def get_stopping_strings(state):
suffix_bot + prefix_user,
]
# Try to find the EOT token
for item in stopping_strings.copy():
item = item.strip()
if item.startswith("<") and ">" in item:
stopping_strings.append(item.split(">")[0] + ">")
elif item.startswith("[") and "]" in item:
stopping_strings.append(item.split("]")[0] + "]")
if 'stopping_strings' in state and isinstance(state['stopping_strings'], list):
stopping_strings += state.pop('stopping_strings')
return list(set(stopping_strings))
# Remove redundant items that start with another item
result = [item for item in stopping_strings if not any(item.startswith(other) and item != other for other in stopping_strings)]
result = list(set(result))
if shared.args.verbose:
logger.info("STOPPING_STRINGS=")
pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(result)
print()
return result
def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_message=True, for_ui=False):

View file

@ -1,3 +1,4 @@
import importlib
from typing import Sequence
from tqdm import tqdm
@ -5,20 +6,55 @@ from tqdm import tqdm
from modules import shared
from modules.cache_utils import process_llamacpp_cache
try:
import llama_cpp
except:
llama_cpp = None
try:
import llama_cpp_cuda
except:
llama_cpp_cuda = None
imported_module = None
try:
import llama_cpp_cuda_tensorcores
except:
llama_cpp_cuda_tensorcores = None
def llama_cpp_lib():
global imported_module
return_lib = None
if shared.args.cpu:
if imported_module and imported_module != 'llama_cpp':
raise Exception(f"Cannot import 'llama_cpp' because '{imported_module}' is already imported. See issue #1575 in llama-cpp-python. Please restart the server before attempting to use a different version of llama-cpp-python.")
try:
return_lib = importlib.import_module('llama_cpp')
imported_module = 'llama_cpp'
except:
pass
if shared.args.tensorcores and return_lib is None:
if imported_module and imported_module != 'llama_cpp_cuda_tensorcores':
raise Exception(f"Cannot import 'llama_cpp_cuda_tensorcores' because '{imported_module}' is already imported. See issue #1575 in llama-cpp-python. Please restart the server before attempting to use a different version of llama-cpp-python.")
try:
return_lib = importlib.import_module('llama_cpp_cuda_tensorcores')
imported_module = 'llama_cpp_cuda_tensorcores'
except:
pass
if return_lib is None:
if imported_module and imported_module != 'llama_cpp_cuda':
raise Exception(f"Cannot import 'llama_cpp_cuda' because '{imported_module}' is already imported. See issue #1575 in llama-cpp-python. Please restart the server before attempting to use a different version of llama-cpp-python.")
try:
return_lib = importlib.import_module('llama_cpp_cuda')
imported_module = 'llama_cpp_cuda'
except:
pass
if return_lib is None and not shared.args.cpu:
if imported_module and imported_module != 'llama_cpp':
raise Exception(f"Cannot import 'llama_cpp' because '{imported_module}' is already imported. See issue #1575 in llama-cpp-python. Please restart the server before attempting to use a different version of llama-cpp-python.")
try:
return_lib = importlib.import_module('llama_cpp')
imported_module = 'llama_cpp'
except:
pass
if return_lib is not None:
monkey_patch_llama_cpp_python(return_lib)
return return_lib
def eval_with_progress(self, tokens: Sequence[int]):
@ -63,7 +99,7 @@ def eval_with_progress(self, tokens: Sequence[int]):
self.n_tokens += n_tokens
def monkey_patch_generate(lib):
def monkey_patch_llama_cpp_python(lib):
def my_generate(self, *args, **kwargs):
@ -77,11 +113,6 @@ def monkey_patch_generate(lib):
for output in self.original_generate(*args, **kwargs):
yield output
lib.Llama.eval = eval_with_progress
lib.Llama.original_generate = lib.Llama.generate
lib.Llama.generate = my_generate
for lib in [llama_cpp, llama_cpp_cuda, llama_cpp_cuda_tensorcores]:
if lib is not None:
lib.Llama.eval = eval_with_progress
monkey_patch_generate(lib)

View file

@ -7,35 +7,10 @@ from torch.nn import CrossEntropyLoss
from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel
from transformers.modeling_outputs import CausalLMOutputWithPast
from modules import llama_cpp_python_hijack, shared
from modules import shared
from modules.llama_cpp_python_hijack import llama_cpp_lib
from modules.logging_colors import logger
try:
import llama_cpp
except:
llama_cpp = None
try:
import llama_cpp_cuda
except:
llama_cpp_cuda = None
try:
import llama_cpp_cuda_tensorcores
except:
llama_cpp_cuda_tensorcores = None
def llama_cpp_lib():
if shared.args.cpu and llama_cpp is not None:
return llama_cpp
elif shared.args.tensorcores and llama_cpp_cuda_tensorcores is not None:
return llama_cpp_cuda_tensorcores
elif llama_cpp_cuda is not None:
return llama_cpp_cuda
else:
return llama_cpp
class LlamacppHF(PreTrainedModel):
def __init__(self, model, path):
@ -221,6 +196,13 @@ class LlamacppHF(PreTrainedModel):
'flash_attn': shared.args.flash_attn
}
if shared.args.cache_4bit:
params["type_k"] = 2
params["type_v"] = 2
elif shared.args.cache_8bit:
params["type_k"] = 8
params["type_v"] = 8
Llama = llama_cpp_lib().Llama
model = Llama(**params)

View file

@ -4,37 +4,12 @@ from functools import partial
import numpy as np
import torch
from modules import llama_cpp_python_hijack, shared
from modules import shared
from modules.callbacks import Iteratorize
from modules.llama_cpp_python_hijack import llama_cpp_lib
from modules.logging_colors import logger
from modules.text_generation import get_max_prompt_length
try:
import llama_cpp
except:
llama_cpp = None
try:
import llama_cpp_cuda
except:
llama_cpp_cuda = None
try:
import llama_cpp_cuda_tensorcores
except:
llama_cpp_cuda_tensorcores = None
def llama_cpp_lib():
if shared.args.cpu and llama_cpp is not None:
return llama_cpp
elif shared.args.tensorcores and llama_cpp_cuda_tensorcores is not None:
return llama_cpp_cuda_tensorcores
elif llama_cpp_cuda is not None:
return llama_cpp_cuda
else:
return llama_cpp
def ban_eos_logits_processor(eos_token, input_ids, logits):
logits[eos_token] = -float('inf')
@ -100,6 +75,13 @@ class LlamaCppModel:
'flash_attn': shared.args.flash_attn
}
if shared.args.cache_4bit:
params["type_k"] = 2
params["type_v"] = 2
elif shared.args.cache_8bit:
params["type_k"] = 8
params["type_v"] = 8
result.model = Llama(**params)
if cache_capacity > 0:
result.model.set_cache(LlamaCache(capacity_bytes=cache_capacity))

View file

@ -21,6 +21,7 @@ loaders_and_params = OrderedDict({
'trust_remote_code',
'no_use_fast',
'use_flash_attention_2',
'use_eager_attention',
'alpha_value',
'compress_pos_emb',
'disable_exllama',
@ -30,6 +31,8 @@ loaders_and_params = OrderedDict({
'llama.cpp': [
'n_ctx',
'n_gpu_layers',
'cache_8bit',
'cache_4bit',
'tensor_split',
'n_batch',
'threads',
@ -51,6 +54,8 @@ loaders_and_params = OrderedDict({
'llamacpp_HF': [
'n_ctx',
'n_gpu_layers',
'cache_8bit',
'cache_4bit',
'tensor_split',
'n_batch',
'threads',

View file

@ -146,6 +146,9 @@ def huggingface_loader(model_name):
if shared.args.force_safetensors:
params['force_safetensors'] = True
if shared.args.use_eager_attention:
params['attn_implementation'] = 'eager'
config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=shared.args.trust_remote_code)
if 'chatglm' in model_name.lower():

View file

@ -9,6 +9,8 @@ from modules import chat, loaders, metadata_gguf, shared, ui
def get_fallback_settings():
return {
'bf16': False,
'use_eager_attention': False,
'wbits': 'None',
'groupsize': 'None',
'desc_act': False,
@ -97,10 +99,18 @@ def get_model_metadata(model):
elif 'attn_config' in metadata and 'rope_theta' in metadata['attn_config']:
model_settings['rope_freq_base'] = metadata['attn_config']['rope_theta']
if 'rope_scaling' in metadata and type(metadata['rope_scaling']) is dict and all(key in metadata['rope_scaling'] for key in ('type', 'factor')):
if 'rope_scaling' in metadata and isinstance(metadata['rope_scaling'], dict) and all(key in metadata['rope_scaling'] for key in ('type', 'factor')):
if metadata['rope_scaling']['type'] == 'linear':
model_settings['compress_pos_emb'] = metadata['rope_scaling']['factor']
# For Gemma-2
if 'torch_dtype' in metadata and metadata['torch_dtype'] == 'bfloat16':
model_settings['bf16'] = True
# For Gemma-2
if 'architectures' in metadata and isinstance(metadata['architectures'], list) and 'Gemma2ForCausalLM' in metadata['architectures']:
model_settings['use_eager_attention'] = True
# Read GPTQ metadata for old GPTQ loaders
if 'quantization_config' in metadata and metadata['quantization_config'].get('quant_method', '') != 'exl2':
if 'bits' in metadata['quantization_config']:
@ -133,7 +143,7 @@ def get_model_metadata(model):
for k in ['eos_token', 'bos_token']:
if k in metadata:
value = metadata[k]
if type(value) is dict:
if isinstance(value, dict):
value = value['content']
template = template.replace(k, "'{}'".format(value))
@ -168,7 +178,7 @@ def infer_loader(model_name, model_settings):
path_to_model = Path(f'{shared.args.model_dir}/{model_name}')
if not path_to_model.exists():
loader = None
elif (path_to_model / 'quantize_config.json').exists() or ('wbits' in model_settings and type(model_settings['wbits']) is int and model_settings['wbits'] > 0):
elif (path_to_model / 'quantize_config.json').exists() or ('wbits' in model_settings and isinstance(model_settings['wbits'], int) and model_settings['wbits'] > 0):
loader = 'ExLlamav2_HF'
elif (path_to_model / 'quant_config.json').exists() or re.match(r'.*-awq', model_name.lower()):
loader = 'AutoAWQ'

View file

@ -359,14 +359,14 @@ class RepetitionPenaltyLogitsProcessorWithRange(LogitsProcessor):
return scores
def get_logits_warper_patch(self, generation_config):
def get_logits_warper_patch(self, generation_config, **kwargs):
# Parameter sanitization
if isinstance(generation_config.temperature, int):
generation_config.temperature = float(generation_config.temperature) # Must be float
# Get the original warpers
warpers = self._get_logits_warper_old(generation_config)
warpers = self._get_logits_warper_old(generation_config, **kwargs)
# Replace temperature with our modified class.
# Currently, it behaves identically to the original.

View file

@ -106,6 +106,7 @@ group.add_argument('--trust-remote-code', action='store_true', help='Set trust_r
group.add_argument('--force-safetensors', action='store_true', help='Set use_safetensors=True while loading the model. This prevents arbitrary code execution.')
group.add_argument('--no_use_fast', action='store_true', help='Set use_fast=False while loading the tokenizer (it\'s True by default). Use this if you have any problems related to use_fast.')
group.add_argument('--use_flash_attention_2', action='store_true', help='Set use_flash_attention_2=True while loading the model.')
group.add_argument('--use_eager_attention', action='store_true', help='Set attn_implementation= eager while loading the model.')
# bitsandbytes 4-bit
group = parser.add_argument_group('bitsandbytes 4-bit')

View file

@ -43,6 +43,11 @@ theme = gr.themes.Default(
body_text_color_subdued='#484848',
background_fill_secondary='#eaeaea',
background_fill_primary='var(--neutral-50)',
body_background_fill="white",
block_background_fill="#f4f4f4",
body_text_color="#333",
button_secondary_background_fill="#f4f4f4",
button_secondary_border_color="var(--border-color-primary)"
)
if Path("notification.mp3").exists():
@ -64,6 +69,7 @@ def list_model_elements():
'trust_remote_code',
'no_use_fast',
'use_flash_attention_2',
'use_eager_attention',
'load_in_4bit',
'compute_dtype',
'quant_type',

View file

@ -87,16 +87,11 @@ def create_ui():
with gr.Row():
shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template selected under Parameters > Instruction template must match the current model.', elem_id='chat-mode')
with gr.Row():
shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', elem_classes='slim-dropdown')
shared.gradio['refresh_character'] = ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)
shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
with gr.Row():
shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
with gr.Row():
shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=16, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=False, elem_classes=['add_scrollbar'])
shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=False, elem_classes=['add_scrollbar'])
def create_chat_settings_ui():
@ -105,10 +100,15 @@ def create_chat_settings_ui():
with gr.Row():
with gr.Column(scale=8):
with gr.Tab("Character"):
with gr.Row():
shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)
shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', elem_id="save-character", interactive=not mu)
shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
shared.gradio['name2'] = gr.Textbox(value='', lines=1, label='Character\'s name')
shared.gradio['context'] = gr.Textbox(value='', lines=10, label='Context', elem_classes=['add_scrollbar'])
shared.gradio['greeting'] = gr.Textbox(value='', lines=5, label='Greeting', elem_classes=['add_scrollbar'])
shared.gradio['save_character'] = gr.Button('Save character', elem_classes=['small-button'], interactive=not mu)
with gr.Tab("User"):
shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Name')
@ -300,8 +300,10 @@ def create_event_handlers():
lambda x: gr.update(choices=(histories := chat.find_all_histories_with_first_prompts(x)), value=histories[0][1]), gradio('interface_state'), gradio('unique_id'), show_progress=False).then(
None, None, None, js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}')
shared.gradio['mode'].change(None, gradio('mode'), None, js="(mode) => {mode === 'instruct' ? document.getElementById('character-menu').parentNode.parentNode.style.display = 'none' : document.getElementById('character-menu').parentNode.parentNode.style.display = ''}")
shared.gradio['mode'].change(
lambda x: [gr.update(visible=(x != 'instruct'))] * 4 + [gr.update(visible=(x == 'chat-instruct'))], gradio('mode'), gradio('character_menu', 'refresh_character', 'delete_character', 'chat_style', 'chat-instruct_command'), show_progress=False).then(
lambda x: [gr.update(visible=x != 'instruct'), gr.update(visible=x == 'chat-instruct')], gradio('mode'), gradio('chat_style', 'chat-instruct_command'), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.load_latest_history, gradio('interface_state'), gradio('history')).then(
chat.redraw_html, gradio(reload_arr), gradio('display')).then(

View file

@ -16,7 +16,6 @@ outputs = ('output_textbox', 'html-default')
def create_ui():
mu = shared.args.multi_user
with gr.Tab('Default', elem_id='default-tab'):
shared.gradio['last_input-default'] = gr.State('')
with gr.Row():
with gr.Column():
with gr.Row():
@ -63,14 +62,12 @@ def create_ui():
def create_event_handlers():
shared.gradio['Generate-default'].click(
lambda x: x, gradio('textbox-default'), gradio('last_input-default')).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['textbox-default'].submit(
lambda x: x, gradio('textbox-default'), gradio('last_input-default')).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(

View file

@ -115,6 +115,7 @@ def create_ui():
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant)
shared.gradio['use_flash_attention_2'] = gr.Checkbox(label="use_flash_attention_2", value=shared.args.use_flash_attention_2, info='Set use_flash_attention_2=True while loading the model.')
shared.gradio['use_eager_attention'] = gr.Checkbox(label="use_eager_attention", value=shared.args.use_eager_attention, info='Set attn_implementation= eager while loading the model.')
shared.gradio['flash_attn'] = gr.Checkbox(label="flash_attn", value=shared.args.flash_attn, info='Use flash-attention.')
shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices)
shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards.')

View file

@ -1,5 +1,5 @@
accelerate==0.30.*
aqlm[gpu,cpu]==1.1.5; platform_system == "Linux"
accelerate==0.31.*
aqlm[gpu,cpu]==1.1.6; platform_system == "Linux"
auto-gptq==0.7.1
bitsandbytes==0.43.*
colorama
@ -7,7 +7,7 @@ datasets
einops
gradio==4.26.*
hqq==0.1.7.post3
jinja2==3.1.2
jinja2==3.1.4
lm_eval==0.3.0
markdown
numba==0.59.*
@ -24,7 +24,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
transformers==4.41.*
transformers==4.42.*
tqdm
wandb
@ -35,22 +35,22 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# llama-cpp-python (CUDA, no tensor cores)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# llama-cpp-python (CUDA, tensor cores)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"

View file

@ -1,10 +1,10 @@
accelerate==0.30.*
accelerate==0.31.*
colorama
datasets
einops
gradio==4.26.*
hqq==0.1.7.post3
jinja2==3.1.2
jinja2==3.1.4
lm_eval==0.3.0
markdown
numba==0.59.*
@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
transformers==4.41.*
transformers==4.42.*
tqdm
wandb
@ -32,14 +32,14 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# AMD wheels
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.79+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.79+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.81+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.81+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"

View file

@ -1,10 +1,10 @@
accelerate==0.30.*
accelerate==0.31.*
colorama
datasets
einops
gradio==4.26.*
hqq==0.1.7.post3
jinja2==3.1.2
jinja2==3.1.4
lm_eval==0.3.0
markdown
numba==0.59.*
@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
transformers==4.41.*
transformers==4.42.*
tqdm
wandb
@ -32,10 +32,10 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# AMD wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"

View file

@ -1,10 +1,10 @@
accelerate==0.30.*
accelerate==0.31.*
colorama
datasets
einops
gradio==4.26.*
hqq==0.1.7.post3
jinja2==3.1.2
jinja2==3.1.4
lm_eval==0.3.0
markdown
numba==0.59.*
@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
transformers==4.41.*
transformers==4.42.*
tqdm
wandb
@ -32,10 +32,8 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6-py3-none-any.whl

View file

@ -1,10 +1,10 @@
accelerate==0.30.*
accelerate==0.31.*
colorama
datasets
einops
gradio==4.26.*
hqq==0.1.7.post3
jinja2==3.1.2
jinja2==3.1.4
lm_eval==0.3.0
markdown
numba==0.59.*
@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
transformers==4.41.*
transformers==4.42.*
tqdm
wandb
@ -32,12 +32,10 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.79-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.81-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6-py3-none-any.whl

View file

@ -1,10 +1,10 @@
accelerate==0.30.*
accelerate==0.31.*
colorama
datasets
einops
gradio==4.26.*
hqq==0.1.7.post3
jinja2==3.1.2
jinja2==3.1.4
lm_eval==0.3.0
markdown
numba==0.59.*
@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
transformers==4.41.*
transformers==4.42.*
tqdm
wandb
@ -32,7 +32,7 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"

View file

@ -1,10 +1,10 @@
accelerate==0.30.*
accelerate==0.31.*
colorama
datasets
einops
gradio==4.26.*
hqq==0.1.7.post3
jinja2==3.1.2
jinja2==3.1.4
lm_eval==0.3.0
markdown
numba==0.59.*
@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
transformers==4.41.*
transformers==4.42.*
tqdm
wandb
@ -32,7 +32,7 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"

View file

@ -1,5 +1,5 @@
accelerate==0.30.*
aqlm[gpu,cpu]==1.1.5; platform_system == "Linux"
accelerate==0.31.*
aqlm[gpu,cpu]==1.1.6; platform_system == "Linux"
auto-gptq==0.7.1
bitsandbytes==0.43.*
colorama
@ -7,7 +7,7 @@ datasets
einops
gradio==4.26.*
hqq==0.1.7.post3
jinja2==3.1.2
jinja2==3.1.4
lm_eval==0.3.0
markdown
numba==0.59.*
@ -24,7 +24,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
transformers==4.41.*
transformers==4.42.*
tqdm
wandb
@ -35,22 +35,22 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.79+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.81+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# llama-cpp-python (CUDA, no tensor cores)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.79+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.81+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# llama-cpp-python (CUDA, tensor cores)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.79+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.81+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.1.6/exllamav2-0.1.6+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"

View file

@ -1,10 +1,10 @@
accelerate==0.30.*
accelerate==0.31.*
colorama
datasets
einops
gradio==4.26.*
hqq==0.1.7.post3
jinja2==3.1.2
jinja2==3.1.4
lm_eval==0.3.0
markdown
numba==0.59.*
@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
transformers==4.41.*
transformers==4.42.*
tqdm
wandb