Add top_k to RWKV

2024-09-20 10:35:10 +02:00 · 2023-03-07 17:24:28 -03:00 · 2023-03-07 17:24:28 -03:00 · 8660227e1b
commit 8660227e1b
parent 827ae51f72
3 changed files with 5 additions and 4 deletions
--- a/modules/RWKV.py
+++ b/modules/RWKV.py
@ -33,10 +33,11 @@ class RWKVModel:
        result.pipeline = pipeline
        return result

-    def generate(self, context, token_count=20, temperature=1, top_p=1, alpha_frequency=0.1, alpha_presence=0.1, token_ban=[0], token_stop=[], callback=None):
+    def generate(self, context, token_count=20, temperature=1, top_p=1, top_k=50, alpha_frequency=0.1, alpha_presence=0.1, token_ban=[0], token_stop=[], callback=None):
        args = PIPELINE_ARGS(
            temperature = temperature,
            top_p = top_p,
+            top_k = top_k,
            alpha_frequency = alpha_frequency, # Frequency Penalty (as in GPT-3)
            alpha_presence = alpha_presence, # Presence Penalty (as in GPT-3)
            token_ban = token_ban, # ban the generation of some tokens
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@ -92,7 +92,7 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
    # separately and terminate the function call earlier
    if shared.is_RWKV:
        if shared.args.no_stream:
-            reply = shared.model.generate(question, token_count=max_new_tokens, temperature=temperature, top_p=top_p)
+            reply = shared.model.generate(question, token_count=max_new_tokens, temperature=temperature, top_p=top_p, top_k=top_k)
            t1 = time.time()
            print(f"Output generated in {(t1-t0):.2f} seconds.")
            yield formatted_outputs(reply, shared.model_name)
@ -100,7 +100,7 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
            yield formatted_outputs(question, shared.model_name)
            for i in tqdm(range(max_new_tokens//8+1)):
                clear_torch_cache()
-                reply = shared.model.generate(question, token_count=8, temperature=temperature, top_p=top_p)
+                reply = shared.model.generate(question, token_count=8, temperature=temperature, top_p=top_p, top_k=top_k)
                yield formatted_outputs(reply, shared.model_name)
                question = reply
        return
--- a/requirements.txt
+++ b/requirements.txt
@ -3,7 +3,7 @@ bitsandbytes==0.37.0
 flexgen==0.1.7
 gradio==3.18.0
 numpy
-rwkv==0.0.8
+rwkv==0.1.0
 safetensors==0.2.8
 sentencepiece
 git+https://github.com/oobabooga/transformers@llama_push