From 4c74c7a1167defef1f9ea217507990974c8fde3d Mon Sep 17 00:00:00 2001 From: GralchemOz <68577430+GralchemOz@users.noreply.github.com> Date: Tue, 3 Sep 2024 10:00:59 +0800 Subject: [PATCH] Fix UnicodeDecodeError for BPE-based Models (especially GLM-4) (#6357) --- modules/text_generation.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/text_generation.py b/modules/text_generation.py index 75e5ef36..e7a2b43f 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -274,7 +274,12 @@ def get_reply_from_output_ids(output_ids, state=None, starting_from=0): if (hasattr(shared.tokenizer, 'convert_ids_to_tokens') and len(output_ids) > starting_from) and not reply.startswith(' '): first_token = shared.tokenizer.convert_ids_to_tokens(int(output_ids[starting_from])) if isinstance(first_token, (bytes,)): - first_token = first_token.decode('utf8') + #try to decode the bytes to a string + try: + first_token = first_token.decode('utf8') + #if it fails, which means it's not a string in this turn, just ignore it + except UnicodeDecodeError: + first_token = '' if first_token.startswith('▁'): reply = ' ' + reply