diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py
index f01282f2..1c0159e8 100644
--- a/extensions/openai/completions.py
+++ b/extensions/openai/completions.py
@@ -349,8 +349,8 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False):
     generate_params['stream'] = stream
     requested_model = generate_params.pop('model')
     logprob_proc = generate_params.pop('logprob_proc', None)
-    # generate_params['suffix'] = body.get('suffix', generate_params['suffix'])
-    generate_params['echo'] = body.get('echo', generate_params['echo'])
+    suffix = body['suffix'] if body['suffix'] else ''
+    echo = body['echo']
 
     if not stream:
         prompt_arg = body[prompt_str]
@@ -373,6 +373,7 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False):
                     except KeyError:
                         prompt = decode(prompt)[0]
 
+            prefix = prompt if echo else ''
             token_count = len(encode(prompt)[0])
             total_prompt_token_count += token_count
 
@@ -393,7 +394,7 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False):
             respi = {
                 "index": idx,
                 "finish_reason": stop_reason,
-                "text": answer,
+                "text": prefix + answer + suffix,
                 "logprobs": {'top_logprobs': [logprob_proc.token_alternatives]} if logprob_proc else None,
             }
 
@@ -425,6 +426,7 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False):
             else:
                 raise InvalidRequestError(message="API Batched generation not yet supported.", param=prompt_str)
 
+        prefix = prompt if echo else ''
         token_count = len(encode(prompt)[0])
 
         def text_streaming_chunk(content):
@@ -444,7 +446,7 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False):
 
             return chunk
 
-        yield text_streaming_chunk('')
+        yield text_streaming_chunk(prefix)
 
         # generate reply #######################################
         debug_msg({'prompt': prompt, 'generate_params': generate_params})
@@ -472,7 +474,7 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False):
         if token_count + completion_token_count >= generate_params['truncation_length'] or completion_token_count >= max_tokens:
             stop_reason = "length"
 
-        chunk = text_streaming_chunk('')
+        chunk = text_streaming_chunk(suffix)
         chunk[resp_list][0]["finish_reason"] = stop_reason
         chunk["usage"] = {
             "prompt_tokens": token_count,
diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py
index c9a3b30a..4d49803e 100644
--- a/extensions/openai/typing.py
+++ b/extensions/openai/typing.py
@@ -57,7 +57,7 @@ class CompletionRequestParams(BaseModel):
     suffix: str | None = None
     temperature: float | None = 1
     top_p: float | None = 1
-    user: str | None = None
+    user: str | None = Field(default=None, description="Unused parameter.")
 
 
 class CompletionRequest(GenerationOptions, CompletionRequestParams):