diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp
index 2e2d3792..dee528ee 100644
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@@ -755,6 +755,7 @@ void LLamaModel::embedInternal(
         tokens.resize(text.length()+4);
         int32_t n_tokens = llama_tokenize(d_ptr->model, text.c_str(), text.length(), tokens.data(), tokens.size(), wantBOS, false);
         if (n_tokens) {
+            (void)eos_token;
             assert(useEOS == (eos_token != -1 && tokens[n_tokens - 1] == eos_token));
             tokens.resize(n_tokens - useEOS); // erase EOS/SEP
         } else {
diff --git a/gpt4all-bindings/python/gpt4all/gpt4all.py b/gpt4all-bindings/python/gpt4all/gpt4all.py
index 90f44354..5ef81bf3 100644
--- a/gpt4all-bindings/python/gpt4all/gpt4all.py
+++ b/gpt4all-bindings/python/gpt4all/gpt4all.py
@@ -497,16 +497,16 @@ class GPT4All:
         if self._history is not None:
             # check if there is only one message, i.e. system prompt:
             reset = len(self._history) == 1
-            generate_kwargs["reset_context"] = reset
             self._history.append({"role": "user", "content": prompt})
 
             fct_func = self._format_chat_prompt_template.__func__  # type: ignore[attr-defined]
             if fct_func is GPT4All._format_chat_prompt_template:
                 if reset:
                     # ingest system prompt
-                    self.model.prompt_model(self._history[0]["content"], "%1",
+                    # use "%1%2" and not "%1" to avoid implicit whitespace
+                    self.model.prompt_model(self._history[0]["content"], "%1%2",
                                             empty_response_callback,
-                                            n_batch=n_batch, n_predict=0, special=True)
+                                            n_batch=n_batch, n_predict=0, reset_context=True, special=True)
                 prompt_template = self._current_prompt_template.format("%1", "%2")
             else:
                 warnings.warn(
@@ -519,6 +519,7 @@ class GPT4All:
                     self._history[0]["content"] if reset else "",
                 )
                 prompt_template = "%1"
+                generate_kwargs["reset_context"] = reset
         else:
             prompt_template = "%1"
             generate_kwargs["reset_context"] = True
diff --git a/gpt4all-bindings/python/setup.py b/gpt4all-bindings/python/setup.py
index 86e5e6f3..c309250d 100644
--- a/gpt4all-bindings/python/setup.py
+++ b/gpt4all-bindings/python/setup.py
@@ -68,7 +68,7 @@ def get_long_description():
 
 setup(
     name=package_name,
-    version="2.5.1",
+    version="2.5.2",
     description="Python bindings for GPT4All",
     long_description=get_long_description(),
     long_description_content_type="text/markdown",
diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp
index 6d812953..37f494c1 100644
--- a/gpt4all-chat/chatllm.cpp
+++ b/gpt4all-chat/chatllm.cpp
@@ -733,23 +733,13 @@ void ChatLLM::generateName()
     if (!isModelLoaded())
         return;
 
-    QString instructPrompt("### Instruction:\n"
-                           "Describe response above in three words.\n"
-                           "### Response:\n");
+    std::string instructPrompt("### Instruction:\n%1\n### Response:\n"); // standard Alpaca
     auto promptFunc = std::bind(&ChatLLM::handleNamePrompt, this, std::placeholders::_1);
-    auto responseFunc = std::bind(&ChatLLM::handleNameResponse, this, std::placeholders::_1,
-        std::placeholders::_2);
+    auto responseFunc = std::bind(&ChatLLM::handleNameResponse, this, std::placeholders::_1, std::placeholders::_2);
     auto recalcFunc = std::bind(&ChatLLM::handleNameRecalculate, this, std::placeholders::_1);
     LLModel::PromptContext ctx = m_ctx;
-#if defined(DEBUG)
-    printf("%s", qPrintable(instructPrompt));
-    fflush(stdout);
-#endif
-    m_llModelInfo.model->prompt(instructPrompt.toStdString(), "%1", promptFunc, responseFunc, recalcFunc, ctx);
-#if defined(DEBUG)
-    printf("\n");
-    fflush(stdout);
-#endif
+    m_llModelInfo.model->prompt("Describe response above in three words.", instructPrompt, promptFunc, responseFunc,
+                                recalcFunc, ctx);
     std::string trimmed = trim_whitespace(m_nameResponse);
     if (trimmed != m_nameResponse) {
         m_nameResponse = trimmed;
@@ -1056,7 +1046,8 @@ void ChatLLM::processSystemPrompt()
     fflush(stdout);
 #endif
     auto old_n_predict = std::exchange(m_ctx.n_predict, 0); // decode system prompt without a response
-    m_llModelInfo.model->prompt(systemPrompt, "%1", promptFunc, nullptr, recalcFunc, m_ctx, true);
+    // use "%1%2" and not "%1" to avoid implicit whitespace
+    m_llModelInfo.model->prompt(systemPrompt, "%1%2", promptFunc, nullptr, recalcFunc, m_ctx, true);
     m_ctx.n_predict = old_n_predict;
 #if defined(DEBUG)
     printf("\n");