diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp index 2e2d3792..dee528ee 100644 --- a/gpt4all-backend/llamamodel.cpp +++ b/gpt4all-backend/llamamodel.cpp @@ -755,6 +755,7 @@ void LLamaModel::embedInternal( tokens.resize(text.length()+4); int32_t n_tokens = llama_tokenize(d_ptr->model, text.c_str(), text.length(), tokens.data(), tokens.size(), wantBOS, false); if (n_tokens) { + (void)eos_token; assert(useEOS == (eos_token != -1 && tokens[n_tokens - 1] == eos_token)); tokens.resize(n_tokens - useEOS); // erase EOS/SEP } else { diff --git a/gpt4all-bindings/python/gpt4all/gpt4all.py b/gpt4all-bindings/python/gpt4all/gpt4all.py index 90f44354..5ef81bf3 100644 --- a/gpt4all-bindings/python/gpt4all/gpt4all.py +++ b/gpt4all-bindings/python/gpt4all/gpt4all.py @@ -497,16 +497,16 @@ class GPT4All: if self._history is not None: # check if there is only one message, i.e. system prompt: reset = len(self._history) == 1 - generate_kwargs["reset_context"] = reset self._history.append({"role": "user", "content": prompt}) fct_func = self._format_chat_prompt_template.__func__ # type: ignore[attr-defined] if fct_func is GPT4All._format_chat_prompt_template: if reset: # ingest system prompt - self.model.prompt_model(self._history[0]["content"], "%1", + # use "%1%2" and not "%1" to avoid implicit whitespace + self.model.prompt_model(self._history[0]["content"], "%1%2", empty_response_callback, - n_batch=n_batch, n_predict=0, special=True) + n_batch=n_batch, n_predict=0, reset_context=True, special=True) prompt_template = self._current_prompt_template.format("%1", "%2") else: warnings.warn( @@ -519,6 +519,7 @@ class GPT4All: self._history[0]["content"] if reset else "", ) prompt_template = "%1" + generate_kwargs["reset_context"] = reset else: prompt_template = "%1" generate_kwargs["reset_context"] = True diff --git a/gpt4all-bindings/python/setup.py b/gpt4all-bindings/python/setup.py index 86e5e6f3..c309250d 100644 --- a/gpt4all-bindings/python/setup.py +++ b/gpt4all-bindings/python/setup.py @@ -68,7 +68,7 @@ def get_long_description(): setup( name=package_name, - version="2.5.1", + version="2.5.2", description="Python bindings for GPT4All", long_description=get_long_description(), long_description_content_type="text/markdown", diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp index 6d812953..37f494c1 100644 --- a/gpt4all-chat/chatllm.cpp +++ b/gpt4all-chat/chatllm.cpp @@ -733,23 +733,13 @@ void ChatLLM::generateName() if (!isModelLoaded()) return; - QString instructPrompt("### Instruction:\n" - "Describe response above in three words.\n" - "### Response:\n"); + std::string instructPrompt("### Instruction:\n%1\n### Response:\n"); // standard Alpaca auto promptFunc = std::bind(&ChatLLM::handleNamePrompt, this, std::placeholders::_1); - auto responseFunc = std::bind(&ChatLLM::handleNameResponse, this, std::placeholders::_1, - std::placeholders::_2); + auto responseFunc = std::bind(&ChatLLM::handleNameResponse, this, std::placeholders::_1, std::placeholders::_2); auto recalcFunc = std::bind(&ChatLLM::handleNameRecalculate, this, std::placeholders::_1); LLModel::PromptContext ctx = m_ctx; -#if defined(DEBUG) - printf("%s", qPrintable(instructPrompt)); - fflush(stdout); -#endif - m_llModelInfo.model->prompt(instructPrompt.toStdString(), "%1", promptFunc, responseFunc, recalcFunc, ctx); -#if defined(DEBUG) - printf("\n"); - fflush(stdout); -#endif + m_llModelInfo.model->prompt("Describe response above in three words.", instructPrompt, promptFunc, responseFunc, + recalcFunc, ctx); std::string trimmed = trim_whitespace(m_nameResponse); if (trimmed != m_nameResponse) { m_nameResponse = trimmed; @@ -1056,7 +1046,8 @@ void ChatLLM::processSystemPrompt() fflush(stdout); #endif auto old_n_predict = std::exchange(m_ctx.n_predict, 0); // decode system prompt without a response - m_llModelInfo.model->prompt(systemPrompt, "%1", promptFunc, nullptr, recalcFunc, m_ctx, true); + // use "%1%2" and not "%1" to avoid implicit whitespace + m_llModelInfo.model->prompt(systemPrompt, "%1%2", promptFunc, nullptr, recalcFunc, m_ctx, true); m_ctx.n_predict = old_n_predict; #if defined(DEBUG) printf("\n");