From 8787780835d1699d8ce2da0680ffb00575f9a99d Mon Sep 17 00:00:00 2001 From: Aaron Giner <aaron.giner@student.tugraz.at> Date: Tue, 23 Apr 2024 08:12:24 +0200 Subject: [PATCH] some cleanup and datetime parser fixes --- python/llm-server/model.py | 10 +++---- python/llm-server/parse_dt.py | 15 ++++------ python/llm-server/queries.py | 56 ++++++++++++++++++++--------------- python/llm-server/util.py | 7 ++--- 4 files changed, 45 insertions(+), 43 deletions(-) diff --git a/python/llm-server/model.py b/python/llm-server/model.py index 51e7dde..efbcf93 100644 --- a/python/llm-server/model.py +++ b/python/llm-server/model.py @@ -21,14 +21,14 @@ class Model: if not self.use_chat_gpt: if use_local_model: - self.llm = util.load_model(model=model_name, ctx_size=self.ctx_size, temperature=temperature) + self.llm = util.load_model(model=model_name, ctx_size=self.ctx_size, temperature=self.temperature) else: - model_path = hf_hub_download(self.model_name, self.model_file, token=api_key) - self.llm = util.load_model(model=model_path, ctx_size=self.ctx_size, temperature=temperature) + model_path = hf_hub_download(self.model_name, self.model_file, token=self.api_key) + self.llm = util.load_model(model=model_path, ctx_size=self.ctx_size, temperature=self.temperature) - self.tokenizer = util.load_tokenizer(model=self.model_tok, token=util.LLAMA_API_KEY) + self.tokenizer = util.load_tokenizer(model=self.model_tok, token=self.api_key) - def query(self, messages, debug=False): + def query(self, messages, debug=True): if self.use_chat_gpt: return self.query_openai(messages=messages) diff --git a/python/llm-server/parse_dt.py b/python/llm-server/parse_dt.py index 1f7f201..e2d618f 100644 --- a/python/llm-server/parse_dt.py +++ b/python/llm-server/parse_dt.py @@ -7,27 +7,24 @@ import parsedatetime as pdt def replace_all(dt, statements): cal = pdt.Calendar() dt = dateutil.parser.parse(dt) - dt_now = datetime.now() replaced = [] for statement in statements: - print(statement) parsed_nlp = cal.nlp(statement, dt) - parsed_nlp_now = cal.nlp(statement, dt_now) if parsed_nlp is None or len(parsed_nlp) == 0: replaced.append(statement) continue - parsed_nlp = parsed_nlp[0] - # if the 2 times are different, that very likely means that the time was not mentioned in the statement - if parsed_nlp[0].time() == parsed_nlp_now[0][0].time(): - include_time = True - else: + # check datetimeflags to see if a date or datetime was parsed + print(parsed_nlp[0][1]) + if parsed_nlp[0][1] == 1: include_time = False + else: + include_time = True dt_format = "%A, %B %d, %Y" + (", %I:%M %p" if include_time else "") - statement = statement.replace(parsed_nlp[-1], "on " + str(parsed_nlp[0].strftime(dt_format))) + statement = statement.replace(parsed_nlp[0][-1], "on " + str(parsed_nlp[0][0].strftime(dt_format))) replaced.append(statement) return replaced diff --git a/python/llm-server/queries.py b/python/llm-server/queries.py index 0273b58..cec279b 100644 --- a/python/llm-server/queries.py +++ b/python/llm-server/queries.py @@ -18,32 +18,40 @@ from datetime import datetime, timedelta def parse_request(request): request = json.loads(request) - requestType = request["type"] + requestType = request["Type"] + + model_info = request["Model"] + model_repo = model_info["ModelRepo"] + model_file = model_info["ModelFile"] + model_tok_repo = model_info["ModelTokRepo"] + api_key = model_info["APIKey"] + use_chat_gpt = model_info["UseChatGPT"] + + llm = model.Model(model_repo, model_file, model_tok_repo, 0.75, 4096, api_key, use_chat_gpt) if requestType == "chat": - return query_chat(request, model.Model(util.LLAMA_NAME, util.LLAMA_FILE, util.LLAMA_TOK, 0.75, 4096, - "sk-proj-aUDdsiCXHDwoHewZFL9AT3BlbkFJIkKZEYaMi5AGEBDbW2zv", use_chat_gpt=True)) + return query_chat(request, llm) elif requestType == "chat_summary": - return query_chat_summary(request, model.Model(util.MISTRAL_GGUF, util.MISTRAL_FILE, util.MISTRAL_TOK, 0, 4096)) + return query_chat_summary(request, llm) elif requestType == "chat_extract_plan": - return query_chat_extract_plan(request, model.Model(util.MISTRAL_GGUF, util.MISTRAL_FILE, util.MISTRAL_TOK, 0, 4096)) + return query_chat_extract_plan(request, llm) elif requestType == "reflection": - return query_reflection(request, model.Model(util.MISTRAL_GGUF, util.MISTRAL_FILE, util.MISTRAL_TOK, 0, 4096)) + return query_reflection(request, llm) elif requestType == "poignancy": - return query_poignancy(request, model.Model(util.MISTRAL_GGUF, util.MISTRAL_FILE, util.MISTRAL_TOK, 0, 4096)) + return query_poignancy(request, llm) elif requestType == "context": - return generate_context(request, model.Model(util.MISTRAL_GGUF, util.MISTRAL_FILE, util.MISTRAL_TOK, 0, 4096)) + return generate_context(request, llm) elif requestType == "plan_day": - return query_plan_day(request, model.Model(util.MISTRAL_GGUF, util.MISTRAL_FILE, util.MISTRAL_TOK, 0, 4096)) + return query_plan_day(request, llm) return "ERROR" def query_chat(request, llm: model.Model): - parameters = request["data"] + parameters = request["Data"] chat = parameters["chat"].split("~") - memories = request["memories"] + memories = request["Memories"] memory_util.filter_memories(memories, parameters["user"] + ";" + chat[-1], parameters["datetime"]) parameters["memories"] = memory_util.memories_to_string(memories[:10], False) @@ -67,8 +75,8 @@ def query_chat(request, llm: model.Model): def query_reflection(request, llm: model.Model): - parameters = request["data"] - memories = request["memories"] + parameters = request["Data"] + memories = request["Memories"] parameters["memories"] = memory_util.memories_to_string(memories, include_nodeId=True) messages = [ @@ -84,7 +92,7 @@ def query_reflection(request, llm: model.Model): def query_poignancy(request, llm: model.Model): - parameters = request["data"] + parameters = request["Data"] messages = [ {"role": "user", "content": util.load_template("poignancy").format(**parameters)}, @@ -141,7 +149,7 @@ def deduplicate_sim(statements): def query_chat_summary_extract(template, request, llm: model.Model, ctx_window=5): - parameters = request["data"] + parameters = request["Data"] chat = parameters["conversation"] lines = chat.split("\n") @@ -180,7 +188,7 @@ def query_chat_summary_extract(template, request, llm: model.Model, ctx_window=5 def query_chat_summary(request, llm: model.Model): - parameters = request["data"] + parameters = request["Data"] messages_summary = [ {"role": "user", "content": util.load_template("chat_summary_single").format(**parameters)}, @@ -207,7 +215,7 @@ def query_chat_summary(request, llm: model.Model): def query_chat_extract_plan(request, llm: model.Model): - parameters = request["data"] + parameters = request["Data"] # alternative 1 plans = query_chat_summary_extract("chat_extract_plan", request, llm, 10) @@ -278,8 +286,8 @@ def get_calendar(): def generate_context(request, llm: model.Model): - parameters = request["data"] - memories = request["memories"] + parameters = request["Data"] + memories = request["Memories"] memories.sort(key=lambda x: x["HrsSinceCreation"], reverse=True) # if the agent has no memory associated with the user, then they have never had conversation @@ -303,8 +311,8 @@ def generate_context(request, llm: model.Model): def query_relationship(request, llm: model.Model): - parameters = request["data"] - memories = request["memories"] + parameters = request["Data"] + memories = request["Memories"] memories_str = memory_util.memories_to_string(memories, include_date_created=True) @@ -319,7 +327,7 @@ def query_relationship(request, llm: model.Model): def query_agent_action(request, llm: model.Model): - parameters = request["data"] + parameters = request["Data"] messages = [ {"role": "user", "content": util.load_template("agent_action").format(**parameters)}, @@ -331,8 +339,8 @@ def query_agent_action(request, llm: model.Model): def query_plan_day(request, llm: model.Model): - parameters = request["data"] - memories = request["memories"] + parameters = request["Data"] + memories = request["Memories"] memories_str = memory_util.memories_to_string(memories) diff --git a/python/llm-server/util.py b/python/llm-server/util.py index 2991ca3..ed48925 100644 --- a/python/llm-server/util.py +++ b/python/llm-server/util.py @@ -21,17 +21,14 @@ MISTRAL_FILE = "mistral-7b-instruct-v0.1.Q4_K_M.gguf" LLAMA_TOK = "meta-llama/Llama-2-7b-chat-hf" LLAMA_API_KEY = "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" -LLAMA_NAME = "TheBloke/Llama-2-7B-GGUF" +LLAMA_NAME = "TheBloke/Llama-2-7B-Chat-GGUF" LLAMA_FILE = "llama-2-7b.Q4_K_M.gguf" KW_EN_CORE = "X:/LLM Models/nlm/en_core_web_sm-3.7.1-py3-none-any.whl" def load_tokenizer(model, token=""): - if "llama" in model: - return AutoTokenizer.from_pretrained(model, token=token) - else: - return AutoTokenizer.from_pretrained(model) + return AutoTokenizer.from_pretrained(model, token=token) def load_model(model, ctx_size, temperature): -- GitLab