From 6df8b2b03ded3f826794782b7108cf540007ec28 Mon Sep 17 00:00:00 2001
From: Aaron Giner <aaron.giner@student.tugraz.at>
Date: Fri, 7 Jun 2024 13:08:33 +0200
Subject: [PATCH] u

---
 python/llm-server/error.py                    |   3 +
 python/llm-server/memory_util.py              |  10 +-
 python/llm-server/model.py                    |  17 +-
 python/llm-server/parse_dt.py                 |   1 -
 python/llm-server/queries.py                  | 418 ++++++++++++++----
 python/llm-server/server.py                   |  25 +-
 ...lated.txt => chat_summary_agent_facts.txt} |   2 +-
 .../templates/chat_summary_agent_old.txt      |  21 -
 .../templates/chat_summary_single.txt         |   5 +-
 .../templates/chat_summary_user_facts.txt     |  16 +
 python/llm-server/templates/chat_system.txt   |   8 +-
 python/llm-server/templates/plan_day.txt      |   9 +-
 python/llm-server/templates/poignancy.txt     |  37 +-
 python/llm-server/templates/reflection.txt    |  24 +
 python/llm-server/templates/reflection_a.txt  |  29 +-
 python/llm-server/templates/reflection_q.txt  |  14 +-
 python/llm-server/templates/replan_day.txt    |   9 +
 python/llm-server/test.py                     |  29 --
 python/llm-server/util.py                     |   2 -
 19 files changed, 441 insertions(+), 238 deletions(-)
 rename python/llm-server/templates/{chat_summary_unrelated.txt => chat_summary_agent_facts.txt} (73%)
 delete mode 100644 python/llm-server/templates/chat_summary_agent_old.txt
 create mode 100644 python/llm-server/templates/chat_summary_user_facts.txt
 create mode 100644 python/llm-server/templates/reflection.txt
 create mode 100644 python/llm-server/templates/replan_day.txt
 delete mode 100644 python/llm-server/test.py

diff --git a/python/llm-server/error.py b/python/llm-server/error.py
index a1a82e1..f3ff37d 100644
--- a/python/llm-server/error.py
+++ b/python/llm-server/error.py
@@ -2,12 +2,14 @@ class CouldNotLoadModelException(Exception):
     """Thrown when the requested model could not be loaded."""
     def __init__(self):
         self.error_code = 1
+        self.message = "An error occurred trying to load the requested model could not be loaded."
 
 
 class LLMInferenceException(Exception):
     """Thrown when an error occurred during inference."""
     def __init__(self):
         self.error_code = 2
+        self.message = "An error occurred during inference."
 
 
 class OpenAIException(Exception):
@@ -15,6 +17,7 @@ class OpenAIException(Exception):
 
     def __init__(self):
         self.error_code = 3
+        self.message = "An error occurred using the OpenAI API."
 
 
 UNKNOWN = 666
diff --git a/python/llm-server/memory_util.py b/python/llm-server/memory_util.py
index afe5436..33a77b3 100644
--- a/python/llm-server/memory_util.py
+++ b/python/llm-server/memory_util.py
@@ -5,11 +5,10 @@ import parse_dt
 decay_factor = 0.995
 
 
-def filter_memories(memories, query, datetime):
+def filter_memories(memories, query, datetime, w_recency=0.5, w_importance=1, w_relevance=3):
     model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
 
     query = parse_dt.replace_all(datetime, [query])[0]
-    print("REPLACED DATETIME: " + query)
 
     embedding_query = embeddings.get_embeddings(query, model)
 
@@ -19,14 +18,11 @@ def filter_memories(memories, query, datetime):
 
         embedding_mem = embeddings.get_embeddings(mem["Content"], model)
         relevance = embeddings.cos_sim(embedding_query, embedding_mem)
-        mem["Score"] = 0.5*recency + 1*importance + 3*relevance
-        print(mem["Content"], "----", relevance, recency, importance)
+        mem["Score"] = w_recency*recency + w_importance*importance + w_relevance*relevance
 
     memories.sort(key=lambda x: x["Score"], reverse=True)
 
-    print("-----------------")
-    for mem in memories:
-        print(mem["Content"] + " - Score: " + str(mem["Score"]))
+    return memories
 
 
 def memories_to_string(memories, include_date_created=False, include_nodeId=False):
diff --git a/python/llm-server/model.py b/python/llm-server/model.py
index 1b26345..0f7290f 100644
--- a/python/llm-server/model.py
+++ b/python/llm-server/model.py
@@ -8,8 +8,8 @@ import error
 
 
 class Model:
-    def __init__(self, model_name, model_file, model_tok, temperature, ctx_size=4096, api_key="", use_chat_gpt=False,
-                 use_local_model=False):
+    def __init__(self, model_name, model_file, model_tok, temperature, allow_system_prompt,
+                 ctx_size=4096, api_key="", use_chat_gpt=False,):
         self.model_name = model_name
         self.model_file = model_file
         self.model_tok = model_tok
@@ -17,27 +17,28 @@ class Model:
         self.ctx_size = ctx_size
         self.api_key = api_key
         self.use_chat_gpt = use_chat_gpt
+        self.allow_system_prompt = allow_system_prompt
 
         self.llm = None
 
         if not self.use_chat_gpt:
             try:
-                if use_local_model:
-                    self.llm = util.load_model(model=model_name, ctx_size=self.ctx_size, temperature=self.temperature)
-                else:
-                    model_path = hf_hub_download(self.model_name, self.model_file, token=self.api_key)
-                    self.llm = util.load_model(model=model_path, ctx_size=self.ctx_size, temperature=self.temperature)
+                model_path = hf_hub_download(self.model_name, self.model_file, token=self.api_key)
+                self.llm = util.load_model(model=model_path, ctx_size=self.ctx_size, temperature=self.temperature)
 
                 self.tokenizer = util.load_tokenizer(model=self.model_tok, token=self.api_key)
             except:
                 raise error.CouldNotLoadModelException
 
     def query(self, messages, debug=False):
+        if self.allow_system_prompt:
+            messages[0]["role"] = "system"
+
         if self.use_chat_gpt:
             return self.query_openai(messages=messages)
 
         try:
-            prompt = self.tokenizer.apply_chat_template(messages, tokenize=False)
+            prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
             if debug:
                 print("-------- PROMPT ------- \n" + prompt + "\n ------------------")
diff --git a/python/llm-server/parse_dt.py b/python/llm-server/parse_dt.py
index e2d618f..c72e8c8 100644
--- a/python/llm-server/parse_dt.py
+++ b/python/llm-server/parse_dt.py
@@ -17,7 +17,6 @@ def replace_all(dt, statements):
             continue
 
         # check datetimeflags to see if a date or datetime was parsed
-        print(parsed_nlp[0][1])
         if parsed_nlp[0][1] == 1:
             include_time = False
         else:
diff --git a/python/llm-server/queries.py b/python/llm-server/queries.py
index 4ed143d..d1f2946 100644
--- a/python/llm-server/queries.py
+++ b/python/llm-server/queries.py
@@ -19,83 +19,125 @@ def parse_request(request):
         request = json.loads(request)
         requestType = request["Type"]
 
+        model_temperature = request["ModelTemperature"]
         model_info = request["Model"]
         model_repo = model_info["ModelRepo"]
         model_file = model_info["ModelFile"]
         model_tok_repo = model_info["ModelTokRepo"]
         api_key = model_info["APIKey"]
         use_chat_gpt = model_info["UseChatGPT"]
+        allow_system_prompt = model_info["AllowSystemPrompt"]
 
-        llm = model.Model(model_repo, model_file, model_tok_repo, 0.75, 4096, api_key, use_chat_gpt)
+        llm = model.Model(model_repo, model_file, model_tok_repo, model_temperature, allow_system_prompt,
+                          4096, api_key, use_chat_gpt)
 
         response = {}
 
+        print(f"Handling '{requestType}' request.")
+
         match requestType:
-            case "chat":
+            case "Chat":
                 response = query_chat(request, llm)
-            case "chat_summary":
+            case "ChatSummary":
                 response = query_chat_summary(request, llm)
-            case "chat_extract_plan":
+            case "ChatExtractPlan":
                 response = query_chat_extract_plan(request, llm)
-            case "reflection":
+            case "Reflection":
                 response = query_reflection(request, llm)
-            case "poignancy":
+            case "Poignancy":
                 response = query_poignancy(request, llm)
-            case "context":
+            case "Context":
                 response = generate_context(request, llm)
-            case "plan_day":
+            case "PlanDay":
                 response = query_plan_day(request, llm)
+            case "ReplanDay":
+                response = query_replan_day(request, llm)
+            case "Custom":
+                response = query_custom(request, llm)
 
         response["ErrorCode"] = 0
+
+        print(response)
+
         return json.dumps(response)
 
     except error.CouldNotLoadModelException as e:
+        print(e.message)
         return json.dumps({"ErrorCode": e.error_code})
     except error.LLMInferenceException as e:
+        print(e.message)
         return json.dumps({"ErrorCode": e.error_code})
     except error.OpenAIException as e:
+        print(e.message)
         return json.dumps({"ErrorCode": e.error_code})
     except:
+        print("An unknown error occurred.")
         return json.dumps({"ErrorCode", error.UNKNOWN})
 
 
 def query_chat(request, llm: model.Model):
+    NUM_MEMORIES_TO_USE = 10
+
     parameters = request["Data"]
     chat = parameters["Chat"].split("~")
 
     memories = request["Memories"]
-    memory_util.filter_memories(memories, parameters["User"] + ";" + chat[-1], parameters["DateTime"])
+    memory_util.filter_memories(memories, parameters["User"] + ";" + ";".join(chat[-5:]), parameters["DateTime"])
 
-    parameters["Memories"] = memory_util.memories_to_string(memories[:10], False)
+    parameters["Memories"] = memory_util.memories_to_string(memories[:NUM_MEMORIES_TO_USE], False)
 
     messages = [
-        {"role": "system",
+        {"role": "user",
          "content": util.load_template("chat_system").format(**parameters)},
     ]
 
+    if not llm.allow_system_prompt:
+        messages.append({"role": "assistant", "content": "Ok."})
+
     roles = ["user", "assistant"]
 
     for i in range(len(chat)):
         messages.append({"role": roles[i % 2], "content": chat[i]})
 
     res = llm.query(messages)
-    memories_accessed = [str(mem["NodeId"]) for mem in memories[:5]]
+    memories_accessed = [str(mem["NodeId"]) for mem in memories[:NUM_MEMORIES_TO_USE]]
 
-    return {"Text": res, "data": {"memories_accessed": str.join(",", memories_accessed)}}
+    return {"Text": res, "Data": {"MemoriesAccessed": str.join(",", memories_accessed)}}
 
 
 def query_reflection(request, llm: model.Model):
     parameters = request["Data"]
     memories = request["Memories"]
-    parameters["Memories"] = memory_util.memories_to_string(memories, include_nodeId=True)
+    parameters["Memories"] = memory_util.memories_to_string(memories)
 
-    messages = [
+    messages_q = [
         {"role": "user",
-         "content": util.load_template("reflection_a").format(**parameters)},
+         "content": util.load_template("reflection_q").format(**parameters)},
     ]
 
-    res = llm.query(messages)
-    insights = [s.replace("- ", "").strip() for s in res.split("\n") if "- " in s]
+    res = llm.query(messages_q)
+
+    questions = res.split("\n")[-3:]
+
+    insights = []
+
+    for q in questions:
+        # take the 5 most relevant memories related to the query
+        memory_util.filter_memories(memories, q, parameters["DateTime"], 0, 0, 1)
+        parameters["Memories"] = memory_util.memories_to_string(memories[:5], include_nodeId=True)
+
+        print("Question: ", q)
+
+        messages_a = [
+            {"role": "user",
+             "content": util.load_template("reflection_a").format(**parameters)},
+        ]
+
+        res = llm.query(messages_a)
+        insights_q = [s.replace("- ", "").strip() for s in res.split("\n") if "- " in s]
+        add_poignancy_to_memories(insights_q, llm)
+
+        insights.extend(insights_q)
 
     return {"Memories": insights}
 
@@ -112,6 +154,11 @@ def query_poignancy(request, llm: model.Model):
     return {"Text": res}
 
 
+def add_poignancy_to_memories(memories, llm: model.Model):
+    for i in range(len(memories)):
+        memories[i] += "~" + query_poignancy({"Data": {"Memory": memories[i]}}, llm)["Text"]
+
+
 # deduplicates a list of statement using a llm
 def query_deduplicate(statements, llm: model.Model):
     messages = [
@@ -119,7 +166,6 @@ def query_deduplicate(statements, llm: model.Model):
          "content": util.load_template("dedup_statements").format(mems="\n".join(statements))},
     ]
 
-    print("DEDUPLICATED")
     dedup = llm.query(messages)
     dedup = [s.replace("- ", "").strip() for s in dedup.split("\n") if "- " in s]
 
@@ -129,7 +175,7 @@ def query_deduplicate(statements, llm: model.Model):
 # deduplicates a list of statements using their similarity score
 # deduplication takes length of statements into account and will keep the
 # longer one (potentially containing more information, but not guaranteed)
-def deduplicate_sim(statements):
+def deduplicate_sim(statements, threshold=0.85):
     st_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
 
     dedup = []
@@ -139,8 +185,8 @@ def deduplicate_sim(statements):
         for i in range(1, len(statements)):
             s2 = statements[i]
             sim = embeddings.cos_sim(embeddings.get_embeddings(s1, st_model), embeddings.get_embeddings(s2, st_model))
-            print(sim, s1, s2)
-            if sim > 0.9:
+            if sim > threshold:
+                print("SIMILAR: " + s1 + " - " + s2)
                 if len(s2) > len(s1):
                     statements[i] = s1
                     statements[0] = s2
@@ -156,12 +202,29 @@ def deduplicate_sim(statements):
     return dedup
 
 
-def query_chat_summary_extract(template, request, llm: model.Model, ctx_window=5):
+# remove statements that are already present in the agent's memory based on similarity
+def deduplicate_sim_mem(statements, memories, threshold=0.85):
+    st_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+    dedup = []
+    for s in statements:
+        similar_found = False
+        for m in memories:
+            sim = embeddings.cos_sim(embeddings.get_embeddings(s, st_model), embeddings.get_embeddings(m["Content"], st_model))
+            if sim > threshold:
+                print("SIMILAR STATEMENTS: " + s + " - " + m["Content"])
+                similar_found = True
+                break
+        if not similar_found:
+            dedup.append(s)
+    return dedup
+
+
+def query_chat_summary_extract(template, request, llm: model.Model, ctx_window=15):
     parameters = request["Data"]
     chat = parameters["Conversation"]
     lines = chat.split("\n")
 
-    statements = []
+    extracted = []
 
     # analyze 'context_window' chat lines at a time - limited context window to improve accuracy
     for li in range(0, max(1, len(lines) - ctx_window)):
@@ -175,50 +238,52 @@ def query_chat_summary_extract(template, request, llm: model.Model, ctx_window=5
         ]
 
         res = llm.query(messages)
-        res = [s.strip() for s in res.split("\n") if "- " in s]
+        res = [s.replace("- ", "").strip() for s in res.split("\n") if "- " in s]
 
-        # validate here? maybe works better for longer conversations?
-        # res = conversation_validate_statements(parameters, res, llm, tokenizer)
-
-        statements.extend(res)
+        res = conversation_validate_statements(parameters, res, llm)
+        extracted.extend(res)
 
     # use a deduplication prompt/sim to remove duplicate statements from collected list
-
     if ctx_window < len(lines):  # only need to deduplicate when ctx window is smaller than convo length
-        statements = query_deduplicate(statements, llm)
-        statements = deduplicate_sim(statements)
+        # statements = query_deduplicate(statements, llm)
+        extracted = deduplicate_sim(extracted)
 
     # validate extracted statements against original conversation
-    parameters["Conversation"] = chat
-    extracted = conversation_validate_statements(parameters, statements, llm)
+    # parameters["Conversation"] = chat
+    # extracted = conversation_validate_statements(parameters, statements, llm)
 
     return extracted
 
 
 def query_chat_summary(request, llm: model.Model):
     parameters = request["Data"]
-    messages_summary = [
-        {"role": "user",
-         "content": util.load_template("chat_summary_single").format(**parameters)},
-    ]
 
-    summary = llm.query(messages_summary)
-    summary = "Conversation: " + parameters["Agent"] + " and " + parameters["User"] + " talked about " + summary
-
-    new_memories = [summary]
+    new_memories = []
 
     user_info = query_chat_summary_extract("chat_summary_user_info", request, llm)
     new_memories.extend(user_info)
 
-    unrelated_info = query_chat_summary_extract("chat_summary_unrelated", request, llm)
-    new_memories.extend(unrelated_info)
+    user_facts = query_chat_summary_extract("chat_summary_user_facts", request, llm)
+    new_memories.extend(user_facts)
+
+    agent_facts = query_chat_summary_extract("chat_summary_agent_facts", request, llm)
+    new_memories.extend(agent_facts)
+
+    print(len(new_memories))
+    new_memories = deduplicate_sim(new_memories, 0.75)
+    print(len(new_memories))
+    new_memories = deduplicate_sim_mem(new_memories, request["Memories"], 0.75)
+    print(len(new_memories))
+    add_poignancy_to_memories(new_memories, llm)
 
-    print("SUMMARY")
-    for mem in new_memories:
-        print(mem)
-    print()
+    messages_summary = [
+        {"role": "user",
+         "content": util.load_template("chat_summary_single").format(**parameters)},
+    ]
+    summary = llm.query(messages_summary)
+    summary = "Conversation with " + parameters["User"] + " on " + parameters["Date"] + ": " + summary
+    new_memories.append(summary)
 
-    new_memories = parse_dt.replace_all(parameters["DateTime"], new_memories)
     return {"Memories": new_memories}
 
 
@@ -227,27 +292,21 @@ def query_chat_extract_plan(request, llm: model.Model):
 
     # alternative 1
     plans = query_chat_summary_extract("chat_extract_plan", request, llm, 10)
+    plans = [p.replace("- ", "").strip() for p in plans]
 
     if len(plans) == 0:
-        return {"Memories": [], "data": {"replan_day": "0"}}
+        return {"Memories": [], "Data": {"ReplanDay": "0"}}
 
     plans = parse_dt.replace_all(parameters["DateTime"], plans)
 
-    print("HUMAN READABLE TO DATETIME")
-    for p in plans:
-        print(p)
-
     plans_validated, any_plans_for_today = plans_validate(plans, parameters["DateTime"])
 
-    print("VALIDATE PLANS")
-    for p in plans_validated:
-        print(p)
+    add_poignancy_to_memories(plans_validated, llm)
 
-    return {"Memories": plans_validated, "data": {"replan_day": "1" if any_plans_for_today else "0"}}
+    return {"Memories": plans_validated, "Data": {"ReplanDay": "1" if any_plans_for_today else "0"}}
 
 
 def parse_datetime(statement):
-    print(statement)
     try:
         datetime_str = re.search(
             "[A-Z][a-z]+, [A-Z][a-z]+ [0-9]+, [0-9]{4}(, [0-9]{1,2}:[0-9]{1,2}( )*((pm|am)|(AM|PM)))?",
@@ -255,7 +314,6 @@ def parse_datetime(statement):
 
         datetime_ref = dateutil.parser.parse(datetime_str)
 
-        print(datetime_ref)
         return datetime_ref
     except:
         return None
@@ -272,8 +330,6 @@ def plans_validate(statements, c_datetime):
         if datetime_ref is None:
             continue
 
-        print(datetime_ref)
-        print(c_datetime)
         if datetime_ref > c_datetime:
             valid.append(statement)
             if datetime_ref.date() == c_datetime.date():
@@ -298,35 +354,41 @@ def generate_context(request, llm: model.Model):
     memories = request["Memories"]
     memories.sort(key=lambda x: x["HrsSinceCreation"], reverse=True)
 
+    context = ""
+
     # if the agent has no memory associated with the user, then they have never had conversation
     if len(memories) == 0:
-        return {"Text": parameters["agent"] + " is having a conversation with someone they "
-                                                  "never met before."}
+        context += parameters["Agent"] + " is having a conversation with someone they never met before.\n"
+    else:
+        # when did the agent last talk to user?
+        lastChatHrs = int(math.ceil(memories[-1]["HrsSinceCreation"]))
+        last_chat = (parameters["Agent"] + " is currently talking to " + parameters["User"] + ". They last talked on " +
+                     memories[-1]["Created"] + " - " + str(lastChatHrs) + " "
+                     + ("hour" if lastChatHrs == 1 else "hours") + " ago.")
+
+        # what is the relationship between agent and user?
+        relationship = query_relationship(request, llm)
+        context += last_chat + " " + relationship + "\n"
 
     # agent's current action based on their schedule
     action = query_agent_action(request, llm)
 
-    # when did the agent last talk to user?
-    lastChatHrs = int(math.ceil(memories[-1]["HrsSinceCreation"]))
-    last_chat = (parameters["Agent"] + " is currently talking to " + parameters["User"] + ". They last talked on " +
-                 memories[-1]["Created"] + " - " + str(lastChatHrs) + " "
-                 + ("hour" if lastChatHrs == 1 else "hours") + " ago.")
+    context += action
 
-    # what is the relationship between agent and user?
-    relationship = query_relationship(request, llm)
+    print(context)
 
-    return {"Text": last_chat + " " + relationship}
+    return {"Text": context}
 
 
 def query_relationship(request, llm: model.Model):
     parameters = request["Data"]
     memories = request["Memories"]
 
-    memories_str = memory_util.memories_to_string(memories, include_date_created=True)
+    memories_str = memory_util.memories_to_string(memories, include_date_created=False)
 
     messages = [
         {"role": "user",
-         "content": util.load_template("relationship").format(memories=memories_str, **parameters)},
+         "content": util.load_template("relationship").format(Memories=memories_str, **parameters)},
     ]
 
     relationship = llm.query(messages)
@@ -354,13 +416,11 @@ def query_plan_day(request, llm: model.Model):
 
     messages = [
         {"role": "user",
-         "content": util.load_template("plan_day").format(memories=memories_str, **parameters)},
+         "content": util.load_template("plan_day").format(Memories=memories_str, **parameters)},
     ]
 
     day_plan = llm.query(messages)
 
-    print("Finished day query")
-
     """
     for h in range(24):
         time_start = datetime.strptime(str(h) + ":00", "%H:%M")
@@ -387,6 +447,33 @@ def query_plan_day(request, llm: model.Model):
     return {"Text": day_plan}
 
 
+def query_replan_day(request, llm: model.Model):
+    parameters = request["Data"]
+    memories = request["Memories"]
+
+    memories_str = memory_util.memories_to_string(memories)
+
+    messages = [
+        {"role": "user",
+         "content": util.load_template("replan_day").format(Memories=memories_str, **parameters)},
+    ]
+
+    day_plan = llm.query(messages)
+
+    return {"Text": day_plan}
+
+
+def query_custom(request, llm):
+    parameters = request["Data"]
+    messages = [
+        {"role": "user",
+         "content": parameters["PromptTemplate"].format(**parameters)},
+    ]
+
+    response = llm.query(messages)
+    return {"Text": response}
+
+
 # returns a list of validated statements
 def validate_statements(template, statements, parameters, llm: model.Model):
     valid = []
@@ -396,11 +483,9 @@ def validate_statements(template, statements, parameters, llm: model.Model):
              "content": util.load_template(template).format(statement=statement, **parameters)},
         ]
 
-        print("DOES THE STATEMENT: '" + statement + "' FIT THE PARAMETERS?: ")
         val = llm.query(message_validate)
         if "yes" in val.lower():
             valid.append(statement)
-        print()
 
     return valid
 
@@ -415,11 +500,9 @@ def conversation_validate_statements(parameters, statements, llm: model.Model):
                                                                              **parameters)},
         ]
 
-        print("IS THE STATEMENT: '" + statement + "' CORRECT?: ")
         val = llm.query(message_validate)
         if "yes" in val.lower():
             valid.append(statement)
-        print()
 
     return valid
 
@@ -442,18 +525,173 @@ Lisa: Sounds good to me, 9 am it is. I'll make sure to pack some snacks for the
 Aaron: Great! See you for coffee next Sunday at 10 am and then for the hike on March 25th at 9 am!
 Lisa: Absolutely! Take care until then, Aaron.
 Aaron: You too, Lisa. Bye for now!
+Aaron: Hey Lisa, how's your week going?
+Lisa: Hi Aaron, it's been pretty good, thanks for asking. How about you?
+Aaron: Not bad at all. Say, I was thinking, would you like to grab a coffee together next Sunday?
+Lisa: Oh, that sounds wonderful! I'd love to. Where do you have in mind?
+Aaron: There's this cozy cafÃ© downtown that I've been wanting to try out. How does that sound?
+Lisa: Perfect! Count me in. What time were you thinking?
+Aaron: How about around 10 in the morning? Does that work for you?
+Lisa: Absolutely, that works great for me.
+Aaron: Awesome! Looking forward to it. Oh, and speaking of plans, I was also thinking about going hiking on March 25th. Would you be interested in joining me?
+Lisa: Hiking sounds like a fantastic idea! March 25th works for me too. Where were you thinking of going?
+Aaron: I was considering hiking up at Pine Ridge Trail. It's got some stunning views along the way.
+Lisa: That sounds amazing! I've heard great things about Pine Ridge Trail. Count me in for that too.
+Aaron: Fantastic! It'll be great to have some company. How about we start around 9 am?
+Lisa: Sounds good to me, 9 am it is. I'll make sure to pack some snacks for the hike.
+Aaron: Great! See you for coffee next Sunday at 10 am and then for the hike on March 25th at 9 am!
+Lisa: Absolutely! Take care until then, Aaron.
+Aaron: You too, Lisa. Bye for now!
+Aaron: Hey Lisa, how's your week going?
+Lisa: Hi Aaron, it's been pretty good, thanks for asking. How about you?
+Aaron: Not bad at all. Say, I was thinking, would you like to grab a coffee together next Sunday?
+Lisa: Oh, that sounds wonderful! I'd love to. Where do you have in mind?
+Aaron: There's this cozy cafÃ© downtown that I've been wanting to try out. How does that sound?
+Lisa: Perfect! Count me in. What time were you thinking?
+Aaron: How about around 10 in the morning? Does that work for you?
+Lisa: Absolutely, that works great for me.
+Aaron: Awesome! Looking forward to it. Oh, and speaking of plans, I was also thinking about going hiking on March 25th. Would you be interested in joining me?
+Lisa: Hiking sounds like a fantastic idea! March 25th works for me too. Where were you thinking of going?
+Aaron: I was considering hiking up at Pine Ridge Trail. It's got some stunning views along the way.
+Lisa: That sounds amazing! I've heard great things about Pine Ridge Trail. Count me in for that too.
+Aaron: Fantastic! It'll be great to have some company. How about we start around 9 am?
+Lisa: Sounds good to me, 9 am it is. I'll make sure to pack some snacks for the hike.
+Aaron: Great! See you for coffee next Sunday at 10 am and then for the hike on March 25th at 9 am!
+Lisa: Absolutely! Take care until then, Aaron.
+Aaron: You too, Lisa. Bye for now!
+Aaron: Hey Lisa, how's your week going?
+Lisa: Hi Aaron, it's been pretty good, thanks for asking. How about you?
+Aaron: Not bad at all. Say, I was thinking, would you like to grab a coffee together next Sunday?
+Lisa: Oh, that sounds wonderful! I'd love to. Where do you have in mind?
+Aaron: There's this cozy cafÃ© downtown that I've been wanting to try out. How does that sound?
+Lisa: Perfect! Count me in. What time were you thinking?
+Aaron: How about around 10 in the morning? Does that work for you?
+Lisa: Absolutely, that works great for me.
+Aaron: Awesome! Looking forward to it. Oh, and speaking of plans, I was also thinking about going hiking on March 25th. Would you be interested in joining me?
+Lisa: Hiking sounds like a fantastic idea! March 25th works for me too. Where were you thinking of going?
+Aaron: I was considering hiking up at Pine Ridge Trail. It's got some stunning views along the way.
+Lisa: That sounds amazing! I've heard great things about Pine Ridge Trail. Count me in for that too.
+Aaron: Fantastic! It'll be great to have some company. How about we start around 9 am?
+Lisa: Sounds good to me, 9 am it is. I'll make sure to pack some snacks for the hike.
+Aaron: Great! See you for coffee next Sunday at 10 am and then for the hike on March 25th at 9 am!
+Lisa: Absolutely! Take care until then, Aaron.
+Aaron: You too, Lisa. Bye for now!
+Aaron: Hey Lisa, how's your week going?
+Lisa: Hi Aaron, it's been pretty good, thanks for asking. How about you?
+Aaron: Not bad at all. Say, I was thinking, would you like to grab a coffee together next Sunday?
+Lisa: Oh, that sounds wonderful! I'd love to. Where do you have in mind?
+Aaron: There's this cozy cafÃ© downtown that I've been wanting to try out. How does that sound?
+Lisa: Perfect! Count me in. What time were you thinking?
+Aaron: How about around 10 in the morning? Does that work for you?
+Lisa: Absolutely, that works great for me.
+Aaron: Awesome! Looking forward to it. Oh, and speaking of plans, I was also thinking about going hiking on March 25th. Would you be interested in joining me?
+Lisa: Hiking sounds like a fantastic idea! March 25th works for me too. Where were you thinking of going?
+Aaron: I was considering hiking up at Pine Ridge Trail. It's got some stunning views along the way.
+Lisa: That sounds amazing! I've heard great things about Pine Ridge Trail. Count me in for that too.
+Aaron: Fantastic! It'll be great to have some company. How about we start around 9 am?
+Lisa: Sounds good to me, 9 am it is. I'll make sure to pack some snacks for the hike.
+Aaron: Great! See you for coffee next Sunday at 10 am and then for the hike on March 25th at 9 am!
+Lisa: Absolutely! Take care until then, Aaron.
+Aaron: You too, Lisa. Bye for now!
+Aaron: Hey Lisa, how's your week going?
+Lisa: Hi Aaron, it's been pretty good, thanks for asking. How about you?
+Aaron: Not bad at all. Say, I was thinking, would you like to grab a coffee together next Sunday?
+Lisa: Oh, that sounds wonderful! I'd love to. Where do you have in mind?
+Aaron: There's this cozy cafÃ© downtown that I've been wanting to try out. How does that sound?
+Lisa: Perfect! Count me in. What time were you thinking?
+Aaron: How about around 10 in the morning? Does that work for you?
+Lisa: Absolutely, that works great for me.
+Aaron: Awesome! Looking forward to it. Oh, and speaking of plans, I was also thinking about going hiking on March 25th. Would you be interested in joining me?
+Lisa: Hiking sounds like a fantastic idea! March 25th works for me too. Where were you thinking of going?
+Aaron: I was considering hiking up at Pine Ridge Trail. It's got some stunning views along the way.
+Lisa: That sounds amazing! I've heard great things about Pine Ridge Trail. Count me in for that too.
+Aaron: Fantastic! It'll be great to have some company. How about we start around 9 am?
+Lisa: Sounds good to me, 9 am it is. I'll make sure to pack some snacks for the hike.
+Aaron: Great! See you for coffee next Sunday at 10 am and then for the hike on March 25th at 9 am!
+Lisa: Absolutely! Take care until then, Aaron.
+Aaron: You too, Lisa. Bye for now!
+Aaron: Hey Lisa, how's your week going?
+Lisa: Hi Aaron, it's been pretty good, thanks for asking. How about you?
+Aaron: Not bad at all. Say, I was thinking, would you like to grab a coffee together next Sunday?
+Lisa: Oh, that sounds wonderful! I'd love to. Where do you have in mind?
+Aaron: There's this cozy cafÃ© downtown that I've been wanting to try out. How does that sound?
+Lisa: Perfect! Count me in. What time were you thinking?
+Aaron: How about around 10 in the morning? Does that work for you?
+Lisa: Absolutely, that works great for me.
+Aaron: Awesome! Looking forward to it. Oh, and speaking of plans, I was also thinking about going hiking on March 25th. Would you be interested in joining me?
+Lisa: Hiking sounds like a fantastic idea! March 25th works for me too. Where were you thinking of going?
+Aaron: I was considering hiking up at Pine Ridge Trail. It's got some stunning views along the way.
+Lisa: That sounds amazing! I've heard great things about Pine Ridge Trail. Count me in for that too.
+Aaron: Fantastic! It'll be great to have some company. How about we start around 9 am?
+Lisa: Sounds good to me, 9 am it is. I'll make sure to pack some snacks for the hike.
+Aaron: Great! See you for coffee next Sunday at 10 am and then for the hike on March 25th at 9 am!
+Lisa: Absolutely! Take care until then, Aaron.
+Aaron: You too, Lisa. Bye for now!
+Aaron: Hey Lisa, how's your week going?
+Lisa: Hi Aaron, it's been pretty good, thanks for asking. How about you?
+Aaron: Not bad at all. Say, I was thinking, would you like to grab a coffee together next Sunday?
+Lisa: Oh, that sounds wonderful! I'd love to. Where do you have in mind?
+Aaron: There's this cozy cafÃ© downtown that I've been wanting to try out. How does that sound?
+Lisa: Perfect! Count me in. What time were you thinking?
+Aaron: How about around 10 in the morning? Does that work for you?
+Lisa: Absolutely, that works great for me.
+Aaron: Awesome! Looking forward to it. Oh, and speaking of plans, I was also thinking about going hiking on March 25th. Would you be interested in joining me?
+Lisa: Hiking sounds like a fantastic idea! March 25th works for me too. Where were you thinking of going?
+Aaron: I was considering hiking up at Pine Ridge Trail. It's got some stunning views along the way.
+Lisa: That sounds amazing! I've heard great things about Pine Ridge Trail. Count me in for that too.
+Aaron: Fantastic! It'll be great to have some company. How about we start around 9 am?
+Lisa: Sounds good to me, 9 am it is. I'll make sure to pack some snacks for the hike.
+Aaron: Great! See you for coffee next Sunday at 10 am and then for the hike on March 25th at 9 am!
+Lisa: Absolutely! Take care until then, Aaron.
+Aaron: You too, Lisa. Bye for now!
+Aaron: Hey Lisa, how's your week going?
+Lisa: Hi Aaron, it's been pretty good, thanks for asking. How about you?
+Aaron: Not bad at all. Say, I was thinking, would you like to grab a coffee together next Sunday?
+Lisa: Oh, that sounds wonderful! I'd love to. Where do you have in mind?
+Aaron: There's this cozy cafÃ© downtown that I've been wanting to try out. How does that sound?
+Lisa: Perfect! Count me in. What time were you thinking?
+Aaron: How about around 10 in the morning? Does that work for you?
+Lisa: Absolutely, that works great for me.
+Aaron: Awesome! Looking forward to it. Oh, and speaking of plans, I was also thinking about going hiking on March 25th. Would you be interested in joining me?
+Lisa: Hiking sounds like a fantastic idea! March 25th works for me too. Where were you thinking of going?
+Aaron: I was considering hiking up at Pine Ridge Trail. It's got some stunning views along the way.
+Lisa: That sounds amazing! I've heard great things about Pine Ridge Trail. Count me in for that too.
+Aaron: Fantastic! It'll be great to have some company. How about we start around 9 am?
+Lisa: Sounds good to me, 9 am it is. I'll make sure to pack some snacks for the hike.
+Aaron: Great! See you for coffee next Sunday at 10 am and then for the hike on March 25th at 9 am!
+Lisa: Absolutely! Take care until then, Aaron.
+Aaron: You too, Lisa. Bye for now!
+Aaron: Hey Lisa, how's your week going?
+Lisa: Hi Aaron, it's been pretty good, thanks for asking. How about you?
+Aaron: Not bad at all. Say, I was thinking, would you like to grab a coffee together next Sunday?
+Lisa: Oh, that sounds wonderful! I'd love to. Where do you have in mind?
+Aaron: There's this cozy cafÃ© downtown that I've been wanting to try out. How does that sound?
+Lisa: Perfect! Count me in. What time were you thinking?
+Aaron: How about around 10 in the morning? Does that work for you?
+Lisa: Absolutely, that works great for me.
+Aaron: Awesome! Looking forward to it. Oh, and speaking of plans, I was also thinking about going hiking on March 25th. Would you be interested in joining me?
+Lisa: Hiking sounds like a fantastic idea! March 25th works for me too. Where were you thinking of going?
+Aaron: I was considering hiking up at Pine Ridge Trail. It's got some stunning views along the way.
+Lisa: That sounds amazing! I've heard great things about Pine Ridge Trail. Count me in for that too.
+Aaron: Fantastic! It'll be great to have some company. How about we start around 9 am?
+Lisa: Sounds good to me, 9 am it is. I'll make sure to pack some snacks for the hike.
 """
 
-d = {"type": "chat_summary",
-     "data": {
-         "agent": "Aaron",
-         "DateTime": "Sunday, April 7, 2024, 3:31pm",
-         "user": "Lisa",
+d = {"Type": "ChatSummary",
+     "Model": {
+            "ModelRepo": "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
+            "ModelFile": "mistral-7b-instruct-v0.1.Q4_K_M.gguf",
+            "ModelTokRepo": "mistralai/Mistral-7B-Instruct-v0.1",
+            "APIKey": "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",
+            "UseChatGPT": False,
+            "AllowSystemPrompt": False},
+     "ModelTemperature": 0.5,
+     "Data": {
+         "Agent": "Aaron",
+         "Date": "Sunday, April 7, 2024",
+         "User": "Lisa",
          "Conversation": t1
      },
      "Memories": []}
 
-# query_chat_summary(d, model.Model(util.MISTRAL_GGUF, util.MISTRAL_FILE, util.MISTRAL_TOK, 0, 4096))
-# query_chat_summary_extract("chat_extract_plan2", d, util.load_model(util.MISTRAL_PATH, 4096, 0), util.load_tokenizer(util.MISTRAL_TOK))
-
-# query_chat_extract_plan(d, model.Model(util.MISTRAL_GGUF, util.MISTRAL_FILE, util.MISTRAL_TOK, 0, 4096))
+# print(parse_request(json.dumps(d)))
\ No newline at end of file
diff --git a/python/llm-server/server.py b/python/llm-server/server.py
index 68081ea..eee7972 100644
--- a/python/llm-server/server.py
+++ b/python/llm-server/server.py
@@ -16,12 +16,15 @@ threads = []
 running = 1
 s = None
 
+STREAM_BUFFER_SIZE = 2048
+
 
 class ClientHandler(threading.Thread):
     def __init__(self, conn, addr, r):
         super().__init__()
         self._stop_event = threading.Event()
         self.conn = conn
+        self.conn.settimeout(0.5)
         self.addr = addr
         self.response = ""
         self.response_condition = threading.Condition()
@@ -37,19 +40,31 @@ class ClientHandler(threading.Thread):
         with self.conn:
             while not self._stop_event.is_set():
                 try:
-                    data = self.conn.recv(100000)
+                    data = b''
+                    print("HEYHO")
+                    while True:
+                        try:
+                            received = self.conn.recv(STREAM_BUFFER_SIZE)
+                            if len(received) == 0:
+                                raise Exception
+                            print(len(received))
+                            data += received
+                        except TimeoutError:
+                            break
+
                     if len(data) == 0:
-                        raise Exception()
+                        continue
 
                     request = data.decode("utf-8")
+                    print(request)
                     with self.response_condition:
                         self.request_handler.queue_request(request, self)
                         self.response_condition.wait()
 
+                    assert(len(self.response) < 20000)
                     self.conn.sendall(bytes(self.response, "utf-8"))
-                except TimeoutError:
-                    pass
-                except:
+                except Exception as e:
+                    print(e)
                     print("An unexpected error occurred, connection likely terminated by client unexpectedly")
                     break
 
diff --git a/python/llm-server/templates/chat_summary_unrelated.txt b/python/llm-server/templates/chat_summary_agent_facts.txt
similarity index 73%
rename from python/llm-server/templates/chat_summary_unrelated.txt
rename to python/llm-server/templates/chat_summary_agent_facts.txt
index d8a1292..f9beb10 100644
--- a/python/llm-server/templates/chat_summary_unrelated.txt
+++ b/python/llm-server/templates/chat_summary_agent_facts.txt
@@ -1,5 +1,5 @@
 {Agent} had a conversation with {User}.
-From the conversation, list interesting things we learn that are unrelated to {Agent} and {User}.
+From the conversation, list the 2 most interesting facts mentioned by {Agent}.
 Each item must start with "-".
 Each item must start with "-".
 Each item must start with "-".
diff --git a/python/llm-server/templates/chat_summary_agent_old.txt b/python/llm-server/templates/chat_summary_agent_old.txt
deleted file mode 100644
index 6f13c04..0000000
--- a/python/llm-server/templates/chat_summary_agent_old.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-{Agent} had a conversation with {User}.
-List the main things {Agent} told {User}.
-If the conversation does not contain anything substantial, respond with 'no valuable information'.
-
-Result Requirements:
-
-Each list item must start with: '- {Agent} told {User} that '
-Related information must be combined into one list item.
-List items must contain distinct information.
-List items must be phrased as briefly as possible.
-List items must be written in past tense.
-
-Example Result:
-- {Agent} told {User} that he has cornflakes for breakfast almost every day.
-- {Agent} told {User} that he recently learned the moon does not emmit light by itself.
-- {Agent} told {User} that he was planning to go for a hike this Saturday
-
-Conversation:
-{Conversation}
-
-Result:
diff --git a/python/llm-server/templates/chat_summary_single.txt b/python/llm-server/templates/chat_summary_single.txt
index 99a364a..2a75eff 100644
--- a/python/llm-server/templates/chat_summary_single.txt
+++ b/python/llm-server/templates/chat_summary_single.txt
@@ -1,8 +1,7 @@
 You are given a conversation between {Agent} and {User}.
-In 1 short sentence, summarize the conversation below between {Agent} and {User}.
+Summarize the conversation in 1-2 sentences.
 
 Conversation:
 {Conversation}
 
-Answer:
-{Agent} and {User} talked about
+Summary:
diff --git a/python/llm-server/templates/chat_summary_user_facts.txt b/python/llm-server/templates/chat_summary_user_facts.txt
new file mode 100644
index 0000000..072df53
--- /dev/null
+++ b/python/llm-server/templates/chat_summary_user_facts.txt
@@ -0,0 +1,16 @@
+{Agent} had a conversation with {User}.
+From the conversation, list the 2 most interesting facts we learn from {User}.
+Each item must start with "-".
+Each item must start with "-".
+Each item must start with "-".
+Each item must start with "-".
+
+Example:
+- James' mom had a heart attack
+- The chiefs won the superbowl
+- The Graz 99ers played well yesterday
+
+Conversation:
+{Conversation}
+
+Facts:
\ No newline at end of file
diff --git a/python/llm-server/templates/chat_system.txt b/python/llm-server/templates/chat_system.txt
index 719ffe8..ab4998d 100644
--- a/python/llm-server/templates/chat_system.txt
+++ b/python/llm-server/templates/chat_system.txt
@@ -4,7 +4,7 @@ Act as {Agent}.
 
 {Agent}'s character traits: {Traits}
 
-{Schedule}
+{Agent}'s schedule for today: {Schedule}
 
 {Agent}'s memories:
 {Memories}
@@ -14,7 +14,7 @@ Act as {Agent}.
 Current date and time:
 {DateTime}
 
-Respond in a natural, human-like manner.
-Your responses must reflect your character and character traits.
+Respond in a human-like manner. The conversation should be as natural as possible.
+The responses must be in accordance with {Agent}'s character and character traits.
 Consider the current time and date.
-Reveal only necessary information. Be concise.
\ No newline at end of file
+Reveal only necessary information. Be concise. Be curious.
\ No newline at end of file
diff --git a/python/llm-server/templates/plan_day.txt b/python/llm-server/templates/plan_day.txt
index be9913e..abb5b06 100644
--- a/python/llm-server/templates/plan_day.txt
+++ b/python/llm-server/templates/plan_day.txt
@@ -1,11 +1,8 @@
-Generate a plan for {Agent}'s day for {Date}.
+Your task is to create a schedule for {Agent}'s day for {Date}.
 When does he have to wake up and when does he go to sleep? What does he do during the day?
 Answer in full sentences and include the time for each action.
 
-Plan requirements:
-"DailyPlanReq"
+Here are the schedule requirements:
+{DailyPlanReq}
 
 {Memories}
-
-Complete the sentence:
-On {Date}, {Agent}
diff --git a/python/llm-server/templates/poignancy.txt b/python/llm-server/templates/poignancy.txt
index 03cdc92..c73324c 100644
--- a/python/llm-server/templates/poignancy.txt
+++ b/python/llm-server/templates/poignancy.txt
@@ -1,35 +1,8 @@
 You are a helpful, unbiased assistant.
-On the scale of 1 to 10, where 1 is purely mundane
-(e.g., brushing teeth, making bed) and 10 is
-extremely poignant (e.g., a break up, college
-acceptance), rate the likely poignancy based on a piece of memory. Only answer with a single number. No explanation.
+On the scale of 1 to 10, where 1 is purely mundane (e.g., brushing teeth, making bed) and 10 is extremely poignant (e.g., a break up, college acceptance), rate the likely poignancy based on a piece of memory.
+The answer must only be a number between 1 and 10. The answer must not include an explanation.
 
-Example 1:
-- sister died yesterday, committed suicide
+Memory:
+{Memory}
 
-Answer: 10
-
-Example 2:
-- went shopping earlier today, met an old friend from school
-
-Answer: 3
-
-Example 3:
-- brushed my teeth this morning
-
-Answer: 1
-
-Example 4:
-- loved playing soccer with Alex
-
-Answer: 4
-
-Example 5:
-- mother died
-
-Answer: 10
-
-Example 6:
-- {Memory}
-
-Answer:
\ No newline at end of file
+Poignancy:
\ No newline at end of file
diff --git a/python/llm-server/templates/reflection.txt b/python/llm-server/templates/reflection.txt
new file mode 100644
index 0000000..6ac9660
--- /dev/null
+++ b/python/llm-server/templates/reflection.txt
@@ -0,0 +1,24 @@
+You are a helpful, unbiased assistant. Give 1-2 interesting high-level insights that you can infer from the given memories.
+The insights MUST be meaningfully different from the given memories.
+Each insight must start with a dash '-' and end with a list of references.
+
+If no interesting insight can be inferred, return 'no insights'.
+
+Format:
+- [insight] ([references])
+
+Example 1:
+Memories:
+1. {User} works on his bachelor's thesis
+2. {User} is at the library doing research for his thesis 10 hours a day
+3. Alex drinks tea in the morning
+4. {User} always talks about how interesting he thinks bachelor's thesis topic is
+
+Result:
+- James is passionate about his bachelor's thesis (1, 2, 4)
+
+Example 2:
+Memories:
+{Memories}
+
+Result:
\ No newline at end of file
diff --git a/python/llm-server/templates/reflection_a.txt b/python/llm-server/templates/reflection_a.txt
index d7e1ece..096a0e6 100644
--- a/python/llm-server/templates/reflection_a.txt
+++ b/python/llm-server/templates/reflection_a.txt
@@ -1,25 +1,12 @@
-You are a helpful, unbiased assistant. Give none or multiple interesting high-level insights that you
-can infer from the given memories.
-The insights MUST be meaningfully different from the input memories.
-Each insight must start with a dash '-' and end with a list of references.
+You are a helpful, unbiased assistant.
+Which 1 to 5 high-level thoughts/insights can you infer from the given memories?
+Each insight must be fully self-contained and understandable without additional context.
+The new insights/thoughts must not be rehashes of the original memories, they must be original.
+Each insight must start with a dash '-'.
+Include the memories' IDs that you derived these insights from as shown in the example below - they must be a comma-separated list.
 
-If no interesting insight can be inferred, return 'no insights'.
+Example:
+- Alex likes studying for exams (1, 6, 25)
 
-Format:
-- [insight] ([references])
-
-Example 1:
-Memories:
-1. {User} works on his bachelor's thesis
-2. {User} is at the library doing research for his thesis 10 hours a day
-3. Alex drinks tea in the morning
-4. {User} always talks about how interesting he thinks bachelor's thesis topic is
-
-Result:
-- James is passionate about his bachelor's thesis (1, 2, 4)
-
-Example 2:
 Memories:
 {Memories}
-
-Result:
\ No newline at end of file
diff --git a/python/llm-server/templates/reflection_q.txt b/python/llm-server/templates/reflection_q.txt
index 34b172e..daafcc0 100644
--- a/python/llm-server/templates/reflection_q.txt
+++ b/python/llm-server/templates/reflection_q.txt
@@ -1,12 +1,10 @@
-You are a helpful, unbiased assistant. Give 1 to 5 salient high-level questions that can be asked given the following memories.
-
-Memories:
-{Memories}
+You are a helpful, unbiased assistant. Give 3 salient high-level questions that can be asked given {Agent}'s recent memories.
+The questions should be asked from a neutral, third-person perspective.
 
 Format:
-- [question]
+- Question 1
+- Question 2
+- Question 3
 
-Example:
-- Is Alex a student?
+{Memories}
 
-Result:
diff --git a/python/llm-server/templates/replan_day.txt b/python/llm-server/templates/replan_day.txt
new file mode 100644
index 0000000..01eaf15
--- /dev/null
+++ b/python/llm-server/templates/replan_day.txt
@@ -0,0 +1,9 @@
+{Agent}'s original schedule for {Date} was:
+
+{OriginalPlan}
+
+However, {Agent} has made new plans. Here are {Agent}'s plans for {Date}:
+
+{Memories}
+
+Your task is to create {Agent}'s new schedule for the day. Modify the original schedule.
\ No newline at end of file
diff --git a/python/llm-server/test.py b/python/llm-server/test.py
deleted file mode 100644
index 4ecfbbc..0000000
--- a/python/llm-server/test.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import torch
-import util
-from huggingface_hub import hf_hub_download
-
-model_name1 = "TheBloke/Mistral-7B-v0.1-GGUF"
-model_name = "mistralai/Mistral-7B-Instruct-v0.2"
-model_file = "mistral-7b-v0.1.Q4_K_M.gguf"
-
-device = "cuda" if torch.cuda.is_available() else "cpu"
-
-model = AutoModelForCausalLM.from_pretrained(model_name, token=util.LLAMA_API_KEY)
-model.to(device)
-tokenizer = AutoTokenizer.from_pretrained(util.MISTRAL_TOK)
-
-messages = [
-    {
-        "role": "user",
-        "content": "You are a friendly chatbot who always responds in the style of a pirate",
-    },
-    {"role": "assistant", "content": "Understood."},
-    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
- ]
-tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(device)
-tokenized_chat.to(device)
-
-generated_ids = model.generate(tokenized_chat, max_new_tokens=1000, do_sample=True)
-decoded = tokenizer.batch_decode(generated_ids)
-print(decoded[0])
\ No newline at end of file
diff --git a/python/llm-server/util.py b/python/llm-server/util.py
index ed48925..c921337 100644
--- a/python/llm-server/util.py
+++ b/python/llm-server/util.py
@@ -32,7 +32,6 @@ def load_tokenizer(model, token=""):
 
 
 def load_model(model, ctx_size, temperature):
-    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
     llm = LlamaCpp(
         model_path=model,
         max_tokens=500,
@@ -40,7 +39,6 @@ def load_model(model, ctx_size, temperature):
         n_gpu_layers=-1,
         n_batch=512,
         n_ctx=ctx_size,
-        callback_manager=callback_manager,
         verbose=False,
     )
 
-- 
GitLab