From 0563dc381e6ebfb13935bfee89b5fdac8b360ad4 Mon Sep 17 00:00:00 2001 From: Aaron Giner <aaron.giner@student.tugraz.at> Date: Tue, 23 Apr 2024 04:38:49 +0200 Subject: [PATCH] u --- python/llm-server/model.py | 16 ++++++--- python/llm-server/queries.py | 65 +++++++++++++++--------------------- python/llm-server/util.py | 9 +++-- 3 files changed, 45 insertions(+), 45 deletions(-) diff --git a/python/llm-server/model.py b/python/llm-server/model.py index bef7af7..51e7dde 100644 --- a/python/llm-server/model.py +++ b/python/llm-server/model.py @@ -1,14 +1,17 @@ from langchain.chains import LLMChain from langchain_core.prompts import PromptTemplate from openai import OpenAI +from huggingface_hub import hf_hub_download import util class Model: - def __init__(self, model_name, model_path, temperature, ctx_size=4096, api_key="", use_chat_gpt=False): + def __init__(self, model_name, model_file, model_tok, temperature, ctx_size=4096, api_key="", use_chat_gpt=False, + use_local_model=False): self.model_name = model_name - self.model_path = model_path + self.model_file = model_file + self.model_tok = model_tok self.temperature = temperature self.ctx_size = ctx_size self.api_key = api_key @@ -17,8 +20,13 @@ class Model: self.llm = None if not self.use_chat_gpt: - self.llm = util.load_model(model=self.model_path, ctx_size=self.ctx_size, temperature=temperature) - self.tokenizer = util.load_tokenizer(model=self.model_name, token=util.LLAMA_API_KEY) + if use_local_model: + self.llm = util.load_model(model=model_name, ctx_size=self.ctx_size, temperature=temperature) + else: + model_path = hf_hub_download(self.model_name, self.model_file, token=api_key) + self.llm = util.load_model(model=model_path, ctx_size=self.ctx_size, temperature=temperature) + + self.tokenizer = util.load_tokenizer(model=self.model_tok, token=util.LLAMA_API_KEY) def query(self, messages, debug=False): if self.use_chat_gpt: diff --git a/python/llm-server/queries.py b/python/llm-server/queries.py index 3e356c5..0273b58 100644 --- a/python/llm-server/queries.py +++ b/python/llm-server/queries.py @@ -11,7 +11,6 @@ import memory_util import util import embeddings import parse_dt -import openai_api import model from datetime import datetime, timedelta @@ -22,20 +21,20 @@ def parse_request(request): requestType = request["type"] if requestType == "chat": - return query_chat(request, model.Model(util.LLAMA, util.LLAMA_PATH, 0.75, 4096, + return query_chat(request, model.Model(util.LLAMA_NAME, util.LLAMA_FILE, util.LLAMA_TOK, 0.75, 4096, "sk-proj-aUDdsiCXHDwoHewZFL9AT3BlbkFJIkKZEYaMi5AGEBDbW2zv", use_chat_gpt=True)) elif requestType == "chat_summary": - return query_chat_summary(request, model.Model(util.MISTRAL, util.MISTRAL_PATH, 0, 4096)) + return query_chat_summary(request, model.Model(util.MISTRAL_GGUF, util.MISTRAL_FILE, util.MISTRAL_TOK, 0, 4096)) elif requestType == "chat_extract_plan": - return query_chat_extract_plan(request, model.Model(util.MISTRAL, util.MISTRAL_PATH, 0, 4096)) + return query_chat_extract_plan(request, model.Model(util.MISTRAL_GGUF, util.MISTRAL_FILE, util.MISTRAL_TOK, 0, 4096)) elif requestType == "reflection": - return query_reflection(request, model.Model(util.MISTRAL, util.MISTRAL_PATH, 0, 4096)) + return query_reflection(request, model.Model(util.MISTRAL_GGUF, util.MISTRAL_FILE, util.MISTRAL_TOK, 0, 4096)) elif requestType == "poignancy": - return query_poignancy(request, model.Model(util.MISTRAL, util.MISTRAL_PATH, 0, 4096)) + return query_poignancy(request, model.Model(util.MISTRAL_GGUF, util.MISTRAL_FILE, util.MISTRAL_TOK, 0, 4096)) elif requestType == "context": - return generate_context(request, model.Model(util.MISTRAL, util.MISTRAL_PATH, 0, 4096)) + return generate_context(request, model.Model(util.MISTRAL_GGUF, util.MISTRAL_FILE, util.MISTRAL_TOK, 0, 4096)) elif requestType == "plan_day": - return query_plan_day(request, model.Model(util.MISTRAL, util.MISTRAL_PATH, 0, 4096)) + return query_plan_day(request, model.Model(util.MISTRAL_GGUF, util.MISTRAL_FILE, util.MISTRAL_TOK, 0, 4096)) return "ERROR" @@ -431,34 +430,23 @@ def run_query(messages, llm, tokenizer, use_chat_gpt=False, debug=False): t1 = """ -Aaron: Hi there! I'm Aaron, nice to meet you. -Lisa: Hi Aaron, I'm Lisa. Pleasure to meet you too. -Aaron: So, Lisa, what brings you here? -Lisa: Oh, just exploring the city, you know. How about you? -Aaron: Same here, just taking a stroll. It's a beautiful day, isn't it? -Lisa: Absolutely, couldn't have asked for better weather. -Aaron: Have you been living in this city for long? -Lisa: No, I actually just moved here recently. How about you? -Aaron: Oh, I've been here for a few years now. It's a great place to live. -Lisa: That's good to hear. Any favorite spots you'd recommend? -Aaron: Hmm, well, there's this cozy café not too far from here that I love. Great place to grab a cup of coffee and unwind. -Lisa: That sounds lovely. I'll have to check it out sometime. -Aaron: Definitely! So, what do you do for a living, Lisa? -Lisa: I work as a graphic designer. How about you? -Aaron: I'm a software engineer. Graphic design sounds fascinating, though. What do you enjoy most about it? -Lisa: I love the creativity it allows me to express and the satisfaction of seeing a project come to life visually. What about software engineering? What do you enjoy about it? -Aaron: I find problem-solving really fulfilling, especially when I can create something that makes people's lives easier or more efficient. Plus, there's always something new to learn in the field, which keeps things interesting. -Lisa: That's true, continuous learning is definitely a perk. Do you have any hobbies outside of work? -Aaron: Yeah, I enjoy hiking and photography. How about you? -Lisa: I love painting and reading. It's a great way to unwind after a busy day. -Aaron: Sounds like we both enjoy creative pursuits. Have you done any painting recently? -Lisa: Not as much as I'd like to, unfortunately. Work has been keeping me pretty busy. But I'm hoping to carve out more time for it soon. -Aaron: It's important to make time for the things you love. Maybe we can both prioritize our hobbies more. -Lisa: Definitely! It's all about finding that balance, right? -Aaron: Absolutely. Well, it was really nice meeting you, Lisa. Maybe we can grab that coffee sometime and chat some more. -Lisa: That sounds like a plan, Aaron. I'd like that. Thanks for the pleasant conversation! -Aaron: No problem at all. Take care, Lisa. See you around! -Lisa: You too, Aaron. Bye for now! +Aaron: Hey Lisa, how's your week going? +Lisa: Hi Aaron, it's been pretty good, thanks for asking. How about you? +Aaron: Not bad at all. Say, I was thinking, would you like to grab a coffee together next Sunday? +Lisa: Oh, that sounds wonderful! I'd love to. Where do you have in mind? +Aaron: There's this cozy café downtown that I've been wanting to try out. How does that sound? +Lisa: Perfect! Count me in. What time were you thinking? +Aaron: How about around 10 in the morning? Does that work for you? +Lisa: Absolutely, that works great for me. +Aaron: Awesome! Looking forward to it. Oh, and speaking of plans, I was also thinking about going hiking on March 25th. Would you be interested in joining me? +Lisa: Hiking sounds like a fantastic idea! March 25th works for me too. Where were you thinking of going? +Aaron: I was considering hiking up at Pine Ridge Trail. It's got some stunning views along the way. +Lisa: That sounds amazing! I've heard great things about Pine Ridge Trail. Count me in for that too. +Aaron: Fantastic! It'll be great to have some company. How about we start around 9 am? +Lisa: Sounds good to me, 9 am it is. I'll make sure to pack some snacks for the hike. +Aaron: Great! See you for coffee next Sunday at 10 am and then for the hike on March 25th at 9 am! +Lisa: Absolutely! Take care until then, Aaron. +Aaron: You too, Lisa. Bye for now! """ d = {"type": "chat_summary", @@ -470,8 +458,7 @@ d = {"type": "chat_summary", }, "memories": []} -# query_chat_summary(d, util.load_model(util.MISTRAL_PATH, 4096, 0), util.load_tokenizer(util.MISTRAL_TOK)) +# query_chat_summary(d, model.Model(util.MISTRAL_GGUF, util.MISTRAL_FILE, util.MISTRAL_TOK, 0, 4096)) # query_chat_summary_extract("chat_extract_plan2", d, util.load_model(util.MISTRAL_PATH, 4096, 0), util.load_tokenizer(util.MISTRAL_TOK)) -# query_chat_extract_plan(d, util.load_model(util.MISTRAL_PATH, 4096, 0), util.load_tokenizer(util.MISTRAL_TOK)) -# plans_validate(["On Sunday, April 25, 2024, 9:00am, Aaron is planning to go on a hike at Pine Ridge Trail with Lisa"], d["data"]["datetime"]) +# query_chat_extract_plan(d, model.Model(util.MISTRAL_GGUF, util.MISTRAL_FILE, util.MISTRAL_TOK, 0, 4096)) diff --git a/python/llm-server/util.py b/python/llm-server/util.py index 0b68bb0..2991ca3 100644 --- a/python/llm-server/util.py +++ b/python/llm-server/util.py @@ -15,9 +15,14 @@ LLAMA_PATH_13b = "X:/LLM Models/llama-2-13b-chat.Q4_K_M.gguf" MISTRAL_LARGE_PATH = "X:/LLM Models/mistral-7b-instruct-v0.1.Q5_K_M.gguf" LUNA_UNC_PATH = "X:/LLM Models/luna-ai-llama2-uncensored.Q4_K_M.gguf" -MISTRAL = "mistralai/Mistral-7B-Instruct-v0.1" -LLAMA = "meta-llama/Llama-2-7b-chat-hf" +MISTRAL_TOK = "mistralai/Mistral-7B-Instruct-v0.1" +MISTRAL_GGUF = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF" +MISTRAL_FILE = "mistral-7b-instruct-v0.1.Q4_K_M.gguf" + +LLAMA_TOK = "meta-llama/Llama-2-7b-chat-hf" LLAMA_API_KEY = "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" +LLAMA_NAME = "TheBloke/Llama-2-7B-GGUF" +LLAMA_FILE = "llama-2-7b.Q4_K_M.gguf" KW_EN_CORE = "X:/LLM Models/nlm/en_core_web_sm-3.7.1-py3-none-any.whl" -- GitLab