From afc6a951774b51c62dc61c4e607a7c25346023f4 Mon Sep 17 00:00:00 2001 From: Thamognya Kodi Date: Thu, 12 Jan 2023 12:50:18 +0700 Subject: [PATCH] update: look at todo --- internet_ml/NLP/no_context/QA.py | 23 ++++++++++++++++------- internet_ml/tools/{NLP => }/ChatGPT.py | 2 ++ internet_ml/tools/NLP/data/internet.py | 5 +---- internet_ml/utils/config.py | 9 --------- 4 files changed, 19 insertions(+), 20 deletions(-) rename internet_ml/tools/{NLP => }/ChatGPT.py (99%) diff --git a/internet_ml/NLP/no_context/QA.py b/internet_ml/NLP/no_context/QA.py index 4c1eb76..8f2823e 100644 --- a/internet_ml/NLP/no_context/QA.py +++ b/internet_ml/NLP/no_context/QA.py @@ -11,7 +11,9 @@ from transformers import pipeline sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools/NLP/data") sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools/NLP") +sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools") sys.path.append(str(Path(__file__).parent.parent.parent) + "/utils") + import config import internet from ChatGPT import Chatbot @@ -26,6 +28,8 @@ def answer( GOOGLE_SEARCH_ENGINE_ID: str = "", OPENAI_API_KEY: str = "", CHATGPT_SESSION_TOKEN: str = "", + CHATGPT_CONVERSATION_ID: str = "", + CHATGPT_PARENT_ID: str = "", ) -> tuple[Any, list[str]]: # if environment keys are not given, assume it is in env if GOOGLE_SEARCH_API_KEY == "": @@ -37,6 +41,10 @@ def answer( openai.api_key = OPENAI_API_KEY if CHATGPT_SESSION_TOKEN == "": CHATGPT_SESSION_TOKEN = str(os.environ.get("CHATGPT_SESSION_TOKEN")) + if CHATGPT_CONVERSATION_ID == "": + CHATGPT_CONVERSATION_ID = str(os.environ.get("CHATGPT_CONVERSATION_ID")) + if CHATGPT_PARENT_ID == "": + CHATGPT_PARENT_ID = str(os.environ.get("CHATGPT_PARENT_ID")) """ model naming convention # Open-AI models: @@ -55,15 +63,16 @@ def answer( ).google(filter_irrelevant=False) chatbot = Chatbot( {"session_token": CHATGPT_SESSION_TOKEN}, - conversation_id=None, - parent_id=None, + conversation_id=CHATGPT_CONVERSATION_ID, + parent_id=CHATGPT_PARENT_ID, ) - print(results) + prompt = f"Utilize the following context: {' '.join(filter(lambda x: isinstance(x, str), results[0]))[:10000]} and answer the question only with the given context: {query}" response = chatbot.ask( - f"Utilize the following context: {results[0][:2000]} ontop of existing knowledge and answer the question: {query}", - conversation_id=None, - parent_id=None, + prompt=prompt, + conversation_id=CHATGPT_CONVERSATION_ID, + parent_id=CHATGPT_PARENT_ID, ) + print(response) return (response["message"], results[1]) else: if model == "openai-text-davinci-003": @@ -93,5 +102,5 @@ def answer( # print(os.environ) -print(answer(query="When was Cristiano Ronaldo Born?", model="openai-chatgpt")) +print(answer(query="What is the latest Pokemon Game in 2022?", model="openai-chatgpt")) # def custom_answer diff --git a/internet_ml/tools/NLP/ChatGPT.py b/internet_ml/tools/ChatGPT.py similarity index 99% rename from internet_ml/tools/NLP/ChatGPT.py rename to internet_ml/tools/ChatGPT.py index d108004..151147e 100644 --- a/internet_ml/tools/NLP/ChatGPT.py +++ b/internet_ml/tools/ChatGPT.py @@ -1,5 +1,6 @@ # type: ignore # For some reason package not working so just deal with it and dont ask questions +# Copied and updated from https://github.com/acheong08/ChatGPT/blob/main/src/revChatGPT/ChatGPT.py import json import logging @@ -538,6 +539,7 @@ class Chatbot: options.add_argument("--no-sandbox") options.add_argument("--disable-setuid-sandbox") options.add_argument("--disable-dev-shm-usage") + # options.add_argument('--headless') if self.config.get("proxy", "") != "": options.add_argument("--proxy-server=" + self.config["proxy"]) return options diff --git a/internet_ml/tools/NLP/data/internet.py b/internet_ml/tools/NLP/data/internet.py index 007b2a6..ae928c7 100644 --- a/internet_ml/tools/NLP/data/internet.py +++ b/internet_ml/tools/NLP/data/internet.py @@ -111,15 +111,12 @@ class Google: contents = loop.run_until_complete(self.__fetch_urls(self.__urls)) loop.close() self.__content = self.__flatten(contents) + self.__content = [str(x) for x in self.__content] def __filter_irrelevant_processing(self: "Google") -> None: - # Create a ThreadPoolExecutor with 4 worker threads with concurrent.futures.ThreadPoolExecutor(max_workers=500) as executor: - # Create a list of futures for the filtering tasks futures = [executor.submit(filter_relevant, self.__content, self.__query)] - # Wait for the tasks to complete concurrent.futures.wait(futures) - # Get the results of the tasks content: list[str] = [] for future in futures: content.append(future.result()) diff --git a/internet_ml/utils/config.py b/internet_ml/utils/config.py index f754421..b153787 100644 --- a/internet_ml/utils/config.py +++ b/internet_ml/utils/config.py @@ -1,14 +1,5 @@ from typing import Any, List, Tuple -import logging - -logging.basicConfig( - filename="config.log", - filemode="w", - level=logging.INFO, - format="%(name)s - %(levelname)s - %(message)s", -) - # General CONF_DEBUG: bool = True # Google