From afc6a951774b51c62dc61c4e607a7c25346023f4 Mon Sep 17 00:00:00 2001
From: Thamognya Kodi <contact@thamognya.com>
Date: Thu, 12 Jan 2023 12:50:18 +0700
Subject: [PATCH] update: look at todo

---
 internet_ml/NLP/no_context/QA.py       | 23 ++++++++++++++++-------
 internet_ml/tools/{NLP => }/ChatGPT.py |  2 ++
 internet_ml/tools/NLP/data/internet.py |  5 +----
 internet_ml/utils/config.py            |  9 ---------
 4 files changed, 19 insertions(+), 20 deletions(-)
 rename internet_ml/tools/{NLP => }/ChatGPT.py (99%)

diff --git a/internet_ml/NLP/no_context/QA.py b/internet_ml/NLP/no_context/QA.py
index 4c1eb76..8f2823e 100644
--- a/internet_ml/NLP/no_context/QA.py
+++ b/internet_ml/NLP/no_context/QA.py
@@ -11,7 +11,9 @@ from transformers import pipeline
 
 sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools/NLP/data")
 sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools/NLP")
+sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools")
 sys.path.append(str(Path(__file__).parent.parent.parent) + "/utils")
+
 import config
 import internet
 from ChatGPT import Chatbot
@@ -26,6 +28,8 @@ def answer(
     GOOGLE_SEARCH_ENGINE_ID: str = "",
     OPENAI_API_KEY: str = "",
     CHATGPT_SESSION_TOKEN: str = "",
+    CHATGPT_CONVERSATION_ID: str = "",
+    CHATGPT_PARENT_ID: str = "",
 ) -> tuple[Any, list[str]]:
     # if environment keys are not given, assume it is in env
     if GOOGLE_SEARCH_API_KEY == "":
@@ -37,6 +41,10 @@ def answer(
         openai.api_key = OPENAI_API_KEY
     if CHATGPT_SESSION_TOKEN == "":
         CHATGPT_SESSION_TOKEN = str(os.environ.get("CHATGPT_SESSION_TOKEN"))
+    if CHATGPT_CONVERSATION_ID == "":
+        CHATGPT_CONVERSATION_ID = str(os.environ.get("CHATGPT_CONVERSATION_ID"))
+    if CHATGPT_PARENT_ID == "":
+        CHATGPT_PARENT_ID = str(os.environ.get("CHATGPT_PARENT_ID"))
     """
     model naming convention
     # Open-AI models:
@@ -55,15 +63,16 @@ def answer(
             ).google(filter_irrelevant=False)
             chatbot = Chatbot(
                 {"session_token": CHATGPT_SESSION_TOKEN},
-                conversation_id=None,
-                parent_id=None,
+                conversation_id=CHATGPT_CONVERSATION_ID,
+                parent_id=CHATGPT_PARENT_ID,
             )
-            print(results)
+            prompt = f"Utilize the following context: {' '.join(filter(lambda x: isinstance(x, str), results[0]))[:10000]} and answer the question only with the given context: {query}"
             response = chatbot.ask(
-                f"Utilize the following context: {results[0][:2000]} ontop of existing knowledge and answer the question: {query}",
-                conversation_id=None,
-                parent_id=None,
+                prompt=prompt,
+                conversation_id=CHATGPT_CONVERSATION_ID,
+                parent_id=CHATGPT_PARENT_ID,
             )
+            print(response)
             return (response["message"], results[1])
         else:
             if model == "openai-text-davinci-003":
@@ -93,5 +102,5 @@ def answer(
 
 
 # print(os.environ)
-print(answer(query="When was Cristiano Ronaldo Born?", model="openai-chatgpt"))
+print(answer(query="What is the latest Pokemon Game in 2022?", model="openai-chatgpt"))
 # def custom_answer
diff --git a/internet_ml/tools/NLP/ChatGPT.py b/internet_ml/tools/ChatGPT.py
similarity index 99%
rename from internet_ml/tools/NLP/ChatGPT.py
rename to internet_ml/tools/ChatGPT.py
index d108004..151147e 100644
--- a/internet_ml/tools/NLP/ChatGPT.py
+++ b/internet_ml/tools/ChatGPT.py
@@ -1,5 +1,6 @@
 # type: ignore
 # For some reason package not working so just deal with it and dont ask questions
+# Copied and updated from https://github.com/acheong08/ChatGPT/blob/main/src/revChatGPT/ChatGPT.py
 
 import json
 import logging
@@ -538,6 +539,7 @@ class Chatbot:
         options.add_argument("--no-sandbox")
         options.add_argument("--disable-setuid-sandbox")
         options.add_argument("--disable-dev-shm-usage")
+        # options.add_argument('--headless')
         if self.config.get("proxy", "") != "":
             options.add_argument("--proxy-server=" + self.config["proxy"])
         return options
diff --git a/internet_ml/tools/NLP/data/internet.py b/internet_ml/tools/NLP/data/internet.py
index 007b2a6..ae928c7 100644
--- a/internet_ml/tools/NLP/data/internet.py
+++ b/internet_ml/tools/NLP/data/internet.py
@@ -111,15 +111,12 @@ class Google:
         contents = loop.run_until_complete(self.__fetch_urls(self.__urls))
         loop.close()
         self.__content = self.__flatten(contents)
+        self.__content = [str(x) for x in self.__content]
 
     def __filter_irrelevant_processing(self: "Google") -> None:
-        # Create a ThreadPoolExecutor with 4 worker threads
         with concurrent.futures.ThreadPoolExecutor(max_workers=500) as executor:
-            # Create a list of futures for the filtering tasks
             futures = [executor.submit(filter_relevant, self.__content, self.__query)]
-            # Wait for the tasks to complete
             concurrent.futures.wait(futures)
-            # Get the results of the tasks
             content: list[str] = []
             for future in futures:
                 content.append(future.result())
diff --git a/internet_ml/utils/config.py b/internet_ml/utils/config.py
index f754421..b153787 100644
--- a/internet_ml/utils/config.py
+++ b/internet_ml/utils/config.py
@@ -1,14 +1,5 @@
 from typing import Any, List, Tuple
 
-import logging
-
-logging.basicConfig(
-    filename="config.log",
-    filemode="w",
-    level=logging.INFO,
-    format="%(name)s - %(levelname)s - %(message)s",
-)
-
 # General
 CONF_DEBUG: bool = True
 # Google