update: look at todo

2023-01-12 11:51:03 +07:00 · 2023-01-12 11:51:03 +07:00 · d20ec8a0fb
parent 66eb7eb9e0
commit d20ec8a0fb
4 changed files with 4 additions and 72 deletions
--- a/internet_ml/NLP/no_context/QA.py
+++ b/internet_ml/NLP/no_context/QA.py
@ -1,7 +1,6 @@
 # type: ignore
 from typing import Any, List, Tuple

-import logging
 import os
 import sys
 from pathlib import Path
@ -10,13 +9,6 @@ import dotenv
 import openai
 from transformers import pipeline

-logging.basicConfig(
-    filename="QA.log",
-    filemode="w",
-    level=logging.INFO,
-    format="%(name)s - %(levelname)s - %(message)s",
-)
-
 sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools/NLP/data")
 sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools/NLP")
 sys.path.append(str(Path(__file__).parent.parent.parent) + "/utils")
@ -55,25 +47,24 @@ def answer(
    """
    if not (model.startswith("openai-") or model.startswith("hf-")):
        model = "openai-chatgpt"  # Default
-
    if model.startswith("openai-"):
        if model == "openai-chatgpt":
            # ChatGPT
            results: tuple[list[str], list[str]] = internet.Google(
                query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
            ).google(filter_irrelevant=False)
-            print(results)
            chatbot = Chatbot(
                {"session_token": CHATGPT_SESSION_TOKEN},
                conversation_id=None,
                parent_id=None,
            )
+            print(results)
            response = chatbot.ask(
-                f"Utilize the following context: {results[0]} ontop of existing knowledge and answer the question: {query}",
+                f"Utilize the following context: {results[0][:2000]} ontop of existing knowledge and answer the question: {query}",
                conversation_id=None,
                parent_id=None,
            )
-            return (response.message, results[1])
+            return (response["message"], results[1])
        else:
            if model == "openai-text-davinci-003":
                results: tuple[list[str], list[str]] = internet.Google(
@ -102,5 +93,5 @@ def answer(


 # print(os.environ)
-# print(answer(query="What is the newest Pokemon Game?", model="hf-deepset/deberta-v3-base-squad2"))
+print(answer(query="When was Cristiano Ronaldo Born?", model="openai-chatgpt"))
 # def custom_answer
--- a/internet_ml/tools/NLP/data/internet.py
+++ b/internet_ml/tools/NLP/data/internet.py
@ -1,6 +1,5 @@
 from typing import Any, Dict, List, Tuple

-import logging
 import os
 import pickle
 import sys
@ -14,15 +13,6 @@ HTTP_USERAGENT: dict[str, str] = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
 }

-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-logging.basicConfig(
-    filename="internet.log",
-    filemode="w",
-    level=logging.INFO,
-    format="%(name)s - %(levelname)s - %(message)s",
-)
-
 sys.path.append(str(Path(__file__).parent.parent.parent.parent) + "/utils/NLP")
 sys.path.append(str(Path(__file__).parent.parent.parent.parent) + "/utils")
 sys.path.append(str(Path(__file__).parent.parent))
@ -68,7 +58,6 @@ class Google:
                str(self.__query),
            )
        )
-        self.__cache_file: str = "google_internet_cache.pkl"
        self.__content: list[str] = []

    def __get_urls(self: "Google") -> None:
@ -90,8 +79,6 @@ class Google:
            self.__urls.append(result["link"])
            if len(self.__urls) == self.__num_res:
                break
-        if config.CONF_DEBUG:
-            logging.info(f"Links: {self.__urls}")

    async def __fetch_url(self: "Google", session: Any, url: str) -> list[str]:
        try:
@ -101,14 +88,8 @@ class Google:
                text = soup.get_text()
                normalized_text = normalizer(text)
                sentences: list[str] = sentencizer(normalized_text)
-                if config.CONF_DEBUG:
-                    logging.info(f"Sentences: {sentences}")
                return sentences
        except aiohttp.ClientConnectorError:
-            if config.CONF_DEBUG:
-                logging.info(
-                    f"ClientConnector Error: Likely a connection issue with wifi"
-                )
            return [""]
        except Exception:
            return [""]
@ -147,25 +128,11 @@ class Google:
    def google(
        self: "Google", filter_irrelevant: bool = True
    ) -> tuple[list[str], list[str]]:
-        # Check the cache file first
-        try:
-            with open(self.__cache_file, "rb") as f:
-                cache = pickle.load(f)
-        except FileNotFoundError:
-            cache = {}
-        # Check if query are in the cache
-        if self.__query in cache:
-            results_cache: tuple[list[str], list[str]] = cache[self.__query]
-            return results_cache
-        # If none of the keywords are in the cache, get the results and update the cache
        self.__get_urls()
        self.__get_urls_contents()
        if filter_irrelevant:
            self.__filter_irrelevant_processing()
        results: tuple[list[str], list[str]] = (self.__content, self.__urls)
-        cache[self.__query] = results
-        with open(self.__cache_file, "wb") as f:
-            pickle.dump(cache, f)
        return results


--- a/internet_ml/tools/NLP/normalize.py
+++ b/internet_ml/tools/NLP/normalize.py
@ -1,13 +1,3 @@
-import logging
-
-# logging config
-logging.basicConfig(
-    filename="normalize.log",
-    filemode="w",
-    level=logging.INFO,
-    format="%(name)s - %(levelname)s - %(message)s",
-)
-
 import concurrent.futures
 import string
 import sys
@ -67,8 +57,6 @@ def normalizer(text: str) -> str:
        .replace("               ", " ")
    )
    text = remove_non_ascii(text)
-    if config.CONF_DEBUG:
-        logging.info(text)
    return text


@ -81,6 +69,4 @@ def normalize_sentences(sentences: list[str]) -> list[str]:
        ):
            if future.result():
                normalized_sentences.append(sentence)
-    if config.CONF_DEBUG:
-        logging.info(f"Normalized Sentences: {normalize_sentences}")
    return normalized_sentences
--- a/internet_ml/tools/NLP/sentencize.py
+++ b/internet_ml/tools/NLP/sentencize.py
@ -1,15 +1,5 @@
 from typing import Any, List

-import logging
-
-# logging config
-logging.basicConfig(
-    filename="sentencize.log",
-    filemode="w",
-    level=logging.INFO,
-    format="%(name)s - %(levelname)s - %(message)s",
-)
-
 import sys
 from pathlib import Path

@ -51,8 +41,6 @@ def sentencizer(text: str) -> list[str]:
        for future in concurrent.futures.as_completed(futures):
            english_sentences.append(future.result())

-    if config.CONF_DEBUG:
-        logging.info(f"sentences: {english_sentences}")
    return english_sentences