diff --git a/internet_ml/NLP/no_context/QA.py b/internet_ml/NLP/no_context/QA.py index 34bdb1f..4c1eb76 100644 --- a/internet_ml/NLP/no_context/QA.py +++ b/internet_ml/NLP/no_context/QA.py @@ -1,7 +1,6 @@ # type: ignore from typing import Any, List, Tuple -import logging import os import sys from pathlib import Path @@ -10,13 +9,6 @@ import dotenv import openai from transformers import pipeline -logging.basicConfig( - filename="QA.log", - filemode="w", - level=logging.INFO, - format="%(name)s - %(levelname)s - %(message)s", -) - sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools/NLP/data") sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools/NLP") sys.path.append(str(Path(__file__).parent.parent.parent) + "/utils") @@ -55,25 +47,24 @@ def answer( """ if not (model.startswith("openai-") or model.startswith("hf-")): model = "openai-chatgpt" # Default - if model.startswith("openai-"): if model == "openai-chatgpt": # ChatGPT results: tuple[list[str], list[str]] = internet.Google( query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID ).google(filter_irrelevant=False) - print(results) chatbot = Chatbot( {"session_token": CHATGPT_SESSION_TOKEN}, conversation_id=None, parent_id=None, ) + print(results) response = chatbot.ask( - f"Utilize the following context: {results[0]} ontop of existing knowledge and answer the question: {query}", + f"Utilize the following context: {results[0][:2000]} ontop of existing knowledge and answer the question: {query}", conversation_id=None, parent_id=None, ) - return (response.message, results[1]) + return (response["message"], results[1]) else: if model == "openai-text-davinci-003": results: tuple[list[str], list[str]] = internet.Google( @@ -102,5 +93,5 @@ def answer( # print(os.environ) -# print(answer(query="What is the newest Pokemon Game?", model="hf-deepset/deberta-v3-base-squad2")) +print(answer(query="When was Cristiano Ronaldo Born?", model="openai-chatgpt")) # def custom_answer diff --git a/internet_ml/tools/NLP/data/internet.py b/internet_ml/tools/NLP/data/internet.py index 3771e0a..007b2a6 100644 --- a/internet_ml/tools/NLP/data/internet.py +++ b/internet_ml/tools/NLP/data/internet.py @@ -1,6 +1,5 @@ from typing import Any, Dict, List, Tuple -import logging import os import pickle import sys @@ -14,15 +13,6 @@ HTTP_USERAGENT: dict[str, str] = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" } -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) -logging.basicConfig( - filename="internet.log", - filemode="w", - level=logging.INFO, - format="%(name)s - %(levelname)s - %(message)s", -) - sys.path.append(str(Path(__file__).parent.parent.parent.parent) + "/utils/NLP") sys.path.append(str(Path(__file__).parent.parent.parent.parent) + "/utils") sys.path.append(str(Path(__file__).parent.parent)) @@ -68,7 +58,6 @@ class Google: str(self.__query), ) ) - self.__cache_file: str = "google_internet_cache.pkl" self.__content: list[str] = [] def __get_urls(self: "Google") -> None: @@ -90,8 +79,6 @@ class Google: self.__urls.append(result["link"]) if len(self.__urls) == self.__num_res: break - if config.CONF_DEBUG: - logging.info(f"Links: {self.__urls}") async def __fetch_url(self: "Google", session: Any, url: str) -> list[str]: try: @@ -101,14 +88,8 @@ class Google: text = soup.get_text() normalized_text = normalizer(text) sentences: list[str] = sentencizer(normalized_text) - if config.CONF_DEBUG: - logging.info(f"Sentences: {sentences}") return sentences except aiohttp.ClientConnectorError: - if config.CONF_DEBUG: - logging.info( - f"ClientConnector Error: Likely a connection issue with wifi" - ) return [""] except Exception: return [""] @@ -147,25 +128,11 @@ class Google: def google( self: "Google", filter_irrelevant: bool = True ) -> tuple[list[str], list[str]]: - # Check the cache file first - try: - with open(self.__cache_file, "rb") as f: - cache = pickle.load(f) - except FileNotFoundError: - cache = {} - # Check if query are in the cache - if self.__query in cache: - results_cache: tuple[list[str], list[str]] = cache[self.__query] - return results_cache - # If none of the keywords are in the cache, get the results and update the cache self.__get_urls() self.__get_urls_contents() if filter_irrelevant: self.__filter_irrelevant_processing() results: tuple[list[str], list[str]] = (self.__content, self.__urls) - cache[self.__query] = results - with open(self.__cache_file, "wb") as f: - pickle.dump(cache, f) return results diff --git a/internet_ml/tools/NLP/normalize.py b/internet_ml/tools/NLP/normalize.py index 3b6857d..f616dad 100644 --- a/internet_ml/tools/NLP/normalize.py +++ b/internet_ml/tools/NLP/normalize.py @@ -1,13 +1,3 @@ -import logging - -# logging config -logging.basicConfig( - filename="normalize.log", - filemode="w", - level=logging.INFO, - format="%(name)s - %(levelname)s - %(message)s", -) - import concurrent.futures import string import sys @@ -67,8 +57,6 @@ def normalizer(text: str) -> str: .replace(" ", " ") ) text = remove_non_ascii(text) - if config.CONF_DEBUG: - logging.info(text) return text @@ -81,6 +69,4 @@ def normalize_sentences(sentences: list[str]) -> list[str]: ): if future.result(): normalized_sentences.append(sentence) - if config.CONF_DEBUG: - logging.info(f"Normalized Sentences: {normalize_sentences}") return normalized_sentences diff --git a/internet_ml/tools/NLP/sentencize.py b/internet_ml/tools/NLP/sentencize.py index 812892a..4f48c64 100644 --- a/internet_ml/tools/NLP/sentencize.py +++ b/internet_ml/tools/NLP/sentencize.py @@ -1,15 +1,5 @@ from typing import Any, List -import logging - -# logging config -logging.basicConfig( - filename="sentencize.log", - filemode="w", - level=logging.INFO, - format="%(name)s - %(levelname)s - %(message)s", -) - import sys from pathlib import Path @@ -51,8 +41,6 @@ def sentencizer(text: str) -> list[str]: for future in concurrent.futures.as_completed(futures): english_sentences.append(future.result()) - if config.CONF_DEBUG: - logging.info(f"sentences: {english_sentences}") return english_sentences