update: look at todo

main
Thamognya Kodi 2023-01-12 11:51:03 +07:00
parent 66eb7eb9e0
commit d20ec8a0fb
4 changed files with 4 additions and 72 deletions

View File

@ -1,7 +1,6 @@
# type: ignore # type: ignore
from typing import Any, List, Tuple from typing import Any, List, Tuple
import logging
import os import os
import sys import sys
from pathlib import Path from pathlib import Path
@ -10,13 +9,6 @@ import dotenv
import openai import openai
from transformers import pipeline from transformers import pipeline
logging.basicConfig(
filename="QA.log",
filemode="w",
level=logging.INFO,
format="%(name)s - %(levelname)s - %(message)s",
)
sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools/NLP/data") sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools/NLP/data")
sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools/NLP") sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools/NLP")
sys.path.append(str(Path(__file__).parent.parent.parent) + "/utils") sys.path.append(str(Path(__file__).parent.parent.parent) + "/utils")
@ -55,25 +47,24 @@ def answer(
""" """
if not (model.startswith("openai-") or model.startswith("hf-")): if not (model.startswith("openai-") or model.startswith("hf-")):
model = "openai-chatgpt" # Default model = "openai-chatgpt" # Default
if model.startswith("openai-"): if model.startswith("openai-"):
if model == "openai-chatgpt": if model == "openai-chatgpt":
# ChatGPT # ChatGPT
results: tuple[list[str], list[str]] = internet.Google( results: tuple[list[str], list[str]] = internet.Google(
query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
).google(filter_irrelevant=False) ).google(filter_irrelevant=False)
print(results)
chatbot = Chatbot( chatbot = Chatbot(
{"session_token": CHATGPT_SESSION_TOKEN}, {"session_token": CHATGPT_SESSION_TOKEN},
conversation_id=None, conversation_id=None,
parent_id=None, parent_id=None,
) )
print(results)
response = chatbot.ask( response = chatbot.ask(
f"Utilize the following context: {results[0]} ontop of existing knowledge and answer the question: {query}", f"Utilize the following context: {results[0][:2000]} ontop of existing knowledge and answer the question: {query}",
conversation_id=None, conversation_id=None,
parent_id=None, parent_id=None,
) )
return (response.message, results[1]) return (response["message"], results[1])
else: else:
if model == "openai-text-davinci-003": if model == "openai-text-davinci-003":
results: tuple[list[str], list[str]] = internet.Google( results: tuple[list[str], list[str]] = internet.Google(
@ -102,5 +93,5 @@ def answer(
# print(os.environ) # print(os.environ)
# print(answer(query="What is the newest Pokemon Game?", model="hf-deepset/deberta-v3-base-squad2")) print(answer(query="When was Cristiano Ronaldo Born?", model="openai-chatgpt"))
# def custom_answer # def custom_answer

View File

@ -1,6 +1,5 @@
from typing import Any, Dict, List, Tuple from typing import Any, Dict, List, Tuple
import logging
import os import os
import pickle import pickle
import sys import sys
@ -14,15 +13,6 @@ HTTP_USERAGENT: dict[str, str] = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
} }
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logging.basicConfig(
filename="internet.log",
filemode="w",
level=logging.INFO,
format="%(name)s - %(levelname)s - %(message)s",
)
sys.path.append(str(Path(__file__).parent.parent.parent.parent) + "/utils/NLP") sys.path.append(str(Path(__file__).parent.parent.parent.parent) + "/utils/NLP")
sys.path.append(str(Path(__file__).parent.parent.parent.parent) + "/utils") sys.path.append(str(Path(__file__).parent.parent.parent.parent) + "/utils")
sys.path.append(str(Path(__file__).parent.parent)) sys.path.append(str(Path(__file__).parent.parent))
@ -68,7 +58,6 @@ class Google:
str(self.__query), str(self.__query),
) )
) )
self.__cache_file: str = "google_internet_cache.pkl"
self.__content: list[str] = [] self.__content: list[str] = []
def __get_urls(self: "Google") -> None: def __get_urls(self: "Google") -> None:
@ -90,8 +79,6 @@ class Google:
self.__urls.append(result["link"]) self.__urls.append(result["link"])
if len(self.__urls) == self.__num_res: if len(self.__urls) == self.__num_res:
break break
if config.CONF_DEBUG:
logging.info(f"Links: {self.__urls}")
async def __fetch_url(self: "Google", session: Any, url: str) -> list[str]: async def __fetch_url(self: "Google", session: Any, url: str) -> list[str]:
try: try:
@ -101,14 +88,8 @@ class Google:
text = soup.get_text() text = soup.get_text()
normalized_text = normalizer(text) normalized_text = normalizer(text)
sentences: list[str] = sentencizer(normalized_text) sentences: list[str] = sentencizer(normalized_text)
if config.CONF_DEBUG:
logging.info(f"Sentences: {sentences}")
return sentences return sentences
except aiohttp.ClientConnectorError: except aiohttp.ClientConnectorError:
if config.CONF_DEBUG:
logging.info(
f"ClientConnector Error: Likely a connection issue with wifi"
)
return [""] return [""]
except Exception: except Exception:
return [""] return [""]
@ -147,25 +128,11 @@ class Google:
def google( def google(
self: "Google", filter_irrelevant: bool = True self: "Google", filter_irrelevant: bool = True
) -> tuple[list[str], list[str]]: ) -> tuple[list[str], list[str]]:
# Check the cache file first
try:
with open(self.__cache_file, "rb") as f:
cache = pickle.load(f)
except FileNotFoundError:
cache = {}
# Check if query are in the cache
if self.__query in cache:
results_cache: tuple[list[str], list[str]] = cache[self.__query]
return results_cache
# If none of the keywords are in the cache, get the results and update the cache
self.__get_urls() self.__get_urls()
self.__get_urls_contents() self.__get_urls_contents()
if filter_irrelevant: if filter_irrelevant:
self.__filter_irrelevant_processing() self.__filter_irrelevant_processing()
results: tuple[list[str], list[str]] = (self.__content, self.__urls) results: tuple[list[str], list[str]] = (self.__content, self.__urls)
cache[self.__query] = results
with open(self.__cache_file, "wb") as f:
pickle.dump(cache, f)
return results return results

View File

@ -1,13 +1,3 @@
import logging
# logging config
logging.basicConfig(
filename="normalize.log",
filemode="w",
level=logging.INFO,
format="%(name)s - %(levelname)s - %(message)s",
)
import concurrent.futures import concurrent.futures
import string import string
import sys import sys
@ -67,8 +57,6 @@ def normalizer(text: str) -> str:
.replace(" ", " ") .replace(" ", " ")
) )
text = remove_non_ascii(text) text = remove_non_ascii(text)
if config.CONF_DEBUG:
logging.info(text)
return text return text
@ -81,6 +69,4 @@ def normalize_sentences(sentences: list[str]) -> list[str]:
): ):
if future.result(): if future.result():
normalized_sentences.append(sentence) normalized_sentences.append(sentence)
if config.CONF_DEBUG:
logging.info(f"Normalized Sentences: {normalize_sentences}")
return normalized_sentences return normalized_sentences

View File

@ -1,15 +1,5 @@
from typing import Any, List from typing import Any, List
import logging
# logging config
logging.basicConfig(
filename="sentencize.log",
filemode="w",
level=logging.INFO,
format="%(name)s - %(levelname)s - %(message)s",
)
import sys import sys
from pathlib import Path from pathlib import Path
@ -51,8 +41,6 @@ def sentencizer(text: str) -> list[str]:
for future in concurrent.futures.as_completed(futures): for future in concurrent.futures.as_completed(futures):
english_sentences.append(future.result()) english_sentences.append(future.result())
if config.CONF_DEBUG:
logging.info(f"sentences: {english_sentences}")
return english_sentences return english_sentences