update: look at todo

main
Thamognya Kodi 2023-01-12 11:51:03 +07:00
parent 66eb7eb9e0
commit d20ec8a0fb
4 changed files with 4 additions and 72 deletions

View File

@ -1,7 +1,6 @@
# type: ignore
from typing import Any, List, Tuple
import logging
import os
import sys
from pathlib import Path
@ -10,13 +9,6 @@ import dotenv
import openai
from transformers import pipeline
logging.basicConfig(
filename="QA.log",
filemode="w",
level=logging.INFO,
format="%(name)s - %(levelname)s - %(message)s",
)
sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools/NLP/data")
sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools/NLP")
sys.path.append(str(Path(__file__).parent.parent.parent) + "/utils")
@ -55,25 +47,24 @@ def answer(
"""
if not (model.startswith("openai-") or model.startswith("hf-")):
model = "openai-chatgpt" # Default
if model.startswith("openai-"):
if model == "openai-chatgpt":
# ChatGPT
results: tuple[list[str], list[str]] = internet.Google(
query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
).google(filter_irrelevant=False)
print(results)
chatbot = Chatbot(
{"session_token": CHATGPT_SESSION_TOKEN},
conversation_id=None,
parent_id=None,
)
print(results)
response = chatbot.ask(
f"Utilize the following context: {results[0]} ontop of existing knowledge and answer the question: {query}",
f"Utilize the following context: {results[0][:2000]} ontop of existing knowledge and answer the question: {query}",
conversation_id=None,
parent_id=None,
)
return (response.message, results[1])
return (response["message"], results[1])
else:
if model == "openai-text-davinci-003":
results: tuple[list[str], list[str]] = internet.Google(
@ -102,5 +93,5 @@ def answer(
# print(os.environ)
# print(answer(query="What is the newest Pokemon Game?", model="hf-deepset/deberta-v3-base-squad2"))
print(answer(query="When was Cristiano Ronaldo Born?", model="openai-chatgpt"))
# def custom_answer

View File

@ -1,6 +1,5 @@
from typing import Any, Dict, List, Tuple
import logging
import os
import pickle
import sys
@ -14,15 +13,6 @@ HTTP_USERAGENT: dict[str, str] = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
}
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logging.basicConfig(
filename="internet.log",
filemode="w",
level=logging.INFO,
format="%(name)s - %(levelname)s - %(message)s",
)
sys.path.append(str(Path(__file__).parent.parent.parent.parent) + "/utils/NLP")
sys.path.append(str(Path(__file__).parent.parent.parent.parent) + "/utils")
sys.path.append(str(Path(__file__).parent.parent))
@ -68,7 +58,6 @@ class Google:
str(self.__query),
)
)
self.__cache_file: str = "google_internet_cache.pkl"
self.__content: list[str] = []
def __get_urls(self: "Google") -> None:
@ -90,8 +79,6 @@ class Google:
self.__urls.append(result["link"])
if len(self.__urls) == self.__num_res:
break
if config.CONF_DEBUG:
logging.info(f"Links: {self.__urls}")
async def __fetch_url(self: "Google", session: Any, url: str) -> list[str]:
try:
@ -101,14 +88,8 @@ class Google:
text = soup.get_text()
normalized_text = normalizer(text)
sentences: list[str] = sentencizer(normalized_text)
if config.CONF_DEBUG:
logging.info(f"Sentences: {sentences}")
return sentences
except aiohttp.ClientConnectorError:
if config.CONF_DEBUG:
logging.info(
f"ClientConnector Error: Likely a connection issue with wifi"
)
return [""]
except Exception:
return [""]
@ -147,25 +128,11 @@ class Google:
def google(
self: "Google", filter_irrelevant: bool = True
) -> tuple[list[str], list[str]]:
# Check the cache file first
try:
with open(self.__cache_file, "rb") as f:
cache = pickle.load(f)
except FileNotFoundError:
cache = {}
# Check if query are in the cache
if self.__query in cache:
results_cache: tuple[list[str], list[str]] = cache[self.__query]
return results_cache
# If none of the keywords are in the cache, get the results and update the cache
self.__get_urls()
self.__get_urls_contents()
if filter_irrelevant:
self.__filter_irrelevant_processing()
results: tuple[list[str], list[str]] = (self.__content, self.__urls)
cache[self.__query] = results
with open(self.__cache_file, "wb") as f:
pickle.dump(cache, f)
return results

View File

@ -1,13 +1,3 @@
import logging
# logging config
logging.basicConfig(
filename="normalize.log",
filemode="w",
level=logging.INFO,
format="%(name)s - %(levelname)s - %(message)s",
)
import concurrent.futures
import string
import sys
@ -67,8 +57,6 @@ def normalizer(text: str) -> str:
.replace(" ", " ")
)
text = remove_non_ascii(text)
if config.CONF_DEBUG:
logging.info(text)
return text
@ -81,6 +69,4 @@ def normalize_sentences(sentences: list[str]) -> list[str]:
):
if future.result():
normalized_sentences.append(sentence)
if config.CONF_DEBUG:
logging.info(f"Normalized Sentences: {normalize_sentences}")
return normalized_sentences

View File

@ -1,15 +1,5 @@
from typing import Any, List
import logging
# logging config
logging.basicConfig(
filename="sentencize.log",
filemode="w",
level=logging.INFO,
format="%(name)s - %(levelname)s - %(message)s",
)
import sys
from pathlib import Path
@ -51,8 +41,6 @@ def sentencizer(text: str) -> list[str]:
for future in concurrent.futures.as_completed(futures):
english_sentences.append(future.result())
if config.CONF_DEBUG:
logging.info(f"sentences: {english_sentences}")
return english_sentences