update: look at todo
parent
66eb7eb9e0
commit
d20ec8a0fb
|
@ -1,7 +1,6 @@
|
|||
# type: ignore
|
||||
from typing import Any, List, Tuple
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
@ -10,13 +9,6 @@ import dotenv
|
|||
import openai
|
||||
from transformers import pipeline
|
||||
|
||||
logging.basicConfig(
|
||||
filename="QA.log",
|
||||
filemode="w",
|
||||
level=logging.INFO,
|
||||
format="%(name)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
|
||||
sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools/NLP/data")
|
||||
sys.path.append(str(Path(__file__).parent.parent.parent) + "/tools/NLP")
|
||||
sys.path.append(str(Path(__file__).parent.parent.parent) + "/utils")
|
||||
|
@ -55,25 +47,24 @@ def answer(
|
|||
"""
|
||||
if not (model.startswith("openai-") or model.startswith("hf-")):
|
||||
model = "openai-chatgpt" # Default
|
||||
|
||||
if model.startswith("openai-"):
|
||||
if model == "openai-chatgpt":
|
||||
# ChatGPT
|
||||
results: tuple[list[str], list[str]] = internet.Google(
|
||||
query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
|
||||
).google(filter_irrelevant=False)
|
||||
print(results)
|
||||
chatbot = Chatbot(
|
||||
{"session_token": CHATGPT_SESSION_TOKEN},
|
||||
conversation_id=None,
|
||||
parent_id=None,
|
||||
)
|
||||
print(results)
|
||||
response = chatbot.ask(
|
||||
f"Utilize the following context: {results[0]} ontop of existing knowledge and answer the question: {query}",
|
||||
f"Utilize the following context: {results[0][:2000]} ontop of existing knowledge and answer the question: {query}",
|
||||
conversation_id=None,
|
||||
parent_id=None,
|
||||
)
|
||||
return (response.message, results[1])
|
||||
return (response["message"], results[1])
|
||||
else:
|
||||
if model == "openai-text-davinci-003":
|
||||
results: tuple[list[str], list[str]] = internet.Google(
|
||||
|
@ -102,5 +93,5 @@ def answer(
|
|||
|
||||
|
||||
# print(os.environ)
|
||||
# print(answer(query="What is the newest Pokemon Game?", model="hf-deepset/deberta-v3-base-squad2"))
|
||||
print(answer(query="When was Cristiano Ronaldo Born?", model="openai-chatgpt"))
|
||||
# def custom_answer
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
import sys
|
||||
|
@ -14,15 +13,6 @@ HTTP_USERAGENT: dict[str, str] = {
|
|||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
|
||||
}
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
logging.basicConfig(
|
||||
filename="internet.log",
|
||||
filemode="w",
|
||||
level=logging.INFO,
|
||||
format="%(name)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
|
||||
sys.path.append(str(Path(__file__).parent.parent.parent.parent) + "/utils/NLP")
|
||||
sys.path.append(str(Path(__file__).parent.parent.parent.parent) + "/utils")
|
||||
sys.path.append(str(Path(__file__).parent.parent))
|
||||
|
@ -68,7 +58,6 @@ class Google:
|
|||
str(self.__query),
|
||||
)
|
||||
)
|
||||
self.__cache_file: str = "google_internet_cache.pkl"
|
||||
self.__content: list[str] = []
|
||||
|
||||
def __get_urls(self: "Google") -> None:
|
||||
|
@ -90,8 +79,6 @@ class Google:
|
|||
self.__urls.append(result["link"])
|
||||
if len(self.__urls) == self.__num_res:
|
||||
break
|
||||
if config.CONF_DEBUG:
|
||||
logging.info(f"Links: {self.__urls}")
|
||||
|
||||
async def __fetch_url(self: "Google", session: Any, url: str) -> list[str]:
|
||||
try:
|
||||
|
@ -101,14 +88,8 @@ class Google:
|
|||
text = soup.get_text()
|
||||
normalized_text = normalizer(text)
|
||||
sentences: list[str] = sentencizer(normalized_text)
|
||||
if config.CONF_DEBUG:
|
||||
logging.info(f"Sentences: {sentences}")
|
||||
return sentences
|
||||
except aiohttp.ClientConnectorError:
|
||||
if config.CONF_DEBUG:
|
||||
logging.info(
|
||||
f"ClientConnector Error: Likely a connection issue with wifi"
|
||||
)
|
||||
return [""]
|
||||
except Exception:
|
||||
return [""]
|
||||
|
@ -147,25 +128,11 @@ class Google:
|
|||
def google(
|
||||
self: "Google", filter_irrelevant: bool = True
|
||||
) -> tuple[list[str], list[str]]:
|
||||
# Check the cache file first
|
||||
try:
|
||||
with open(self.__cache_file, "rb") as f:
|
||||
cache = pickle.load(f)
|
||||
except FileNotFoundError:
|
||||
cache = {}
|
||||
# Check if query are in the cache
|
||||
if self.__query in cache:
|
||||
results_cache: tuple[list[str], list[str]] = cache[self.__query]
|
||||
return results_cache
|
||||
# If none of the keywords are in the cache, get the results and update the cache
|
||||
self.__get_urls()
|
||||
self.__get_urls_contents()
|
||||
if filter_irrelevant:
|
||||
self.__filter_irrelevant_processing()
|
||||
results: tuple[list[str], list[str]] = (self.__content, self.__urls)
|
||||
cache[self.__query] = results
|
||||
with open(self.__cache_file, "wb") as f:
|
||||
pickle.dump(cache, f)
|
||||
return results
|
||||
|
||||
|
||||
|
|
|
@ -1,13 +1,3 @@
|
|||
import logging
|
||||
|
||||
# logging config
|
||||
logging.basicConfig(
|
||||
filename="normalize.log",
|
||||
filemode="w",
|
||||
level=logging.INFO,
|
||||
format="%(name)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
|
||||
import concurrent.futures
|
||||
import string
|
||||
import sys
|
||||
|
@ -67,8 +57,6 @@ def normalizer(text: str) -> str:
|
|||
.replace(" ", " ")
|
||||
)
|
||||
text = remove_non_ascii(text)
|
||||
if config.CONF_DEBUG:
|
||||
logging.info(text)
|
||||
return text
|
||||
|
||||
|
||||
|
@ -81,6 +69,4 @@ def normalize_sentences(sentences: list[str]) -> list[str]:
|
|||
):
|
||||
if future.result():
|
||||
normalized_sentences.append(sentence)
|
||||
if config.CONF_DEBUG:
|
||||
logging.info(f"Normalized Sentences: {normalize_sentences}")
|
||||
return normalized_sentences
|
||||
|
|
|
@ -1,15 +1,5 @@
|
|||
from typing import Any, List
|
||||
|
||||
import logging
|
||||
|
||||
# logging config
|
||||
logging.basicConfig(
|
||||
filename="sentencize.log",
|
||||
filemode="w",
|
||||
level=logging.INFO,
|
||||
format="%(name)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
@ -51,8 +41,6 @@ def sentencizer(text: str) -> list[str]:
|
|||
for future in concurrent.futures.as_completed(futures):
|
||||
english_sentences.append(future.result())
|
||||
|
||||
if config.CONF_DEBUG:
|
||||
logging.info(f"sentences: {english_sentences}")
|
||||
return english_sentences
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue