update: look at todo

main
Thamognya Kodi 2023-01-14 20:12:43 +07:00
parent 581ec23b2a
commit 5f0faa77a4
2 changed files with 18 additions and 50 deletions

View File

@ -60,24 +60,26 @@ def answer(
# ChatGPT
results: tuple[list[str], list[str]] = internet.Google(
query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
).google(filter_irrelevant=False)
).google()
# print(' '.join(filter(lambda x: isinstance(x, str), results[0]))[:4000])
prompt = f"Using the context: {' '.join(filter(lambda x: isinstance(x, str), results[0]))[:3000]} and answer the question with the context above and previous knowledge: \"{query}\". Also write long answers or essays if asked."
print(prompt)
chatbot = Chatbot(
{"session_token": CHATGPT_SESSION_TOKEN},
conversation_id=CHATGPT_CONVERSATION_ID,
parent_id=CHATGPT_PARENT_ID,
conversation_id=None,
parent_id=None,
)
prompt = f"Utilize the following context: {' '.join(filter(lambda x: isinstance(x, str), results[0]))[:4000]} and answer the question only with the given context: {query}"
response = chatbot.ask(
prompt=prompt,
conversation_id=CHATGPT_CONVERSATION_ID,
parent_id=CHATGPT_PARENT_ID,
conversation_id=None,
parent_id=None,
)
return (response["message"], results[1])
else:
if model == "openai-text-davinci-003":
results: tuple[list[str], list[str]] = internet.Google(
query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
).google(filter_irrelevant=False)
).google()
context = " ".join(results[0])
context[: (4097 - len(query) - 10)]
response = openai.Completion.create(
@ -94,17 +96,15 @@ def answer(
model = model.replace("hf-", "", 1)
results: tuple[list[str], list[str]] = internet.Google(
query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
).google(filter_irrelevant=False)
).google()
qa_model = pipeline("question-answering", model=model)
response = qa_model(question=query, context=" ".join(results[0]))
return (response["answer"], results[1])
# print(os.environ)
print(
answer(
query="What is Club is Crisitano Ronaldo in 2023?",
model="openai-text-davinci-003",
query="Best original song in 80th Golden Globe award 2023?",
model="openai-chatgpt",
)
)
# def custom_answer

View File

@ -1,7 +1,6 @@
from typing import Any, Dict, List, Tuple
from typing import Any, List, Tuple
import os
import pickle
import sys
from importlib import reload
from pathlib import Path
@ -18,22 +17,18 @@ sys.path.append(str(Path(__file__).parent.parent.parent.parent) + "/utils")
sys.path.append(str(Path(__file__).parent.parent))
import asyncio
import concurrent.futures
import itertools
import re
import aiohttp
import config
from adremover import AdRemover
from bs4 import BeautifulSoup
from keywords import get_keywords
from normalize import normalizer
from relevancy import filter_relevant
# from relevancy import filter_irrelevant
from sentencize import sentencizer
from urlextract import URLExtract
dotenv.load_dotenv()
class Google:
def __init__(
@ -44,11 +39,7 @@ class Google:
) -> None:
self.__GOOGLE_SEARCH_API_KEY: str = GOOGLE_SEARCH_API_KEY
self.__GOOGLE_SEARCH_ENGINE_ID: str = GOOGLE_SEARCH_ENGINE_ID
self.__num_res: int = (
5
if config.NLP_CONF_MODE == "speed"
else (20 if config.NLP_CONF_MODE else 10)
)
self.__num_res: int = 10
self.__query = query
self.__URL_EXTRACTOR: URLExtract = URLExtract()
self.__urls: list[str] = self.__URL_EXTRACTOR.find_urls(query)
@ -59,15 +50,8 @@ class Google:
str(self.__query),
)
)
self.__content: list[str] = []
ADBLOCK_RULES = [
"https://easylist-downloads.adblockplus.org/ruadlist+easylist.txt",
"https://filters.adtidy.org/extension/chromium/filters/1.txt",
]
self.__ad_remover = AdRemover(ADBLOCK_RULES)
def __get_urls(self: "Google") -> None:
# Send the request to the Google Search API
if self.__GOOGLE_SEARCH_API_KEY == "":
exit("ERROR: Google API Key not found")
if self.__GOOGLE_SEARCH_ENGINE_ID == "":
@ -90,7 +74,6 @@ class Google:
try:
async with session.get(url, headers=HTTP_USERAGENT) as response:
html = await response.text()
html = self.__ad_remover.remove_ads(html)
soup = BeautifulSoup(html, "html.parser")
text = soup.get_text()
normalized_text = normalizer(text)
@ -118,26 +101,11 @@ class Google:
contents = loop.run_until_complete(self.__fetch_urls(self.__urls))
loop.close()
self.__content = self.__flatten(contents)
self.__content = [str(x) for x in self.__content]
def __filter_irrelevant_processing(self: "Google") -> None:
with concurrent.futures.ThreadPoolExecutor(max_workers=500) as executor:
futures = [executor.submit(filter_relevant, self.__content, self.__query)]
concurrent.futures.wait(futures)
content: list[str] = []
for future in futures:
content.append(future.result())
self.__content = content
def google(
self: "Google", filter_irrelevant: bool = True
) -> tuple[list[str], list[str]]:
def google(self: "Google") -> tuple[list[str], list[str]]:
self.__get_urls()
self.__get_urls_contents()
if filter_irrelevant:
self.__filter_irrelevant_processing()
results: tuple[list[str], list[str]] = (self.__content, self.__urls)
return results
return (self.__content, self.__urls)
"""