update: look at todo

main
Thamognya Kodi 2023-01-14 20:12:43 +07:00
parent 581ec23b2a
commit 5f0faa77a4
2 changed files with 18 additions and 50 deletions

View File

@ -60,24 +60,26 @@ def answer(
# ChatGPT # ChatGPT
results: tuple[list[str], list[str]] = internet.Google( results: tuple[list[str], list[str]] = internet.Google(
query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
).google(filter_irrelevant=False) ).google()
# print(' '.join(filter(lambda x: isinstance(x, str), results[0]))[:4000])
prompt = f"Using the context: {' '.join(filter(lambda x: isinstance(x, str), results[0]))[:3000]} and answer the question with the context above and previous knowledge: \"{query}\". Also write long answers or essays if asked."
print(prompt)
chatbot = Chatbot( chatbot = Chatbot(
{"session_token": CHATGPT_SESSION_TOKEN}, {"session_token": CHATGPT_SESSION_TOKEN},
conversation_id=CHATGPT_CONVERSATION_ID, conversation_id=None,
parent_id=CHATGPT_PARENT_ID, parent_id=None,
) )
prompt = f"Utilize the following context: {' '.join(filter(lambda x: isinstance(x, str), results[0]))[:4000]} and answer the question only with the given context: {query}"
response = chatbot.ask( response = chatbot.ask(
prompt=prompt, prompt=prompt,
conversation_id=CHATGPT_CONVERSATION_ID, conversation_id=None,
parent_id=CHATGPT_PARENT_ID, parent_id=None,
) )
return (response["message"], results[1]) return (response["message"], results[1])
else: else:
if model == "openai-text-davinci-003": if model == "openai-text-davinci-003":
results: tuple[list[str], list[str]] = internet.Google( results: tuple[list[str], list[str]] = internet.Google(
query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
).google(filter_irrelevant=False) ).google()
context = " ".join(results[0]) context = " ".join(results[0])
context[: (4097 - len(query) - 10)] context[: (4097 - len(query) - 10)]
response = openai.Completion.create( response = openai.Completion.create(
@ -94,17 +96,15 @@ def answer(
model = model.replace("hf-", "", 1) model = model.replace("hf-", "", 1)
results: tuple[list[str], list[str]] = internet.Google( results: tuple[list[str], list[str]] = internet.Google(
query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
).google(filter_irrelevant=False) ).google()
qa_model = pipeline("question-answering", model=model) qa_model = pipeline("question-answering", model=model)
response = qa_model(question=query, context=" ".join(results[0])) response = qa_model(question=query, context=" ".join(results[0]))
return (response["answer"], results[1]) return (response["answer"], results[1])
# print(os.environ)
print( print(
answer( answer(
query="What is Club is Crisitano Ronaldo in 2023?", query="Best original song in 80th Golden Globe award 2023?",
model="openai-text-davinci-003", model="openai-chatgpt",
) )
) )
# def custom_answer

View File

@ -1,7 +1,6 @@
from typing import Any, Dict, List, Tuple from typing import Any, List, Tuple
import os import os
import pickle
import sys import sys
from importlib import reload from importlib import reload
from pathlib import Path from pathlib import Path
@ -18,22 +17,18 @@ sys.path.append(str(Path(__file__).parent.parent.parent.parent) + "/utils")
sys.path.append(str(Path(__file__).parent.parent)) sys.path.append(str(Path(__file__).parent.parent))
import asyncio import asyncio
import concurrent.futures
import itertools import itertools
import re import re
import aiohttp import aiohttp
import config import config
from adremover import AdRemover
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from keywords import get_keywords
from normalize import normalizer from normalize import normalizer
from relevancy import filter_relevant
# from relevancy import filter_irrelevant
from sentencize import sentencizer from sentencize import sentencizer
from urlextract import URLExtract from urlextract import URLExtract
dotenv.load_dotenv()
class Google: class Google:
def __init__( def __init__(
@ -44,11 +39,7 @@ class Google:
) -> None: ) -> None:
self.__GOOGLE_SEARCH_API_KEY: str = GOOGLE_SEARCH_API_KEY self.__GOOGLE_SEARCH_API_KEY: str = GOOGLE_SEARCH_API_KEY
self.__GOOGLE_SEARCH_ENGINE_ID: str = GOOGLE_SEARCH_ENGINE_ID self.__GOOGLE_SEARCH_ENGINE_ID: str = GOOGLE_SEARCH_ENGINE_ID
self.__num_res: int = ( self.__num_res: int = 10
5
if config.NLP_CONF_MODE == "speed"
else (20 if config.NLP_CONF_MODE else 10)
)
self.__query = query self.__query = query
self.__URL_EXTRACTOR: URLExtract = URLExtract() self.__URL_EXTRACTOR: URLExtract = URLExtract()
self.__urls: list[str] = self.__URL_EXTRACTOR.find_urls(query) self.__urls: list[str] = self.__URL_EXTRACTOR.find_urls(query)
@ -59,15 +50,8 @@ class Google:
str(self.__query), str(self.__query),
) )
) )
self.__content: list[str] = []
ADBLOCK_RULES = [
"https://easylist-downloads.adblockplus.org/ruadlist+easylist.txt",
"https://filters.adtidy.org/extension/chromium/filters/1.txt",
]
self.__ad_remover = AdRemover(ADBLOCK_RULES)
def __get_urls(self: "Google") -> None: def __get_urls(self: "Google") -> None:
# Send the request to the Google Search API
if self.__GOOGLE_SEARCH_API_KEY == "": if self.__GOOGLE_SEARCH_API_KEY == "":
exit("ERROR: Google API Key not found") exit("ERROR: Google API Key not found")
if self.__GOOGLE_SEARCH_ENGINE_ID == "": if self.__GOOGLE_SEARCH_ENGINE_ID == "":
@ -90,7 +74,6 @@ class Google:
try: try:
async with session.get(url, headers=HTTP_USERAGENT) as response: async with session.get(url, headers=HTTP_USERAGENT) as response:
html = await response.text() html = await response.text()
html = self.__ad_remover.remove_ads(html)
soup = BeautifulSoup(html, "html.parser") soup = BeautifulSoup(html, "html.parser")
text = soup.get_text() text = soup.get_text()
normalized_text = normalizer(text) normalized_text = normalizer(text)
@ -118,26 +101,11 @@ class Google:
contents = loop.run_until_complete(self.__fetch_urls(self.__urls)) contents = loop.run_until_complete(self.__fetch_urls(self.__urls))
loop.close() loop.close()
self.__content = self.__flatten(contents) self.__content = self.__flatten(contents)
self.__content = [str(x) for x in self.__content]
def __filter_irrelevant_processing(self: "Google") -> None: def google(self: "Google") -> tuple[list[str], list[str]]:
with concurrent.futures.ThreadPoolExecutor(max_workers=500) as executor:
futures = [executor.submit(filter_relevant, self.__content, self.__query)]
concurrent.futures.wait(futures)
content: list[str] = []
for future in futures:
content.append(future.result())
self.__content = content
def google(
self: "Google", filter_irrelevant: bool = True
) -> tuple[list[str], list[str]]:
self.__get_urls() self.__get_urls()
self.__get_urls_contents() self.__get_urls_contents()
if filter_irrelevant: return (self.__content, self.__urls)
self.__filter_irrelevant_processing()
results: tuple[list[str], list[str]] = (self.__content, self.__urls)
return results
""" """