update: look at todo
parent
581ec23b2a
commit
5f0faa77a4
|
@ -60,24 +60,26 @@ def answer(
|
||||||
# ChatGPT
|
# ChatGPT
|
||||||
results: tuple[list[str], list[str]] = internet.Google(
|
results: tuple[list[str], list[str]] = internet.Google(
|
||||||
query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
|
query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
|
||||||
).google(filter_irrelevant=False)
|
).google()
|
||||||
|
# print(' '.join(filter(lambda x: isinstance(x, str), results[0]))[:4000])
|
||||||
|
prompt = f"Using the context: {' '.join(filter(lambda x: isinstance(x, str), results[0]))[:3000]} and answer the question with the context above and previous knowledge: \"{query}\". Also write long answers or essays if asked."
|
||||||
|
print(prompt)
|
||||||
chatbot = Chatbot(
|
chatbot = Chatbot(
|
||||||
{"session_token": CHATGPT_SESSION_TOKEN},
|
{"session_token": CHATGPT_SESSION_TOKEN},
|
||||||
conversation_id=CHATGPT_CONVERSATION_ID,
|
conversation_id=None,
|
||||||
parent_id=CHATGPT_PARENT_ID,
|
parent_id=None,
|
||||||
)
|
)
|
||||||
prompt = f"Utilize the following context: {' '.join(filter(lambda x: isinstance(x, str), results[0]))[:4000]} and answer the question only with the given context: {query}"
|
|
||||||
response = chatbot.ask(
|
response = chatbot.ask(
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
conversation_id=CHATGPT_CONVERSATION_ID,
|
conversation_id=None,
|
||||||
parent_id=CHATGPT_PARENT_ID,
|
parent_id=None,
|
||||||
)
|
)
|
||||||
return (response["message"], results[1])
|
return (response["message"], results[1])
|
||||||
else:
|
else:
|
||||||
if model == "openai-text-davinci-003":
|
if model == "openai-text-davinci-003":
|
||||||
results: tuple[list[str], list[str]] = internet.Google(
|
results: tuple[list[str], list[str]] = internet.Google(
|
||||||
query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
|
query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
|
||||||
).google(filter_irrelevant=False)
|
).google()
|
||||||
context = " ".join(results[0])
|
context = " ".join(results[0])
|
||||||
context[: (4097 - len(query) - 10)]
|
context[: (4097 - len(query) - 10)]
|
||||||
response = openai.Completion.create(
|
response = openai.Completion.create(
|
||||||
|
@ -94,17 +96,15 @@ def answer(
|
||||||
model = model.replace("hf-", "", 1)
|
model = model.replace("hf-", "", 1)
|
||||||
results: tuple[list[str], list[str]] = internet.Google(
|
results: tuple[list[str], list[str]] = internet.Google(
|
||||||
query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
|
query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
|
||||||
).google(filter_irrelevant=False)
|
).google()
|
||||||
qa_model = pipeline("question-answering", model=model)
|
qa_model = pipeline("question-answering", model=model)
|
||||||
response = qa_model(question=query, context=" ".join(results[0]))
|
response = qa_model(question=query, context=" ".join(results[0]))
|
||||||
return (response["answer"], results[1])
|
return (response["answer"], results[1])
|
||||||
|
|
||||||
|
|
||||||
# print(os.environ)
|
|
||||||
print(
|
print(
|
||||||
answer(
|
answer(
|
||||||
query="What is Club is Crisitano Ronaldo in 2023?",
|
query="Best original song in 80th Golden Globe award 2023?",
|
||||||
model="openai-text-davinci-003",
|
model="openai-chatgpt",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
# def custom_answer
|
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
from typing import Any, Dict, List, Tuple
|
from typing import Any, List, Tuple
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import pickle
|
|
||||||
import sys
|
import sys
|
||||||
from importlib import reload
|
from importlib import reload
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
@ -18,22 +17,18 @@ sys.path.append(str(Path(__file__).parent.parent.parent.parent) + "/utils")
|
||||||
sys.path.append(str(Path(__file__).parent.parent))
|
sys.path.append(str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import concurrent.futures
|
|
||||||
import itertools
|
import itertools
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import config
|
import config
|
||||||
from adremover import AdRemover
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from keywords import get_keywords
|
|
||||||
from normalize import normalizer
|
from normalize import normalizer
|
||||||
from relevancy import filter_relevant
|
|
||||||
|
# from relevancy import filter_irrelevant
|
||||||
from sentencize import sentencizer
|
from sentencize import sentencizer
|
||||||
from urlextract import URLExtract
|
from urlextract import URLExtract
|
||||||
|
|
||||||
dotenv.load_dotenv()
|
|
||||||
|
|
||||||
|
|
||||||
class Google:
|
class Google:
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -44,11 +39,7 @@ class Google:
|
||||||
) -> None:
|
) -> None:
|
||||||
self.__GOOGLE_SEARCH_API_KEY: str = GOOGLE_SEARCH_API_KEY
|
self.__GOOGLE_SEARCH_API_KEY: str = GOOGLE_SEARCH_API_KEY
|
||||||
self.__GOOGLE_SEARCH_ENGINE_ID: str = GOOGLE_SEARCH_ENGINE_ID
|
self.__GOOGLE_SEARCH_ENGINE_ID: str = GOOGLE_SEARCH_ENGINE_ID
|
||||||
self.__num_res: int = (
|
self.__num_res: int = 10
|
||||||
5
|
|
||||||
if config.NLP_CONF_MODE == "speed"
|
|
||||||
else (20 if config.NLP_CONF_MODE else 10)
|
|
||||||
)
|
|
||||||
self.__query = query
|
self.__query = query
|
||||||
self.__URL_EXTRACTOR: URLExtract = URLExtract()
|
self.__URL_EXTRACTOR: URLExtract = URLExtract()
|
||||||
self.__urls: list[str] = self.__URL_EXTRACTOR.find_urls(query)
|
self.__urls: list[str] = self.__URL_EXTRACTOR.find_urls(query)
|
||||||
|
@ -59,15 +50,8 @@ class Google:
|
||||||
str(self.__query),
|
str(self.__query),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
self.__content: list[str] = []
|
|
||||||
ADBLOCK_RULES = [
|
|
||||||
"https://easylist-downloads.adblockplus.org/ruadlist+easylist.txt",
|
|
||||||
"https://filters.adtidy.org/extension/chromium/filters/1.txt",
|
|
||||||
]
|
|
||||||
self.__ad_remover = AdRemover(ADBLOCK_RULES)
|
|
||||||
|
|
||||||
def __get_urls(self: "Google") -> None:
|
def __get_urls(self: "Google") -> None:
|
||||||
# Send the request to the Google Search API
|
|
||||||
if self.__GOOGLE_SEARCH_API_KEY == "":
|
if self.__GOOGLE_SEARCH_API_KEY == "":
|
||||||
exit("ERROR: Google API Key not found")
|
exit("ERROR: Google API Key not found")
|
||||||
if self.__GOOGLE_SEARCH_ENGINE_ID == "":
|
if self.__GOOGLE_SEARCH_ENGINE_ID == "":
|
||||||
|
@ -90,7 +74,6 @@ class Google:
|
||||||
try:
|
try:
|
||||||
async with session.get(url, headers=HTTP_USERAGENT) as response:
|
async with session.get(url, headers=HTTP_USERAGENT) as response:
|
||||||
html = await response.text()
|
html = await response.text()
|
||||||
html = self.__ad_remover.remove_ads(html)
|
|
||||||
soup = BeautifulSoup(html, "html.parser")
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
text = soup.get_text()
|
text = soup.get_text()
|
||||||
normalized_text = normalizer(text)
|
normalized_text = normalizer(text)
|
||||||
|
@ -118,26 +101,11 @@ class Google:
|
||||||
contents = loop.run_until_complete(self.__fetch_urls(self.__urls))
|
contents = loop.run_until_complete(self.__fetch_urls(self.__urls))
|
||||||
loop.close()
|
loop.close()
|
||||||
self.__content = self.__flatten(contents)
|
self.__content = self.__flatten(contents)
|
||||||
self.__content = [str(x) for x in self.__content]
|
|
||||||
|
|
||||||
def __filter_irrelevant_processing(self: "Google") -> None:
|
def google(self: "Google") -> tuple[list[str], list[str]]:
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=500) as executor:
|
|
||||||
futures = [executor.submit(filter_relevant, self.__content, self.__query)]
|
|
||||||
concurrent.futures.wait(futures)
|
|
||||||
content: list[str] = []
|
|
||||||
for future in futures:
|
|
||||||
content.append(future.result())
|
|
||||||
self.__content = content
|
|
||||||
|
|
||||||
def google(
|
|
||||||
self: "Google", filter_irrelevant: bool = True
|
|
||||||
) -> tuple[list[str], list[str]]:
|
|
||||||
self.__get_urls()
|
self.__get_urls()
|
||||||
self.__get_urls_contents()
|
self.__get_urls_contents()
|
||||||
if filter_irrelevant:
|
return (self.__content, self.__urls)
|
||||||
self.__filter_irrelevant_processing()
|
|
||||||
results: tuple[list[str], list[str]] = (self.__content, self.__urls)
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Reference in New Issue