From a7b10f646a205d8ad100cd87d83c842e6d5ff8fc Mon Sep 17 00:00:00 2001 From: Thamognya Kodi Date: Sat, 21 Jan 2023 12:20:10 +0700 Subject: [PATCH] good qa model --- internet_ml/NLP/no_context/QA.py | 10 ++-------- internet_ml/NLP/no_context/test.py | 8 ++++++++ internet_ml/tools/NLP/data/internet.py | 9 +++------ 3 files changed, 13 insertions(+), 14 deletions(-) create mode 100644 internet_ml/NLP/no_context/test.py diff --git a/internet_ml/NLP/no_context/QA.py b/internet_ml/NLP/no_context/QA.py index 2a1aef4..2821574 100644 --- a/internet_ml/NLP/no_context/QA.py +++ b/internet_ml/NLP/no_context/QA.py @@ -44,9 +44,11 @@ def answer( if not (model.startswith("openai-") or model.startswith("hf-")): model = "openai-chatgpt" # Default + print("Scraping the Internet") results: tuple[list[str], list[str]] = internet.Google( query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID ).google() + print("Done scraping the Internet") context: str = str(" ".join([str(string) for string in results])) print(f"context: {context}") @@ -85,11 +87,3 @@ def answer( qa_model = pipeline("question-answering", model=model) response = qa_model(question=query, context=context) return (response["answer"], results[1]) - - -print( - answer( - query="What is the newest pokemon game?", - model="openai-chatgpt", - ) -) diff --git a/internet_ml/NLP/no_context/test.py b/internet_ml/NLP/no_context/test.py new file mode 100644 index 0000000..61bf0c4 --- /dev/null +++ b/internet_ml/NLP/no_context/test.py @@ -0,0 +1,8 @@ +from QA import answer + +print( + answer( + query="When was the last cricket worldcup held?", + model="hf-deepset/roberta-large-squad2", + ) +) diff --git a/internet_ml/tools/NLP/data/internet.py b/internet_ml/tools/NLP/data/internet.py index 75b1ffd..cc95547 100644 --- a/internet_ml/tools/NLP/data/internet.py +++ b/internet_ml/tools/NLP/data/internet.py @@ -1,3 +1,4 @@ +# type: ignore from typing import Any, List, Tuple import asyncio @@ -47,11 +48,7 @@ class Google: self.__GOOGLE_SEARCH_ENGINE_ID = str( os.environ.get("GOOGLE_SEARCH_ENGINE_ID") ) - self.__num_res: int = ( - 5 - if config.NLP_CONF_MODE == "speed" - else (20 if config.NLP_CONF_MODE else 10) - ) + self.__num_res: int = 10 self.__query = query self.__URL_EXTRACTOR: URLExtract = URLExtract() self.__urls: list[str] = self.__URL_EXTRACTOR.find_urls(query) @@ -136,7 +133,7 @@ class Google: self.__get_urls_contents() if filter_irrelevant: self.__filter_irrelevant_processing() - results: tuple[list[str], list[str]] = (self.__content, self.__urls) # type: ignore + results: tuple[list[str], list[str]] = (self.__content, self.__urls) return results