working

2023-01-19 14:47:35 +07:00 · 2023-01-19 14:47:35 +07:00 · 5939e7804e
parent 091e0b27e6
commit 5939e7804e
2 changed files with 6 additions and 7 deletions
--- a/internet_ml/NLP/no_context/QA.py
+++ b/internet_ml/NLP/no_context/QA.py
@ -47,7 +47,7 @@ def answer(
    results: tuple[list[str], list[str]] = internet.Google(
        query, GOOGLE_SEARCH_API_KEY, GOOGLE_SEARCH_ENGINE_ID
    ).google()
-    context: str = str(" ".join([str(string) for string in results[0]]))
+    context: str = str(" ".join([str(string) for string in results]))
    print(f"context: {context}")
    if model.startswith("openai-"):
@ -90,6 +90,6 @@ def answer(
 print(
    answer(
        query="What is the newest pokemon game?",
-        model="hf-deepset/xlm-roberta-large-squad2",
+        model="openai-chatgpt",
    )
 )
--- a/internet_ml/tools/NLP/data/internet.py
+++ b/internet_ml/tools/NLP/data/internet.py
@ -100,9 +100,7 @@ class Google:
                soup = BeautifulSoup(html, "html.parser")
                text = soup.get_text()
                normalized_text = normalizer(text)
-                sentences: list[str] = sentencizer(normalized_text)
+                return normalized_text
                sentence: str = str(" ".join(sentences))
                return sentence
        except Exception:
            error: str = ""
            return error
@ -118,8 +116,9 @@ class Google:
    def __get_urls_contents(self: "Google") -> None:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
-        self.__content = loop.run_until_complete(self.__fetch_urls(self.__urls))
+        contents = loop.run_until_complete(self.__fetch_urls(self.__urls))
        loop.close()
        self.__content = contents
    def __filter_irrelevant_processing(self: "Google") -> None:
        with concurrent.futures.ThreadPoolExecutor(max_workers=500) as executor:
@ -137,7 +136,7 @@ class Google:
        self.__get_urls_contents()
        if filter_irrelevant:
            self.__filter_irrelevant_processing()
-        results: tuple[list[str], list[str]] = (self.__content[0], self.__urls)  # type: ignore
+        results: tuple[list[str], list[str]] = (self.__content, self.__urls)  # type: ignore
        return results