internet_ml/internet_ml/tools/NLP/relevancy.py

28 lines
1.1 KiB
Python
Raw Normal View History

2023-01-10 12:50:43 +00:00
from typing import List
import concurrent.futures
2023-01-10 12:50:43 +00:00
import re
2022-12-26 06:07:39 +00:00
2023-01-10 12:50:43 +00:00
def filter_relevant(sentences: list[str], question: str) -> list[str]:
def is_relevant(sentence: str) -> bool:
# Use regular expression to check if the sentence contains any of the words in the question
return bool(
re.search(r"\b" + "\\b|\\b".join(question.split()) + r"\b", sentence)
2022-12-27 06:38:47 +00:00
)
2022-12-26 06:07:39 +00:00
2023-01-10 12:50:43 +00:00
# Create a thread pool with 4 worker threads
with concurrent.futures.ThreadPoolExecutor(max_workers=500) as executor:
# Map the is_relevant function to each sentence in the list and return the resulting list of booleans
relevant = list(executor.map(is_relevant, sentences))
2022-12-26 06:07:39 +00:00
2023-01-10 12:50:43 +00:00
# Return only the sentences where is_relevant returned True
return [sentence for sentence, relevant in zip(sentences, relevant) if relevant]
2022-12-26 06:07:39 +00:00
2023-01-10 12:50:43 +00:00
# # Example usage
# context = ["The quick brown fox jumps over the lazy dog.", "The slow green snake slithers under the rock."]
# question = "quick brown fox"
# relevant_context = filter_relevant_context(context, question)
# print(relevant_context) # Output: ["The quick brown fox jumps over the lazy dog."]