update: look at todo

main
Thamognya Kodi 2022-12-28 17:41:27 +07:00
parent 032de4e7f4
commit 367675e57f
39 changed files with 3351 additions and 111 deletions

2
.github/TODO.md vendored
View File

@ -1 +1 @@
- work on API_KEY fixing config
- IMP IMP work on API_KEY fixing config or use .env

View File

@ -42,9 +42,11 @@ HTTP_USERAGENT: dict[str, str] = {
class Google:
def __init__(self: Any, query: str) -> None:
self.GOOGLE_SEARCH_API_KEY: str = ""
self.GOOGLE_SEARCH_ENGINE_ID: str = ""
def __init__(
self: Any, query: str, GOOGLE_SEARCH_API_KEY: str, GOOGLE_SEARCH_ENGINE_ID: str
) -> None:
self.__GOOGLE_SEARCH_API_KEY: str = GOOGLE_SEARCH_API_KEY
self.__GOOGLE_SEARCH_ENGINE_ID: str = GOOGLE_SEARCH_ENGINE_ID
self.__num_res: int = (
5
if config.NLP_CONF_MODE == "speed"
@ -57,36 +59,18 @@ class Google:
r"\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*", "", self.__query
)
@property
def google_search_api_key(self: Any) -> str:
val: str = self.GOOGLE_SEARCH_API_KEY
return val
@google_search_api_key.setter
def google_search_api_key(self: Any, val: str) -> None:
self.GOOGLE_SEARCH_API_KEY = val
@property
def google_search_engine_id(self: Any) -> str:
val: str = self.GOOGLE_SEARCH_ENGINE_ID
return val
@google_search_engine_id.setter
def google_search_engine_id(self: Any, val: str) -> None:
self.GOOGLE_SEARCH_ENGINE_ID = val
def __get_urls(self: Any) -> None:
# Send the request to the Google Search API
if self.GOOGLE_SEARCH_API_KEY == "":
if self.__GOOGLE_SEARCH_API_KEY == "":
exit("ERROR: Google API Key not found")
if self.GOOGLE_SEARCH_ENGINE_ID == "":
if self.__GOOGLE_SEARCH_ENGINE_ID == "":
exit("ERROR: Google Search Engine Id not found")
response = requests.get(
"https://www.googleapis.com/customsearch/v1",
params={
"key": self.GOOGLE_SEARCH_API_KEY,
"key": self.__GOOGLE_SEARCH_API_KEY,
"q": self.__query,
"cx": self.GOOGLE_SEARCH_ENGINE_ID,
"cx": self.__GOOGLE_SEARCH_ENGINE_ID,
},
)
results = response.json()["items"]
@ -161,9 +145,11 @@ class Google:
def google(query: str) -> tuple[list[str], list[str]]:
_google = Google(query)
_google.google_search_api_key = config.GET_GOOGLE_API_CONFIG()[0]
_google.google_search_engine_id = config.GET_GOOGLE_API_CONFIG()[1]
_google = Google(
query,
os.environ["INTERNET_ML_GOOGLE_API"],
os.environ["INTERNET_ML_GOOGLE_SEARCH_ENGINE_ID"],
)
return _google.google()

View File

@ -1,4 +1,4 @@
from typing import List, Tuple
from typing import Any, List, Tuple
import logging
@ -8,45 +8,31 @@ logging.basicConfig(
level=logging.INFO,
format="%(name)s - %(levelname)s - %(message)s",
)
# General
CONF_DEBUG: bool = True
# Google
GOOGLE_API_KEY: str = ""
GOOGLE_SEARCH_ENGINE_ID: str = ""
# NLP
NLP_CONF_MODE: str = "default"
def GOOGLE_API_CONFIG(_GOOGLE_API_KEY: str, _GOOGLE_SEARCH_ENGINE_ID: str) -> None:
global GOOGLE_SEARCH_ENGINE_ID, GOOGLE_API_KEY
GOOGLE_API_KEY = _GOOGLE_API_KEY
GOOGLE_SEARCH_ENGINE_ID = _GOOGLE_SEARCH_ENGINE_ID
if CONF_DEBUG and _GOOGLE_API_KEY != "":
logging.info(f"API_KEY set")
if CONF_DEBUG and _GOOGLE_SEARCH_ENGINE_ID != "":
logging.info(f"SEARCH_ENGINE_ID set")
class FullConfig:
def __init__(self: Any) -> None:
self.CONF_DEBUG: bool = True
self.GOOGLE_API_KEY: str = ""
self.GOOGLE_SEARCH_ENGINE_ID: str = ""
self.NLP_CONF_MODE: str = "default"
def general_config(self: Any, CONF_DEBUG: bool) -> None:
self.CONF_DEBUG = CONF_DEBUG
def google_config(
self: Any, GOOGLE_API_KEY: str, GOOGLE_SEARCH_ENGINE_ID: str
) -> None:
self.GOOGLE_API_KEY = GOOGLE_API_KEY
self.GOOGLE_SEARCH_ENGINE_ID = GOOGLE_SEARCH_ENGINE_ID
def NLP_config(self: Any, NLP_CONF_MODE: str = "default") -> None:
if (
NLP_CONF_MODE == "accuracy"
or NLP_CONF_MODE == "speed"
or NLP_CONF_MODE == "default"
):
self.NLP_CONF_MODE = NLP_CONF_MODE
def GET_GOOGLE_API_CONFIG() -> tuple[str, str]:
global GOOGLE_SEARCH_ENGINE_ID, GOOGLE_API_KEY
return (GOOGLE_API_KEY, GOOGLE_SEARCH_ENGINE_ID)
# TODO: work in progress
# class GoogleAPI:
# def __init__(self) -> None:
# self.GOOGLE_SEARCH_API_KEY: str = ""
# self.GOOGLE_SEARCH_ENGINE_ID: str = ""
# @property
# def google_search_api_key
def NLP_config(mode: str = "default", debug: bool = True) -> None:
global NLP_CONF_MODE, CONF_DEBUG
CONF_DEBUG = debug
if mode == "accuracy" or mode == "speed":
NLP_CONF_MODE = mode
else:
if CONF_DEBUG:
logging.warn(f"mode: {mode} does not exist")
config = FullConfig()

62
poetry.lock generated
View File

@ -644,7 +644,6 @@ files = [
]
[package.dependencies]
accelerate = {version = ">=0.11.0", optional = true, markers = "extra == \"torch\""}
filelock = "*"
huggingface-hub = ">=0.10.0"
importlib-metadata = "*"
@ -652,7 +651,6 @@ numpy = "*"
Pillow = "*"
regex = "!=2019.12.17"
requests = "*"
torch = {version = ">=1.4", optional = true, markers = "extra == \"torch\""}
[package.extras]
dev = ["accelerate (>=0.11.0)", "black (==22.8)", "datasets", "flake8 (>=3.8.3)", "flax (>=0.4.1)", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "jax (>=0.2.8,!=0.3.2)", "jaxlib (>=0.1.65)", "k-diffusion", "librosa", "modelcards (>=0.1.4)", "parameterized", "pytest", "pytest-timeout", "pytest-xdist", "safetensors", "scipy", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "torch (>=1.4)", "torchvision", "transformers (>=4.25.1)"]
@ -1469,40 +1467,40 @@ setuptools = "*"
[[package]]
name = "numpy"
version = "1.24.0"
version = "1.24.1"
description = "Fundamental package for array computing in Python"
category = "main"
optional = false
python-versions = ">=3.8"
files = [
{file = "numpy-1.24.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6e73a1f4f5b74a42abb55bc2b3d869f1b38cbc8776da5f8b66bf110284f7a437"},
{file = "numpy-1.24.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9387c7d6d50e8f8c31e7bfc034241e9c6f4b3eb5db8d118d6487047b922f82af"},
{file = "numpy-1.24.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ad6a024a32ee61d18f5b402cd02e9c0e22c0fb9dc23751991b3a16d209d972e"},
{file = "numpy-1.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73cf2c5b5a07450f20a0c8e04d9955491970177dce8df8d6903bf253e53268e0"},
{file = "numpy-1.24.0-cp310-cp310-win32.whl", hash = "sha256:cec79ff3984b2d1d103183fc4a3361f5b55bbb66cb395cbf5a920a4bb1fd588d"},
{file = "numpy-1.24.0-cp310-cp310-win_amd64.whl", hash = "sha256:4f5e78b8b710cd7cd1a8145994cfffc6ddd5911669a437777d8cedfce6c83a98"},
{file = "numpy-1.24.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4445f472b246cad6514cc09fbb5ecb7aab09ca2acc3c16f29f8dca6c468af501"},
{file = "numpy-1.24.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ec3e5e8172a0a6a4f3c2e7423d4a8434c41349141b04744b11a90e017a95bad5"},
{file = "numpy-1.24.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f9168790149f917ad8e3cf5047b353fefef753bd50b07c547da0bdf30bc15d91"},
{file = "numpy-1.24.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ada6c1e9608ceadaf7020e1deea508b73ace85560a16f51bef26aecb93626a72"},
{file = "numpy-1.24.0-cp311-cp311-win32.whl", hash = "sha256:f3c4a9a9f92734a4728ddbd331e0124eabbc968a0359a506e8e74a9b0d2d419b"},
{file = "numpy-1.24.0-cp311-cp311-win_amd64.whl", hash = "sha256:90075ef2c6ac6397d0035bcd8b298b26e481a7035f7a3f382c047eb9c3414db0"},
{file = "numpy-1.24.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0885d9a7666cafe5f9876c57bfee34226e2b2847bfb94c9505e18d81011e5401"},
{file = "numpy-1.24.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e63d2157f9fc98cc178870db83b0e0c85acdadd598b134b00ebec9e0db57a01f"},
{file = "numpy-1.24.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf8960f72997e56781eb1c2ea256a70124f92a543b384f89e5fb3503a308b1d3"},
{file = "numpy-1.24.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f8e0df2ecc1928ef7256f18e309c9d6229b08b5be859163f5caa59c93d53646"},
{file = "numpy-1.24.0-cp38-cp38-win32.whl", hash = "sha256:fe44e925c68fb5e8db1334bf30ac1a1b6b963b932a19cf41d2e899cf02f36aab"},
{file = "numpy-1.24.0-cp38-cp38-win_amd64.whl", hash = "sha256:d7f223554aba7280e6057727333ed357b71b7da7422d02ff5e91b857888c25d1"},
{file = "numpy-1.24.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ab11f6a7602cf8ea4c093e091938207de3068c5693a0520168ecf4395750f7ea"},
{file = "numpy-1.24.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:12bba5561d8118981f2f1ff069ecae200c05d7b6c78a5cdac0911f74bc71cbd1"},
{file = "numpy-1.24.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9af91f794d2d3007d91d749ebc955302889261db514eb24caef30e03e8ec1e41"},
{file = "numpy-1.24.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b1ddfac6a82d4f3c8e99436c90b9c2c68c0bb14658d1684cdd00f05fab241f5"},
{file = "numpy-1.24.0-cp39-cp39-win32.whl", hash = "sha256:ac4fe68f1a5a18136acebd4eff91aab8bed00d1ef2fdb34b5d9192297ffbbdfc"},
{file = "numpy-1.24.0-cp39-cp39-win_amd64.whl", hash = "sha256:667b5b1f6a352419e340f6475ef9930348ae5cb7fca15f2cc3afcb530823715e"},
{file = "numpy-1.24.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:4d01f7832fa319a36fd75ba10ea4027c9338ede875792f7bf617f4b45056fc3a"},
{file = "numpy-1.24.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbb0490f0a880700a6cc4d000384baf19c1f4df59fff158d9482d4dbbca2b239"},
{file = "numpy-1.24.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:0104d8adaa3a4cc60c2777cab5196593bf8a7f416eda133be1f3803dd0838886"},
{file = "numpy-1.24.0.tar.gz", hash = "sha256:c4ab7c9711fe6b235e86487ca74c1b092a6dd59a3cb45b63241ea0a148501853"},
{file = "numpy-1.24.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:179a7ef0889ab769cc03573b6217f54c8bd8e16cef80aad369e1e8185f994cd7"},
{file = "numpy-1.24.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b09804ff570b907da323b3d762e74432fb07955701b17b08ff1b5ebaa8cfe6a9"},
{file = "numpy-1.24.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1b739841821968798947d3afcefd386fa56da0caf97722a5de53e07c4ccedc7"},
{file = "numpy-1.24.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e3463e6ac25313462e04aea3fb8a0a30fb906d5d300f58b3bc2c23da6a15398"},
{file = "numpy-1.24.1-cp310-cp310-win32.whl", hash = "sha256:b31da69ed0c18be8b77bfce48d234e55d040793cebb25398e2a7d84199fbc7e2"},
{file = "numpy-1.24.1-cp310-cp310-win_amd64.whl", hash = "sha256:b07b40f5fb4fa034120a5796288f24c1fe0e0580bbfff99897ba6267af42def2"},
{file = "numpy-1.24.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7094891dcf79ccc6bc2a1f30428fa5edb1e6fb955411ffff3401fb4ea93780a8"},
{file = "numpy-1.24.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:28e418681372520c992805bb723e29d69d6b7aa411065f48216d8329d02ba032"},
{file = "numpy-1.24.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e274f0f6c7efd0d577744f52032fdd24344f11c5ae668fe8d01aac0422611df1"},
{file = "numpy-1.24.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0044f7d944ee882400890f9ae955220d29b33d809a038923d88e4e01d652acd9"},
{file = "numpy-1.24.1-cp311-cp311-win32.whl", hash = "sha256:442feb5e5bada8408e8fcd43f3360b78683ff12a4444670a7d9e9824c1817d36"},
{file = "numpy-1.24.1-cp311-cp311-win_amd64.whl", hash = "sha256:de92efa737875329b052982e37bd4371d52cabf469f83e7b8be9bb7752d67e51"},
{file = "numpy-1.24.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b162ac10ca38850510caf8ea33f89edcb7b0bb0dfa5592d59909419986b72407"},
{file = "numpy-1.24.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:26089487086f2648944f17adaa1a97ca6aee57f513ba5f1c0b7ebdabbe2b9954"},
{file = "numpy-1.24.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caf65a396c0d1f9809596be2e444e3bd4190d86d5c1ce21f5fc4be60a3bc5b36"},
{file = "numpy-1.24.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0677a52f5d896e84414761531947c7a330d1adc07c3a4372262f25d84af7bf7"},
{file = "numpy-1.24.1-cp38-cp38-win32.whl", hash = "sha256:dae46bed2cb79a58d6496ff6d8da1e3b95ba09afeca2e277628171ca99b99db1"},
{file = "numpy-1.24.1-cp38-cp38-win_amd64.whl", hash = "sha256:6ec0c021cd9fe732e5bab6401adea5a409214ca5592cd92a114f7067febcba0c"},
{file = "numpy-1.24.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:28bc9750ae1f75264ee0f10561709b1462d450a4808cd97c013046073ae64ab6"},
{file = "numpy-1.24.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:84e789a085aabef2f36c0515f45e459f02f570c4b4c4c108ac1179c34d475ed7"},
{file = "numpy-1.24.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e669fbdcdd1e945691079c2cae335f3e3a56554e06bbd45d7609a6cf568c700"},
{file = "numpy-1.24.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef85cf1f693c88c1fd229ccd1055570cb41cdf4875873b7728b6301f12cd05bf"},
{file = "numpy-1.24.1-cp39-cp39-win32.whl", hash = "sha256:87a118968fba001b248aac90e502c0b13606721b1343cdaddbc6e552e8dfb56f"},
{file = "numpy-1.24.1-cp39-cp39-win_amd64.whl", hash = "sha256:ddc7ab52b322eb1e40521eb422c4e0a20716c271a306860979d450decbb51b8e"},
{file = "numpy-1.24.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ed5fb71d79e771ec930566fae9c02626b939e37271ec285e9efaf1b5d4370e7d"},
{file = "numpy-1.24.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad2925567f43643f51255220424c23d204024ed428afc5aad0f86f3ffc080086"},
{file = "numpy-1.24.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:cfa1161c6ac8f92dea03d625c2d0c05e084668f4a06568b77a25a89111621566"},
{file = "numpy-1.24.1.tar.gz", hash = "sha256:2386da9a471cc00a1f47845e27d916d5ec5346ae9696e01a8a34760858fe9dd2"},
]
[[package]]
@ -3714,4 +3712,4 @@ testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools"
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "1176d6ed37e6dbd675b086a6eae2e8ad214ac12291285c1e46700f6600d5f0bf"
content-hash = "ce866ef389ecf01a47a8d7ac1df101ab3c9ef5ed17b6d92eb43b10a7d1e2d221"

View File

@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "internet_ml"
version = "0.2.0"
version = "0.2.11"
description = "Internet-ML: Allowing ML to connect to the internet"
readme = "./.github/README.md"
authors = ["Thamognya Kodi <contact@thamognya.com>"]
@ -40,7 +40,6 @@ numpy = "^1.24.0"
torch = "^1.13.1"
datasets = "^2.8.0"
accelerate = "^0.15.0"
diffusers = {extras = ["torch"], version = "^0.11.1"}
timm = "^0.6.12"
torchvision = "^0.14.1"
torchaudio = "^0.13.1"
@ -53,6 +52,7 @@ scikit-learn = "^1.2.0"
spacy = "^3.4.4"
nltk = "^3.8"
beautifulsoup4 = "^4.11.1"
diffusers = "^0.11.1"
[tool.poetry.group.dev.dependencies]
bandit = "^1.7.4"

View File

@ -1,15 +1,22 @@
accelerate==0.15.0 ; python_version >= "3.10" and python_version < "4.0"
aiohttp==3.8.3 ; python_version >= "3.10" and python_version < "4.0"
aiosignal==1.3.1 ; python_version >= "3.10" and python_version < "4.0"
anyascii==0.3.1 ; python_version >= "3.10" and python_version < "4.0"
async-timeout==4.0.2 ; python_version >= "3.10" and python_version < "4.0"
attrs==22.2.0 ; python_version >= "3.10" and python_version < "4.0"
beautifulsoup4==4.11.1 ; python_version >= "3.10" and python_version < "4.0"
blis==0.7.9 ; python_version >= "3.10" and python_version < "4.0"
catalogue==2.0.8 ; python_version >= "3.10" and python_version < "4.0"
certifi==2022.12.7 ; python_version >= "3.10" and python_version < "4"
charset-normalizer==2.1.1 ; python_version >= "3.10" and python_version < "4"
click==8.1.3 ; python_version >= "3.10" and python_version < "4.0"
colorama==0.4.6 ; python_version >= "3.10" and python_version < "4.0"
commonmark==0.9.1 ; python_version >= "3.10" and python_version < "4.0"
confection==0.0.3 ; python_version >= "3.10" and python_version < "4.0"
contractions==0.1.73 ; python_version >= "3.10" and python_version < "4.0"
cymem==2.0.7 ; python_version >= "3.10" and python_version < "4.0"
datasets==2.8.0 ; python_version >= "3.10" and python_version < "4.0"
diffusers[torch]==0.11.1 ; python_version >= "3.10" and python_version < "4.0"
diffusers==0.11.1 ; python_version >= "3.10" and python_version < "4.0"
dill==0.3.6 ; python_version >= "3.10" and python_version < "4.0"
filelock==3.8.2 ; python_version >= "3.10" and python_version < "4.0"
frozenlist==1.3.3 ; python_version >= "3.10" and python_version < "4.0"
@ -17,18 +24,29 @@ fsspec[http]==2022.11.0 ; python_version >= "3.10" and python_version < "4.0"
huggingface-hub==0.11.1 ; python_version >= "3.10" and python_version < "4.0"
idna==3.4 ; python_version >= "3.10" and python_version < "4"
importlib-metadata==5.2.0 ; python_version >= "3.10" and python_version < "4.0"
jinja2==3.1.2 ; python_version >= "3.10" and python_version < "4.0"
joblib==1.2.0 ; python_version >= "3.10" and python_version < "4.0"
langcodes==3.3.0 ; python_version >= "3.10" and python_version < "4.0"
markupsafe==2.1.1 ; python_version >= "3.10" and python_version < "4.0"
multidict==6.0.4 ; python_version >= "3.10" and python_version < "4.0"
multiprocess==0.70.14 ; python_version >= "3.10" and python_version < "4.0"
numpy==1.24.0 ; python_version < "4.0" and python_version >= "3.10"
murmurhash==1.0.9 ; python_version >= "3.10" and python_version < "4.0"
nltk==3.8 ; python_version >= "3.10" and python_version < "4.0"
numpy==1.24.1 ; python_version < "4.0" and python_version >= "3.10"
nvidia-cublas-cu11==11.10.3.66 ; python_version >= "3.10" and python_version < "4.0" and platform_system == "Linux"
nvidia-cuda-nvrtc-cu11==11.7.99 ; python_version >= "3.10" and python_version < "4.0" and platform_system == "Linux"
nvidia-cuda-runtime-cu11==11.7.99 ; python_version >= "3.10" and python_version < "4.0" and platform_system == "Linux"
nvidia-cudnn-cu11==8.5.0.96 ; python_version >= "3.10" and python_version < "4.0" and platform_system == "Linux"
packaging==21.3 ; python_version >= "3.10" and python_version < "4.0"
pandas==1.5.2 ; python_version >= "3.10" and python_version < "4.0"
pathy==0.10.1 ; python_version >= "3.10" and python_version < "4.0"
pillow==9.3.0 ; python_version >= "3.10" and python_version < "4.0"
platformdirs==2.6.0 ; python_version >= "3.10" and python_version < "4.0"
preshed==3.0.8 ; python_version >= "3.10" and python_version < "4.0"
psutil==5.9.4 ; python_version >= "3.10" and python_version < "4.0"
pyahocorasick==1.4.4 ; python_version >= "3.10" and python_version < "4.0"
pyarrow==10.0.1 ; python_version >= "3.10" and python_version < "4.0"
pydantic==1.10.2 ; python_version >= "3.10" and python_version < "4.0"
pygments==2.13.0 ; python_version >= "3.10" and python_version < "4.0"
pyparsing==3.0.9 ; python_version >= "3.10" and python_version < "4.0"
python-dateutil==2.8.2 ; python_version >= "3.10" and python_version < "4.0"
@ -39,20 +57,35 @@ regex==2022.10.31 ; python_version >= "3.10" and python_version < "4.0"
requests==2.28.1 ; python_version >= "3.10" and python_version < "4"
responses==0.18.0 ; python_version >= "3.10" and python_version < "4.0"
rich==10.16.2 ; python_version >= "3.10" and python_version < "4.0"
setuptools==65.6.3 ; python_version >= "3.10" and python_version < "4.0" and platform_system == "Linux"
scikit-learn==1.2.0 ; python_version >= "3.10" and python_version < "4.0"
scipy==1.9.3 ; python_version >= "3.10" and python_version < "4.0"
setuptools==65.6.3 ; python_version >= "3.10" and python_version < "4.0"
shellingham==1.5.0 ; python_version >= "3.10" and python_version < "4.0"
six==1.16.0 ; python_version >= "3.10" and python_version < "4.0"
smart-open==6.3.0 ; python_version >= "3.10" and python_version < "4.0"
soupsieve==2.3.2.post1 ; python_version >= "3.10" and python_version < "4.0"
spacy-legacy==3.0.10 ; python_version >= "3.10" and python_version < "4.0"
spacy-loggers==1.0.4 ; python_version >= "3.10" and python_version < "4.0"
spacy==3.4.4 ; python_version >= "3.10" and python_version < "4.0"
srsly==2.4.5 ; python_version >= "3.10" and python_version < "4.0"
textsearch==0.0.24 ; python_version >= "3.10" and python_version < "4.0"
thinc==8.1.6 ; python_version >= "3.10" and python_version < "4.0"
threadpoolctl==3.1.0 ; python_version >= "3.10" and python_version < "4.0"
timm==0.6.12 ; python_version >= "3.10" and python_version < "4.0"
tokenizers==0.13.2 ; python_version >= "3.10" and python_version < "4.0"
torch==1.13.1 ; python_version >= "3.10" and python_version < "4.0"
torchaudio==0.13.1 ; python_version >= "3.10" and python_version < "4.0"
torchvision==0.14.1 ; python_version >= "3.10" and python_version < "4.0"
tqdm==4.64.1 ; python_version >= "3.10" and python_version < "4.0"
typer==0.4.2 ; python_version >= "3.10" and python_version < "4.0"
typer[all]==0.4.2 ; python_version >= "3.10" and python_version < "4.0"
types-requests==2.28.11.7 ; python_version >= "3.10" and python_version < "4.0"
types-urllib3==1.26.25.4 ; python_version >= "3.10" and python_version < "4.0"
typing-extensions==4.4.0 ; python_version >= "3.10" and python_version < "4.0"
uritools==4.0.0 ; python_version >= "3.10" and python_version < "4.0"
urlextract==1.8.0 ; python_version >= "3.10" and python_version < "4.0"
urllib3==1.26.13 ; python_version >= "3.10" and python_version < "4"
wasabi==0.10.1 ; python_version >= "3.10" and python_version < "4.0"
wheel==0.38.4 ; python_version >= "3.10" and python_version < "4.0" and platform_system == "Linux"
xxhash==3.1.0 ; python_version >= "3.10" and python_version < "4.0"
yarl==1.8.2 ; python_version >= "3.10" and python_version < "4.0"

View File

@ -1,5 +1,6 @@
In this paper, I present {\bf \inlp} a new control-flow wrapper abstraction to enable the utilization of data from the internet (or a knowledge-database when offline) for existing context-needing Natural Language Processing (NLP) models to function without any given context. Internet-NLP can be used, finetuned alongside existing NLP models via its config settings and additionally its Long Short Term Memory neural network (LSTM neural network) can also be trained. Additionally incorporations of Masked Language Models (MLM) such as BERT, or LinkBERT \cite{devlin-etal-2019-bert,yasunaga-etal-2022-linkbert} can be utilized to improve search queries, and therfore retrieve more accurate and reliable data. Futhermore, {\bf \inlp} utilizes a LSTM, Reinforcement Learning and caches to allow for multi-turn NLP tasks, and improvement via Reinforcement Learning from user.
In this paper, I present {\bf \inlp}, a new control-flow wrapper abstraction to enable the utilization of data from the internet (or a knowledge-database when offline) for existing context-needing Natural Language Processing (NLP) models to function without any given context. Internet-NLP can be used, finetuned alongside existing NLP models via its config settings and additionally its Long Short Term Memory neural network (LSTM neural network) can also be trained. Additionally incorporations of Masked Language Models (MLM) such as BERT, or LinkBERT \cite{devlin-etal-2019-bert,yasunaga-etal-2022-linkbert} can be utilized to improve search queries, and therfore retrieve more accurate and reliable data. Futhermore, Internet-NLP utilizes a LSTM, Reinforcement Learning and caches to allow for multi-turn NLP tasks, and improvement via Reinforcement Learning from user.
Internet-NLP, in basic terms, provides the context for context-needing NLP models to let them function. Internet-NLP can be improved via finetuning, and training of LSTM and Reinforcement Learning model (which can be trained alongside the NLP model), which enables for better search queries, and subsequently results. It obtains state-of-the-art (SOTA) results in QA and NLI without context.
Additionally in this paper, I also present new NLP and Natural Language Inference (NLI) models to assist {\bf \inlp}:
@ -12,12 +13,20 @@ Additionally in this paper, I also present new NLP and Natural Language Inferenc
Along with these models, I also present new general purpose QA and NLI datasets:
\begin{itemize}
\item ALotNLI made from ANLI, MultiNLI, and SNLI \cite{nie-etal-2020-adversarial,N18-1101,DBLP:journals/corr/BowmanAPM15}
\item ALotOpenBookQA made from CoQA, Natural Questions, and SQuAD \cite{DBLP:journals/corr/abs-1808-07042,kwiatkowski-etal-2019-natural,DBLP:journals/corr/abs-1806-03822}
\item ALotNLI made from datasets: ANLI, MultiNLI, and SNLI \cite{nie-etal-2020-adversarial,N18-1101,DBLP:journals/corr/BowmanAPM15}
\item ALotOpenBookQA made from datasets: CoQA, Natural Questions, and SQuAD \cite{DBLP:journals/corr/abs-1808-07042,kwiatkowski-etal-2019-natural,DBLP:journals/corr/abs-1806-03822}
\end{itemize}
As a result of these models, datasets, and Internet-NLP, the accuracy and reliability of most context-needing NLP models on most NLP tasks, especially tasks that require more factual responses with no given context increased.
As a result of these Internet-NLP, models and datasets the accuracy and reliability of most context-needing NLP models on most NLP tasks, especially tasks that require more factual responses with no given context increased.
Internet-NLP and the new NLP and NLI models, which were trained on the general-purpose datasets (ALotNLI, and ALotOpenBookQA). Internet-NLP, by default utilizes an Text-Generative model GPT-NeoX \cite{gpt-neox-library, gpt-neox-20b} for long responses and LinkBERT \cite{yasunaga-etal-2022-linkbert} for short responses. For 2 choices (for ex: True and False) Bi-Encoder NLI has been used and for multiple choices CrossEncoder will be used \cite{thakur-2020-AugSBERT}.
Internet-NLP, in layman terms, provides the context for context-needing NLP models to let them function. Internet-NLP can be improved via finetuning, and training of LSTM and Reinforcement Learning model (which can be trained alongside the NLP model), which enables for better search queries, and subsequently results. It obtains state-of-the-art (SOTA) results in QA and NLI without context.
\begin{comment}
In this paper, we propose Internet-NLP, a novel control-flow wrapper abstraction that allows existing context-dependent Natural Language Processing (NLP) models to utilize data from the internet as context, enabling them to function without any given context. Using the internet as a context source is particularly useful for NLP models that require real-time or current information to perform their tasks accurately.
Internet-NLP can be fine-tuned alongside existing NLP models using its config settings and optimizations of Masked Language Models (MLM) and Text2Text Models. This can improve search queries and retrieve more accurate and reliable data. Additionally, Internet-NLP can utilize large NLP models such as GPT-3 or GPT-NeoX-20B for multi-turn NLP tasks and can be improved through Reinforcement Learning from user interactions. Caches of internet results can also be tuned to enable faster computation for repetitive tasks.
To assist Internet-NLP, we also present a suite of new NLP and Natural Language Inference (NLI) models, including GPT-NeoX-20b for open-book question and answer (QA), LinkBERT for crossEncoder NLI, T5 for a statement to query and answer to context NLP. These models have been specifically designed to work with Internet-NLP to improve the accuracy and reliability of context-dependent NLP tasks. We also introduce two new general-purpose QA and NLI datasets: ALotNLI, which is made from ANLI, MultiNLI, and SNLI, and ALotOpenBookQA, which is made from CoQA, Natural Questions, and SQuAD. These datasets provide a diverse range of contexts and information that can be used to train and evaluate the performance of Internet-NLP and the accompanying NLP and NLI models.
The results of our evaluation show that Internet-NLP significantly improves the accuracy and reliability of context-dependent NLP models on various tasks, particularly those requiring factual responses with no given context. We achieve state-of-the-art results in QA with a no-context accuracy of approximately 64.7% when tested manually on the ALotOpenBookQA dataset and with random recent events. Internet-NLP enables NLP models to stay connected to current events without requiring frequent updates or large models and datasets. Overall, the combination of Internet-NLP and the accompanying NLP and NLI models represents a significant advance in the field of NLP and has the potential to revolutionize the way that NLP models are used in real-world applications.
\end{comment}

Binary file not shown.

View File

@ -20,6 +20,7 @@
\usepackage{enumitem}
\usepackage{import}
\usepackage{tikz}
\usepackage{verbatim}
\usetikzlibrary{chains,shadows.blur}
\pgfplotsset{compat=1.14}
@ -70,4 +71,4 @@
\appendix
\import{appendix/}{main.tex}
\end{document}
\end{document}

View File

View File

@ -0,0 +1,3 @@
from django.contrib import admin
# Register your models here.

View File

@ -0,0 +1,6 @@
from django.apps import AppConfig
class ApiConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "api"

View File

@ -0,0 +1,3 @@
from django.db import models
# Create your models here.

View File

@ -0,0 +1,3 @@
from django.contrib import admin
# Register your models here.

View File

@ -0,0 +1,6 @@
from django.apps import AppConfig
class QuestionAnswerConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "api.question_answer"

View File

@ -0,0 +1,3 @@
from django.db import models
# Create your models here.

View File

@ -0,0 +1,3 @@
from django.test import TestCase
# Create your tests here.

View File

@ -0,0 +1,10 @@
from typing import Any, List
from django.contrib import admin
from django.urls import include, path
from . import views
urlpatterns: list[Any] = [
path("", views.QAView.as_view()),
]

View File

@ -0,0 +1,7 @@
from django.views.generic.base import TemplateView
# Create your views here.
class QAView(TemplateView):
template_name = "index.question_answer.dj.html"

View File

@ -0,0 +1 @@
This is my api list

View File

@ -0,0 +1,3 @@
from django.test import TestCase
# Create your tests here.

View File

@ -0,0 +1,11 @@
from typing import Any, List
from django.contrib import admin
from django.urls import include, path
from . import views
urlpatterns: list[Any] = [
path("", views.ApiView.as_view()),
path("question-answer/", include("api.question_answer.urls")),
]

View File

@ -0,0 +1,7 @@
from django.views.generic.base import TemplateView
# Create your views here.
class ApiView(TemplateView):
template_name = "index.api.dj.html"

View File

@ -0,0 +1,16 @@
"""
ASGI config for internet_ml_server project.
It exposes the ASGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/4.1/howto/deployment/asgi/
"""
import os
from django.core.asgi import get_asgi_application
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "internet_ml_server.settings")
application = get_asgi_application()

View File

@ -0,0 +1,128 @@
"""
Django settings for internet_ml_server project.
Generated by 'django-admin startproject' using Django 4.1.4.
For more information on this file, see
https://docs.djangoproject.com/en/4.1/topics/settings/
For the full list of settings and their values, see
https://docs.djangoproject.com/en/4.1/ref/settings/
"""
from typing import Any, List
from pathlib import Path
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/4.1/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY: str = "django-insecure--$i87-g93k@%mope_eab8jxj&5sacde)=4u)rd9t7&b9wpr(9c"
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG: bool = True
ALLOWED_HOSTS: list[str] = ["*"]
# Application definition
INSTALLED_APPS: list[str] = [
"django.contrib.admin",
"django.contrib.auth",
"django.contrib.contenttypes",
"django.contrib.sessions",
"django.contrib.messages",
"django.contrib.staticfiles",
"rest_framework",
"api",
"api.question_answer",
]
MIDDLEWARE: list[str] = [
"django.middleware.security.SecurityMiddleware",
"django.contrib.sessions.middleware.SessionMiddleware",
"django.middleware.common.CommonMiddleware",
"django.middleware.csrf.CsrfViewMiddleware",
"django.contrib.auth.middleware.AuthenticationMiddleware",
"django.contrib.messages.middleware.MessageMiddleware",
"django.middleware.clickjacking.XFrameOptionsMiddleware",
]
ROOT_URLCONF: str = "internet_ml_server.urls"
TEMPLATES: list[Any] = [
{
"BACKEND": "django.template.backends.django.DjangoTemplates",
"DIRS": [
str(BASE_DIR) + "/api/templates",
str(BASE_DIR) + "/internet_ml_server/templates",
str(BASE_DIR) + "/api/question_answer/templates",
],
"APP_DIRS": True,
"OPTIONS": {
"context_processors": [
"django.template.context_processors.debug",
"django.template.context_processors.request",
"django.contrib.auth.context_processors.auth",
"django.contrib.messages.context_processors.messages",
],
},
},
]
WSGI_APPLICATION: str = "internet_ml_server.wsgi.application"
# Database
# https://docs.djangoproject.com/en/4.1/ref/settings/#databases
DATABASES: Any = {}
# Password validation
# https://docs.djangoproject.com/en/4.1/ref/settings/#auth-password-validators
AUTH_PASSWORD_VALIDATORS: list[Any] = [
{
"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",
},
{
"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
},
{
"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",
},
{
"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",
},
]
# Internationalization
# https://docs.djangoproject.com/en/4.1/topics/i18n/
LANGUAGE_CODE: str = "en-us"
TIME_ZONE: str = "UTC"
USE_I18N: bool = True
USE_TZ: bool = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/4.1/howto/static-files/
STATIC_URL: str = "static/"
# Default primary key field type
# https://docs.djangoproject.com/en/4.1/ref/settings/#default-auto-field
DEFAULT_AUTO_FIELD: str = "django.db.models.BigAutoField"

View File

@ -0,0 +1 @@
This is my api website

View File

@ -0,0 +1,27 @@
"""internet_ml_server URL Configuration
The `urlpatterns` list routes URLs to views. For more information please see:
https://docs.djangoproject.com/en/4.1/topics/http/urls/
Examples:
Function views
1. Add an import: from my_app import views
2. Add a URL to urlpatterns: path('', views.home, name='home')
Class-based views
1. Add an import: from other_app.views import Home
2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
Including another URLconf
1. Import the include() function: from django.urls import include, path
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
"""
from typing import Any, List
from django.contrib import admin
from django.urls import include, path
from . import views
urlpatterns: list[Any] = [
# path("admin/", admin.site.urls),
path("", views.IndexView.as_view(), name="api"),
path("api/", include("api.urls"), name="api"),
]

View File

@ -0,0 +1,7 @@
from django.views.generic.base import TemplateView
# Create your views here.
class IndexView(TemplateView):
template_name = "index.internet_ml_server.dj.html"

View File

@ -0,0 +1,16 @@
"""
WSGI config for internet_ml_server project.
It exposes the WSGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/4.1/howto/deployment/wsgi/
"""
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "internet_ml_server.settings")
application = get_wsgi_application()

View File

@ -0,0 +1,22 @@
#!/usr/bin/env python
"""Django's command-line utility for administrative tasks."""
import os
import sys
def main():
"""Run administrative tasks."""
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "internet_ml_server.settings")
try:
from django.core.management import execute_from_command_line
except ImportError as exc:
raise ImportError(
"Couldn't import Django. Are you sure it's installed and "
"available on your PYTHONPATH environment variable? Did you "
"forget to activate a virtual environment?"
) from exc
execute_from_command_line(sys.argv)
if __name__ == "__main__":
main()

2916
web/internet_ml_server/poetry.lock generated 100644

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,24 @@
[build-system]
requires = ["poetry_core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "internet-ml-server"
version = "0.1.0"
description = ""
authors = ["Thamognya Kodi <contact@thamognya.com>"]
readme = "README.md"
packages = [{include = "internet_ml_server"}]
[tool.poetry.dependencies]
python = "^3.10"
django = "^4.1.4"
internet-ml = "^0.2.11"
django-stubs = "^1.13.1"
djangorestframework = "^3.14.0"
[tool.mypy]
plugins = ["mypy_django_plugin.main"]
[tool.django-stubs]
django_settings_module = "myproject.settings"

View File

@ -0,0 +1,3 @@
#!/bin/sh
python manage.py runserver 0.0.0.0:8080