main
Thamognya Kodi 2022-12-25 10:04:11 +07:00
parent 64868710d7
commit eef7dfce56
7 changed files with 176 additions and 209 deletions

View File

@ -1,38 +0,0 @@
%auto-ignore
% TODO: Fix stuff here
\section{Introduction}
There are currently two main solutions for utilizing NLP tasks with no context provided:
\begin{enumerate}[leftmargin=1em]
\item Large Pre-Trained Text2Text-Generation Model \label{CurrSolOne}
\begin{itemize}[leftmargin=1em]
\begin{figure}
\includegraphics[width=1.0\columnwidth]{t5qa.pdf}
\caption{This is an illustration of how ODQA LMs work \cite{https://doi.org/10.48550/arxiv.2002.08910}.}
\label{fig:CurrSolOneImg}
\end{figure}
\item Pre-trained Text2Text-generation models, like T5 \cite{https://doi.org/10.48550/arxiv.1910.10683} that have open-domain question-answering closed-book (no context) language models (ODQA LM) capabilities \cite{weng2020odqa}. These closed-book QDQA LMs are comparatively state-of-the-art performance in many no-context NLP tasks, mainly question-answering. Text2Text-generation models for such no-context NLP tasks are usually large, slow, and have a low accuracy \cite{https://doi.org/10.48550/arxiv.2002.08910}.
\item Example: T5 \cite{https://doi.org/10.48550/arxiv.1910.10683}
\item Illustration of how ODQA LM work: \ref{fig:CurrSolOneImg}
\end{itemize}
\item Large Knowledge Base with a Contex-Needing Language Model \label{CurrSolTwo}
\begin{itemize}[leftmargin=1em]
\begin{figure}
\includegraphics[width=1.0\columnwidth]{artefacts_diagram.pdf}
\caption{This is an illustration of how LMs with a knowledge base and artifact retriver work \cite{https://doi.org/10.48550/arxiv.2201.09651}.}
\label{fig:CurrSolTwoImg}
\end{figure}
\item Large Knowledge base with an pre-trained open-book LM and retriever, provides an comparatively higher performance, accuracy and the model itself is small. These models however require --- usually --- a large knowledge base which makes the overall solution large, but still fast and with more accuracy on the field the knowledge base specalizes in.
\item Example: LinkBERT \cite{https://doi.org/10.48550/arxiv.2203.15827} with an artifact retriver \cite{https://doi.org/10.48550/arxiv.2201.09651} with a knowledge base such as DBpedia or WikiData \cite{10.5555/1785162.1785216, 10.1145/2629489}
\item Illustration of how LMs with a knowledge base and artifact retriver work: \ref{fig:CurrSolTwoImg}
\end{itemize}
\end{enumerate}
Solution \ref{CurrSolOne} and \ref{CurrSolTwo} achieve the same end goal of NLP tasks without context via two different methods; these current solutions restrict NLP tasks and accuracy without context, especially for more open-domain tasks. The major limitation in this case would be accuracy, efficency and size of models and their knowledge base which then limit the use cases of closed-book open-domain NLP tasks.
In this paper, I propose \inlp, an direct improvement to solution \ref{CurrSolOne} which allows NLP models to not require a large knowledge base (altough you can configure Internet-NLP to utilize a knowledge base) that incoproates the internet's vast knowledge along utilizing data in hyperlinks in webpages \cite{https://doi.org/10.48550/arxiv.2203.15827} to create a more resource-filled data for our existing or future context-needing pre-trained model to use for NLP tasks. Internet-NLP encompasses pre-trained NLP and NLI models, along with its web-data-scraper creates an small temporary on-basis data and a cache for NLP tasks to be performed without given context.
Utilizing the vast data on the internet, graph of documents as corpus \cite{https://doi.org/10.48550/arxiv.2203.15827} allows us to enable to reduce our solution size, increase efficency and increase accuracy. Additionally unlike usage of static data, Internet-NLP utilizes the dynamic, and frequent updating data of the internet which enables us to utilize any type of NLP model along with NLI models to allow us to follow a sequence of control flow to get the context for context-needing models. This approach utilizes a combination of data-collection \cite{inbook} for NLPs with context-needing open-domain NLP to gain more accurate results in most no-context NLP tasks.
Additionally Internet-NLP's Text2Text-generation search query model: T5 \cite{https://doi.org/10.48550/arxiv.1910.10683} and LSTM noun remembrance using parts of speech tagging \cite{Chiche2022} on ALotClosedBookQA with it improving search queries based on the difference on answer recieved and the answer from datasets, using parts of speech tagging on answers \cite{Chiche2022}.

View File

@ -1,5 +1,5 @@
#!/bin/sh #!/bin/sh
bibtex-tidy --curly --numeric --tab --align=13 --duplicates=key --no-escape --no-remove-dupe-fields ./ref.bib && pdflatex main
pdflatex main pdflatex main
bibtex main bibtex main
pdflatex main pdflatex main

Binary file not shown.

After

Width:  |  Height:  |  Size: 197 KiB

View File

@ -0,0 +1,41 @@
%auto-ignore
% TODO: Fix stuff here
\section{Introduction}
There are currently two main solutions for utilizing NLP tasks with no context provided:
\begin{enumerate}[leftmargin=1em]
\item Large Pre-Trained Text-Generation and Text2Text-Generation Model \label{CurrSolOne}
\begin{itemize}[leftmargin=1em]
\begin{figure}
\includegraphics[width=1.0\columnwidth]{gpt3architecture.png}
\caption{This is an illustration of the architecture of GPT-2 and GPT-3, a popular Text-Generation model \cite{gpt3-overview, the-gpt-3-architecture-on-a-napkin}.}
\label{fig:CurrSolOneImg}
\includegraphics[width=1.0\columnwidth]{t5architecture.png}
\caption{This is an illustration of the architecture of T5, a popular Text2Text-Generation model \cite{alammar}.}
\label{fig:CurrSolOneSecondImg}
\end{figure}
\item Pre-trained Text2Text-generation models, like T5 \cite{https://doi.org/10.48550/arxiv.1910.10683} that have open-domain question-answering closed-book (no context) language models (ODQA LM) capabilities \cite{weng2020odqa}. These closed-book QDQA LMs are comparatively state-of-the-art performance in many no-context NLP tasks, mainly question-answering. Text2Text-generation models for such no-context NLP tasks are usually large, slow, and have a low accuracy \cite{https://doi.org/10.48550/arxiv.2002.08910}.
\item Example: T5 \cite{https://doi.org/10.48550/arxiv.1910.10683}
\item Illustration of how ODQA LM work: \ref{fig:CurrSolOneImg}
\end{itemize}
\item Large Knowledge Base with a Contex-Needing Language Model \label{CurrSolTwo}
\begin{itemize}[leftmargin=1em]
\begin{figure}
\includegraphics[width=1.0\columnwidth]{artefacts_diagram.pdf}
\caption{This is an illustration of how LMs with a knowledge base and artifact retriver work \cite{https://doi.org/10.48550/arxiv.2201.09651}.}
\label{fig:CurrSolTwoImg}
\end{figure}
\item Large Knowledge base with an pre-trained open-book LM and retriever, provides an comparatively higher performance, accuracy and the model itself is small. These models however require --- usually --- a large knowledge base which makes the overall solution large, but still fast and with more accuracy on the field the knowledge base specalizes in.
\item Example: LinkBERT \cite{https://doi.org/10.48550/arxiv.2203.15827} with an artifact retriver \cite{https://doi.org/10.48550/arxiv.2201.09651} with a knowledge base such as DBpedia or WikiData \cite{10.5555/1785162.1785216, 10.1145/2629489}
\item Illustration of how LMs with a knowledge base and artifact retriver work: \ref{fig:CurrSolTwoImg}
\end{itemize}
\end{enumerate}
Solution \ref{CurrSolOne} and \ref{CurrSolTwo} achieve the same end goal of NLP tasks without context via two different methods; these current solutions restrict NLP tasks and accuracy without context, especially for more open-domain tasks. The major limitation in this case would be accuracy, efficency and size of models and their knowledge base which then limit the use cases of closed-book open-domain NLP tasks.
In this paper, I propose \inlp, an direct improvement to solution \ref{CurrSolOne} which allows NLP models to not require a large knowledge base (altough you can configure Internet-NLP to utilize a knowledge base) that incoproates the internet's vast knowledge along utilizing data in hyperlinks in webpages \cite{https://doi.org/10.48550/arxiv.2203.15827} to create a more resource-filled data for our existing or future context-needing pre-trained model to use for NLP tasks. Internet-NLP encompasses pre-trained NLP and NLI models, along with its web-data-scraper creates an small temporary on-basis data and a cache for NLP tasks to be performed without given context.
Utilizing the vast data on the internet, graph of documents as corpus \cite{https://doi.org/10.48550/arxiv.2203.15827} allows us to enable to reduce our solution size, increase efficency and increase accuracy. Additionally unlike usage of static data, Internet-NLP utilizes the dynamic, and frequent updating data of the internet which enables us to utilize any type of NLP model along with NLI models to allow us to follow a sequence of control flow to get the context for context-needing models. This approach utilizes a combination of data-collection \cite{inbook} for NLPs with context-needing open-domain NLP to gain more accurate results in most no-context NLP tasks.
Additionally Internet-NLP's Text2Text-generation search query model: T5 \cite{https://doi.org/10.48550/arxiv.1910.10683} and LSTM noun remembrance using parts of speech tagging \cite{Chiche2022} on ALotClosedBookQA with it improving search queries based on the difference on answer recieved and the answer from datasets, using parts of speech tagging on answers \cite{Chiche2022}.

Binary file not shown.

After

Width:  |  Height:  |  Size: 184 KiB

Binary file not shown.

View File

@ -1,194 +1,158 @@
% Cleaned with bibtex-tidy
@inproceedings{devlin-etal-2019-bert, @inproceedings{devlin-etal-2019-bert,
title = "{BERT}: Pre-training of Deep Bidirectional Transformers for Language Understanding", title = {{BERT}: Pre-training of Deep Bidirectional Transformers for Language Understanding},
author = "Devlin, Jacob and author = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
Chang, Ming-Wei and year = 2019,
Lee, Kenton and month = jun,
Toutanova, Kristina", booktitle = {Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)},
booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)", publisher = {Association for Computational Linguistics},
month = jun, address = {Minneapolis, Minnesota},
year = "2019", pages = {4171--4186},
address = "Minneapolis, Minnesota", doi = {10.18653/v1/N19-1423},
publisher = "Association for Computational Linguistics", url = {https://aclanthology.org/N19-1423},
url = "https://aclanthology.org/N19-1423", abstract = {We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation models (Peters et al., 2018a; Radford et al., 2018), BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result, the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial task-specific architecture modifications. BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE score to 80.5 (7.7 point absolute improvement), MultiNLI accuracy to 86.7{\%} (4.6{\%} absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).}
doi = "10.18653/v1/N19-1423",
pages = "4171--4186",
abstract = "We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation models (Peters et al., 2018a; Radford et al., 2018), BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result, the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial task-specific architecture modifications. BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE score to 80.5 (7.7 point absolute improvement), MultiNLI accuracy to 86.7{\%} (4.6{\%} absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).",
} }
@inproceedings{yasunaga-etal-2022-linkbert, @inproceedings{yasunaga-etal-2022-linkbert,
title = "{L}ink{BERT}: Pretraining Language Models with Document Links", title = {{L}ink{BERT}: Pretraining Language Models with Document Links},
author = "Yasunaga, Michihiro and author = {Yasunaga, Michihiro and Leskovec, Jure and Liang, Percy},
Leskovec, Jure and year = 2022,
Liang, Percy", month = may,
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", booktitle = {Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
month = may, publisher = {Association for Computational Linguistics},
year = "2022", address = {Dublin, Ireland},
address = "Dublin, Ireland", pages = {8003--8016},
publisher = "Association for Computational Linguistics", doi = {10.18653/v1/2022.acl-long.551},
url = "https://aclanthology.org/2022.acl-long.551", url = {https://aclanthology.org/2022.acl-long.551},
doi = "10.18653/v1/2022.acl-long.551", abstract = {Language model (LM) pretraining captures various knowledge from text corpora, helping downstream tasks. However, existing methods such as BERT model a single document, and do not capture dependencies or knowledge that span across documents. In this work, we propose LinkBERT, an LM pretraining method that leverages links between documents, e.g., hyperlinks. Given a text corpus, we view it as a graph of documents and create LM inputs by placing linked documents in the same context. We then pretrain the LM with two joint self-supervised objectives: masked language modeling and our new proposal, document relation prediction. We show that LinkBERT outperforms BERT on various downstream tasks across two domains: the general domain (pretrained on Wikipedia with hyperlinks) and biomedical domain (pretrained on PubMed with citation links). LinkBERT is especially effective for multi-hop reasoning and few-shot QA (+5{\%} absolute improvement on HotpotQA and TriviaQA), and our biomedical LinkBERT sets new states of the art on various BioNLP tasks (+7{\%} on BioASQ and USMLE). We release our pretrained models, LinkBERT and BioLinkBERT, as well as code and data.}
pages = "8003--8016",
abstract = "Language model (LM) pretraining captures various knowledge from text corpora, helping downstream tasks. However, existing methods such as BERT model a single document, and do not capture dependencies or knowledge that span across documents. In this work, we propose LinkBERT, an LM pretraining method that leverages links between documents, e.g., hyperlinks. Given a text corpus, we view it as a graph of documents and create LM inputs by placing linked documents in the same context. We then pretrain the LM with two joint self-supervised objectives: masked language modeling and our new proposal, document relation prediction. We show that LinkBERT outperforms BERT on various downstream tasks across two domains: the general domain (pretrained on Wikipedia with hyperlinks) and biomedical domain (pretrained on PubMed with citation links). LinkBERT is especially effective for multi-hop reasoning and few-shot QA (+5{\%} absolute improvement on HotpotQA and TriviaQA), and our biomedical LinkBERT sets new states of the art on various BioNLP tasks (+7{\%} on BioASQ and USMLE). We release our pretrained models, LinkBERT and BioLinkBERT, as well as code and data.",
} }
@software{gpt-neox-library, @software{gpt-neox-library,
title = {{GPT-NeoX: Large Scale Autoregressive Language Modeling in PyTorch}}, title = {{GPT-NeoX: Large Scale Autoregressive Language Modeling in PyTorch}},
author = {Andonian, Alex and Anthony, Quentin and Biderman, Stella and Black, Sid and Gali, Preetham and Gao, Leo and Hallahan, Eric and Levy-Kramer, Josh and Leahy, Connor and Nestler, Lucas and Parker, Kip and Pieler, Michael and Purohit, Shivanshu and Songz, Tri and Phil, Wang and Weinbach, Samuel}, author = {Andonian, Alex and Anthony, Quentin and Biderman, Stella and Black, Sid and Gali, Preetham and Gao, Leo and Hallahan, Eric and Levy-Kramer, Josh and Leahy, Connor and Nestler, Lucas and Parker, Kip and Pieler, Michael and Purohit, Shivanshu and Songz, Tri and Phil, Wang and Weinbach, Samuel},
url = {https://www.github.com/eleutherai/gpt-neox}, year = 2021,
doi = {10.5281/zenodo.5879544}, month = 8,
month = {8}, doi = {10.5281/zenodo.5879544},
year = {2021}, url = {https://www.github.com/eleutherai/gpt-neox},
version = {0.0.1}, version = {0.0.1}
} }
@inproceedings{gpt-neox-20b, @inproceedings{gpt-neox-20b,
title={{GPT-NeoX-20B}: An Open-Source Autoregressive Language Model}, title = {{GPT-NeoX-20B}: An Open-Source Autoregressive Language Model},
author={Black, Sid and Biderman, Stella and Hallahan, Eric and Anthony, Quentin and Gao, Leo and Golding, Laurence and He, Horace and Leahy, Connor and McDonell, Kyle and Phang, Jason and Pieler, Michael and Prashanth, USVSN Sai and Purohit, Shivanshu and Reynolds, Laria and Tow, Jonathan and Wang, Ben and Weinbach, Samuel}, author = {Black, Sid and Biderman, Stella and Hallahan, Eric and Anthony, Quentin and Gao, Leo and Golding, Laurence and He, Horace and Leahy, Connor and McDonell, Kyle and Phang, Jason and Pieler, Michael and Prashanth, USVSN Sai and Purohit, Shivanshu and Reynolds, Laria and Tow, Jonathan and Wang, Ben and Weinbach, Samuel},
booktitle={Proceedings of the ACL Workshop on Challenges \& Perspectives in Creating Large Language Models}, year = 2022,
url={https://arxiv.org/abs/2204.06745}, booktitle = {Proceedings of the ACL Workshop on Challenges \& Perspectives in Creating Large Language Models},
year={2022} url = {https://arxiv.org/abs/2204.06745}
} }
@inproceedings{reimers-2019-sentence-bert, @inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks", title = {Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks},
author = "Reimers, Nils and Gurevych, Iryna", author = {Reimers, Nils and Gurevych, Iryna},
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing", year = 2019,
month = "11", month = 11,
year = "2019", booktitle = {Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing},
publisher = "Association for Computational Linguistics", publisher = {Association for Computational Linguistics},
url = "http://arxiv.org/abs/1908.10084", url = {http://arxiv.org/abs/1908.10084}
} }
@inproceedings{thakur-2020-AugSBERT, @inproceedings{thakur-2020-AugSBERT,
title = "Augmented {SBERT}: Data Augmentation Method for Improving Bi-Encoders for Pairwise Sentence Scoring Tasks", title = {Augmented {SBERT}: Data Augmentation Method for Improving Bi-Encoders for Pairwise Sentence Scoring Tasks},
author = "Thakur, Nandan and Reimers, Nils and Daxenberger, Johannes and Gurevych, Iryna", author = {Thakur, Nandan and Reimers, Nils and Daxenberger, Johannes and Gurevych, Iryna},
booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies", year = 2021,
month = "6", month = 6,
year = "2021", booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
address = "Online", publisher = {Association for Computational Linguistics},
publisher = "Association for Computational Linguistics", address = {Online},
url = "https://arxiv.org/abs/2010.08240", pages = {296--310},
pages = "296--310", url = {https://arxiv.org/abs/2010.08240}
} }
@misc{https://doi.org/10.48550/arxiv.1910.10683, @misc{https://doi.org/10.48550/arxiv.1910.10683,
doi = {10.48550/ARXIV.1910.10683}, title = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer},
url = {https://arxiv.org/abs/1910.10683}, author = {Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and Narang, Sharan and Matena, Michael and Zhou, Yanqi and Li, Wei and Liu, Peter J.},
author = {Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and Narang, Sharan and Matena, Michael and Zhou, Yanqi and Li, Wei and Liu, Peter J.}, year = 2019,
keywords = {Machine Learning (cs.LG), Computation and Language (cs.CL), Machine Learning (stat.ML), FOS: Computer and information sciences, FOS: Computer and information sciences}, publisher = {arXiv},
title = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer}, doi = {10.48550/ARXIV.1910.10683},
publisher = {arXiv}, url = {https://arxiv.org/abs/1910.10683},
year = {2019}, copyright = {arXiv.org perpetual, non-exclusive license},
copyright = {arXiv.org perpetual, non-exclusive license} keywords = {Machine Learning (cs.LG), Computation and Language (cs.CL), Machine Learning (stat.ML), FOS: Computer and information sciences, FOS: Computer and information sciences}
} }
@inproceedings{nie-etal-2020-adversarial, @inproceedings{nie-etal-2020-adversarial,
title = "Adversarial {NLI}: A New Benchmark for Natural Language Understanding", title = {Adversarial {NLI}: A New Benchmark for Natural Language Understanding},
author = "Nie, Yixin and author = {Nie, Yixin and Williams, Adina and Dinan, Emily and Bansal, Mohit and Weston, Jason and Kiela, Douwe},
Williams, Adina and year = 2020,
Dinan, Emily and booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
Bansal, Mohit and publisher = {Association for Computational Linguistics}
Weston, Jason and
Kiela, Douwe",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
year = "2020",
publisher = "Association for Computational Linguistics",
} }
@inproceedings{N18-1101,
@InProceedings{N18-1101, title = {A Broad-Coverage Challenge Corpus for Sentence Understanding through Inference},
author = "Williams, Adina author = {Williams, Adina and Nangia, Nikita and Bowman, Samuel},
and Nangia, Nikita year = 2018,
and Bowman, Samuel", booktitle = {Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)},
title = "A Broad-Coverage Challenge Corpus for location = {New Orleans, Louisiana},
Sentence Understanding through Inference", publisher = {Association for Computational Linguistics},
booktitle = "Proceedings of the 2018 Conference of pages = {1112--1122},
the North American Chapter of the url = {http://aclweb.org/anthology/N18-1101}
Association for Computational Linguistics:
Human Language Technologies, Volume 1 (Long
Papers)",
year = "2018",
publisher = "Association for Computational Linguistics",
pages = "1112--1122",
location = "New Orleans, Louisiana",
url = "http://aclweb.org/anthology/N18-1101"
} }
@article{DBLP:journals/corr/BowmanAPM15, @article{DBLP:journals/corr/BowmanAPM15,
author = {Samuel R. Bowman and title = {A large annotated corpus for learning natural language inference},
Gabor Angeli and author = {Samuel R. Bowman and Gabor Angeli and Christopher Potts and Christopher D. Manning},
Christopher Potts and year = 2015,
Christopher D. Manning}, journal = {CoRR},
title = {A large annotated corpus for learning natural language inference}, volume = {abs/1508.05326},
journal = {CoRR}, url = {http://arxiv.org/abs/1508.05326},
volume = {abs/1508.05326}, eprinttype = {arXiv},
year = {2015}, eprint = {1508.05326},
url = {http://arxiv.org/abs/1508.05326}, timestamp = {Mon, 13 Aug 2018 16:46:27 +0200},
eprinttype = {arXiv}, biburl = {https://dblp.org/rec/journals/corr/BowmanAPM15.bib},
eprint = {1508.05326}, bibsource = {dblp computer science bibliography, https://dblp.org}
timestamp = {Mon, 13 Aug 2018 16:46:27 +0200},
biburl = {https://dblp.org/rec/journals/corr/BowmanAPM15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
} }
@article{DBLP:journals/corr/abs-1808-07042, @article{DBLP:journals/corr/abs-1808-07042,
author = {Siva Reddy and title = {CoQA: {A} Conversational Question Answering Challenge},
Danqi Chen and author = {Siva Reddy and Danqi Chen and Christopher D. Manning},
Christopher D. Manning}, year = 2018,
title = {CoQA: {A} Conversational Question Answering Challenge}, journal = {CoRR},
journal = {CoRR}, volume = {abs/1808.07042},
volume = {abs/1808.07042}, url = {http://arxiv.org/abs/1808.07042},
year = {2018}, eprinttype = {arXiv},
url = {http://arxiv.org/abs/1808.07042}, eprint = {1808.07042},
eprinttype = {arXiv}, timestamp = {Sun, 02 Sep 2018 15:01:56 +0200},
eprint = {1808.07042}, biburl = {https://dblp.org/rec/journals/corr/abs-1808-07042.bib},
timestamp = {Sun, 02 Sep 2018 15:01:56 +0200}, bibsource = {dblp computer science bibliography, https://dblp.org}
biburl = {https://dblp.org/rec/journals/corr/abs-1808-07042.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
} }
@article{kwiatkowski-etal-2019-natural, @article{kwiatkowski-etal-2019-natural,
title = "Natural Questions: A Benchmark for Question Answering Research", title = {Natural Questions: A Benchmark for Question Answering Research},
author = "Kwiatkowski, Tom and author = {Kwiatkowski, Tom and Palomaki, Jennimaria and Redfield, Olivia and Collins, Michael and Parikh, Ankur and Alberti, Chris and Epstein, Danielle and Polosukhin, Illia and Devlin, Jacob and Lee, Kenton and Toutanova, Kristina and Jones, Llion and Kelcey, Matthew and Chang, Ming-Wei and Dai, Andrew M. and Uszkoreit, Jakob and Le, Quoc and Petrov, Slav},
Palomaki, Jennimaria and year = 2019,
Redfield, Olivia and journal = {Transactions of the Association for Computational Linguistics},
Collins, Michael and publisher = {MIT Press},
Parikh, Ankur and address = {Cambridge, MA},
Alberti, Chris and volume = 7,
Epstein, Danielle and pages = {452--466},
Polosukhin, Illia and doi = {10.1162/tacl_a_00276},
Devlin, Jacob and url = {https://aclanthology.org/Q19-1026},
Lee, Kenton and abstract = {We present the Natural Questions corpus, a question answering data set. Questions consist of real anonymized, aggregated queries issued to the Google search engine. An annotator is presented with a question along with a Wikipedia page from the top 5 search results, and annotates a long answer (typically a paragraph) and a short answer (one or more entities) if present on the page, or marks null if no long/short answer is present. The public release consists of 307,373 training examples with single annotations; 7,830 examples with 5-way annotations for development data; and a further 7,842 examples with 5-way annotated sequestered as test data. We present experiments validating quality of the data. We also describe analysis of 25-way annotations on 302 examples, giving insights into human variability on the annotation task. We introduce robust metrics for the purposes of evaluating question answering systems; demonstrate high human upper bounds on these metrics; and establish baseline results using competitive methods drawn from related literature.}
Toutanova, Kristina and
Jones, Llion and
Kelcey, Matthew and
Chang, Ming-Wei and
Dai, Andrew M. and
Uszkoreit, Jakob and
Le, Quoc and
Petrov, Slav",
journal = "Transactions of the Association for Computational Linguistics",
volume = "7",
year = "2019",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/Q19-1026",
doi = "10.1162/tacl_a_00276",
pages = "452--466",
abstract = "We present the Natural Questions corpus, a question answering data set. Questions consist of real anonymized, aggregated queries issued to the Google search engine. An annotator is presented with a question along with a Wikipedia page from the top 5 search results, and annotates a long answer (typically a paragraph) and a short answer (one or more entities) if present on the page, or marks null if no long/short answer is present. The public release consists of 307,373 training examples with single annotations; 7,830 examples with 5-way annotations for development data; and a further 7,842 examples with 5-way annotated sequestered as test data. We present experiments validating quality of the data. We also describe analysis of 25-way annotations on 302 examples, giving insights into human variability on the annotation task. We introduce robust metrics for the purposes of evaluating question answering systems; demonstrate high human upper bounds on these metrics; and establish baseline results using competitive methods drawn from related literature.",
} }
@article{DBLP:journals/corr/abs-1806-03822, @article{DBLP:journals/corr/abs-1806-03822,
author = {Pranav Rajpurkar and title = {Know What You Don't Know: Unanswerable Questions for SQuAD},
Robin Jia and author = {Pranav Rajpurkar and Robin Jia and Percy Liang},
Percy Liang}, year = 2018,
title = {Know What You Don't Know: Unanswerable Questions for SQuAD}, journal = {CoRR},
journal = {CoRR}, volume = {abs/1806.03822},
volume = {abs/1806.03822}, url = {http://arxiv.org/abs/1806.03822},
year = {2018}, eprinttype = {arXiv},
url = {http://arxiv.org/abs/1806.03822}, eprint = {1806.03822},
eprinttype = {arXiv}, timestamp = {Mon, 13 Aug 2018 16:48:21 +0200},
eprint = {1806.03822}, biburl = {https://dblp.org/rec/journals/corr/abs-1806-03822.bib},
timestamp = {Mon, 13 Aug 2018 16:48:21 +0200}, bibsource = {dblp computer science bibliography, https://dblp.org}
biburl = {https://dblp.org/rec/journals/corr/abs-1806-03822.bib}, }
bibsource = {dblp computer science bibliography, https://dblp.org} @misc{the-gpt-3-architecture-on-a-napkin,
title = {How deep is the machine?},
journal = {The GPT-3 Architecture, on a Napkin},
url = {https://dugas.ch/artificial_curiosity/GPT_architecture.html}
}
@misc{gpt3-overview,
url = {https://dzlab.github.io/ml/2020/07/25/gpt3-overview/},
journal = {GPT-3 An Overview},
author = {Dzlab}
}
@misc{alammar,
title = {The illustrated transformer},
url = {https://jalammar.github.io/illustrated-transformer/},
journal = {The Illustrated Transformer Jay Alammar Visualizing machine learning one concept at a time.},
author = {Alammar, Jay}
} }