internet_ml/research/Internet-NLP/paper/ref.bib

% Cleaned with bibtex-tidy
@inproceedings{devlin-etal-2019-bert,
	title        = {{BERT}: Pre-training of Deep Bidirectional Transformers for Language Understanding},
	author       = {Devlin, Jacob  and Chang, Ming-Wei  and Lee, Kenton  and Toutanova, Kristina},
	year         = 2019,
	month        = jun,
	booktitle    = {Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)},
	publisher    = {Association for Computational Linguistics},
	address      = {Minneapolis, Minnesota},
	pages        = {4171--4186},
	doi          = {10.18653/v1/N19-1423},
	url          = {https://aclanthology.org/N19-1423},
	abstract     = {We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation models (Peters et al., 2018a; Radford et al., 2018), BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result, the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial task-specific architecture modifications. BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE score to 80.5 (7.7 point absolute improvement), MultiNLI accuracy to 86.7{\%} (4.6{\%} absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).}
}
@inproceedings{yasunaga-etal-2022-linkbert,
	title        = {{L}ink{BERT}: Pretraining Language Models with Document Links},
	author       = {Yasunaga, Michihiro  and Leskovec, Jure  and Liang, Percy},
	year         = 2022,
	month        = may,
	booktitle    = {Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
	publisher    = {Association for Computational Linguistics},
	address      = {Dublin, Ireland},
	pages        = {8003--8016},
	doi          = {10.18653/v1/2022.acl-long.551},
	url          = {https://aclanthology.org/2022.acl-long.551},
	abstract     = {Language model (LM) pretraining captures various knowledge from text corpora, helping downstream tasks. However, existing methods such as BERT model a single document, and do not capture dependencies or knowledge that span across documents. In this work, we propose LinkBERT, an LM pretraining method that leverages links between documents, e.g., hyperlinks. Given a text corpus, we view it as a graph of documents and create LM inputs by placing linked documents in the same context. We then pretrain the LM with two joint self-supervised objectives: masked language modeling and our new proposal, document relation prediction. We show that LinkBERT outperforms BERT on various downstream tasks across two domains: the general domain (pretrained on Wikipedia with hyperlinks) and biomedical domain (pretrained on PubMed with citation links). LinkBERT is especially effective for multi-hop reasoning and few-shot QA (+5{\%} absolute improvement on HotpotQA and TriviaQA), and our biomedical LinkBERT sets new states of the art on various BioNLP tasks (+7{\%} on BioASQ and USMLE). We release our pretrained models, LinkBERT and BioLinkBERT, as well as code and data.}
}
@software{gpt-neox-library,
	title        = {{GPT-NeoX: Large Scale Autoregressive Language Modeling in PyTorch}},
	author       = {Andonian, Alex and Anthony, Quentin and Biderman, Stella and Black, Sid and Gali, Preetham and Gao, Leo and Hallahan, Eric and Levy-Kramer, Josh and Leahy, Connor and Nestler, Lucas and Parker, Kip and Pieler, Michael and Purohit, Shivanshu and Songz, Tri and Phil, Wang and Weinbach, Samuel},
	year         = 2021,
	month        = 8,
	doi          = {10.5281/zenodo.5879544},
	url          = {https://www.github.com/eleutherai/gpt-neox},
	version      = {0.0.1}
}
@inproceedings{gpt-neox-20b,
	title        = {{GPT-NeoX-20B}: An Open-Source Autoregressive Language Model},
	author       = {Black, Sid and Biderman, Stella and Hallahan, Eric and Anthony, Quentin and Gao, Leo and Golding, Laurence and He, Horace and Leahy, Connor and McDonell, Kyle and Phang, Jason and Pieler, Michael and Prashanth, USVSN Sai and Purohit, Shivanshu and Reynolds, Laria and Tow, Jonathan and Wang, Ben and Weinbach, Samuel},
	year         = 2022,
	booktitle    = {Proceedings of the ACL Workshop on Challenges \& Perspectives in Creating Large Language Models},
	url          = {https://arxiv.org/abs/2204.06745}
}
@inproceedings{reimers-2019-sentence-bert,
	title        = {Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks},
	author       = {Reimers, Nils and Gurevych, Iryna},
	year         = 2019,
	month        = 11,
	booktitle    = {Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing},
	publisher    = {Association for Computational Linguistics},
	url          = {http://arxiv.org/abs/1908.10084}
}
@inproceedings{thakur-2020-AugSBERT,
	title        = {Augmented {SBERT}: Data Augmentation Method for Improving Bi-Encoders for Pairwise Sentence Scoring Tasks},
	author       = {Thakur, Nandan and Reimers, Nils and Daxenberger, Johannes  and Gurevych, Iryna},
	year         = 2021,
	month        = 6,
	booktitle    = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
	publisher    = {Association for Computational Linguistics},
	address      = {Online},
	pages        = {296--310},
	url          = {https://arxiv.org/abs/2010.08240}
}
@misc{https://doi.org/10.48550/arxiv.1910.10683,
	title        = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer},
	author       = {Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and Narang, Sharan and Matena, Michael and Zhou, Yanqi and Li, Wei and Liu, Peter J.},
	year         = 2019,
	publisher    = {arXiv},
	doi          = {10.48550/ARXIV.1910.10683},
	url          = {https://arxiv.org/abs/1910.10683},
	copyright    = {arXiv.org perpetual, non-exclusive license},
	keywords     = {Machine Learning (cs.LG), Computation and Language (cs.CL), Machine Learning (stat.ML), FOS: Computer and information sciences, FOS: Computer and information sciences}
}
@inproceedings{nie-etal-2020-adversarial,
	title        = {Adversarial {NLI}: A New Benchmark for Natural Language Understanding},
	author       = {Nie, Yixin  and Williams, Adina  and Dinan, Emily  and Bansal, Mohit  and Weston, Jason  and Kiela, Douwe},
	year         = 2020,
	booktitle    = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
	publisher    = {Association for Computational Linguistics}
}
@inproceedings{N18-1101,
	title        = {A Broad-Coverage Challenge Corpus for Sentence Understanding through Inference},
	author       = {Williams, Adina and Nangia, Nikita and Bowman, Samuel},
	year         = 2018,
	booktitle    = {Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)},
	location     = {New Orleans, Louisiana},
	publisher    = {Association for Computational Linguistics},
	pages        = {1112--1122},
	url          = {http://aclweb.org/anthology/N18-1101}
}
@article{DBLP:journals/corr/BowmanAPM15,
	title        = {A large annotated corpus for learning natural language inference},
	author       = {Samuel R. Bowman and Gabor Angeli and Christopher Potts and Christopher D. Manning},
	year         = 2015,
	journal      = {CoRR},
	volume       = {abs/1508.05326},
	url          = {http://arxiv.org/abs/1508.05326},
	eprinttype   = {arXiv},
	eprint       = {1508.05326},
	timestamp    = {Mon, 13 Aug 2018 16:46:27 +0200},
	biburl       = {https://dblp.org/rec/journals/corr/BowmanAPM15.bib},
	bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1808-07042,
	title        = {CoQA: {A} Conversational Question Answering Challenge},
	author       = {Siva Reddy and Danqi Chen and Christopher D. Manning},
	year         = 2018,
	journal      = {CoRR},
	volume       = {abs/1808.07042},
	url          = {http://arxiv.org/abs/1808.07042},
	eprinttype   = {arXiv},
	eprint       = {1808.07042},
	timestamp    = {Sun, 02 Sep 2018 15:01:56 +0200},
	biburl       = {https://dblp.org/rec/journals/corr/abs-1808-07042.bib},
	bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{kwiatkowski-etal-2019-natural,
	title        = {Natural Questions: A Benchmark for Question Answering Research},
	author       = {Kwiatkowski, Tom  and Palomaki, Jennimaria  and Redfield, Olivia  and Collins, Michael  and Parikh, Ankur  and Alberti, Chris  and Epstein, Danielle  and Polosukhin, Illia  and Devlin, Jacob  and Lee, Kenton  and Toutanova, Kristina  and Jones, Llion  and Kelcey, Matthew  and Chang, Ming-Wei  and Dai, Andrew M.  and Uszkoreit, Jakob  and Le, Quoc  and Petrov, Slav},
	year         = 2019,
	journal      = {Transactions of the Association for Computational Linguistics},
	publisher    = {MIT Press},
	address      = {Cambridge, MA},
	volume       = 7,
	pages        = {452--466},
	doi          = {10.1162/tacl_a_00276},
	url          = {https://aclanthology.org/Q19-1026},
	abstract     = {We present the Natural Questions corpus, a question answering data set. Questions consist of real anonymized, aggregated queries issued to the Google search engine. An annotator is presented with a question along with a Wikipedia page from the top 5 search results, and annotates a long answer (typically a paragraph) and a short answer (one or more entities) if present on the page, or marks null if no long/short answer is present. The public release consists of 307,373 training examples with single annotations; 7,830 examples with 5-way annotations for development data; and a further 7,842 examples with 5-way annotated sequestered as test data. We present experiments validating quality of the data. We also describe analysis of 25-way annotations on 302 examples, giving insights into human variability on the annotation task. We introduce robust metrics for the purposes of evaluating question answering systems; demonstrate high human upper bounds on these metrics; and establish baseline results using competitive methods drawn from related literature.}
}
@article{DBLP:journals/corr/abs-1806-03822,
	title        = {Know What You Don't Know: Unanswerable Questions for SQuAD},
	author       = {Pranav Rajpurkar and Robin Jia and Percy Liang},
	year         = 2018,
	journal      = {CoRR},
	volume       = {abs/1806.03822},
	url          = {http://arxiv.org/abs/1806.03822},
	eprinttype   = {arXiv},
	eprint       = {1806.03822},
	timestamp    = {Mon, 13 Aug 2018 16:48:21 +0200},
	biburl       = {https://dblp.org/rec/journals/corr/abs-1806-03822.bib},
	bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@misc{the-gpt-3-architecture-on-a-napkin,
	title        = {How deep is the machine?},
	journal      = {The GPT-3 Architecture, on a Napkin},
	url          = {https://dugas.ch/artificial_curiosity/GPT_architecture.html}
}
@misc{gpt3-overview,
	url          = {https://dzlab.github.io/ml/2020/07/25/gpt3-overview/},
	journal      = {GPT-3 An Overview},
	author       = {Dzlab}
}
@misc{alammar,
	title        = {The illustrated transformer},
	url          = {https://jalammar.github.io/illustrated-transformer/},
	journal      = {The Illustrated Transformer  Jay Alammar  Visualizing machine learning one concept at a time.},
	author       = {Alammar, Jay}
}
@article{DBLP:journals/corr/abs-2005-14165,
	author       = {Tom B. Brown and Benjamin Mann and Nick Ryder and Melanie Subbiah and Jared Kaplan and Prafulla Dhariwal and Arvind Neelakantan and Pranav Shyam and Girish Sastry and Amanda Askell and Sandhini Agarwal and Ariel Herbert{-}Voss and Gretchen Krueger and Tom Henighan and Rewon Child and Aditya Ramesh and Daniel M. Ziegler and Jeffrey Wu and Clemens Winter and Christopher Hesse and Mark Chen and Eric Sigler and Mateusz Litwin and Scott Gray and Benjamin Chess and Jack Clark and Christopher Berner and Sam McCandlish and Alec Radford and Ilya Sutskever and Dario Amodei},
	title        = {Language Models are Few-Shot Learners},
	journal      = {CoRR},
	volume       = {abs/2005.14165},
	year         = 2020,
	url          = {https://arxiv.org/abs/2005.14165},
	eprinttype   = {arXiv},
	eprint       = {2005.14165},
	timestamp    = {Wed, 03 Jun 2020 11:36:54 +0200},
	biburl       = {https://dblp.org/rec/journals/corr/abs-2005-14165.bib},
	bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{weng2020odqa,
	title        = {How to Build an Open-Domain Question Answering System?},
	author       = {Weng, Lilian},
	journal      = {lilianweng.github.io},
	year         = 2020,
	month        = {Oct},
	url          = {https://lilianweng.github.io/posts/2020-10-29-odqa/}
}
@article{DBLP:journals/corr/abs-2002-08910,
	author       = {Adam Roberts and Colin Raffel and Noam Shazeer},
	title        = {How Much Knowledge Can You Pack Into the Parameters of a Language Model?},
	journal      = {CoRR},
	volume       = {abs/2002.08910},
	year         = 2020,
	url          = {https://arxiv.org/abs/2002.08910},
	eprinttype   = {arXiv},
	eprint       = {2002.08910},
	timestamp    = {Mon, 02 Mar 2020 16:46:06 +0100},
	biburl       = {https://dblp.org/rec/journals/corr/abs-2002-08910.bib},
	bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@misc{https://doi.org/10.48550/arxiv.1810.04805,
	doi          = {10.48550/ARXIV.1810.04805},
	url          = {https://arxiv.org/abs/1810.04805},
	author       = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
	keywords     = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
	title        = {BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding},
	publisher    = {arXiv},
	year         = 2018,
	copyright    = {arXiv.org perpetual, non-exclusive license}
}
@misc{https://doi.org/10.48550/arxiv.2203.15827,
	doi          = {10.48550/ARXIV.2203.15827},
	url          = {https://arxiv.org/abs/2203.15827},
	author       = {Yasunaga, Michihiro and Leskovec, Jure and Liang, Percy},
	keywords     = {Computation and Language (cs.CL), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences},
	title        = {LinkBERT: Pretraining Language Models with Document Links},
	publisher    = {arXiv},
	year         = 2022,
	copyright    = {Creative Commons Attribution 4.0 International}
}
@misc{https://doi.org/10.48550/arxiv.1908.10084,
	doi          = {10.48550/ARXIV.1908.10084},
	url          = {https://arxiv.org/abs/1908.10084},
	author       = {Reimers, Nils and Gurevych, Iryna},
	keywords     = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
	title        = {Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks},
	publisher    = {arXiv},
	year         = 2019,
	copyright    = {Creative Commons Attribution Share Alike 4.0 International}
}
@misc{https://doi.org/10.48550/arxiv.1910.10683,
	doi          = {10.48550/ARXIV.1910.10683},
	url          = {https://arxiv.org/abs/1910.10683},
	author       = {Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and Narang, Sharan and Matena, Michael and Zhou, Yanqi and Li, Wei and Liu, Peter J.},
	keywords     = {Machine Learning (cs.LG), Computation and Language (cs.CL), Machine Learning (stat.ML), FOS: Computer and information sciences, FOS: Computer and information sciences},
	title        = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer},
	publisher    = {arXiv},
	year         = 2019,
	copyright    = {arXiv.org perpetual, non-exclusive license}
}
@misc{https://doi.org/10.48550/arxiv.1809.02789,
	doi          = {10.48550/ARXIV.1809.02789},
	url          = {https://arxiv.org/abs/1809.02789},
	author       = {Mihaylov, Todor and Clark, Peter and Khot, Tushar and Sabharwal, Ashish},
	keywords     = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
	title        = {Can a Suit of Armor Conduct Electricity? A New Dataset for Open Book Question Answering},
	publisher    = {arXiv},
	year         = 2018,
	copyright    = {arXiv.org perpetual, non-exclusive license}
}
@article{Bartolo_2020,
	doi          = {10.1162/tacl_a_00338},
	url          = {https://doi.org/10.1162%2Ftacl_a_00338},
	year         = 2020,
	month        = {dec},
	publisher    = {{MIT} Press - Journals},
	volume       = 8,
	pages        = {662--678},
	author       = {Max Bartolo and Alastair Roberts and Johannes Welbl and Sebastian Riedel and Pontus Stenetorp},
	title        = {Beat the {AI}: Investigating Adversarial Human Annotation for Reading Comprehension},
	journal      = {Transactions of the Association for Computational Linguistics}
}
@article{weng2020odqa,
	title        = {How to Build an Open-Domain Question Answering System?},
	author       = {Weng, Lilian},
	journal      = {lilianweng.github.io},
	year         = 2020,
	month        = {Oct},
	url          = {https://lilianweng.github.io/posts/2020-10-29-odqa/}
}
@misc{https://doi.org/10.48550/arxiv.2002.08910,
	doi          = {10.48550/ARXIV.2002.08910},
	url          = {https://arxiv.org/abs/2002.08910},
	author       = {Roberts, Adam and Raffel, Colin and Shazeer, Noam},
	keywords     = {Computation and Language (cs.CL), Machine Learning (cs.LG), Machine Learning (stat.ML), FOS: Computer and information sciences, FOS: Computer and information sciences},
	title        = {How Much Knowledge Can You Pack Into the Parameters of a Language Model?},
	publisher    = {arXiv},
	year         = 2020,
	copyright    = {arXiv.org perpetual, non-exclusive license}
}
@inproceedings{10.5555/1785162.1785216,
	author       = {Auer, S\"{o}ren and Bizer, Christian and Kobilarov, Georgi and Lehmann, Jens and Cyganiak, Richard and Ives, Zachary},
	title        = {DBpedia: A Nucleus for a Web of Open Data},
	year         = 2007,
	isbn         = 3540762973,
	publisher    = {Springer-Verlag},
	address      = {Berlin, Heidelberg},
	abstract     = {DBpedia is a community effort to extract structured information from Wikipedia and to make this information available on the Web. DBpedia allows you to ask sophisticated queries against datasets derived from Wikipedia and to link other datasets on the Web to Wikipedia data. We describe the extraction of the DBpedia datasets, and how the resulting information is published on the Web for human-andmachine-consumption. We describe some emerging applications from the DBpedia community and show how website authors can facilitate DBpedia content within their sites. Finally, we present the current status of interlinking DBpedia with other open datasets on the Web and outline how DBpedia could serve as a nucleus for an emerging Web of open data.},
	booktitle    = {Proceedings of the 6th International The Semantic Web and 2nd Asian Conference on Asian Semantic Web Conference},
	pages        = {722–735},
	numpages     = 14,
	location     = {Busan, Korea},
	series       = {ISWC'07/ASWC'07}
}
@article{10.1145/2629489,
	author       = {Vrande\v{c}i\'{c}, Denny and Kr\"{o}tzsch, Markus},
	title        = {Wikidata: A Free Collaborative Knowledgebase},
	year         = 2014,
	issue_date   = {October 2014},
	publisher    = {Association for Computing Machinery},
	address      = {New York, NY, USA},
	volume       = 57,
	number       = 10,
	issn         = {0001-0782},
	url          = {https://doi.org/10.1145/2629489},
	doi          = {10.1145/2629489},
	abstract     = {This collaboratively edited knowledgebase provides a common source of data for Wikipedia, and everyone else.},
	journal      = {Commun. ACM},
	month        = {sep},
	pages        = {78–85},
	numpages     = 8
}
@misc{https://doi.org/10.48550/arxiv.1508.05326,
	doi          = {10.48550/ARXIV.1508.05326},
	url          = {https://arxiv.org/abs/1508.05326},
	author       = {Bowman, Samuel R. and Angeli, Gabor and Potts, Christopher and Manning, Christopher D.},
	keywords     = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
	title        = {A large annotated corpus for learning natural language inference},
	publisher    = {arXiv},
	year         = 2015,
	copyright    = {arXiv.org perpetual, non-exclusive license}
}
@misc{https://doi.org/10.48550/arxiv.1910.14599,
	doi          = {10.48550/ARXIV.1910.14599},
	url          = {https://arxiv.org/abs/1910.14599},
	author       = {Nie, Yixin and Williams, Adina and Dinan, Emily and Bansal, Mohit and Weston, Jason and Kiela, Douwe},
	keywords     = {Computation and Language (cs.CL), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences},
	title        = {Adversarial NLI: A New Benchmark for Natural Language Understanding},
	publisher    = {arXiv},
	year         = 2019,
	copyright    = {arXiv.org perpetual, non-exclusive license}
}
@inproceedings{N18-1101,
	author       = {Williams, Adina and Nangia, Nikita and Bowman, Samuel},
	title        = {A Broad-Coverage Challenge Corpus for Sentence Understanding through Inference},
	booktitle    = {Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)},
	year         = 2018,
	publisher    = {Association for Computational Linguistics},
	pages        = {1112--1122},
	location     = {New Orleans, Louisiana},
	url          = {http://aclweb.org/anthology/N18-1101}
}
@article{47761,
	title        = {Natural Questions: a Benchmark for Question Answering Research},
	author       = {Tom Kwiatkowski and Jennimaria Palomaki and Olivia Redfield and Michael Collins and Ankur Parikh and Chris Alberti and Danielle Epstein and Illia Polosukhin and Matthew Kelcey and Jacob Devlin and Kenton Lee and Kristina N. Toutanova and Llion Jones and Ming-Wei Chang and Andrew Dai and Jakob Uszkoreit and Quoc Le and Slav Petrov},
	year         = 2019,
	journal      = {Transactions of the Association of Computational Linguistics}
}
@misc{https://doi.org/10.48550/arxiv.1704.05179,
	doi          = {10.48550/ARXIV.1704.05179},
	url          = {https://arxiv.org/abs/1704.05179},
	author       = {Dunn, Matthew and Sagun, Levent and Higgins, Mike and Guney, V. Ugur and Cirik, Volkan and Cho, Kyunghyun},
	keywords     = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
	title        = {SearchQA: A New Q \& A Dataset Augmented with Context from a Search Engine},
	publisher    = {arXiv},
	year         = 2017,
	copyright    = {arXiv.org perpetual, non-exclusive license}
}
@misc{https://doi.org/10.48550/arxiv.1705.03551,
	doi          = {10.48550/ARXIV.1705.03551},
	url          = {https://arxiv.org/abs/1705.03551},
	author       = {Joshi, Mandar and Choi, Eunsol and Weld, Daniel S. and Zettlemoyer , Luke},
	keywords     = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
	title        = {TriviaQA: A Large Scale Distantly Supervised Challenge Dataset for Reading Comprehension},
	publisher    = {arXiv},
	year         = 2017,
	copyright    = {arXiv.org perpetual, non-exclusive license}
}
@misc{https://doi.org/10.48550/arxiv.2201.09651,
	doi          = {10.48550/ARXIV.2201.09651},
	url          = {https://arxiv.org/abs/2201.09651},
	author       = {Zouhar, Vilém and Mosbach, Marius and Biswas, Debanjali and Klakow , Dietrich},
	keywords     = {Computation and Language (cs.CL), Information Retrieval (cs.IR), FOS: Computer and information sciences, FOS: Computer and information sciences},
	title        = {Artefact Retrieval: Overview of NLP Models with Knowledge Base Access},
	publisher    = {arXiv},
	year         = 2022,
	copyright    = {Creative Commons Attribution Share Alike 4.0 International}
}
@inbook{inbook,
	author       = {Hrkút, Patrik and Toth, Štefan and Ďuračík, Michal and Meško, Matej and Krsak, Emil and Mikušová, Miroslava},
	year         = 2020,
	month        = {03},
	pages        = {60--70},
	title        = {Data Collection for Natural Language Processing Systems},
	isbn         = {978-981-15-3379-2},
	doi          = {10.1007/978-981-15-3380-8_6}
}
@article{Chiche2022,
	author       = {Chiche, Alebachew and Yitagesu, Betselot},
	title        = {Part of speech tagging: a systematic review of deep learning and machine learning approaches},
	journal      = {Journal of Big Data},
	year         = 2022,
	month        = {Jan},
	day          = 24,
	volume       = 9,
	number       = 1,
	pages        = 10,
	abstract     = {Natural language processing (NLP) tools have sparked a great deal of interest due to rapid improvements in information and communications technologies. As a result, many different NLP tools are being produced. However, there are many challenges for developing efficient and effective NLP tools that accurately process natural languages. One such tool is part of speech (POS) tagging, which tags a particular sentence or words in a paragraph by looking at the context of the sentence/words inside the paragraph. Despite enormous efforts by researchers, POS tagging still faces challenges in improving accuracy while reducing false-positive rates and in tagging unknown words. Furthermore, the presence of ambiguity when tagging terms with different contextual meanings inside a sentence cannot be overlooked. Recently, Deep learning (DL) and Machine learning (ML)-based POS taggers are being implemented as potential solutions to efficiently identify words in a given sentence across a paragraph. This article first clarifies the concept of part of speech POS tagging. It then provides the broad categorization based on the famous ML and DL techniques employed in designing and implementing part of speech taggers. A comprehensive review of the latest POS tagging articles is provided by discussing the weakness and strengths of the proposed approaches. Then, recent trends and advancements of DL and ML-based part-of-speech-taggers are presented in terms of the proposed approaches deployed and their performance evaluation metrics. Using the limitations of the proposed approaches, we emphasized various research gaps and presented future recommendations for the research in advancing DL and ML-based POS tagging.},
	issn         = {2196-1115},
	doi          = {10.1186/s40537-022-00561-y},
	url          = {https://doi.org/10.1186/s40537-022-00561-y}
}
@inproceedings{thakur-2020-AugSBERT,
	title        = {Augmented {SBERT}: Data Augmentation Method for Improving Bi-Encoders for Pairwise Sentence Scoring Tasks},
	author       = {Thakur, Nandan and Reimers, Nils and Daxenberger, Johannes and Gurevych, Iryna},
	booktitle    = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
	month        = 6,
	year         = 2021,
	address      = {Online},
	publisher    = {Association for Computational Linguistics},
	url          = {https://arxiv.org/abs/2010.08240},
	pages        = {296--310}
}
@misc{https://doi.org/10.48550/arxiv.2006.03654,
	doi          = {10.48550/ARXIV.2006.03654},
	url          = {https://arxiv.org/abs/2006.03654},
	author       = {He, Pengcheng and Liu, Xiaodong and Gao, Jianfeng and Chen, Weizhu},
	keywords     = {Computation and Language (cs.CL), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences, I.2; I.7, cs.CL, cs.GL},
	title        = {DeBERTa: Decoding-enhanced BERT with Disentangled Attention},
	publisher    = {arXiv},
	year         = 2020,
	copyright    = {arXiv.org perpetual, non-exclusive license}
}
@misc{FormalInformal,
	title        = {Formal and Informal Style},
	url          = {https://www.niu.edu/writingtutorial/style/formal-and-informal-style.shtml}
}
@misc{BetterWebSearches,
	title        = {Refine web searches},
	url          = {https://support.google.com/websearch/answer/2466433}
}
@inproceedings{inproceedings,
	author       = {Banga, Ritu and Mehndiratta, Pulkit},
	year         = 2017,
	month        = 12,
	pages        = {264--267},
	title        = {Tagging Efficiency Analysis on Part of Speech Taggers},
	doi          = {10.1109/ICIT.2017.57}
}
@article{2019t5,
	author       = {Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu},
	title        = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer},
	journal      = {arXiv e-prints},
	year         = 2019,
	archiveprefix = {arXiv},
	eprint       = {1910.10683}
}