Compare commits

...

5 Commits

Author SHA1 Message Date
Thamognya Kodi 0c12e6b0ff update 2022-12-25 11:30:03 +07:00
Thamognya Kodi eef7dfce56 update 2022-12-25 10:04:11 +07:00
Thamognya Kodi 64868710d7 update 2022-12-24 23:19:11 +07:00
Thamognya Kodi 3b6f545e14 started on research paper and presentation 2022-12-24 20:37:31 +07:00
Thamognya Kodi 2dc4a9454a started on research paper 2022-12-24 20:37:17 +07:00
22 changed files with 3756 additions and 1 deletions

View File

@ -0,0 +1,301 @@
## Core latex/pdflatex auxiliary files:
*.aux
*.lof
*.log
*.lot
*.fls
*.out
*.toc
*.fmt
*.fot
*.cb
*.cb2
.*.lb
## Intermediate documents:
*.dvi
*.xdv
*-converted-to.*
# these rules might exclude image files for figures etc.
# *.ps
# *.eps
# *.pdf
## Generated if empty string is given at "Please type another file name for output:"
.pdf
## Bibliography auxiliary files (bibtex/biblatex/biber):
*.bbl
*.bcf
*.blg
*-blx.aux
*-blx.bib
*.run.xml
## Build tool auxiliary files:
*.fdb_latexmk
*.synctex
*.synctex(busy)
*.synctex.gz
*.synctex.gz(busy)
*.pdfsync
## Build tool directories for auxiliary files
# latexrun
latex.out/
## Auxiliary and intermediate files from other packages:
# algorithms
*.alg
*.loa
# achemso
acs-*.bib
# amsthm
*.thm
# beamer
*.nav
*.pre
*.snm
*.vrb
# changes
*.soc
# comment
*.cut
# cprotect
*.cpt
# elsarticle (documentclass of Elsevier journals)
*.spl
# endnotes
*.ent
# fixme
*.lox
# feynmf/feynmp
*.mf
*.mp
*.t[1-9]
*.t[1-9][0-9]
*.tfm
#(r)(e)ledmac/(r)(e)ledpar
*.end
*.?end
*.[1-9]
*.[1-9][0-9]
*.[1-9][0-9][0-9]
*.[1-9]R
*.[1-9][0-9]R
*.[1-9][0-9][0-9]R
*.eledsec[1-9]
*.eledsec[1-9]R
*.eledsec[1-9][0-9]
*.eledsec[1-9][0-9]R
*.eledsec[1-9][0-9][0-9]
*.eledsec[1-9][0-9][0-9]R
# glossaries
*.acn
*.acr
*.glg
*.glo
*.gls
*.glsdefs
*.lzo
*.lzs
*.slg
*.slo
*.sls
# uncomment this for glossaries-extra (will ignore makeindex's style files!)
# *.ist
# gnuplot
*.gnuplot
*.table
# gnuplottex
*-gnuplottex-*
# gregoriotex
*.gaux
*.glog
*.gtex
# htlatex
*.4ct
*.4tc
*.idv
*.lg
*.trc
*.xref
# hyperref
*.brf
# knitr
*-concordance.tex
# TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files
# *.tikz
*-tikzDictionary
# listings
*.lol
# luatexja-ruby
*.ltjruby
# makeidx
*.idx
*.ilg
*.ind
# minitoc
*.maf
*.mlf
*.mlt
*.mtc[0-9]*
*.slf[0-9]*
*.slt[0-9]*
*.stc[0-9]*
# minted
_minted*
*.pyg
# morewrites
*.mw
# newpax
*.newpax
# nomencl
*.nlg
*.nlo
*.nls
# pax
*.pax
# pdfpcnotes
*.pdfpc
# sagetex
*.sagetex.sage
*.sagetex.py
*.sagetex.scmd
# scrwfile
*.wrt
# svg
svg-inkscape/
# sympy
*.sout
*.sympy
sympy-plots-for-*.tex/
# pdfcomment
*.upa
*.upb
# pythontex
*.pytxcode
pythontex-files-*/
# tcolorbox
*.listing
# thmtools
*.loe
# TikZ & PGF
*.dpth
*.md5
*.auxlock
# titletoc
*.ptc
# todonotes
*.tdo
# vhistory
*.hst
*.ver
# easy-todo
*.lod
# xcolor
*.xcp
# xmpincl
*.xmpi
# xindy
*.xdy
# xypic precompiled matrices and outlines
*.xyc
*.xyd
# endfloat
*.ttt
*.fff
# Latexian
TSWLatexianTemp*
## Editors:
# WinEdt
*.bak
*.sav
# Texpad
.texpadtmp
# LyX
*.lyx~
# Kile
*.backup
# gummi
.*.swp
# KBibTeX
*~[0-9]*
# TeXnicCenter
*.tps
# auto folder when using emacs and auctex
./auto/*
*.el
# expex forward references with \gathertags
*-tags.tex
# standalone packages
*.sta
# Makeindex log files
*.lpz
# xwatermark package
*.xwm
# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
# option is specified. Footnotes are the stored in a file with suffix Notes.bib.
# Uncomment the next line to have this generated file ignored.
#*Notes.bib

301
research/.gitignore vendored 100644
View File

@ -0,0 +1,301 @@
## Core latex/pdflatex auxiliary files:
*.aux
*.lof
*.log
*.lot
*.fls
*.out
*.toc
*.fmt
*.fot
*.cb
*.cb2
.*.lb
## Intermediate documents:
*.dvi
*.xdv
*-converted-to.*
# these rules might exclude image files for figures etc.
# *.ps
# *.eps
# *.pdf
## Generated if empty string is given at "Please type another file name for output:"
.pdf
## Bibliography auxiliary files (bibtex/biblatex/biber):
*.bbl
*.bcf
*.blg
*-blx.aux
*-blx.bib
*.run.xml
## Build tool auxiliary files:
*.fdb_latexmk
*.synctex
*.synctex(busy)
*.synctex.gz
*.synctex.gz(busy)
*.pdfsync
## Build tool directories for auxiliary files
# latexrun
latex.out/
## Auxiliary and intermediate files from other packages:
# algorithms
*.alg
*.loa
# achemso
acs-*.bib
# amsthm
*.thm
# beamer
*.nav
*.pre
*.snm
*.vrb
# changes
*.soc
# comment
*.cut
# cprotect
*.cpt
# elsarticle (documentclass of Elsevier journals)
*.spl
# endnotes
*.ent
# fixme
*.lox
# feynmf/feynmp
*.mf
*.mp
*.t[1-9]
*.t[1-9][0-9]
*.tfm
#(r)(e)ledmac/(r)(e)ledpar
*.end
*.?end
*.[1-9]
*.[1-9][0-9]
*.[1-9][0-9][0-9]
*.[1-9]R
*.[1-9][0-9]R
*.[1-9][0-9][0-9]R
*.eledsec[1-9]
*.eledsec[1-9]R
*.eledsec[1-9][0-9]
*.eledsec[1-9][0-9]R
*.eledsec[1-9][0-9][0-9]
*.eledsec[1-9][0-9][0-9]R
# glossaries
*.acn
*.acr
*.glg
*.glo
*.gls
*.glsdefs
*.lzo
*.lzs
*.slg
*.slo
*.sls
# uncomment this for glossaries-extra (will ignore makeindex's style files!)
# *.ist
# gnuplot
*.gnuplot
*.table
# gnuplottex
*-gnuplottex-*
# gregoriotex
*.gaux
*.glog
*.gtex
# htlatex
*.4ct
*.4tc
*.idv
*.lg
*.trc
*.xref
# hyperref
*.brf
# knitr
*-concordance.tex
# TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files
# *.tikz
*-tikzDictionary
# listings
*.lol
# luatexja-ruby
*.ltjruby
# makeidx
*.idx
*.ilg
*.ind
# minitoc
*.maf
*.mlf
*.mlt
*.mtc[0-9]*
*.slf[0-9]*
*.slt[0-9]*
*.stc[0-9]*
# minted
_minted*
*.pyg
# morewrites
*.mw
# newpax
*.newpax
# nomencl
*.nlg
*.nlo
*.nls
# pax
*.pax
# pdfpcnotes
*.pdfpc
# sagetex
*.sagetex.sage
*.sagetex.py
*.sagetex.scmd
# scrwfile
*.wrt
# svg
svg-inkscape/
# sympy
*.sout
*.sympy
sympy-plots-for-*.tex/
# pdfcomment
*.upa
*.upb
# pythontex
*.pytxcode
pythontex-files-*/
# tcolorbox
*.listing
# thmtools
*.loe
# TikZ & PGF
*.dpth
*.md5
*.auxlock
# titletoc
*.ptc
# todonotes
*.tdo
# vhistory
*.hst
*.ver
# easy-todo
*.lod
# xcolor
*.xcp
# xmpincl
*.xmpi
# xindy
*.xdy
# xypic precompiled matrices and outlines
*.xyc
*.xyd
# endfloat
*.ttt
*.fff
# Latexian
TSWLatexianTemp*
## Editors:
# WinEdt
*.bak
*.sav
# Texpad
.texpadtmp
# LyX
*.lyx~
# Kile
*.backup
# gummi
.*.swp
# KBibTeX
*~[0-9]*
# TeXnicCenter
*.tps
# auto folder when using emacs and auctex
./auto/*
*.el
# expex forward references with \gathertags
*-tags.tex
# standalone packages
*.sta
# Makeindex log files
*.lpz
# xwatermark package
*.xwm
# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
# option is specified. Footnotes are the stored in a file with suffix Notes.bib.
# Uncomment the next line to have this generated file ignored.
#*Notes.bib

View File

@ -0,0 +1,28 @@
In this paper, I present {\bf \inlp} a new control-flow wrapper abstraction to enable the utilization of data from the internet (or a knowledge-database when offline) for existing context-needing Natural Lnaguage Processing (NLP) models to function without any given context. Internet-NLP can be used, finetuned alongside existing NLP models via its config settings and additionally its Long Short Term Memory neural network (LSTM neural network) can also be trained. Additionally incorporations of Masked Language Models (MLM) such as BERT, or LinkBERT \cite{devlin-etal-2019-bert,yasunaga-etal-2022-linkbert} can be utilized to improve search queries, and therfore retrieve more accurate and reliable data. Futhermore, {\bf \inlp} utilizes a LSTM, Reinforcement Learning and caches to allow for multi-turn NLP tasks, and improvement via Reinforcement Learning from user.
Additionally in this paper, I also present new NLP and Natural Language Inference (NLI) models to assist {\bf \inlp}:
\begin{itemize}
\item Open-book question and long answer (QA) via GPT-NeoX-20b \cite{gpt-neox-library, gpt-neox-20b}
\item CrossEncoder NLI via LinkBERT \cite{reimers-2019-sentence-bert,thakur-2020-AugSBERT, yasunaga-etal-2022-linkbert}
\item Answer to context NLP via T5 \cite{https://doi.org/10.48550/arxiv.1910.10683}
\end{itemize}
Along with these models, I also present new general purpose QA and NLI datasets:
\begin{itemize}
\item ALotNLI made from ANLI, MultiNLI, and SNLI \cite{nie-etal-2020-adversarial,N18-1101,DBLP:journals/corr/BowmanAPM15}
\item ALotOpenBookQA made from CoQA, Natural Questions, and SQuAD \cite{DBLP:journals/corr/abs-1808-07042,kwiatkowski-etal-2019-natural,DBLP:journals/corr/abs-1806-03822}
\end{itemize}
As a result of these models, datasets, and Internet-NLP, the accuracy and reliability of most context-needing NLP models on most NLP tasks, especially tasks that require more factual responses with no given context increased.
Internet-NLP and the new NLP and NLI models, which were trained on the general-purpose datasets (ALotNLI, and ALotOpenBookQA). Internet-NLP, by default utilizes an Text-Generative model GPT-NeoX \cite{gpt-neox-library, gpt-neox-20b} for long responses and LinkBERT \cite{yasunaga-etal-2022-linkbert} for short responses. For 2 choices (for ex: True and False) Bi-Encoder NLI has been used and for multiple choices CrossEncoder will be used \cite{thakur-2020-AugSBERT}.
Internet-NLP, in layperson terms, provides the context for context-needing NLP models to let them function. Internet-NLP can be improved via finetuning, and training of LSTM and Reinforcement Learning model (which can be trained alongside the NLP model), which enables for better search queries, and subsequently results. It obtains state-of-the-art (SOTA) results in QA and NLI without context.
Internet-NLP is a subset of a larger package, Internet-ML and is open-source. $\footnote{Internet-NLP, subset of Internet-ML is public, and open-source: \url{https://github.com/thamognya/internet_ml}}. \label{footnote:code}$
Old versions of Internet-NLP is also publicly available. $\footnote{Old Versions of Internet-NLP is public: \url{https://pypi.org/project/internet-nlp/}}. \label{footnote:code-old}$

View File

@ -0,0 +1,315 @@
% This is the LaTex style file for *ACL.
% The official sources can be found at
%
% https://github.com/acl-org/acl-style-files/
%
% This package is activated by adding
%
% \usepackage{acl}
%
% to your LaTeX file. When submitting your paper for review, add the "review" option:
%
% \usepackage[review]{acl}
\newif\ifacl@finalcopy
\DeclareOption{final}{\acl@finalcopytrue}
\DeclareOption{review}{\acl@finalcopyfalse}
\ExecuteOptions{final} % final copy is the default
% include hyperref, unless user specifies nohyperref option like this:
% \usepackage[nohyperref]{acl}
\newif\ifacl@hyperref
\DeclareOption{hyperref}{\acl@hyperreftrue}
\DeclareOption{nohyperref}{\acl@hyperreffalse}
\ExecuteOptions{hyperref} % default is to use hyperref
\ProcessOptions\relax
\typeout{Conference Style for ACL}
\usepackage{xcolor}
\ifacl@finalcopy
% Hack to ignore these commands, which review mode puts into the .aux file.
\newcommand{\@LN@col}[1]{}
\newcommand{\@LN}[2]{}
\else
% Add draft line numbering via the lineno package
% https://texblog.org/2012/02/08/adding-line-numbers-to-documents/
\usepackage[switch,mathlines]{lineno}
% Line numbers in gray Helvetica 8pt
\font\aclhv = phvb at 8pt
\renewcommand\linenumberfont{\aclhv\color{lightgray}}
% Zero-fill line numbers
% NUMBER with left flushed zeros \fillzeros[<WIDTH>]<NUMBER>
\newcount\cv@tmpc@ \newcount\cv@tmpc
\def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi
\cv@tmpc=1 %
\loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi
\ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat
\ifnum#2<0\advance\cv@tmpc1\relax-\fi
\loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat
\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}%
\renewcommand\thelinenumber{\fillzeros[3]{\arabic{linenumber}}}
\linenumbers
\setlength{\linenumbersep}{1.6cm}
% Bug: An equation with $$ ... $$ isn't numbered, nor is the previous line.
% Patch amsmath commands so that the previous line and the equation itself
% are numbered. Bug: multline has an extra line number.
% https://tex.stackexchange.com/questions/461186/how-to-use-lineno-with-amsmath-align
\usepackage{etoolbox} %% <- for \pretocmd, \apptocmd and \patchcmd
\newcommand*\linenomathpatch[1]{%
\expandafter\pretocmd\csname #1\endcsname {\linenomath}{}{}%
\expandafter\pretocmd\csname #1*\endcsname {\linenomath}{}{}%
\expandafter\apptocmd\csname end#1\endcsname {\endlinenomath}{}{}%
\expandafter\apptocmd\csname end#1*\endcsname {\endlinenomath}{}{}%
}
\newcommand*\linenomathpatchAMS[1]{%
\expandafter\pretocmd\csname #1\endcsname {\linenomathAMS}{}{}%
\expandafter\pretocmd\csname #1*\endcsname {\linenomathAMS}{}{}%
\expandafter\apptocmd\csname end#1\endcsname {\endlinenomath}{}{}%
\expandafter\apptocmd\csname end#1*\endcsname {\endlinenomath}{}{}%
}
%% Definition of \linenomathAMS depends on whether the mathlines option is provided
\expandafter\ifx\linenomath\linenomathWithnumbers
\let\linenomathAMS\linenomathWithnumbers
%% The following line gets rid of an extra line numbers at the bottom:
\patchcmd\linenomathAMS{\advance\postdisplaypenalty\linenopenalty}{}{}{}
\else
\let\linenomathAMS\linenomathNonumbers
\fi
\AtBeginDocument{%
\linenomathpatch{equation}%
\linenomathpatchAMS{gather}%
\linenomathpatchAMS{multline}%
\linenomathpatchAMS{align}%
\linenomathpatchAMS{alignat}%
\linenomathpatchAMS{flalign}%
}
\fi
\iffalse
\PassOptionsToPackage{
a4paper,
top=2.21573cm,left=2.54cm,
textheight=24.7cm,textwidth=16.0cm,
headheight=0.17573cm,headsep=0cm
}{geometry}
\fi
\PassOptionsToPackage{a4paper,margin=2.5cm}{geometry}
\RequirePackage{geometry}
\setlength\columnsep{0.6cm}
\newlength\titlebox
\setlength\titlebox{5cm}
\twocolumn \sloppy
% We're never going to need a table of contents, so just flush it to
% save space --- suggested by drstrip@sandia-2
\def\addcontentsline#1#2#3{}
\ifacl@finalcopy
\thispagestyle{empty}
\pagestyle{empty}
\else
\pagenumbering{arabic}
\fi
%% Title and Authors %%
\newcommand{\Thanks}[1]{\thanks{\ #1}}
\newcommand\outauthor{
\begin{tabular}[t]{c}
\ifacl@finalcopy
\bf\@author
\else
% Avoiding common accidental de-anonymization issue. --MM
\bf Anonymous ACL submission
\fi
\end{tabular}}
% Mostly taken from deproc.
\AtBeginDocument{
\def\maketitle{\par
\begingroup
\def\thefootnote{\fnsymbol{footnote}}
\def\@makefnmark{\hbox to 0pt{$^{\@thefnmark}$\hss}}
\twocolumn[\@maketitle] \@thanks
\endgroup
\setcounter{footnote}{0}
\let\maketitle\relax \let\@maketitle\relax
\gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax}
\def\@maketitle{\vbox to \titlebox{\hsize\textwidth
\linewidth\hsize \vskip 0.125in minus 0.125in \centering
{\Large\bf \@title \par} \vskip 0.2in plus 1fil minus 0.1in
{\def\and{\unskip\enspace{\rm and}\enspace}%
\def\And{\end{tabular}\hss \egroup \hskip 1in plus 2fil
\hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf}%
\def\AND{\end{tabular}\hss\egroup \hfil\hfil\egroup
\vskip 0.25in plus 1fil minus 0.125in
\hbox to \linewidth\bgroup\large \hfil\hfil
\hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf}
\hbox to \linewidth\bgroup\large \hfil\hfil
\hbox to 0pt\bgroup\hss
\outauthor
\hss\egroup
\hfil\hfil\egroup}
\vskip 0.3in plus 2fil minus 0.1in
}}
}
% margins and font size for abstract
\renewenvironment{abstract}%
{\centerline{\large\bf Abstract}%
\begin{list}{}%
{\setlength{\rightmargin}{0.6cm}%
\setlength{\leftmargin}{0.6cm}}%
\item[]\ignorespaces%
\@setsize\normalsize{12pt}\xpt\@xpt
}%
{\unskip\end{list}}
%\renewenvironment{abstract}{\centerline{\large\bf
% Abstract}\vspace{0.5ex}\begin{quote}}{\par\end{quote}\vskip 1ex}
% Resizing figure and table captions - SL
% Support for interacting with the caption, subfigure, and subcaption packages - SL
\RequirePackage{caption}
\DeclareCaptionFont{10pt}{\fontsize{10pt}{12pt}\selectfont}
\captionsetup{font=10pt}
\RequirePackage{natbib}
% for citation commands in the .tex, authors can use:
% \citep, \citet, and \citeyearpar for compatibility with natbib, or
% \cite, \newcite, and \shortcite for compatibility with older ACL .sty files
\renewcommand\cite{\citep} % to get "(Author Year)" with natbib
\newcommand\shortcite{\citeyearpar}% to get "(Year)" with natbib
\newcommand\newcite{\citet} % to get "Author (Year)" with natbib
\bibliographystyle{acl_natbib}
% Bibliography
% Don't put a label in the bibliography at all. Just use the unlabeled format
% instead.
\def\thebibliography#1{\vskip\parskip%
\vskip\baselineskip%
\def\baselinestretch{1}%
\ifx\@currsize\normalsize\@normalsize\else\@currsize\fi%
\vskip-\parskip%
\vskip-\baselineskip%
\section*{References\@mkboth
{References}{References}}\list
{}{\setlength{\labelwidth}{0pt}\setlength{\leftmargin}{\parindent}
\setlength{\itemindent}{-\parindent}}
\def\newblock{\hskip .11em plus .33em minus -.07em}
\sloppy\clubpenalty4000\widowpenalty4000
\sfcode`\.=1000\relax}
\let\endthebibliography=\endlist
% Allow for a bibliography of sources of attested examples
\def\thesourcebibliography#1{\vskip\parskip%
\vskip\baselineskip%
\def\baselinestretch{1}%
\ifx\@currsize\normalsize\@normalsize\else\@currsize\fi%
\vskip-\parskip%
\vskip-\baselineskip%
\section*{Sources of Attested Examples\@mkboth
{Sources of Attested Examples}{Sources of Attested Examples}}\list
{}{\setlength{\labelwidth}{0pt}\setlength{\leftmargin}{\parindent}
\setlength{\itemindent}{-\parindent}}
\def\newblock{\hskip .11em plus .33em minus -.07em}
\sloppy\clubpenalty4000\widowpenalty4000
\sfcode`\.=1000\relax}
\let\endthesourcebibliography=\endlist
% sections with less space
\def\section{\@startsection {section}{1}{\z@}{-2.0ex plus
-0.5ex minus -.2ex}{1.5ex plus 0.3ex minus .2ex}{\large\bf\raggedright}}
\def\subsection{\@startsection{subsection}{2}{\z@}{-1.8ex plus
-0.5ex minus -.2ex}{0.8ex plus .2ex}{\normalsize\bf\raggedright}}
%% changed by KO to - values to get the initial parindent right
\def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-1.5ex plus
-0.5ex minus -.2ex}{0.5ex plus .2ex}{\normalsize\bf\raggedright}}
\def\paragraph{\@startsection{paragraph}{4}{\z@}{1.5ex plus
0.5ex minus .2ex}{-1em}{\normalsize\bf}}
\def\subparagraph{\@startsection{subparagraph}{5}{\parindent}{1.5ex plus
0.5ex minus .2ex}{-1em}{\normalsize\bf}}
% Footnotes
\footnotesep 6.65pt %
\skip\footins 9pt plus 4pt minus 2pt
\def\footnoterule{\kern-3pt \hrule width 5pc \kern 2.6pt }
\setcounter{footnote}{0}
% Lists and paragraphs
\parindent 1em
\topsep 4pt plus 1pt minus 2pt
\partopsep 1pt plus 0.5pt minus 0.5pt
\itemsep 2pt plus 1pt minus 0.5pt
\parsep 2pt plus 1pt minus 0.5pt
\leftmargin 2em \leftmargini\leftmargin \leftmarginii 2em
\leftmarginiii 1.5em \leftmarginiv 1.0em \leftmarginv .5em \leftmarginvi .5em
\labelwidth\leftmargini\advance\labelwidth-\labelsep \labelsep 5pt
\def\@listi{\leftmargin\leftmargini}
\def\@listii{\leftmargin\leftmarginii
\labelwidth\leftmarginii\advance\labelwidth-\labelsep
\topsep 2pt plus 1pt minus 0.5pt
\parsep 1pt plus 0.5pt minus 0.5pt
\itemsep \parsep}
\def\@listiii{\leftmargin\leftmarginiii
\labelwidth\leftmarginiii\advance\labelwidth-\labelsep
\topsep 1pt plus 0.5pt minus 0.5pt
\parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt
\itemsep \topsep}
\def\@listiv{\leftmargin\leftmarginiv
\labelwidth\leftmarginiv\advance\labelwidth-\labelsep}
\def\@listv{\leftmargin\leftmarginv
\labelwidth\leftmarginv\advance\labelwidth-\labelsep}
\def\@listvi{\leftmargin\leftmarginvi
\labelwidth\leftmarginvi\advance\labelwidth-\labelsep}
\abovedisplayskip 7pt plus2pt minus5pt%
\belowdisplayskip \abovedisplayskip
\abovedisplayshortskip 0pt plus3pt%
\belowdisplayshortskip 4pt plus3pt minus3pt%
% Less leading in most fonts (due to the narrow columns)
% The choices were between 1-pt and 1.5-pt leading
\def\@normalsize{\@setsize\normalsize{11pt}\xpt\@xpt}
\def\small{\@setsize\small{10pt}\ixpt\@ixpt}
\def\footnotesize{\@setsize\footnotesize{10pt}\ixpt\@ixpt}
\def\scriptsize{\@setsize\scriptsize{8pt}\viipt\@viipt}
\def\tiny{\@setsize\tiny{7pt}\vipt\@vipt}
\def\large{\@setsize\large{14pt}\xiipt\@xiipt}
\def\Large{\@setsize\Large{16pt}\xivpt\@xivpt}
\def\LARGE{\@setsize\LARGE{20pt}\xviipt\@xviipt}
\def\huge{\@setsize\huge{23pt}\xxpt\@xxpt}
\def\Huge{\@setsize\Huge{28pt}\xxvpt\@xxvpt}
% The hyperref manual (section 9) says hyperref should be loaded after natbib
\ifacl@hyperref
\PassOptionsToPackage{breaklinks}{hyperref}
\RequirePackage{hyperref}
% make links dark blue
\definecolor{darkblue}{rgb}{0, 0, 0.5}
\hypersetup{colorlinks=true, citecolor=darkblue, linkcolor=darkblue, urlcolor=darkblue}
\else
% This definition is used if the hyperref package is not loaded.
% It provides a backup, no-op definiton of \href.
% This is necessary because \href command is used in the acl_natbib.bst file.
\def\href#1#2{{#2}}
\usepackage{url}
\fi

View File

@ -0,0 +1,270 @@
% This must be in the first 5 lines to tell arXiv to use pdfLaTeX, which is strongly recommended.
\pdfoutput=1
% In particular, the hyperref package requires pdfLaTeX in order to break URLs across lines.
\documentclass[11pt]{article}
% Remove the "review" option to generate the final version.
\usepackage[review]{acl}
% Standard package includes
\usepackage{times}
\usepackage{latexsym}
% For proper rendering and hyphenation of words containing Latin characters (including in bib files)
\usepackage[T1]{fontenc}
% For Vietnamese characters
% \usepackage[T5]{fontenc}
% See https://www.latex-project.org/help/documentation/encguide.pdf for other character sets
% This assumes your files are encoded as UTF8
\usepackage[utf8]{inputenc}
% This is not strictly necessary, and may be commented out,
% but it will improve the layout of the manuscript,
% and will typically save some space.
\usepackage{microtype}
% If the title and author information does not fit in the area allocated, uncomment the following
%
%\setlength\titlebox{<dim>}
%
% and set <dim> to something 5cm or larger.
\title{Instructions for *ACL Proceedings}
% Author information can be set in various styles:
% For several authors from the same institution:
% \author{Author 1 \and ... \and Author n \\
% Address line \\ ... \\ Address line}
% if the names do not fit well on one line use
% Author 1 \\ {\bf Author 2} \\ ... \\ {\bf Author n} \\
% For authors from different institutions:
% \author{Author 1 \\ Address line \\ ... \\ Address line
% \And ... \And
% Author n \\ Address line \\ ... \\ Address line}
% To start a seperate ``row'' of authors use \AND, as in
% \author{Author 1 \\ Address line \\ ... \\ Address line
% \AND
% Author 2 \\ Address line \\ ... \\ Address line \And
% Author 3 \\ Address line \\ ... \\ Address line}
\author{First Author \\
Affiliation / Address line 1 \\
Affiliation / Address line 2 \\
Affiliation / Address line 3 \\
\texttt{email@domain} \\\And
Second Author \\
Affiliation / Address line 1 \\
Affiliation / Address line 2 \\
Affiliation / Address line 3 \\
\texttt{email@domain} \\}
\begin{document}
\maketitle
\begin{abstract}
This document is a supplement to the general instructions for *ACL authors. It contains instructions for using the \LaTeX{} style files for ACL conferences.
The document itself conforms to its own specifications, and is therefore an example of what your manuscript should look like.
These instructions should be used both for papers submitted for review and for final versions of accepted papers.
\end{abstract}
\section{Introduction}
These instructions are for authors submitting papers to *ACL conferences using \LaTeX. They are not self-contained. All authors must follow the general instructions for *ACL proceedings,\footnote{\url{http://acl-org.github.io/ACLPUB/formatting.html}} and this document contains additional instructions for the \LaTeX{} style files.
The templates include the \LaTeX{} source of this document (\texttt{acl.tex}),
the \LaTeX{} style file used to format it (\texttt{acl.sty}),
an ACL bibliography style (\texttt{acl\_natbib.bst}),
an example bibliography (\texttt{custom.bib}),
and the bibliography for the ACL Anthology (\texttt{anthology.bib}).
\section{Engines}
To produce a PDF file, pdf\LaTeX{} is strongly recommended (over original \LaTeX{} plus dvips+ps2pdf or dvipdf). Xe\LaTeX{} also produces PDF files, and is especially suitable for text in non-Latin scripts.
\section{Preamble}
The first line of the file must be
\begin{quote}
\begin{verbatim}
\documentclass[11pt]{article}
\end{verbatim}
\end{quote}
To load the style file in the review version:
\begin{quote}
\begin{verbatim}
\usepackage[review]{acl}
\end{verbatim}
\end{quote}
For the final version, omit the \verb|review| option:
\begin{quote}
\begin{verbatim}
\usepackage{acl}
\end{verbatim}
\end{quote}
To use Times Roman, put the following in the preamble:
\begin{quote}
\begin{verbatim}
\usepackage{times}
\end{verbatim}
\end{quote}
(Alternatives like txfonts or newtx are also acceptable.)
Please see the \LaTeX{} source of this document for comments on other packages that may be useful.
Set the title and author using \verb|\title| and \verb|\author|. Within the author list, format multiple authors using \verb|\and| and \verb|\And| and \verb|\AND|; please see the \LaTeX{} source for examples.
By default, the box containing the title and author names is set to the minimum of 5 cm. If you need more space, include the following in the preamble:
\begin{quote}
\begin{verbatim}
\setlength\titlebox{<dim>}
\end{verbatim}
\end{quote}
where \verb|<dim>| is replaced with a length. Do not set this length smaller than 5 cm.
\section{Document Body}
\subsection{Footnotes}
Footnotes are inserted with the \verb|\footnote| command.\footnote{This is a footnote.}
\subsection{Tables and figures}
See Table~\ref{tab:accents} for an example of a table and its caption.
\textbf{Do not override the default caption sizes.}
\begin{table}
\centering
\begin{tabular}{lc}
\hline
\textbf{Command} & \textbf{Output}\\
\hline
\verb|{\"a}| & {\"a} \\
\verb|{\^e}| & {\^e} \\
\verb|{\`i}| & {\`i} \\
\verb|{\.I}| & {\.I} \\
\verb|{\o}| & {\o} \\
\verb|{\'u}| & {\'u} \\
\verb|{\aa}| & {\aa} \\\hline
\end{tabular}
\begin{tabular}{lc}
\hline
\textbf{Command} & \textbf{Output}\\
\hline
\verb|{\c c}| & {\c c} \\
\verb|{\u g}| & {\u g} \\
\verb|{\l}| & {\l} \\
\verb|{\~n}| & {\~n} \\
\verb|{\H o}| & {\H o} \\
\verb|{\v r}| & {\v r} \\
\verb|{\ss}| & {\ss} \\
\hline
\end{tabular}
\caption{Example commands for accented characters, to be used in, \emph{e.g.}, Bib\TeX{} entries.}
\label{tab:accents}
\end{table}
\subsection{Hyperlinks}
Users of older versions of \LaTeX{} may encounter the following error during compilation:
\begin{quote}
\tt\verb|\pdfendlink| ended up in different nesting level than \verb|\pdfstartlink|.
\end{quote}
This happens when pdf\LaTeX{} is used and a citation splits across a page boundary. The best way to fix this is to upgrade \LaTeX{} to 2018-12-01 or later.
\subsection{Citations}
\begin{table*}
\centering
\begin{tabular}{lll}
\hline
\textbf{Output} & \textbf{natbib command} & \textbf{Old ACL-style command}\\
\hline
\citep{Gusfield:97} & \verb|\citep| & \verb|\cite| \\
\citealp{Gusfield:97} & \verb|\citealp| & no equivalent \\
\citet{Gusfield:97} & \verb|\citet| & \verb|\newcite| \\
\citeyearpar{Gusfield:97} & \verb|\citeyearpar| & \verb|\shortcite| \\
\hline
\end{tabular}
\caption{\label{citation-guide}
Citation commands supported by the style file.
The style is based on the natbib package and supports all natbib citation commands.
It also supports commands defined in previous ACL style files for compatibility.
}
\end{table*}
Table~\ref{citation-guide} shows the syntax supported by the style files.
We encourage you to use the natbib styles.
You can use the command \verb|\citet| (cite in text) to get ``author (year)'' citations, like this citation to a paper by \citet{Gusfield:97}.
You can use the command \verb|\citep| (cite in parentheses) to get ``(author, year)'' citations \citep{Gusfield:97}.
You can use the command \verb|\citealp| (alternative cite without parentheses) to get ``author, year'' citations, which is useful for using citations within parentheses (e.g. \citealp{Gusfield:97}).
\subsection{References}
\nocite{Ando2005,andrew2007scalable,rasooli-tetrault-2015}
The \LaTeX{} and Bib\TeX{} style files provided roughly follow the American Psychological Association format.
If your own bib file is named \texttt{custom.bib}, then placing the following before any appendices in your \LaTeX{} file will generate the references section for you:
\begin{quote}
\begin{verbatim}
\bibliography{custom}
\end{verbatim}
\end{quote}
You can obtain the complete ACL Anthology as a Bib\TeX{} file from \url{https://aclweb.org/anthology/anthology.bib.gz}.
To include both the Anthology and your own .bib file, use the following instead of the above.
\begin{quote}
\begin{verbatim}
\bibliography{anthology,custom}
\end{verbatim}
\end{quote}
Please see Section~\ref{sec:bibtex} for information on preparing Bib\TeX{} files.
\subsection{Appendices}
Use \verb|\appendix| before any appendix section to switch the section numbering over to letters. See Appendix~\ref{sec:appendix} for an example.
\section{Bib\TeX{} Files}
\label{sec:bibtex}
Unicode cannot be used in Bib\TeX{} entries, and some ways of typing special characters can disrupt Bib\TeX's alphabetization. The recommended way of typing special characters is shown in Table~\ref{tab:accents}.
Please ensure that Bib\TeX{} records contain DOIs or URLs when possible, and for all the ACL materials that you reference.
Use the \verb|doi| field for DOIs and the \verb|url| field for URLs.
If a Bib\TeX{} entry has a URL or DOI field, the paper title in the references section will appear as a hyperlink to the paper, using the hyperref \LaTeX{} package.
\section*{Acknowledgements}
This document has been adapted
by Steven Bethard, Ryan Cotterell and Rui Yan
from the instructions for earlier ACL and NAACL proceedings, including those for
ACL 2019 by Douwe Kiela and Ivan Vuli\'{c},
NAACL 2019 by Stephanie Lukin and Alla Roskovskaya,
ACL 2018 by Shay Cohen, Kevin Gimpel, and Wei Lu,
NAACL 2018 by Margaret Mitchell and Stephanie Lukin,
Bib\TeX{} suggestions for (NA)ACL 2017/2018 from Jason Eisner,
ACL 2017 by Dan Gildea and Min-Yen Kan,
NAACL 2017 by Margaret Mitchell,
ACL 2012 by Maggie Li and Michael White,
ACL 2010 by Jing-Shin Chang and Philipp Koehn,
ACL 2008 by Johanna D. Moore, Simone Teufel, James Allan, and Sadaoki Furui,
ACL 2005 by Hwee Tou Ng and Kemal Oflazer,
ACL 2002 by Eugene Charniak and Dekang Lin,
and earlier ACL and EACL formats written by several people, including
John Chen, Henry S. Thompson and Donald Walker.
Additional elements were taken from the formatting instructions of the \emph{International Joint Conference on Artificial Intelligence} and the \emph{Conference on Computer Vision and Pattern Recognition}.
% Entries for the entire Anthology, followed by custom entries
\bibliography{anthology,custom}
\appendix
\section{Example Appendix}
\label{sec:appendix}
This is an appendix.
\end{document}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,7 @@
#!/bin/sh
bibtex-tidy --curly --numeric --tab --align=13 --duplicates=key --no-escape --no-remove-dupe-fields ./ref.bib && pdflatex main
pdflatex main
bibtex main
pdflatex main
pdflatex main
open main.pdf

Binary file not shown.

After

Width:  |  Height:  |  Size: 197 KiB

View File

@ -0,0 +1,42 @@
%auto-ignore
% TODO: Fix stuff here
\section{Introduction}
There are currently two main solutions for utilizing NLP tasks with no context provided:
\begin{enumerate}[leftmargin=1em]
\item Large Pre-Trained Text-Generation and Text2Text-Generation Model \label{CurrSolOne}
\begin{itemize}[leftmargin=1em]
\begin{figure}
\includegraphics[width=1.0\columnwidth]{gpt3architecture.png}
\caption{This is an illustration of the architecture of GPT-2 and GPT-3, a popular Text-Generation model \cite{gpt3-overview, the-gpt-3-architecture-on-a-napkin}.}
\label{fig:CurrSolOneImg}
\includegraphics[width=1.0\columnwidth]{t5architecture.png}
\caption{This is an illustration of the architecture of T5, a popular Text2Text-Generation model \cite{alammar}.}
\label{fig:CurrSolOneSecondImg}
\end{figure}
\item Pre-trained Text-generation models, like GPT-NeoX, GPT-3, and etc. \cite{gpt-neox-20b,gpt-neox-library,DBLP:journals/corr/abs-2005-14165} can be trained for open-domain question-answering closed-book language model tasks (ODQA LM) \cite{weng2020odqa}. When used in ODQA tasks, they achieve SOTA results in such tasks, have high accuracy and are fast but are much larger in size than open-book (context-needing) language models.
\item Additionally Pre-trained Text2Text-generation models, like T5 \cite{https://doi.org/10.48550/arxiv.1910.10683} that have open-domain question-answering closed-book (no context) language models (ODQA LM) capabilities \cite{weng2020odqa}. These closed-book QDQA LMs are comparatively state-of-the-art performance in many no-context NLP tasks, mainly question-answering. Text2Text-generation models for such no-context NLP tasks are usually large, slow, and have a low accuracy \cite{DBLP:journals/corr/abs-2002-08910}.
\item Example: T5 \cite{https://doi.org/10.48550/arxiv.1910.10683}
\item Illustration of how ODQA LM work: \ref{fig:CurrSolOneImg}
\end{itemize}
\item Large Knowledge Database with a Contex-Needing Language Model \label{CurrSolTwo}
\begin{itemize}[leftmargin=1em]
\begin{figure}
\includegraphics[width=1.0\columnwidth]{artefacts_diagram.pdf}
\caption{This is an illustration of how LMs with a knowledge base and artifact retriver work \cite{https://doi.org/10.48550/arxiv.2201.09651}.}
\label{fig:CurrSolTwoImg}
\end{figure}
\item Large Knowledge base with an pre-trained open-book LM and retriever, provides an comparatively higher performance, accuracy and the model itself is small. These models however require --- usually --- a large knowledge base which makes the overall solution large, but still fast and with more accuracy on the field the knowledge base specalizes in.
\item Example: LinkBERT \cite{https://doi.org/10.48550/arxiv.2203.15827} with an artifact retriver \cite{https://doi.org/10.48550/arxiv.2201.09651} with a knowledge base such as DBpedia or WikiData \cite{10.5555/1785162.1785216, 10.1145/2629489}
\item Illustration of how LMs with a knowledge base and artifact retriver work: \ref{fig:CurrSolTwoImg}
\end{itemize}
\end{enumerate}
Solution \ref{CurrSolOne} and \ref{CurrSolTwo} achieve the same end goal of NLP tasks without context via two different methods; these current solutions restrict NLP tasks and accuracy without context, especially for more open-domain tasks. The major limitation in this case would be accuracy, efficency and size of models and their knowledge base which then limit the use cases of closed-book open-domain NLP tasks.
In this paper, I propose \inlp, an direct improvement to solution \ref{CurrSolOne} which allows NLP models to not require a large knowledge base (altough you can configure Internet-NLP to utilize a knowledge base) that incoproates the internet's vast knowledge along utilizing data in hyperlinks in webpages \cite{https://doi.org/10.48550/arxiv.2203.15827} to create a more resource-filled data for our existing or future context-needing pre-trained model to use for NLP tasks. Internet-NLP encompasses pre-trained NLP and NLI models, along with its web-data-scraper creates an small temporary on-basis data and a cache for NLP tasks to be performed without given context.
Utilizing the vast data on the internet, graph of documents as corpus \cite{https://doi.org/10.48550/arxiv.2203.15827} allows us to enable to reduce our solution size, increase efficency and increase accuracy. Additionally unlike usage of static data, Internet-NLP utilizes the dynamic, and frequent updating data of the internet which enables us to utilize any type of NLP model along with NLI models to allow us to follow a sequence of control flow to get the context for context-needing models. This approach utilizes a combination of data-collection \cite{inbook} for NLPs with context-needing open-domain NLP to gain more accurate results in most no-context NLP tasks.
Additionally Internet-NLP's Text2Text-generation search query model: T5 \cite{https://doi.org/10.48550/arxiv.1910.10683} and LSTM noun remembrance using parts of speech tagging \cite{Chiche2022} on ALotClosedBookQA with it improving search queries based on the difference on answer recieved and the answer from datasets, using parts of speech tagging on answers \cite{Chiche2022}.

Binary file not shown.

After

Width:  |  Height:  |  Size: 184 KiB

Binary file not shown.

View File

@ -0,0 +1,73 @@
\pdfoutput=1
\documentclass[11pt,a4]{article}
\usepackage[]{acl}
\usepackage{times}
\usepackage{latexsym}
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage{microtype}
\usepackage{xspace}
\usepackage{multirow}
\usepackage{url}
\usepackage{booktabs}
\usepackage{tikz,tikz-qtree}
\usepackage{pgfplots}
\usepackage{amssymb}
\usepackage{xfrac}
\usepackage{graphicx}
\usepackage{tablefootnote}
\usepackage{amsmath}
\usepackage{enumitem}
\usepackage{import}
\usepackage{tikz}
\usetikzlibrary{chains,shadows.blur}
\pgfplotsset{compat=1.14}
\newcommand\BibTeX{B{\sc ib}\TeX}
\newcommand\inlp{Internet-NLP\xspace}
\newcommand{\eat}[1]{\ignorespaces}
\definecolor{cred}{HTML}{DB4437}
\definecolor{cblue}{HTML}{4285F4}
\definecolor{cgreen}{HTML}{0F9D58}
\definecolor{cyellow}{HTML}{F4B400}
\definecolor{corange}{HTML}{FF9800}
\definecolor{cgrey}{HTML}{9E9E9E}
\setlength{\tabcolsep}{0.3em}
\title{\textbf{Internet-NLP}: Allowing Natural Language Processing Models to Connect to the Internet}
\author{
Thamognya Kodi \\
Ruamrudee International School \\
{\tt \href{mailto:contact@thamognya.com}{contact@thamognya.com}} \\
}
\date{\today}
\begin{document}
\maketitle
\begin{abstract}
\import{abstract/}{main.tex}
\end{abstract}
\import{introduction/}{main.tex}
\import{related_work/}{main.tex}
\import{preliminaries/}{main.tex}
\import{Internet-NLP/}{main.tex}
% \bibliographystyle{acl_natbib}
% \bibliographystyle{plain}
\bibliography{ref}
\pagenumbering{arabic}
\appendix
\import{appendix/}{main.tex}
\end{document}

View File

@ -0,0 +1,439 @@
% Cleaned with bibtex-tidy
@inproceedings{devlin-etal-2019-bert,
title = {{BERT}: Pre-training of Deep Bidirectional Transformers for Language Understanding},
author = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
year = 2019,
month = jun,
booktitle = {Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)},
publisher = {Association for Computational Linguistics},
address = {Minneapolis, Minnesota},
pages = {4171--4186},
doi = {10.18653/v1/N19-1423},
url = {https://aclanthology.org/N19-1423},
abstract = {We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation models (Peters et al., 2018a; Radford et al., 2018), BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result, the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial task-specific architecture modifications. BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE score to 80.5 (7.7 point absolute improvement), MultiNLI accuracy to 86.7{\%} (4.6{\%} absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).}
}
@inproceedings{yasunaga-etal-2022-linkbert,
title = {{L}ink{BERT}: Pretraining Language Models with Document Links},
author = {Yasunaga, Michihiro and Leskovec, Jure and Liang, Percy},
year = 2022,
month = may,
booktitle = {Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
publisher = {Association for Computational Linguistics},
address = {Dublin, Ireland},
pages = {8003--8016},
doi = {10.18653/v1/2022.acl-long.551},
url = {https://aclanthology.org/2022.acl-long.551},
abstract = {Language model (LM) pretraining captures various knowledge from text corpora, helping downstream tasks. However, existing methods such as BERT model a single document, and do not capture dependencies or knowledge that span across documents. In this work, we propose LinkBERT, an LM pretraining method that leverages links between documents, e.g., hyperlinks. Given a text corpus, we view it as a graph of documents and create LM inputs by placing linked documents in the same context. We then pretrain the LM with two joint self-supervised objectives: masked language modeling and our new proposal, document relation prediction. We show that LinkBERT outperforms BERT on various downstream tasks across two domains: the general domain (pretrained on Wikipedia with hyperlinks) and biomedical domain (pretrained on PubMed with citation links). LinkBERT is especially effective for multi-hop reasoning and few-shot QA (+5{\%} absolute improvement on HotpotQA and TriviaQA), and our biomedical LinkBERT sets new states of the art on various BioNLP tasks (+7{\%} on BioASQ and USMLE). We release our pretrained models, LinkBERT and BioLinkBERT, as well as code and data.}
}
@software{gpt-neox-library,
title = {{GPT-NeoX: Large Scale Autoregressive Language Modeling in PyTorch}},
author = {Andonian, Alex and Anthony, Quentin and Biderman, Stella and Black, Sid and Gali, Preetham and Gao, Leo and Hallahan, Eric and Levy-Kramer, Josh and Leahy, Connor and Nestler, Lucas and Parker, Kip and Pieler, Michael and Purohit, Shivanshu and Songz, Tri and Phil, Wang and Weinbach, Samuel},
year = 2021,
month = 8,
doi = {10.5281/zenodo.5879544},
url = {https://www.github.com/eleutherai/gpt-neox},
version = {0.0.1}
}
@inproceedings{gpt-neox-20b,
title = {{GPT-NeoX-20B}: An Open-Source Autoregressive Language Model},
author = {Black, Sid and Biderman, Stella and Hallahan, Eric and Anthony, Quentin and Gao, Leo and Golding, Laurence and He, Horace and Leahy, Connor and McDonell, Kyle and Phang, Jason and Pieler, Michael and Prashanth, USVSN Sai and Purohit, Shivanshu and Reynolds, Laria and Tow, Jonathan and Wang, Ben and Weinbach, Samuel},
year = 2022,
booktitle = {Proceedings of the ACL Workshop on Challenges \& Perspectives in Creating Large Language Models},
url = {https://arxiv.org/abs/2204.06745}
}
@inproceedings{reimers-2019-sentence-bert,
title = {Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks},
author = {Reimers, Nils and Gurevych, Iryna},
year = 2019,
month = 11,
booktitle = {Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing},
publisher = {Association for Computational Linguistics},
url = {http://arxiv.org/abs/1908.10084}
}
@inproceedings{thakur-2020-AugSBERT,
title = {Augmented {SBERT}: Data Augmentation Method for Improving Bi-Encoders for Pairwise Sentence Scoring Tasks},
author = {Thakur, Nandan and Reimers, Nils and Daxenberger, Johannes and Gurevych, Iryna},
year = 2021,
month = 6,
booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
publisher = {Association for Computational Linguistics},
address = {Online},
pages = {296--310},
url = {https://arxiv.org/abs/2010.08240}
}
@misc{https://doi.org/10.48550/arxiv.1910.10683,
title = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer},
author = {Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and Narang, Sharan and Matena, Michael and Zhou, Yanqi and Li, Wei and Liu, Peter J.},
year = 2019,
publisher = {arXiv},
doi = {10.48550/ARXIV.1910.10683},
url = {https://arxiv.org/abs/1910.10683},
copyright = {arXiv.org perpetual, non-exclusive license},
keywords = {Machine Learning (cs.LG), Computation and Language (cs.CL), Machine Learning (stat.ML), FOS: Computer and information sciences, FOS: Computer and information sciences}
}
@inproceedings{nie-etal-2020-adversarial,
title = {Adversarial {NLI}: A New Benchmark for Natural Language Understanding},
author = {Nie, Yixin and Williams, Adina and Dinan, Emily and Bansal, Mohit and Weston, Jason and Kiela, Douwe},
year = 2020,
booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
publisher = {Association for Computational Linguistics}
}
@inproceedings{N18-1101,
title = {A Broad-Coverage Challenge Corpus for Sentence Understanding through Inference},
author = {Williams, Adina and Nangia, Nikita and Bowman, Samuel},
year = 2018,
booktitle = {Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)},
location = {New Orleans, Louisiana},
publisher = {Association for Computational Linguistics},
pages = {1112--1122},
url = {http://aclweb.org/anthology/N18-1101}
}
@article{DBLP:journals/corr/BowmanAPM15,
title = {A large annotated corpus for learning natural language inference},
author = {Samuel R. Bowman and Gabor Angeli and Christopher Potts and Christopher D. Manning},
year = 2015,
journal = {CoRR},
volume = {abs/1508.05326},
url = {http://arxiv.org/abs/1508.05326},
eprinttype = {arXiv},
eprint = {1508.05326},
timestamp = {Mon, 13 Aug 2018 16:46:27 +0200},
biburl = {https://dblp.org/rec/journals/corr/BowmanAPM15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1808-07042,
title = {CoQA: {A} Conversational Question Answering Challenge},
author = {Siva Reddy and Danqi Chen and Christopher D. Manning},
year = 2018,
journal = {CoRR},
volume = {abs/1808.07042},
url = {http://arxiv.org/abs/1808.07042},
eprinttype = {arXiv},
eprint = {1808.07042},
timestamp = {Sun, 02 Sep 2018 15:01:56 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1808-07042.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{kwiatkowski-etal-2019-natural,
title = {Natural Questions: A Benchmark for Question Answering Research},
author = {Kwiatkowski, Tom and Palomaki, Jennimaria and Redfield, Olivia and Collins, Michael and Parikh, Ankur and Alberti, Chris and Epstein, Danielle and Polosukhin, Illia and Devlin, Jacob and Lee, Kenton and Toutanova, Kristina and Jones, Llion and Kelcey, Matthew and Chang, Ming-Wei and Dai, Andrew M. and Uszkoreit, Jakob and Le, Quoc and Petrov, Slav},
year = 2019,
journal = {Transactions of the Association for Computational Linguistics},
publisher = {MIT Press},
address = {Cambridge, MA},
volume = 7,
pages = {452--466},
doi = {10.1162/tacl_a_00276},
url = {https://aclanthology.org/Q19-1026},
abstract = {We present the Natural Questions corpus, a question answering data set. Questions consist of real anonymized, aggregated queries issued to the Google search engine. An annotator is presented with a question along with a Wikipedia page from the top 5 search results, and annotates a long answer (typically a paragraph) and a short answer (one or more entities) if present on the page, or marks null if no long/short answer is present. The public release consists of 307,373 training examples with single annotations; 7,830 examples with 5-way annotations for development data; and a further 7,842 examples with 5-way annotated sequestered as test data. We present experiments validating quality of the data. We also describe analysis of 25-way annotations on 302 examples, giving insights into human variability on the annotation task. We introduce robust metrics for the purposes of evaluating question answering systems; demonstrate high human upper bounds on these metrics; and establish baseline results using competitive methods drawn from related literature.}
}
@article{DBLP:journals/corr/abs-1806-03822,
title = {Know What You Don't Know: Unanswerable Questions for SQuAD},
author = {Pranav Rajpurkar and Robin Jia and Percy Liang},
year = 2018,
journal = {CoRR},
volume = {abs/1806.03822},
url = {http://arxiv.org/abs/1806.03822},
eprinttype = {arXiv},
eprint = {1806.03822},
timestamp = {Mon, 13 Aug 2018 16:48:21 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1806-03822.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@misc{the-gpt-3-architecture-on-a-napkin,
title = {How deep is the machine?},
journal = {The GPT-3 Architecture, on a Napkin},
url = {https://dugas.ch/artificial_curiosity/GPT_architecture.html}
}
@misc{gpt3-overview,
url = {https://dzlab.github.io/ml/2020/07/25/gpt3-overview/},
journal = {GPT-3 An Overview},
author = {Dzlab}
}
@misc{alammar,
title = {The illustrated transformer},
url = {https://jalammar.github.io/illustrated-transformer/},
journal = {The Illustrated Transformer Jay Alammar Visualizing machine learning one concept at a time.},
author = {Alammar, Jay}
}
@article{DBLP:journals/corr/abs-2005-14165,
author = {Tom B. Brown and Benjamin Mann and Nick Ryder and Melanie Subbiah and Jared Kaplan and Prafulla Dhariwal and Arvind Neelakantan and Pranav Shyam and Girish Sastry and Amanda Askell and Sandhini Agarwal and Ariel Herbert{-}Voss and Gretchen Krueger and Tom Henighan and Rewon Child and Aditya Ramesh and Daniel M. Ziegler and Jeffrey Wu and Clemens Winter and Christopher Hesse and Mark Chen and Eric Sigler and Mateusz Litwin and Scott Gray and Benjamin Chess and Jack Clark and Christopher Berner and Sam McCandlish and Alec Radford and Ilya Sutskever and Dario Amodei},
title = {Language Models are Few-Shot Learners},
journal = {CoRR},
volume = {abs/2005.14165},
year = 2020,
url = {https://arxiv.org/abs/2005.14165},
eprinttype = {arXiv},
eprint = {2005.14165},
timestamp = {Wed, 03 Jun 2020 11:36:54 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2005-14165.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{weng2020odqa,
title = {How to Build an Open-Domain Question Answering System?},
author = {Weng, Lilian},
journal = {lilianweng.github.io},
year = 2020,
month = {Oct},
url = {https://lilianweng.github.io/posts/2020-10-29-odqa/}
}
@article{DBLP:journals/corr/abs-2002-08910,
author = {Adam Roberts and Colin Raffel and Noam Shazeer},
title = {How Much Knowledge Can You Pack Into the Parameters of a Language Model?},
journal = {CoRR},
volume = {abs/2002.08910},
year = 2020,
url = {https://arxiv.org/abs/2002.08910},
eprinttype = {arXiv},
eprint = {2002.08910},
timestamp = {Mon, 02 Mar 2020 16:46:06 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2002-08910.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@misc{https://doi.org/10.48550/arxiv.1810.04805,
doi = {10.48550/ARXIV.1810.04805},
url = {https://arxiv.org/abs/1810.04805},
author = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding},
publisher = {arXiv},
year = 2018,
copyright = {arXiv.org perpetual, non-exclusive license}
}
@misc{https://doi.org/10.48550/arxiv.2203.15827,
doi = {10.48550/ARXIV.2203.15827},
url = {https://arxiv.org/abs/2203.15827},
author = {Yasunaga, Michihiro and Leskovec, Jure and Liang, Percy},
keywords = {Computation and Language (cs.CL), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {LinkBERT: Pretraining Language Models with Document Links},
publisher = {arXiv},
year = 2022,
copyright = {Creative Commons Attribution 4.0 International}
}
@misc{https://doi.org/10.48550/arxiv.1908.10084,
doi = {10.48550/ARXIV.1908.10084},
url = {https://arxiv.org/abs/1908.10084},
author = {Reimers, Nils and Gurevych, Iryna},
keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks},
publisher = {arXiv},
year = 2019,
copyright = {Creative Commons Attribution Share Alike 4.0 International}
}
@misc{https://doi.org/10.48550/arxiv.1910.10683,
doi = {10.48550/ARXIV.1910.10683},
url = {https://arxiv.org/abs/1910.10683},
author = {Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and Narang, Sharan and Matena, Michael and Zhou, Yanqi and Li, Wei and Liu, Peter J.},
keywords = {Machine Learning (cs.LG), Computation and Language (cs.CL), Machine Learning (stat.ML), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer},
publisher = {arXiv},
year = 2019,
copyright = {arXiv.org perpetual, non-exclusive license}
}
@misc{https://doi.org/10.48550/arxiv.1809.02789,
doi = {10.48550/ARXIV.1809.02789},
url = {https://arxiv.org/abs/1809.02789},
author = {Mihaylov, Todor and Clark, Peter and Khot, Tushar and Sabharwal, Ashish},
keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {Can a Suit of Armor Conduct Electricity? A New Dataset for Open Book Question Answering},
publisher = {arXiv},
year = 2018,
copyright = {arXiv.org perpetual, non-exclusive license}
}
@article{Bartolo_2020,
doi = {10.1162/tacl_a_00338},
url = {https://doi.org/10.1162%2Ftacl_a_00338},
year = 2020,
month = {dec},
publisher = {{MIT} Press - Journals},
volume = 8,
pages = {662--678},
author = {Max Bartolo and Alastair Roberts and Johannes Welbl and Sebastian Riedel and Pontus Stenetorp},
title = {Beat the {AI}: Investigating Adversarial Human Annotation for Reading Comprehension},
journal = {Transactions of the Association for Computational Linguistics}
}
@article{weng2020odqa,
title = {How to Build an Open-Domain Question Answering System?},
author = {Weng, Lilian},
journal = {lilianweng.github.io},
year = 2020,
month = {Oct},
url = {https://lilianweng.github.io/posts/2020-10-29-odqa/}
}
@misc{https://doi.org/10.48550/arxiv.2002.08910,
doi = {10.48550/ARXIV.2002.08910},
url = {https://arxiv.org/abs/2002.08910},
author = {Roberts, Adam and Raffel, Colin and Shazeer, Noam},
keywords = {Computation and Language (cs.CL), Machine Learning (cs.LG), Machine Learning (stat.ML), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {How Much Knowledge Can You Pack Into the Parameters of a Language Model?},
publisher = {arXiv},
year = 2020,
copyright = {arXiv.org perpetual, non-exclusive license}
}
@inproceedings{10.5555/1785162.1785216,
author = {Auer, S\"{o}ren and Bizer, Christian and Kobilarov, Georgi and Lehmann, Jens and Cyganiak, Richard and Ives, Zachary},
title = {DBpedia: A Nucleus for a Web of Open Data},
year = 2007,
isbn = 3540762973,
publisher = {Springer-Verlag},
address = {Berlin, Heidelberg},
abstract = {DBpedia is a community effort to extract structured information from Wikipedia and to make this information available on the Web. DBpedia allows you to ask sophisticated queries against datasets derived from Wikipedia and to link other datasets on the Web to Wikipedia data. We describe the extraction of the DBpedia datasets, and how the resulting information is published on the Web for human-andmachine-consumption. We describe some emerging applications from the DBpedia community and show how website authors can facilitate DBpedia content within their sites. Finally, we present the current status of interlinking DBpedia with other open datasets on the Web and outline how DBpedia could serve as a nucleus for an emerging Web of open data.},
booktitle = {Proceedings of the 6th International The Semantic Web and 2nd Asian Conference on Asian Semantic Web Conference},
pages = {722735},
numpages = 14,
location = {Busan, Korea},
series = {ISWC'07/ASWC'07}
}
@article{10.1145/2629489,
author = {Vrande\v{c}i\'{c}, Denny and Kr\"{o}tzsch, Markus},
title = {Wikidata: A Free Collaborative Knowledgebase},
year = 2014,
issue_date = {October 2014},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = 57,
number = 10,
issn = {0001-0782},
url = {https://doi.org/10.1145/2629489},
doi = {10.1145/2629489},
abstract = {This collaboratively edited knowledgebase provides a common source of data for Wikipedia, and everyone else.},
journal = {Commun. ACM},
month = {sep},
pages = {7885},
numpages = 8
}
@misc{https://doi.org/10.48550/arxiv.1508.05326,
doi = {10.48550/ARXIV.1508.05326},
url = {https://arxiv.org/abs/1508.05326},
author = {Bowman, Samuel R. and Angeli, Gabor and Potts, Christopher and Manning, Christopher D.},
keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {A large annotated corpus for learning natural language inference},
publisher = {arXiv},
year = 2015,
copyright = {arXiv.org perpetual, non-exclusive license}
}
@misc{https://doi.org/10.48550/arxiv.1910.14599,
doi = {10.48550/ARXIV.1910.14599},
url = {https://arxiv.org/abs/1910.14599},
author = {Nie, Yixin and Williams, Adina and Dinan, Emily and Bansal, Mohit and Weston, Jason and Kiela, Douwe},
keywords = {Computation and Language (cs.CL), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {Adversarial NLI: A New Benchmark for Natural Language Understanding},
publisher = {arXiv},
year = 2019,
copyright = {arXiv.org perpetual, non-exclusive license}
}
@inproceedings{N18-1101,
author = {Williams, Adina and Nangia, Nikita and Bowman, Samuel},
title = {A Broad-Coverage Challenge Corpus for Sentence Understanding through Inference},
booktitle = {Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)},
year = 2018,
publisher = {Association for Computational Linguistics},
pages = {1112--1122},
location = {New Orleans, Louisiana},
url = {http://aclweb.org/anthology/N18-1101}
}
@article{47761,
title = {Natural Questions: a Benchmark for Question Answering Research},
author = {Tom Kwiatkowski and Jennimaria Palomaki and Olivia Redfield and Michael Collins and Ankur Parikh and Chris Alberti and Danielle Epstein and Illia Polosukhin and Matthew Kelcey and Jacob Devlin and Kenton Lee and Kristina N. Toutanova and Llion Jones and Ming-Wei Chang and Andrew Dai and Jakob Uszkoreit and Quoc Le and Slav Petrov},
year = 2019,
journal = {Transactions of the Association of Computational Linguistics}
}
@misc{https://doi.org/10.48550/arxiv.1704.05179,
doi = {10.48550/ARXIV.1704.05179},
url = {https://arxiv.org/abs/1704.05179},
author = {Dunn, Matthew and Sagun, Levent and Higgins, Mike and Guney, V. Ugur and Cirik, Volkan and Cho, Kyunghyun},
keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {SearchQA: A New Q \& A Dataset Augmented with Context from a Search Engine},
publisher = {arXiv},
year = 2017,
copyright = {arXiv.org perpetual, non-exclusive license}
}
@misc{https://doi.org/10.48550/arxiv.1705.03551,
doi = {10.48550/ARXIV.1705.03551},
url = {https://arxiv.org/abs/1705.03551},
author = {Joshi, Mandar and Choi, Eunsol and Weld, Daniel S. and Zettlemoyer , Luke},
keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {TriviaQA: A Large Scale Distantly Supervised Challenge Dataset for Reading Comprehension},
publisher = {arXiv},
year = 2017,
copyright = {arXiv.org perpetual, non-exclusive license}
}
@misc{https://doi.org/10.48550/arxiv.2201.09651,
doi = {10.48550/ARXIV.2201.09651},
url = {https://arxiv.org/abs/2201.09651},
author = {Zouhar, Vilém and Mosbach, Marius and Biswas, Debanjali and Klakow , Dietrich},
keywords = {Computation and Language (cs.CL), Information Retrieval (cs.IR), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {Artefact Retrieval: Overview of NLP Models with Knowledge Base Access},
publisher = {arXiv},
year = 2022,
copyright = {Creative Commons Attribution Share Alike 4.0 International}
}
@inbook{inbook,
author = {Hrkút, Patrik and Toth, Štefan and Ďuračík, Michal and Meško, Matej and Krsak, Emil and Mikušová, Miroslava},
year = 2020,
month = {03},
pages = {60--70},
title = {Data Collection for Natural Language Processing Systems},
isbn = {978-981-15-3379-2},
doi = {10.1007/978-981-15-3380-8_6}
}
@article{Chiche2022,
author = {Chiche, Alebachew and Yitagesu, Betselot},
title = {Part of speech tagging: a systematic review of deep learning and machine learning approaches},
journal = {Journal of Big Data},
year = 2022,
month = {Jan},
day = 24,
volume = 9,
number = 1,
pages = 10,
abstract = {Natural language processing (NLP) tools have sparked a great deal of interest due to rapid improvements in information and communications technologies. As a result, many different NLP tools are being produced. However, there are many challenges for developing efficient and effective NLP tools that accurately process natural languages. One such tool is part of speech (POS) tagging, which tags a particular sentence or words in a paragraph by looking at the context of the sentence/words inside the paragraph. Despite enormous efforts by researchers, POS tagging still faces challenges in improving accuracy while reducing false-positive rates and in tagging unknown words. Furthermore, the presence of ambiguity when tagging terms with different contextual meanings inside a sentence cannot be overlooked. Recently, Deep learning (DL) and Machine learning (ML)-based POS taggers are being implemented as potential solutions to efficiently identify words in a given sentence across a paragraph. This article first clarifies the concept of part of speech POS tagging. It then provides the broad categorization based on the famous ML and DL techniques employed in designing and implementing part of speech taggers. A comprehensive review of the latest POS tagging articles is provided by discussing the weakness and strengths of the proposed approaches. Then, recent trends and advancements of DL and ML-based part-of-speech-taggers are presented in terms of the proposed approaches deployed and their performance evaluation metrics. Using the limitations of the proposed approaches, we emphasized various research gaps and presented future recommendations for the research in advancing DL and ML-based POS tagging.},
issn = {2196-1115},
doi = {10.1186/s40537-022-00561-y},
url = {https://doi.org/10.1186/s40537-022-00561-y}
}
@inproceedings{thakur-2020-AugSBERT,
title = {Augmented {SBERT}: Data Augmentation Method for Improving Bi-Encoders for Pairwise Sentence Scoring Tasks},
author = {Thakur, Nandan and Reimers, Nils and Daxenberger, Johannes and Gurevych, Iryna},
booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
month = 6,
year = 2021,
address = {Online},
publisher = {Association for Computational Linguistics},
url = {https://arxiv.org/abs/2010.08240},
pages = {296--310}
}
@misc{https://doi.org/10.48550/arxiv.2006.03654,
doi = {10.48550/ARXIV.2006.03654},
url = {https://arxiv.org/abs/2006.03654},
author = {He, Pengcheng and Liu, Xiaodong and Gao, Jianfeng and Chen, Weizhu},
keywords = {Computation and Language (cs.CL), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences, I.2; I.7, cs.CL, cs.GL},
title = {DeBERTa: Decoding-enhanced BERT with Disentangled Attention},
publisher = {arXiv},
year = 2020,
copyright = {arXiv.org perpetual, non-exclusive license}
}
@misc{FormalInformal,
title = {Formal and Informal Style},
url = {https://www.niu.edu/writingtutorial/style/formal-and-informal-style.shtml}
}
@misc{BetterWebSearches,
title = {Refine web searches},
url = {https://support.google.com/websearch/answer/2466433}
}
@inproceedings{inproceedings,
author = {Banga, Ritu and Mehndiratta, Pulkit},
year = 2017,
month = 12,
pages = {264--267},
title = {Tagging Efficiency Analysis on Part of Speech Taggers},
doi = {10.1109/ICIT.2017.57}
}
@article{2019t5,
author = {Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu},
title = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer},
journal = {arXiv e-prints},
year = 2019,
archiveprefix = {arXiv},
eprint = {1910.10683}
}

View File

@ -3,6 +3,6 @@ for d in */ ; do
[ -L "${d%/}" ] && continue
echo "$d"
cd "$d"
touch __init__.py
touch main.tex
cd ..
done