deep_learning.bib

@article{DBLP:journals/corr/VaswaniSPUJGKP17,
  author = {Ashish Vaswani and Noam Shazeer and Niki Parmar and Jakob Uszkoreit
            and Llion Jones and Aidan N. Gomez and Lukasz Kaiser and Illia
            Polosukhin},
  title = {Attention Is All You Need},
  journal = {CoRR},
  volume = {abs/1706.03762},
  year = {2017},
  url = {http://arxiv.org/abs/1706.03762},
  archivePrefix = {arXiv},
  eprint = {1706.03762},
  timestamp = {Mon, 13 Aug 2018 16:48:37 +0200},
  biburl = {https://dblp.org/rec/bib/journals/corr/VaswaniSPUJGKP17},
  bibsource = {dblp computer science bibliography, https://dblp.org},
}

@article{DBLP:journals/corr/abs-1803-08493,
  author = {Eric Zelikman},
  title = {Context is Everything: Finding Meaning Statistically in Semantic
           Spaces},
  journal = {CoRR},
  volume = {abs/1803.08493},
  year = {2018},
  url = {http://arxiv.org/abs/1803.08493},
  archivePrefix = {arXiv},
  eprint = {1803.08493},
  timestamp = {Mon, 13 Aug 2018 16:47:12 +0200},
  biburl = {https://dblp.org/rec/bib/journals/corr/abs-1803-08493},
  bibsource = {dblp computer science bibliography, https://dblp.org},
}

@article{bert-score,
  title = {BERTScore: Evaluating Text Generation with BERT},
  author = {Zhang, Tianyi and Kishore, Varsha and Wu, Felix and Weinberger,
            Kilian Q. and Artzi, Yoav.},
  journal = {arXiv preprint arXiv:1904.09675},
  year = {2019},
}

@article{DBLP:journals/corr/abs-1711-03705,
  author = {Doyen Sahoo and Quang Pham and Jing Lu and Steven C. H. Hoi},
  title = {Online Deep Learning: Learning Deep Neural Networks on the Fly},
  journal = {CoRR},
  volume = {abs/1711.03705},
  year = {2017},
  url = {http://arxiv.org/abs/1711.03705},
  archivePrefix = {arXiv},
  eprint = {1711.03705},
  timestamp = {Mon, 13 Aug 2018 16:48:53 +0200},
  biburl = {https://dblp.org/rec/bib/journals/corr/abs-1711-03705},
  bibsource = {dblp computer science bibliography, https://dblp.org},
}

@article{DBLP:journals/corr/XiongMS16,
  author = {Caiming Xiong and Stephen Merity and Richard Socher},
  title = {Dynamic Memory Networks for Visual and Textual Question Answering},
  journal = {CoRR},
  volume = {abs/1603.01417},
  year = {2016},
  url = {http://arxiv.org/abs/1603.01417},
  archivePrefix = {arXiv},
  eprint = {1603.01417},
  timestamp = {Mon, 13 Aug 2018 16:49:04 +0200},
  biburl = {https://dblp.org/rec/bib/journals/corr/XiongMS16},
  bibsource = {dblp computer science bibliography, https://dblp.org},
}

@article{DBLP:journals/corr/abs-1710-02298,
  author = {Matteo Hessel and Joseph Modayil and Hado van Hasselt and Tom Schaul
            and Georg Ostrovski and Will Dabney and Daniel Horgan and Bilal Piot
            and Mohammad Gheshlaghi Azar and David Silver},
  title = {Rainbow: Combining Improvements in Deep Reinforcement Learning},
  journal = {CoRR},
  volume = {abs/1710.02298},
  year = {2017},
  url = {http://arxiv.org/abs/1710.02298},
  archivePrefix = {arXiv},
  eprint = {1710.02298},
  timestamp = {Mon, 13 Aug 2018 16:48:05 +0200},
  biburl = {https://dblp.org/rec/bib/journals/corr/abs-1710-02298},
  bibsource = {dblp computer science bibliography, https://dblp.org},
}

@article{DBLP:journals/corr/abs-1806-05695,
  author = {Dennis G. Wilson and Sylvain Cussat{-}Blanc and Herv{\'{e}} Luga and
            Julian F. Miller},
  title = {Evolving simple programs for playing Atari games},
  journal = {CoRR},
  volume = {abs/1806.05695},
  year = {2018},
  url = {http://arxiv.org/abs/1806.05695},
  archivePrefix = {arXiv},
  eprint = {1806.05695},
  timestamp = {Mon, 13 Aug 2018 16:46:45 +0200},
  biburl = {https://dblp.org/rec/bib/journals/corr/abs-1806-05695},
  bibsource = {dblp computer science bibliography, https://dblp.org},
}

@article{DBLP:journals/corr/WeberRRBGRBVHLP17,
  author = {Theophane Weber and S{\'{e}}bastien Racani{\`{e}}re and David P.
            Reichert and Lars Buesing and Arthur Guez and Danilo Jimenez Rezende
            and Adri{\`{a}} Puigdom{\`{e}}nech Badia and Oriol Vinyals and
            Nicolas Heess and Yujia Li and Razvan Pascanu and Peter Battaglia and
            David Silver and Daan Wierstra},
  title = {Imagination-Augmented Agents for Deep Reinforcement Learning},
  journal = {CoRR},
  volume = {abs/1707.06203},
  year = {2017},
  url = {http://arxiv.org/abs/1707.06203},
  archivePrefix = {arXiv},
  eprint = {1707.06203},
  timestamp = {Mon, 13 Aug 2018 16:47:05 +0200},
  biburl = {https://dblp.org/rec/bib/journals/corr/WeberRRBGRBVHLP17},
  bibsource = {dblp computer science bibliography, https://dblp.org},
}

@article{DBLP:journals/corr/PritzelUSBVHWB17,
  author = {Alexander Pritzel and Benigno Uria and Sriram Srinivasan and Adri{\`
            {a}} Puigdom{\`{e}}nech Badia and Oriol Vinyals and Demis Hassabis
            and Daan Wierstra and Charles Blundell},
  title = {Neural Episodic Control},
  journal = {CoRR},
  volume = {abs/1703.01988},
  year = {2017},
  url = {http://arxiv.org/abs/1703.01988},
  archivePrefix = {arXiv},
  eprint = {1703.01988},
  timestamp = {Mon, 13 Aug 2018 16:48:14 +0200},
  biburl = {https://dblp.org/rec/bib/journals/corr/PritzelUSBVHWB17},
  bibsource = {dblp computer science bibliography, https://dblp.org},
}

@article{DBLP:journals/corr/JoulinM15,
  author = {Armand Joulin and Tomas Mikolov},
  title = {Inferring Algorithmic Patterns with Stack-Augmented Recurrent Nets},
  journal = {CoRR},
  volume = {abs/1503.01007},
  year = {2015},
  url = {http://arxiv.org/abs/1503.01007},
  archivePrefix = {arXiv},
  eprint = {1503.01007},
  timestamp = {Mon, 13 Aug 2018 16:47:42 +0200},
  biburl = {https://dblp.org/rec/bib/journals/corr/JoulinM15},
  bibsource = {dblp computer science bibliography, https://dblp.org},
}

@article{gyurko2013extracting,
  title = {Extracting information from the signature of a financial data stream},
  author = {Gyurk{\'o}, Lajos Gergely and Lyons, Terry and Kontkowski, Mark and
            Field, Jonathan},
  journal = {arXiv preprint arXiv:1307.7244},
  year = {2013},
}

@article{DBLP:journals/corr/abs-1802-01528,
  author = {Terence Parr and Jeremy Howard},
  title = {The Matrix Calculus You Need For Deep Learning},
  journal = {CoRR},
  volume = {abs/1802.01528},
  year = {2018},
  url = {http://arxiv.org/abs/1802.01528},
  archivePrefix = {arXiv},
  eprint = {1802.01528},
  timestamp = {Mon, 13 Aug 2018 16:48:36 +0200},
  biburl = {https://dblp.org/rec/bib/journals/corr/abs-1802-01528},
  bibsource = {dblp computer science bibliography, https://dblp.org},
}

@article{DBLP:journals/corr/LakeUTG16,
  author = {Brenden M. Lake and Tomer D. Ullman and Joshua B. Tenenbaum and
            Samuel J. Gershman},
  title = {Building Machines That Learn and Think Like People},
  journal = {CoRR},
  volume = {abs/1604.00289},
  year = {2016},
  url = {http://arxiv.org/abs/1604.00289},
  archivePrefix = {arXiv},
  eprint = {1604.00289},
  timestamp = {Mon, 13 Aug 2018 16:47:34 +0200},
  biburl = {https://dblp.org/rec/bib/journals/corr/LakeUTG16},
  bibsource = {dblp computer science bibliography, https://dblp.org},
}

@article{radford2019language,
  title = {Language Models are Unsupervised Multitask Learners},
  author = {Radford, Alec and Wu, Jeff and Child, Rewon and Luan, David and
            Amodei, Dario and Sutskever, Ilya},
  year = {2019},
}

@article{wann2019,
  author = {Adam Gaier and David Ha},
  title = {Weight Agnostic Neural Networks},
  eprint = {arXiv:1906.04358},
  url = {https://weightagnostic.github.io},
  note = "\url{https://weightagnostic.github.io}",
  year = {2019},
}

@inproceedings{anonymous2020largescale,
  title = {Large-scale Pretraining for Neural Machine Translation with Tens of
           Billions of Sentence Pairs},
  author = {Anonymous},
  booktitle = {Submitted to International Conference on Learning Representations
               },
  year = {2020},
  url = {https://openreview.net/forum?id=Bkl8YR4YDB},
  note = {under review},
}

@article{AWRPeng19,
  author = {Xue Bin Peng and Aviral Kumar and Grace Zhang and Sergey Levine},
  title = {Advantage-Weighted Regression: Simple and Scalable Off-Policy
           Reinforcement Learning},
  journal = {CoRR},
  volume = {abs/1910.00177},
  year = {2019},
  url = {https://arxiv.org/abs/1910.00177},
  archivePrefix = {arXiv},
  eprint = {1910.00177},
  timestamp = {Tue, 01 October 2019 11:27:50 +0200},
  bibsource = {dblp computer science bibliography, https://dblp.org},
}

@article{DBLP:journals/corr/abs-1901-02731,
  author = {Kumar Shridhar and Felix Laumann and Marcus Liwicki},
  title = {A Comprehensive guide to Bayesian Convolutional Neural Network with
           Variational Inference},
  journal = {CoRR},
  volume = {abs/1901.02731},
  year = {2019},
  url = {http://arxiv.org/abs/1901.02731},
  archivePrefix = {arXiv},
  eprint = {1901.02731},
  timestamp = {Fri, 01 Feb 2019 13:39:59 +0100},
  biburl = {https://dblp.org/rec/bib/journals/corr/abs-1901-02731},
  bibsource = {dblp computer science bibliography, https://dblp.org},
}

@misc{mcclell2019extending,
  title = {Extending Machine Language Models toward Human-Level Language
           Understanding},
  author = {James L. McClelland and Felix Hill and Maja Rudolph and Jason
            Baldridge and Hinrich Schütze},
  year = {2019},
  eprint = {1912.05877},
  archivePrefix = {arXiv},
  primaryClass = {cs.CL},
}

@misc{ainslie2020encoding,
  title = {ETC: Encoding Long and Structured Data in Transformers},
  author = {Joshua Ainslie and Santiago Ontanon and Chris Alberti and Philip
            Pham and Anirudh Ravula and Sumit Sanghai},
  year = {2020},
  eprint = {2004.08483},
  archivePrefix = {arXiv},
  primaryClass = {cs.LG},
}

@misc{satsangi2020maximizing,
  title = {Maximizing Information Gain in Partially Observable Environments via
           Prediction Reward},
  author = {Yash Satsangi and Sungsu Lim and Shimon Whiteson and Frans Oliehoek
            and Martha White},
  year = {2020},
  eprint = {2005.04912},
  archivePrefix = {arXiv},
  primaryClass = {cs.AI},
}

@misc{wang2020provably,
  title = {Provably Efficient Reinforcement Learning with General Value Function
           Approximation},
  author = {Ruosong Wang and Ruslan Salakhutdinov and Lin F. Yang},
  year = {2020},
  eprint = {2005.10804},
  archivePrefix = {arXiv},
  primaryClass = {cs.LG},
}

@misc{zakharov2020episodic,
  title = {Episodic Memory for Learning Subjective-Timescale Models},
  author = {Alexey Zakharov and Matthew Crosby and Zafeirios Fountas},
  year = {2020},
  eprint = {2010.01430},
  archivePrefix = {arXiv},
  primaryClass = {cs.LG},
}

@misc{siarohin2020order,
  title = {First Order Motion Model for Image Animation},
  author = {Aliaksandr Siarohin and Stéphane Lathuilière and Sergey Tulyakov and
            Elisa Ricci and Nicu Sebe},
  year = {2020},
  eprint = {2003.00196},
  archivePrefix = {arXiv},
  primaryClass = {cs.CV},
}

@misc{thies2020neural,
  title = {Neural Voice Puppetry: Audio-driven Facial Reenactment},
  author = {Justus Thies and Mohamed Elgharib and Ayush Tewari and Christian
            Theobalt and Matthias Nießner},
  year = {2020},
  eprint = {1912.05566},
  archivePrefix = {arXiv},
  primaryClass = {cs.CV},
}

@misc{xu2021layoutlmv2,
  title = {LayoutLMv2: Multi-modal Pre-training for Visually-Rich Document
           Understanding},
  author = {Yang Xu and Yiheng Xu and Tengchao Lv and Lei Cui and Furu Wei and
            Guoxin Wang and Yijuan Lu and Dinei Florencio and Cha Zhang and
            Wanxiang Che and Min Zhang and Lidong Zhou},
  year = {2021},
  eprint = {2012.14740},
  archivePrefix = {arXiv},
  primaryClass = {cs.CL},
}

@misc{https://doi.org/10.48550/arxiv.2302.07730,
  doi = {10.48550/ARXIV.2302.07730},
  url = {https://arxiv.org/abs/2302.07730},
  author = {Amatriain, Xavier},
  keywords = {Computation and Language (cs.CL), FOS: Computer and information
              sciences, FOS: Computer and information sciences},
  title = {Transformer models: an introduction and catalog},
  publisher = {arXiv},
  year = {2023},
  copyright = {Creative Commons Attribution 4.0 International},
}

@misc{wang2023evaluating,
  title = {Evaluating Open-QA Evaluation},
  author = {Cunxiang Wang and Sirui Cheng and Qipeng Guo and Yuanhao Yue and
            Bowen Ding and Zhikun Xu and Yidong Wang and Xiangkun Hu and Zheng
            Zhang and Yue Zhang},
  year = {2023},
  eprint = {2305.12421},
  archivePrefix = {arXiv},
  primaryClass = {cs.CL},
}

@misc{loureiro2023topics,
  title = {Topics as Entity Clusters: Entity-based Topics from Language Models
           and Graph Neural Networks},
  author = {Manuel V. Loureiro and Steven Derby and Tri Kurniawan Wijaya},
  year = {2023},
  eprint = {2301.02458},
  archivePrefix = {arXiv},
  primaryClass = {cs.CL},
}

@misc{hernandez2024linearity,
  title = {Linearity of Relation Decoding in Transformer Language Models},
  author = {Evan Hernandez and Arnab Sen Sharma and Tal Haklay and Kevin Meng
            and Martin Wattenberg and Jacob Andreas and Yonatan Belinkov and
            David Bau},
  year = {2024},
  eprint = {2308.09124},
  archivePrefix = {arXiv},
  primaryClass = {cs.CL},
}

@misc{tan2024information,
  title = {The Information of Large Language Model Geometry},
  author = {Zhiquan Tan and Chenghai Li and Weiran Huang},
  year = {2024},
  eprint = {2402.03471},
  archivePrefix = {arXiv},
  primaryClass = {cs.LG},
}

@misc{chen2024chatgpts,
  title = {ChatGPT's One-year Anniversary: Are Open-Source Large Language Models
           Catching up?},
  author = {Hailin Chen and Fangkai Jiao and Xingxuan Li and Chengwei Qin and
            Mathieu Ravaut and Ruochen Zhao and Caiming Xiong and Shafiq Joty},
  year = {2024},
  eprint = {2311.16989},
  archivePrefix = {arXiv},
  primaryClass = {cs.CL},
}

  @misc{kahng2024llm,
  title = {LLM Comparator: Visual Analytics for Side-by-Side Evaluation of Large
           Language Models},
  author = {Minsuk Kahng and Ian Tenney and Mahima Pushkarna and Michael Xieyang
            Liu and James Wexler and Emily Reif and Krystal Kallarackal and
            Minsuk Chang and Michael Terry and Lucas Dixon},
  year = {2024},
  eprint = {2402.10524},
  archivePrefix = {arXiv},
  primaryClass = {cs.HC},
}