-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdeep_learning.bib
397 lines (365 loc) · 13.9 KB
/
deep_learning.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
@article{DBLP:journals/corr/VaswaniSPUJGKP17,
author = {Ashish Vaswani and Noam Shazeer and Niki Parmar and Jakob Uszkoreit
and Llion Jones and Aidan N. Gomez and Lukasz Kaiser and Illia
Polosukhin},
title = {Attention Is All You Need},
journal = {CoRR},
volume = {abs/1706.03762},
year = {2017},
url = {http://arxiv.org/abs/1706.03762},
archivePrefix = {arXiv},
eprint = {1706.03762},
timestamp = {Mon, 13 Aug 2018 16:48:37 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/VaswaniSPUJGKP17},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{DBLP:journals/corr/abs-1803-08493,
author = {Eric Zelikman},
title = {Context is Everything: Finding Meaning Statistically in Semantic
Spaces},
journal = {CoRR},
volume = {abs/1803.08493},
year = {2018},
url = {http://arxiv.org/abs/1803.08493},
archivePrefix = {arXiv},
eprint = {1803.08493},
timestamp = {Mon, 13 Aug 2018 16:47:12 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1803-08493},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{bert-score,
title = {BERTScore: Evaluating Text Generation with BERT},
author = {Zhang, Tianyi and Kishore, Varsha and Wu, Felix and Weinberger,
Kilian Q. and Artzi, Yoav.},
journal = {arXiv preprint arXiv:1904.09675},
year = {2019},
}
@article{DBLP:journals/corr/abs-1711-03705,
author = {Doyen Sahoo and Quang Pham and Jing Lu and Steven C. H. Hoi},
title = {Online Deep Learning: Learning Deep Neural Networks on the Fly},
journal = {CoRR},
volume = {abs/1711.03705},
year = {2017},
url = {http://arxiv.org/abs/1711.03705},
archivePrefix = {arXiv},
eprint = {1711.03705},
timestamp = {Mon, 13 Aug 2018 16:48:53 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1711-03705},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{DBLP:journals/corr/XiongMS16,
author = {Caiming Xiong and Stephen Merity and Richard Socher},
title = {Dynamic Memory Networks for Visual and Textual Question Answering},
journal = {CoRR},
volume = {abs/1603.01417},
year = {2016},
url = {http://arxiv.org/abs/1603.01417},
archivePrefix = {arXiv},
eprint = {1603.01417},
timestamp = {Mon, 13 Aug 2018 16:49:04 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/XiongMS16},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{DBLP:journals/corr/abs-1710-02298,
author = {Matteo Hessel and Joseph Modayil and Hado van Hasselt and Tom Schaul
and Georg Ostrovski and Will Dabney and Daniel Horgan and Bilal Piot
and Mohammad Gheshlaghi Azar and David Silver},
title = {Rainbow: Combining Improvements in Deep Reinforcement Learning},
journal = {CoRR},
volume = {abs/1710.02298},
year = {2017},
url = {http://arxiv.org/abs/1710.02298},
archivePrefix = {arXiv},
eprint = {1710.02298},
timestamp = {Mon, 13 Aug 2018 16:48:05 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1710-02298},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{DBLP:journals/corr/abs-1806-05695,
author = {Dennis G. Wilson and Sylvain Cussat{-}Blanc and Herv{\'{e}} Luga and
Julian F. Miller},
title = {Evolving simple programs for playing Atari games},
journal = {CoRR},
volume = {abs/1806.05695},
year = {2018},
url = {http://arxiv.org/abs/1806.05695},
archivePrefix = {arXiv},
eprint = {1806.05695},
timestamp = {Mon, 13 Aug 2018 16:46:45 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1806-05695},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{DBLP:journals/corr/WeberRRBGRBVHLP17,
author = {Theophane Weber and S{\'{e}}bastien Racani{\`{e}}re and David P.
Reichert and Lars Buesing and Arthur Guez and Danilo Jimenez Rezende
and Adri{\`{a}} Puigdom{\`{e}}nech Badia and Oriol Vinyals and
Nicolas Heess and Yujia Li and Razvan Pascanu and Peter Battaglia and
David Silver and Daan Wierstra},
title = {Imagination-Augmented Agents for Deep Reinforcement Learning},
journal = {CoRR},
volume = {abs/1707.06203},
year = {2017},
url = {http://arxiv.org/abs/1707.06203},
archivePrefix = {arXiv},
eprint = {1707.06203},
timestamp = {Mon, 13 Aug 2018 16:47:05 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/WeberRRBGRBVHLP17},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{DBLP:journals/corr/PritzelUSBVHWB17,
author = {Alexander Pritzel and Benigno Uria and Sriram Srinivasan and Adri{\`
{a}} Puigdom{\`{e}}nech Badia and Oriol Vinyals and Demis Hassabis
and Daan Wierstra and Charles Blundell},
title = {Neural Episodic Control},
journal = {CoRR},
volume = {abs/1703.01988},
year = {2017},
url = {http://arxiv.org/abs/1703.01988},
archivePrefix = {arXiv},
eprint = {1703.01988},
timestamp = {Mon, 13 Aug 2018 16:48:14 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/PritzelUSBVHWB17},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{DBLP:journals/corr/JoulinM15,
author = {Armand Joulin and Tomas Mikolov},
title = {Inferring Algorithmic Patterns with Stack-Augmented Recurrent Nets},
journal = {CoRR},
volume = {abs/1503.01007},
year = {2015},
url = {http://arxiv.org/abs/1503.01007},
archivePrefix = {arXiv},
eprint = {1503.01007},
timestamp = {Mon, 13 Aug 2018 16:47:42 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/JoulinM15},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{gyurko2013extracting,
title = {Extracting information from the signature of a financial data stream},
author = {Gyurk{\'o}, Lajos Gergely and Lyons, Terry and Kontkowski, Mark and
Field, Jonathan},
journal = {arXiv preprint arXiv:1307.7244},
year = {2013},
}
@article{DBLP:journals/corr/abs-1802-01528,
author = {Terence Parr and Jeremy Howard},
title = {The Matrix Calculus You Need For Deep Learning},
journal = {CoRR},
volume = {abs/1802.01528},
year = {2018},
url = {http://arxiv.org/abs/1802.01528},
archivePrefix = {arXiv},
eprint = {1802.01528},
timestamp = {Mon, 13 Aug 2018 16:48:36 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1802-01528},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{DBLP:journals/corr/LakeUTG16,
author = {Brenden M. Lake and Tomer D. Ullman and Joshua B. Tenenbaum and
Samuel J. Gershman},
title = {Building Machines That Learn and Think Like People},
journal = {CoRR},
volume = {abs/1604.00289},
year = {2016},
url = {http://arxiv.org/abs/1604.00289},
archivePrefix = {arXiv},
eprint = {1604.00289},
timestamp = {Mon, 13 Aug 2018 16:47:34 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/LakeUTG16},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{radford2019language,
title = {Language Models are Unsupervised Multitask Learners},
author = {Radford, Alec and Wu, Jeff and Child, Rewon and Luan, David and
Amodei, Dario and Sutskever, Ilya},
year = {2019},
}
@article{wann2019,
author = {Adam Gaier and David Ha},
title = {Weight Agnostic Neural Networks},
eprint = {arXiv:1906.04358},
url = {https://weightagnostic.github.io},
note = "\url{https://weightagnostic.github.io}",
year = {2019},
}
@inproceedings{anonymous2020largescale,
title = {Large-scale Pretraining for Neural Machine Translation with Tens of
Billions of Sentence Pairs},
author = {Anonymous},
booktitle = {Submitted to International Conference on Learning Representations
},
year = {2020},
url = {https://openreview.net/forum?id=Bkl8YR4YDB},
note = {under review},
}
@article{AWRPeng19,
author = {Xue Bin Peng and Aviral Kumar and Grace Zhang and Sergey Levine},
title = {Advantage-Weighted Regression: Simple and Scalable Off-Policy
Reinforcement Learning},
journal = {CoRR},
volume = {abs/1910.00177},
year = {2019},
url = {https://arxiv.org/abs/1910.00177},
archivePrefix = {arXiv},
eprint = {1910.00177},
timestamp = {Tue, 01 October 2019 11:27:50 +0200},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@article{DBLP:journals/corr/abs-1901-02731,
author = {Kumar Shridhar and Felix Laumann and Marcus Liwicki},
title = {A Comprehensive guide to Bayesian Convolutional Neural Network with
Variational Inference},
journal = {CoRR},
volume = {abs/1901.02731},
year = {2019},
url = {http://arxiv.org/abs/1901.02731},
archivePrefix = {arXiv},
eprint = {1901.02731},
timestamp = {Fri, 01 Feb 2019 13:39:59 +0100},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1901-02731},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@misc{mcclell2019extending,
title = {Extending Machine Language Models toward Human-Level Language
Understanding},
author = {James L. McClelland and Felix Hill and Maja Rudolph and Jason
Baldridge and Hinrich Schütze},
year = {2019},
eprint = {1912.05877},
archivePrefix = {arXiv},
primaryClass = {cs.CL},
}
@misc{ainslie2020encoding,
title = {ETC: Encoding Long and Structured Data in Transformers},
author = {Joshua Ainslie and Santiago Ontanon and Chris Alberti and Philip
Pham and Anirudh Ravula and Sumit Sanghai},
year = {2020},
eprint = {2004.08483},
archivePrefix = {arXiv},
primaryClass = {cs.LG},
}
@misc{satsangi2020maximizing,
title = {Maximizing Information Gain in Partially Observable Environments via
Prediction Reward},
author = {Yash Satsangi and Sungsu Lim and Shimon Whiteson and Frans Oliehoek
and Martha White},
year = {2020},
eprint = {2005.04912},
archivePrefix = {arXiv},
primaryClass = {cs.AI},
}
@misc{wang2020provably,
title = {Provably Efficient Reinforcement Learning with General Value Function
Approximation},
author = {Ruosong Wang and Ruslan Salakhutdinov and Lin F. Yang},
year = {2020},
eprint = {2005.10804},
archivePrefix = {arXiv},
primaryClass = {cs.LG},
}
@misc{zakharov2020episodic,
title = {Episodic Memory for Learning Subjective-Timescale Models},
author = {Alexey Zakharov and Matthew Crosby and Zafeirios Fountas},
year = {2020},
eprint = {2010.01430},
archivePrefix = {arXiv},
primaryClass = {cs.LG},
}
@misc{siarohin2020order,
title = {First Order Motion Model for Image Animation},
author = {Aliaksandr Siarohin and Stéphane Lathuilière and Sergey Tulyakov and
Elisa Ricci and Nicu Sebe},
year = {2020},
eprint = {2003.00196},
archivePrefix = {arXiv},
primaryClass = {cs.CV},
}
@misc{thies2020neural,
title = {Neural Voice Puppetry: Audio-driven Facial Reenactment},
author = {Justus Thies and Mohamed Elgharib and Ayush Tewari and Christian
Theobalt and Matthias Nießner},
year = {2020},
eprint = {1912.05566},
archivePrefix = {arXiv},
primaryClass = {cs.CV},
}
@misc{xu2021layoutlmv2,
title = {LayoutLMv2: Multi-modal Pre-training for Visually-Rich Document
Understanding},
author = {Yang Xu and Yiheng Xu and Tengchao Lv and Lei Cui and Furu Wei and
Guoxin Wang and Yijuan Lu and Dinei Florencio and Cha Zhang and
Wanxiang Che and Min Zhang and Lidong Zhou},
year = {2021},
eprint = {2012.14740},
archivePrefix = {arXiv},
primaryClass = {cs.CL},
}
@misc{https://doi.org/10.48550/arxiv.2302.07730,
doi = {10.48550/ARXIV.2302.07730},
url = {https://arxiv.org/abs/2302.07730},
author = {Amatriain, Xavier},
keywords = {Computation and Language (cs.CL), FOS: Computer and information
sciences, FOS: Computer and information sciences},
title = {Transformer models: an introduction and catalog},
publisher = {arXiv},
year = {2023},
copyright = {Creative Commons Attribution 4.0 International},
}
@misc{wang2023evaluating,
title = {Evaluating Open-QA Evaluation},
author = {Cunxiang Wang and Sirui Cheng and Qipeng Guo and Yuanhao Yue and
Bowen Ding and Zhikun Xu and Yidong Wang and Xiangkun Hu and Zheng
Zhang and Yue Zhang},
year = {2023},
eprint = {2305.12421},
archivePrefix = {arXiv},
primaryClass = {cs.CL},
}
@misc{loureiro2023topics,
title = {Topics as Entity Clusters: Entity-based Topics from Language Models
and Graph Neural Networks},
author = {Manuel V. Loureiro and Steven Derby and Tri Kurniawan Wijaya},
year = {2023},
eprint = {2301.02458},
archivePrefix = {arXiv},
primaryClass = {cs.CL},
}
@misc{hernandez2024linearity,
title = {Linearity of Relation Decoding in Transformer Language Models},
author = {Evan Hernandez and Arnab Sen Sharma and Tal Haklay and Kevin Meng
and Martin Wattenberg and Jacob Andreas and Yonatan Belinkov and
David Bau},
year = {2024},
eprint = {2308.09124},
archivePrefix = {arXiv},
primaryClass = {cs.CL},
}
@misc{tan2024information,
title = {The Information of Large Language Model Geometry},
author = {Zhiquan Tan and Chenghai Li and Weiran Huang},
year = {2024},
eprint = {2402.03471},
archivePrefix = {arXiv},
primaryClass = {cs.LG},
}
@misc{chen2024chatgpts,
title = {ChatGPT's One-year Anniversary: Are Open-Source Large Language Models
Catching up?},
author = {Hailin Chen and Fangkai Jiao and Xingxuan Li and Chengwei Qin and
Mathieu Ravaut and Ruochen Zhao and Caiming Xiong and Shafiq Joty},
year = {2024},
eprint = {2311.16989},
archivePrefix = {arXiv},
primaryClass = {cs.CL},
}
@misc{kahng2024llm,
title = {LLM Comparator: Visual Analytics for Side-by-Side Evaluation of Large
Language Models},
author = {Minsuk Kahng and Ian Tenney and Mahima Pushkarna and Michael Xieyang
Liu and James Wexler and Emily Reif and Krystal Kallarackal and
Minsuk Chang and Michael Terry and Lucas Dixon},
year = {2024},
eprint = {2402.10524},
archivePrefix = {arXiv},
primaryClass = {cs.HC},
}