-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathrefs.bib
More file actions
502 lines (455 loc) · 18.9 KB
/
refs.bib
File metadata and controls
502 lines (455 loc) · 18.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
@article{block2023butterfly,
title={Butterfly effects of sgd noise: Error amplification in behavior cloning and autoregression},
author={Block, Adam and Foster, Dylan J and Krishnamurthy, Akshay and Simchowitz, Max and Zhang, Cyril},
journal={arXiv preprint arXiv:2310.11428},
year={2023}
}
@book{kirk2004optimal,
title={Optimal control theory: an introduction},
author={Kirk, Donald E},
year={2004},
publisher={Courier Corporation}
}
@article{pfrommer2022tasil,
title={TaSIL: Taylor series imitation learning},
author={Pfrommer, Daniel and Zhang, Thomas and Tu, Stephen and Matni, Nikolai},
journal={Advances in Neural Information Processing Systems},
volume={35},
pages={20162--20174},
year={2022}
}
@inproceedings{chen2019information,
title={Information-theoretic considerations in batch reinforcement learning},
author={Chen, Jinglin and Jiang, Nan},
booktitle={International Conference on Machine Learning},
pages={1042--1051},
year={2019},
organization={PMLR}
}
@article{sontag1983lyapunov,
title={A Lyapunov-like characterization of asymptotic controllability},
author={Sontag, Eduardo D},
journal={SIAM journal on control and optimization},
volume={21},
number={3},
pages={462--471},
year={1983},
publisher={SIAM}
}
@article{tajwar2024preference,
title={Preference fine-tuning of llms should leverage suboptimal, on-policy data},
author={Tajwar, Fahim and Singh, Anikait and Sharma, Archit and Rafailov, Rafael and Schneider, Jeff and Xie, Tengyang and Ermon, Stefano and Finn, Chelsea and Kumar, Aviral},
journal={arXiv preprint arXiv:2404.14367},
year={2024}
}
@article{loshchilov2017decoupled,
title={Decoupled weight decay regularization},
author={Loshchilov, I},
journal={arXiv preprint arXiv:1711.05101},
year={2017}
}
@article{loshchilov2016sgdr,
title={SGDR: Stochastic gradient descent with warm restarts},
author={Loshchilov, Ilya and Hutter, Frank},
journal={arXiv preprint arXiv:1608.03983},
year={2016}
}
@inproceedings{perez2018film,
title={Film: Visual reasoning with a general conditioning layer},
author={Perez, Ethan and Strub, Florian and De Vries, Harm and Dumoulin, Vincent and Courville, Aaron},
booktitle={Proceedings of the AAAI conference on artificial intelligence},
volume={32},
number={1},
year={2018}
}
@article{zhao2023decision,
title={Decision Stacks: Flexible Reinforcement Learning via Modular Generative Models},
author={Zhao, Siyan and Grover, Aditya},
journal={arXiv preprint arXiv:2306.06253},
year={2023}
}
@article{ajay2023compositional,
title={Compositional Foundation Models for Hierarchical Planning},
author={Ajay, Anurag and Han, Seungwook and Du, Yilun and Li, Shaung and Gupta, Abhi and Jaakkola, Tommi and Tenenbaum, Josh and Kaelbling, Leslie and Srivastava, Akash and Agrawal, Pulkit},
journal={arXiv preprint arXiv:2309.08587},
year={2023}
}
@article{janner2022planning,
title={Planning with diffusion for flexible behavior synthesis},
author={Janner, Michael and Du, Yilun and Tenenbaum, Joshua B and Levine, Sergey},
journal={arXiv preprint arXiv:2205.09991},
year={2022}
}
@article{pearce2023imitating,
title={Imitating human behaviour with diffusion models},
author={Pearce, Tim and Rashid, Tabish and Kanervisto, Anssi and Bignell, Dave and Sun, Mingfei and Georgescu, Raluca and Macua, Sergio Valcarcel and Tan, Shan Zheng and Momennejad, Ida and Hofmann, Katja and others},
journal={arXiv preprint arXiv:2301.10677},
year={2023}
}
@article{ajay2022conditional,
title={Is Conditional Generative Modeling all you need for Decision-Making?},
author={Ajay, Anurag and Du, Yilun and Gupta, Abhi and Tenenbaum, Joshua and Jaakkola, Tommi and Agrawal, Pulkit},
journal={arXiv preprint arXiv:2211.15657},
year={2022}
}
@article{chi2023diffusion,
title={Diffusion Policy: Visuomotor Policy Learning via Action Diffusion},
author={Chi, Cheng and Feng, Siyuan and Du, Yilun and Xu, Zhenjia and Cousineau, Eric and Burchfiel, Benjamin and Song, Shuran},
journal={arXiv preprint arXiv:2303.04137},
year={2023}
}
% Noising prior work
@article{zhao2023learning,
title={Learning fine-grained bimanual manipulation with low-cost hardware},
author={Zhao, Tony Z and Kumar, Vikash and Levine, Sergey and Finn, Chelsea},
journal={arXiv preprint arXiv:2304.13705},
year={2023}
}
@article{shafiullah2022behavior,
title={Behavior Transformers: Cloning $ k $ modes with one stone},
author={Shafiullah, Nur Muhammad and Cui, Zichen and Altanzaya, Ariuntuya Arty and Pinto, Lerrel},
journal={Advances in neural information processing systems},
volume={35},
pages={22955--22968},
year={2022}
}
@article{chen2021decision,
title={Decision transformer: Reinforcement learning via sequence modeling},
author={Chen, Lili and Lu, Kevin and Rajeswaran, Aravind and Lee, Kimin and Grover, Aditya and Laskin, Misha and Abbeel, Pieter and Srinivas, Aravind and Mordatch, Igor},
journal={Advances in neural information processing systems},
volume={34},
pages={15084--15097},
year={2021}
}
@article{hussein2018deep,
title={Deep imitation learning for 3D navigation tasks},
author={Hussein, Ahmed and Elyan, Eyad and Gaber, Mohamed Medhat and Jayne, Chrisina},
journal={Neural computing and applications},
volume={29},
pages={389--404},
year={2018},
publisher={Springer}
}
@article{hussein2017imitation,
title={Imitation learning: A survey of learning methods},
author={Hussein, Ahmed and Gaber, Mohamed Medhat and Elyan, Eyad and Jayne, Chrisina},
journal={ACM Computing Surveys (CSUR)},
volume={50},
number={2},
pages={1--35},
year={2017},
publisher={ACM New York, NY, USA}
}
@article{bojarski2016end,
title={End to end learning for self-driving cars},
author={Bojarski, Mariusz and Del Testa, Davide and Dworakowski, Daniel and Firner, Bernhard and Flepp, Beat and Goyal, Prasoon and Jackel, Lawrence D and Monfort, Mathew and Muller, Urs and Zhang, Jiakai and others},
journal={arXiv preprint arXiv:1604.07316},
year={2016}
}
@inproceedings{finn2017one,
title={One-shot visual imitation learning via meta-learning},
author={Finn, Chelsea and Yu, Tianhe and Zhang, Tianhao and Abbeel, Pieter and Levine, Sergey},
booktitle={Conference on robot learning},
pages={357--368},
year={2017},
organization={PMLR}
}
@inproceedings{zhang2018deep,
title={Deep imitation learning for complex manipulation tasks from virtual reality teleoperation},
author={Zhang, Tianhao and McCarthy, Zoe and Jow, Owen and Lee, Dennis and Chen, Xi and Goldberg, Ken and Abbeel, Pieter},
booktitle={2018 IEEE International Conference on Robotics and Automation (ICRA)},
pages={5628--5635},
year={2018},
organization={IEEE}
}
@article{bansal2018chauffeurnet,
title={Chauffeurnet: Learning to drive by imitating the best and synthesizing the worst},
author={Bansal, Mayank and Krizhevsky, Alex and Ogale, Abhijit},
journal={arXiv preprint arXiv:1812.03079},
year={2018}
}
@inproceedings{tu2022sample,
title={On the sample complexity of stability constrained imitation learning},
author={Tu, Stephen and Robey, Alexander and Zhang, Tingnan and Matni, Nikolai},
booktitle={Learning for Dynamics and Control Conference},
pages={180--191},
year={2022},
organization={PMLR}
}
@inproceedings{ross2011reduction,
title={A reduction of imitation learning and structured prediction to no-regret online learning},
author={Ross, St{\'e}phane and Gordon, Geoffrey and Bagnell, Drew},
booktitle={Proceedings of the fourteenth international conference on artificial intelligence and statistics},
pages={627--635},
year={2011},
organization={JMLR Workshop and Conference Proceedings}
}
@inproceedings{laskey2017dart,
title={Dart: Noise injection for robust imitation learning},
author={Laskey, Michael and Lee, Jonathan and Fox, Roy and Dragan, Anca and Goldberg, Ken},
booktitle={Conference on robot learning},
pages={143--156},
year={2017},
organization={PMLR}
}
@inproceedings{kelly2019hg,
title={Hg-dagger: Interactive imitation learning with human experts},
author={Kelly, Michael and Sidrane, Chelsea and Driggs-Campbell, Katherine and Kochenderfer, Mykel J},
booktitle={2019 International Conference on Robotics and Automation (ICRA)},
pages={8077--8083},
year={2019},
organization={IEEE}
}
@article{sun2023mega,
title={MEGA-DAgger: Imitation Learning with Multiple Imperfect Experts},
author={Sun, Xiatao and Yang, Shuo and Mangharam, Rahul},
journal={arXiv preprint arXiv:2303.00638},
year={2023}
}
@article{chang2021mitigating,
title={Mitigating covariate shift in imitation learning via offline data without great coverage},
author={Chang, Jonathan D and Uehara, Masatoshi and Sreenivas, Dhruv and Kidambi, Rahul and Sun, Wen},
journal={arXiv preprint arXiv:2106.03207},
year={2021}
}
@article{de2019causal,
title={Causal confusion in imitation learning},
author={De Haan, Pim and Jayaraman, Dinesh and Levine, Sergey},
journal={Advances in Neural Information Processing Systems},
volume={32},
year={2019}
}
@inproceedings{ross2010efficient,
title={Efficient reductions for imitation learning},
author={Ross, St{\'e}phane and Bagnell, Drew},
booktitle={Proceedings of the thirteenth international conference on artificial intelligence and statistics},
pages={661--668},
year={2010},
organization={JMLR Workshop and Conference Proceedings}
}
@inproceedings{fosterbehavior,
title={Is Behavior Cloning All You Need? Understanding Horizon in Imitation Learning},
author={Foster, Dylan J and Block, Adam and Misra, Dipendra},
booktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},
year={2024}
}
@book{kakade2003sample,
title={On the sample complexity of reinforcement learning},
author={Kakade, Sham Machandranath},
year={2003},
publisher={University of London, University College London (United Kingdom)}
}
@article{carbery2001distributional,
title={Distributional and $L^q$ norm inequalities for polynomials over convex bodies in $\mathbb{R}^n$},
author={Carbery, Anthony and Wright, James},
journal={Mathematical research letters},
volume={8},
number={3},
pages={233--248},
year={2001},
publisher={International Press of Boston}
}
@inproceedings{jin2017escape,
title={How to escape saddle points efficiently},
author={Jin, Chi and Ge, Rong and Netrapalli, Praneeth and Kakade, Sham M and Jordan, Michael I},
booktitle={International conference on machine learning},
pages={1724--1732},
year={2017},
organization={PMLR}
}
@article{krieg2022recovery,
title={Recovery of Sobolev functions restricted to iid sampling},
author={Krieg, David and Novak, Erich and Sonnleitner, Mathias},
journal={Mathematics of Computation},
volume={91},
number={338},
pages={2715--2738},
year={2022}
}
@article{flaxman2004online,
title={Online convex optimization in the bandit setting: gradient descent without a gradient},
author={Flaxman, Abraham D and Kalai, Adam Tauman and McMahan, H Brendan},
journal={arXiv preprint cs/0408007},
year={2004}
}
@article{agrachev2008input,
title={Input to state stability: Basic concepts and results},
author={Agrachev, Andrei A and Morse, A Stephen and Sontag, Eduardo D and Sussmann, H{\'e}ctor J and Utkin, Vadim I and Sontag, Eduardo D},
journal={Nonlinear and optimal control theory: lectures given at the CIME summer school held in Cetraro, Italy June 19--29, 2004},
pages={163--220},
year={2008},
publisher={Springer}
}
@article{kohler2013optimal,
title={Optimal global rates of convergence for interpolation problems with random design},
author={Kohler, Michael and Krzy{\.z}ak, Adam},
journal={Statistics \& Probability Letters},
volume={83},
number={8},
pages={1871--1879},
year={2013},
publisher={Elsevier}
}
@article{ma2024high,
title={High-probability minimax lower bounds},
author={Ma, Tianyi and Verchand, Kabir A and Samworth, Richard J},
journal={arXiv preprint arXiv:2406.13447},
year={2024}
}
@book{gyorfi2006distribution,
title={A distribution-free theory of nonparametric regression},
author={Gy{\"o}rfi, L{\'a}szl{\'o} and Kohler, Michael and Krzyzak, Adam and Walk, Harro},
year={2006},
publisher={Springer Science \& Business Media}
}
@article{block2024provable,
title={Provable guarantees for generative behavior cloning: Bridging low-level stability and high-level behavior},
author={Block, Adam and Jadbabaie, Ali and Pfrommer, Daniel and Simchowitz, Max and Tedrake, Russ},
journal={Advances in Neural Information Processing Systems},
year={2024}
}
@article{zames1981uncertainty,
title={Uncertainty in unstable systems: the gap metric},
author={Zames, G and El-Sakkary, A},
journal={IFAC Proceedings Volumes},
volume={14},
number={2},
pages={149--152},
year={1981},
publisher={Elsevier}
}
@book{geer2000empirical,
title={Empirical Processes in M-estimation},
author={van der Geer, Sara A},
volume={6},
year={2000},
publisher={Cambridge university press}
}
@book{wainwright2019high,
title={High-dimensional statistics: A non-asymptotic viewpoint},
author={Wainwright, Martin J},
volume={48},
year={2019},
publisher={Cambridge university press}
}
@inproceedings{el2024minimax,
title={Minimax Linear Regression under the Quantile Risk},
author={El Hanchi, Ayoub and Maddison, Chris and Erdogdu, Murat},
booktitle={The Thirty Seventh Annual Conference on Learning Theory},
pages={1516--1572},
year={2024},
organization={PMLR}
}
@InProceedings{pmlr-v229-zitkovich23a,
title = {RT-2: Vision-Language-Action Models Transfer Web Knowledge to Robotic Control},
author = {Zitkovich, Brianna and Yu, Tianhe and Xu, Sichun and Xu, Peng and Xiao, Ted and Xia, Fei and Wu, Jialin and Wohlhart, Paul and Welker, Stefan and Wahid, Ayzaan and Vuong, Quan and Vanhoucke, Vincent and Tran, Huong and Soricut, Radu and Singh, Anikait and Singh, Jaspiar and Sermanet, Pierre and Sanketi, Pannag R. and Salazar, Grecia and Ryoo, Michael S. and Reymann, Krista and Rao, Kanishka and Pertsch, Karl and Mordatch, Igor and Michalewski, Henryk and Lu, Yao and Levine, Sergey and Lee, Lisa and Lee, Tsang-Wei Edward and Leal, Isabel and Kuang, Yuheng and Kalashnikov, Dmitry and Julian, Ryan and Joshi, Nikhil J. and Irpan, Alex and Ichter, Brian and Hsu, Jasmine and Herzog, Alexander and Hausman, Karol and Gopalakrishnan, Keerthana and Fu, Chuyuan and Florence, Pete and Finn, Chelsea and Dubey, Kumar Avinava and Driess, Danny and Ding, Tianli and Choromanski, Krzysztof Marcin and Chen, Xi and Chebotar, Yevgen and Carbajal, Justice and Brown, Noah and Brohan, Anthony and Arenas, Montserrat Gonzalez and Han, Kehang},
booktitle = {Proceedings of The 7th Conference on Robot Learning},
pages = {2165--2183},
year = {2023},
editor = {Tan, Jie and Toussaint, Marc and Darvish, Kourosh},
volume = {229},
series = {Proceedings of Machine Learning Research},
month = {06--09 Nov},
publisher = {PMLR},
pdf = {https://proceedings.mlr.press/v229/zitkovich23a/zitkovich23a.pdf},
url = {}
}
@article{kumar2020conservative,
title={Conservative q-learning for offline reinforcement learning},
author={Kumar, Aviral and Zhou, Aurick and Tucker, George and Levine, Sergey},
journal={Advances in Neural Information Processing Systems},
volume={33},
pages={1179--1191},
year={2020}
}
@article{kostrikov2021offline,
title={Offline reinforcement learning with implicit q-learning},
author={Kostrikov, Ilya and Nair, Ashvin and Levine, Sergey},
journal={arXiv preprint arXiv:2110.06169},
year={2021}
}
@article{ho2016generative,
title={Generative adversarial imitation learning},
author={Ho, Jonathan and Ermon, Stefano},
journal={Advances in neural information processing systems},
volume={29},
year={2016}
}
@inproceedings{swamy2021moments,
title={Of moments and matching: A game-theoretic framework for closing the imitation gap},
author={Swamy, Gokul and Choudhury, Sanjiban and Bagnell, J Andrew and Wu, Steven},
booktitle={International Conference on Machine Learning},
pages={10022--10032},
year={2021},
organization={PMLR}
}
@article{wu2024diffusing,
title={Diffusing States and Matching Scores: A New Framework for Imitation Learning},
author={Wu, Runzhe and Chen, Yiding and Swamy, Gokul and Brantley, Kiant{\'e} and Sun, Wen},
journal={arXiv preprint arXiv:2410.13855},
year={2024}
}
@article{black2024pi_0,
title={$pi_0 $: A Vision-Language-Action Flow Model for General Robot Control},
author={Black, Kevin and Brown, Noah and Driess, Danny and Esmail, Adnan and Equi, Michael and Finn, Chelsea and Fusai, Niccolo and Groom, Lachy and Hausman, Karol and Ichter, Brian and others},
journal={arXiv preprint arXiv:2410.24164},
year={2024}
}
@article{hansen2023idql,
title={Idql: Implicit q-learning as an actor-critic method with diffusion policies},
author={Hansen-Estruch, Philippe and Kostrikov, Ilya and Janner, Michael and Kuba, Jakub Grudzien and Levine, Sergey},
journal={arXiv preprint arXiv:2304.10573},
year={2023}
}
@article{tsybakov1997nonparametric,
title={On nonparametric estimation of density level sets},
author={Tsybakov, Alexandre B},
journal={The Annals of Statistics},
volume={25},
number={3},
pages={948--969},
year={1997},
publisher={Institute of Mathematical Statistics}
}
@article{bauer2017nonparametric,
title={Nonparametric estimation of a function from noiseless observations at random points},
author={Bauer, Benedikt and Devroye, Luc and Kohler, Michael and Krzy{\.z}ak, Adam and Walk, Harro},
journal={Journal of Multivariate Analysis},
volume={160},
pages={93--104},
year={2017},
publisher={Elsevier}
}
@book{vershynin2018high,
title={High-dimensional probability: An introduction with applications in data science},
author={Vershynin, Roman},
volume={47},
year={2018},
publisher={Cambridge university press}
}
@article{teng2023motion,
title={Motion planning for autonomous driving: The state of the art and future perspectives},
author={Teng, Siyu and Hu, Xuemin and Deng, Peng and Li, Bai and Li, Yuchen and Ai, Yunfeng and Yang, Dongsheng and Li, Lingxi and Xuanyuan, Zhe and Zhu, Fenghua and others},
journal={IEEE Transactions on Intelligent Vehicles},
volume={8},
number={6},
pages={3692--3711},
year={2023},
publisher={IEEE}
}
@inproceedings{ke2021grasping,
title={Grasping with chopsticks: Combating covariate shift in model-free imitation learning for fine manipulation},
author={Ke, Liyiming and Wang, Jingqiang and Bhattacharjee, Tapomayukh and Boots, Byron and Srinivasa, Siddhartha},
booktitle={2021 IEEE International Conference on Robotics and Automation (ICRA)},
pages={6185--6191},
year={2021},
organization={IEEE}
}
@inproceedings{jia2023seil,
title={Seil: simulation-augmented equivariant imitation learning},
author={Jia, Mingxi and Wang, Dian and Su, Guanang and Klee, David and Zhu, Xupeng and Walters, Robin and Platt, Robert},
booktitle={2023 IEEE International Conference on Robotics and Automation (ICRA)},
pages={1845--1851},
year={2023},
organization={IEEE}
}