Open-source pre-training implementation of Google's LaMDA research paper in PyTorch.
lamda_base = LaMDA(
num_tokens = 20000,
dim = 512,
dim_head = 64,
depth = 12,
heads = 8
)
lamda = AutoregressiveWrapper(lamda_base, max_seq_len = 512)
tokens = torch.randint(0, 20000, (1, 512)) # mock token data
logits = lamda(tokens)
print(logits)@article{DBLP:journals/corr/abs-2201-08239,
author = {Romal Thoppilan and
Daniel De Freitas and
Jamie Hall and
Noam Shazeer and
Apoorv Kulshreshtha and
Heng{-}Tze Cheng and
Alicia Jin and
Taylor Bos and
Leslie Baker and
Yu Du and
YaGuang Li and
Hongrae Lee and
Huaixiu Steven Zheng and
Amin Ghafouri and
Marcelo Menegali and
Yanping Huang and
Maxim Krikun and
Dmitry Lepikhin and
James Qin and
Dehao Chen and
Yuanzhong Xu and
Zhifeng Chen and
Adam Roberts and
Maarten Bosma and
Yanqi Zhou and
Chung{-}Ching Chang and
Igor Krivokon and
Will Rusch and
Marc Pickett and
Kathleen S. Meier{-}Hellstern and
Meredith Ringel Morris and
Tulsee Doshi and
Renelito Delos Santos and
Toju Duke and
Johnny Soraker and
Ben Zevenbergen and
Vinodkumar Prabhakaran and
Mark Diaz and
Ben Hutchinson and
Kristen Olson and
Alejandra Molina and
Erin Hoffman{-}John and
Josh Lee and
Lora Aroyo and
Ravi Rajakumar and
Alena Butryna and
Matthew Lamm and
Viktoriya Kuzmina and
Joe Fenton and
Aaron Cohen and
Rachel Bernstein and
Ray Kurzweil and
Blaise Aguera{-}Arcas and
Claire Cui and
Marian Croak and
Ed H. Chi and
Quoc Le},
title = {LaMDA: Language Models for Dialog Applications},
journal = {CoRR},
volume = {abs/2201.08239},
year = {2022},
url = {https://arxiv.org/abs/2201.08239},
eprinttype = {arXiv},
eprint = {2201.08239},
timestamp = {Fri, 22 Apr 2022 16:06:31 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2201-08239.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}@misc{https://doi.org/10.48550/arxiv.1706.03762,
doi = {10.48550/ARXIV.1706.03762},
url = {https://arxiv.org/abs/1706.03762},
author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N. and Kaiser, Lukasz and Polosukhin, Illia},
keywords = {Computation and Language (cs.CL), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {Attention Is All You Need},
publisher = {arXiv},
year = {2017},
copyright = {arXiv.org perpetual, non-exclusive license}
}@article{DBLP:journals/corr/abs-1808-06226,
author = {Taku Kudo and
John Richardson},
title = {SentencePiece: {A} simple and language independent subword tokenizer
and detokenizer for Neural Text Processing},
journal = {CoRR},
volume = {abs/1808.06226},
year = {2018},
url = {http://arxiv.org/abs/1808.06226},
eprinttype = {arXiv},
eprint = {1808.06226},
timestamp = {Sun, 02 Sep 2018 15:01:56 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1808-06226.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}