[ { "id": "https://authors.library.caltech.edu/records/ch9jq-hc145", "eprint_status": "archive", "datestamp": "2024-01-10 20:00:52", "lastmod": "2024-01-10 20:00:52", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Zhou-Tingtao", "name": { "family": "Zhou", "given": "Tingtao" }, "orcid": "0000-0002-1766-719X" }, { "id": "Wan-Xuan", "name": { "family": "Wan", "given": "Xuan" }, "orcid": "0000-0002-6165-6340" }, { "id": "Huang-Daniel-Zhengyu", "name": { "family": "Huang", "given": "Daniel Zhengyu" } }, { "id": "Li-Zongyi", "name": { "family": "Li", "given": "Zongyi" }, "orcid": "0000-0003-2081-9665" }, { "id": "Peng-Zhiwei", "name": { "family": "Peng", "given": "Zhiwei" }, "orcid": "0000-0002-9486-2837" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" }, { "id": "Brady-J-F", "name": { "family": "Brady", "given": "John F." }, "orcid": "0000-0001-5817-9128" }, { "id": "Sternberg-P-W", "name": { "family": "Sternberg", "given": "Paul W." }, "orcid": "0000-0002-7699-0173" }, { "id": "Daraio-C", "name": { "family": "Daraio", "given": "Chiara" }, "orcid": "0000-0001-5296-4440" } ] }, "title": "AI-aided geometric design of anti-infection catheters", "ispublished": "pub", "full_text_status": "public", "keywords": "Multidisciplinary", "note": "
© 2024 The Authors, some rights reserved; exclusive licensee American Association for the Advancement of Science. No claim to original U.S. Government Works. Distributed under a Creative Commons Attribution License 4.0 (CC BY).
\n\nThis work was supported by the following: the Donna and Benjamin M. Rosen Bioengineering Center Pilot Research Grant (J.F.B. and C.D.), the Heritage Medical Institute at Caltech (C.D.), and the National Science Foundation, Center to Stream Healthcare in Place (C2SHIP), award no. 2052827 (C.D.). D.Z.H. is supported by the generosity of Eric and Wendy Schmidt by recommendation of the Schmidt Futures program. Z.L. is supported in part by the PIMCO Fellowship and Amazon AI4Science Fellowship. A.A. and P.W.S. are supported by Bren Professorships.
\n\nX.W., T.Z., P.W.S., and C.D. designed experiments. X.W. and T.Z. performed experiments and analyzed data. T.Z. and Z.P. performed simulations. D.Z.H. and Z.L. designed the AI model and performed optimization. A.A. conceptualized and planned the AI framework. T.Z., J.F.B., and C.D. conceived the project. P.W.S. and C.D. supervised the project. All authors discussed the results and contributed to the manuscript writing.
\n\nAll data needed to evaluate the conclusions in the paper are present in the paper and/or the Supplementary Materials. Supplementary data for the optimization process are included in the following link: https://data.caltech.edu/records/mdj7m-ajv14.
\n\nCalifornia Institute of Technology (Caltech) has a patent pending related to the discoveries in this manuscript. Patent status: Pending. Name of organization issuing patent: The United States Patent and Trademark Office (USPTO). All authors are inventors. Filing date: 13 March 2023. Serial number: 63/451,788. The authors declare that they have no other competing interests.
", "abstract": "Bacteria can swim upstream in a narrow tube and pose a clinical threat of urinary tract infection to patients implanted with catheters. Coatings and structured surfaces have been proposed to repel bacteria, but no such approach thoroughly addresses the contamination problem in catheters. Here, on the basis of the physical mechanism of upstream swimming, we propose a novel geometric design, optimized by an artificial intelligence model. Using\n Escherichia coli\n , we demonstrate the anti-infection mechanism in microfluidic experiments and evaluate the effectiveness of the design in three-dimensionally printed prototype catheters under clinical flow rates. Our catheter design shows that one to two orders of magnitude improved suppression of bacterial contamination at the upstream end, potentially prolonging the in-dwelling time for catheter use and reducing the overall risk of catheter-associated urinary tract infection.", "date": "2024-01-05", "date_type": "published", "publication": "Science Advances", "volume": "10", "number": "1", "publisher": "American Association for the Advancement of Science", "pagerange": "eadj1741", "issn": "2375-2548", "official_url": "https://authors.library.caltech.edu/records/ch9jq-hc145", "funders": { "items": [ { "grant_number": "Donna and Benjamin M. Rosen Bioengineering Center" }, { "grant_number": "Heritage Medical Research Institute" }, { "grant_number": "CNS-2052827" }, {}, { "grant_number": "Amazon AI4Science Fellowship" }, { "grant_number": "Bren Professor of Computing and Mathematical Sciences" } ] }, "local_group": { "items": [ { "id": "Division-of-Biology-and-Biological-Engineering" }, { "id": "Rosen-Bioengineering-Center" }, { "id": "Heritage-Medical-Research-Institute" } ] }, "doi": "10.1126/sciadv.adj1741", "pmcid": "PMC10776022", "primary_object": { "basename": "sciadv.adj1741.pdf", "url": "https://authors.library.caltech.edu/records/ch9jq-hc145/files/sciadv.adj1741.pdf" }, "related_objects": [ { "basename": "sciadv.adj1741_movies_s1_to_s3.zip", "url": "https://authors.library.caltech.edu/records/ch9jq-hc145/files/sciadv.adj1741_movies_s1_to_s3.zip" }, { "basename": "sciadv.adj1741_sm.pdf", "url": "https://authors.library.caltech.edu/records/ch9jq-hc145/files/sciadv.adj1741_sm.pdf" } ], "resource_type": "article", "pub_year": "2024", "author_list": "Zhou, Tingtao; Wan, Xuan; et el." }, { "id": "https://authors.library.caltech.edu/records/ewmpw-3r017", "eprint_status": "archive", "datestamp": "2023-12-18 18:07:26", "lastmod": "2023-12-18 18:07:26", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Zheng-Zhiling", "name": { "family": "Zheng", "given": "Zhiling" }, "orcid": "0000-0001-6090-2258" }, { "id": "Alawadhi-Ali-H", "name": { "family": "Alawadhi", "given": "Ali H." }, "orcid": "0000-0003-2680-5221" }, { "id": "Chheda-Saumil", "name": { "family": "Chheda", "given": "Saumil" }, "orcid": "0000-0002-0989-5707" }, { "id": "Neumann-S-Ephraim", "name": { "family": "Neumann", "given": "S. Ephraim" }, "orcid": "0000-0002-8515-9621" }, { "id": "Rampal-Nakul", "name": { "family": "Rampal", "given": "Nakul" }, "orcid": "0000-0002-6187-5631" }, { "id": "Liu-Shengchao", "name": { "family": "Liu", "given": "Shengchao" }, "orcid": "0000-0003-2030-2367" }, { "id": "Nguyen-Ha-L", "name": { "family": "Nguyen", "given": "Ha L." }, "orcid": "0000-0002-4977-925X" }, { "id": "Lin-Yen-hsu", "name": { "family": "Lin", "given": "Yen-hsu" } }, { "id": "Rong-Zichao", "name": { "family": "Rong", "given": "Zichao" }, "orcid": "0000-0002-9014-9540" }, { "id": "Siepmann-Joern-Ilja", "name": { "family": "Siepmann", "given": "J. Ilja" }, "orcid": "0000-0003-2534-4507" }, { "id": "Gagliardi-Laura", "name": { "family": "Gagliardi", "given": "Laura" }, "orcid": "0000-0001-5227-1396" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" }, { "id": "Borgs-Christian", "name": { "family": "Borgs", "given": "Christian" }, "orcid": "0000-0001-5653-0498" }, { "id": "Chayes-Jennifer-T", "name": { "family": "Chayes", "given": "Jennifer T." }, "orcid": "0000-0003-4020-8618" }, { "id": "Yaghi-Omar-M", "name": { "family": "Yaghi", "given": "Omar M." }, "orcid": "0000-0002-5611-3325" } ] }, "title": "Shaping the Water-Harvesting Behavior of Metal\u2013Organic Frameworks Aided by Fine-Tuned GPT Models", "ispublished": "pub", "full_text_status": "public", "keywords": "Colloid and Surface Chemistry; Biochemistry; General Chemistry; Catalysis", "note": "\u00a9 2023 American Chemical Society.
\n\n
This material is based upon work supported by the Defense Advanced Research Projects Agency (DARPA) under contract HR0011-21-C-0020. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of DARPA. The computational work is partially supported by the Department of Energy (DOE), Office of Basic Energy Sciences, Division of Chemical Sciences, Geosciences, and Biosciences, under award DE-SC0023454. In addition, the National Science Foundation (NSF), Division of Chemistry, Chemical Structure, Dynamics, and Mechanisms A (CSDM\u2013A), provided support for the computational resources, award number: CHE-2223442. The authors also extend their gratitude to the Research Computing Center at the University of Chicago for providing computational resources. Additionally, this research utilized the facilities of the Advanced Light Source, a DOE Office of Science User Facility, under contract no. DE-AC02-05CH11231. The study made use of instruments located in the College of Chemistry Nuclear Magnetic Resonance (NMR) Facility, partially supported by NIH S10OD024998. The authors are grateful to Dr. Seth Cohen (DARPA) and Dr. David Moore (General Electric) for their helpful comments and suggestions on this work. Moreover, Z.Z. expresses gratitude to Drs. Nikita Hanikel and Daria Kurandina, Ms. Oufan Zhang, and Mr. Boyu Qie for their valuable discussions. Z.Z. also acknowledges financial support from a Kavli ENSI Graduate Student Fellowship.
\n\nThe manuscript was written through contributions of all authors. All authors have given approval to the final version of the manuscript.
\n\nThe authors declare the following competing financial interest(s): Omar M. Yaghi is co-founder of ATOCO Inc., aiming at commercializing related technologies.
\n\nCCDC 2302011 (LAMOF-2) contains the supplementary crystallographic data for this paper.
", "abstract": "We construct a data set of metal\u2013organic framework (MOF) linkers and employ a fine-tuned GPT assistant to propose MOF linker designs by mutating and modifying the existing linker structures. This strategy allows the GPT model to learn the intricate language of chemistry in molecular representations, thereby achieving an enhanced accuracy in generating linker structures compared with its base models. Aiming to highlight the significance of linker design strategies in advancing the discovery of water-harvesting MOFs, we conducted a systematic MOF variant expansion upon state-of-the-art MOF-303 utilizing a multidimensional approach that integrates linker extension with multivariate tuning strategies. We synthesized a series of isoreticular aluminum MOFs, termed Long-Arm MOFs (LAMOF-1 to LAMOF-10), featuring linkers that bear various combinations of heteroatoms in their five-membered ring moiety, replacing pyrazole with either thiophene, furan, or thiazole rings or a combination of two. Beyond their consistent and robust architecture, as demonstrated by permanent porosity and thermal stability, the LAMOF series offers a generalizable synthesis strategy. Importantly, these 10 LAMOFs establish new benchmarks for water uptake (up to 0.64 g g\u207b\u00b9) and operational humidity ranges (between 13 and 53%), thereby expanding the diversity of water-harvesting MOFs.
", "date": "2023-12-13", "date_type": "published", "publication": "Journal of the American Chemical Society", "publisher": "American Chemical Society", "issn": "0002-7863", "official_url": "https://authors.library.caltech.edu/records/ewmpw-3r017", "funders": { "items": [ { "grant_number": "HR0011-21-C-0020" }, { "grant_number": "DE-SC0023454" }, { "grant_number": "CHE-2223442" }, { "grant_number": "DE-AC02-05CH11231" }, { "grant_number": "S10OD024998" }, {} ] }, "doi": "10.1021/jacs.3c12086", "primary_object": { "basename": "ja3c12086_si_001.pdf", "url": "https://authors.library.caltech.edu/records/ewmpw-3r017/files/ja3c12086_si_001.pdf" }, "related_objects": [ { "basename": "ja3c12086_si_002.zip", "url": "https://authors.library.caltech.edu/records/ewmpw-3r017/files/ja3c12086_si_002.zip" } ], "resource_type": "article", "pub_year": "2023", "author_list": "Zheng, Zhiling; Alawadhi, Ali H.; et el." }, { "id": "https://authors.library.caltech.edu/records/wygt1-n8w76", "eprint_status": "archive", "datestamp": "2023-12-19 20:25:45", "lastmod": "2023-12-19 20:25:45", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Liu-Shengchao", "name": { "family": "Liu", "given": "Shengchao" }, "orcid": "0000-0003-2030-2367" }, { "id": "Nie-Weili", "name": { "family": "Nie", "given": "Weili" } }, { "id": "Wang-Chengpeng", "name": { "family": "Wang", "given": "Chengpeng" }, "orcid": "0000-0002-9196-2613" }, { "id": "Lu-Jiarui", "name": { "family": "Lu", "given": "Jiarui" } }, { "id": "Qiao-Zhuoran", "name": { "family": "Qiao", "given": "Zhuoran" } }, { "id": "Liu-Ling", "name": { "family": "Liu", "given": "Ling" } }, { "id": "Tang-Jian", "name": { "family": "Tang", "given": "Jian" } }, { "id": "Xiao-Chaowei", "name": { "family": "Xiao", "given": "Chaowei" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" } ] }, "title": "Multi-modal molecule structure\u2013text model for text-based retrieval and editing", "ispublished": "pub", "full_text_status": "public", "keywords": "Artificial Intelligence; Computer Networks and Communications; Computer Vision and Pattern Recognition; Human-Computer Interaction; Software", "note": "
\u00a9 The Author(s), under exclusive licence to Springer Nature Limited 2023.
\n\nThis work was done during S.L.'s internship at NVIDIA Research. We thank the insightful comments from M. L. Gill, A. Stern and other team members from AIAlgo and Clara team at NVIDIA. We also thank the kind help from T. Dierks, E. Bolton, P. Thiessen and others from PubChem for confirming the PubChem license.
\n\nThese authors jointly supervised this work: Jian Tang, Chaowei Xiao, Animashree Anandkumar.
S.L., W.N., C.W., Z.Q., C.X. and A.A. conceived and designed the experiments. S.L. performed the experiments. S.L. and C.W. analysed the data. S.L., C.W. and J.L. contributed analysis tools. S.L., W.N., C.W., J.L., Z.Q., L.L., J.T., C.X. and A.A. wrote the paper. J.T., C.X. and A.A. contributed equally to advising this project.
\n\nAll the datasets are provided on Hugging Face at https://huggingface.co/datasets/chao1224/MoleculeSTM/tree/main. Specifically for the release of PubChemSTM, we encountered a big challenge regarding the textual data license. As confirmed with the PubChem group, performing research on these data does not violate their license; however, PubChem does not possess the license for the textual data, which necessitates an extensive evaluation of the license for each of the 280 structure\u2013text pairs in PubChemSTM. This has hindered the release of PubChemSTM. Nevertheless, we have (1) described the detailed preprocessing steps in Supplementary Section A.1, (2) provided the molecules with CID file (https://huggingface.co/datasets/chao1224/MoleculeSTM/blob/main/PubChemSTM_data/raw/CID2SMILES.csv) in PubChemSTM and (3) have also provided the detailed preprocessing scripts (https://github.com/chao1224/MoleculeSTM/tree/main/preprocessing/PubChemSTM). By utilizing these scripts, users can easily reconstruct the PubChemSTM dataset.
\n\nThe source code can be found on GitHub (https://github.com/chao1224/MoleculeSTM/tree/main) and Zenodo62. The scripts for pretraining and three downstream tasks are provided at https://github.com/chao1224/MoleculeSTM/tree/main/scripts. The checkpoints of the pretrained models are provided on Hugging Face at https://huggingface.co/chao1224/MoleculeSTM/tree/main. Beyond the methods described so far, to help users try our MoleculeSTM model, this release includes demos in notebooks (https://github.com/chao1224/MoleculeSTM). Furthermore, users can customize their own datasets by checking the datasets folder (https://github.com/chao1224/MoleculeSTM/tree/main/MoleculeSTM/datasets).
\n\nThe authors declare no competing interests.
", "abstract": "There is increasing adoption of artificial intelligence in drug discovery. However, existing studies use machine learning to mainly utilize the chemical structures of molecules but ignore the vast textual knowledge available in chemistry. Incorporating textual knowledge enables us to realize new drug design objectives, adapt to text-based instructions and predict complex biological activities. Here we present a multi-modal molecule structure\u2013text model, MoleculeSTM, by jointly learning molecules' chemical structures and textual descriptions via a contrastive learning strategy. To train MoleculeSTM, we construct a large multi-modal dataset, namely, PubChemSTM, with over 280,000 chemical structure\u2013text pairs. To demonstrate the effectiveness and utility of MoleculeSTM, we design two challenging zero-shot tasks based on text instructions, including structure\u2013text retrieval and molecule editing. MoleculeSTM has two main properties: open vocabulary and compositionality via natural language. In experiments, MoleculeSTM obtains the state-of-the-art generalization ability to novel biochemical concepts across various benchmarks.
", "date": "2023-12", "date_type": "published", "publication": "Nature Machine Intelligence", "volume": "5", "number": "12", "publisher": "Nature Publishing Group", "pagerange": "1447-1457", "issn": "2522-5839", "official_url": "https://authors.library.caltech.edu/records/wygt1-n8w76", "funders": { "items": [ { "agency": "Bren Named Chair" } ] }, "doi": "10.1038/s42256-023-00759-6", "primary_object": { "basename": "42256_2023_759_MOESM3_ESM.txt", "url": "https://authors.library.caltech.edu/records/wygt1-n8w76/files/42256_2023_759_MOESM3_ESM.txt" }, "related_objects": [ { "basename": "42256_2023_759_MOESM1_ESM.pdf", "url": "https://authors.library.caltech.edu/records/wygt1-n8w76/files/42256_2023_759_MOESM1_ESM.pdf" }, { "basename": "42256_2023_759_MOESM2_ESM.txt", "url": "https://authors.library.caltech.edu/records/wygt1-n8w76/files/42256_2023_759_MOESM2_ESM.txt" } ], "resource_type": "article", "pub_year": "2023", "author_list": "Liu, Shengchao; Nie, Weili; et el." }, { "id": "https://authors.library.caltech.edu/records/yn54g-8d682", "eprint_id": 121054, "eprint_status": "archive", "datestamp": "2023-08-22 21:01:53", "lastmod": "2023-10-18 18:08:02", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Kiyasseh-Dani", "name": { "family": "Kiyasseh", "given": "Dani" }, "orcid": "0000-0002-2898-1790" }, { "id": "Ma-Runzhuo", "name": { "family": "Ma", "given": "Runzhuo" }, "orcid": "0000-0001-6381-2661" }, { "id": "Haque-Taseen-F", "name": { "family": "Haque", "given": "Taseen F." }, "orcid": "0000-0002-7165-6539" }, { "id": "Miles-Brian-J", "name": { "family": "Miles", "given": "Brian J." }, "orcid": "0000-0001-7927-9873" }, { "id": "Wagner-Christian", "name": { "family": "Wagner", "given": "Christian" } }, { "id": "Donoho-Daniel-A", "name": { "family": "Donoho", "given": "Daniel A." }, "orcid": "0000-0002-0531-1436" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" }, { "id": "Hung-Andrew-J", "name": { "family": "Hung", "given": "Andrew J." }, "orcid": "0000-0002-7201-6736" } ] }, "title": "A vision transformer for decoding surgeon activity from surgical videos", "ispublished": "pub", "full_text_status": "public", "keywords": "Computer Science Applications; Biomedical Engineering; Medicine (miscellaneous); Bioengineering; Biotechnology", "note": "\u00a9 2023. The Author(s). This article is licensed under a Creative Commons Attribution 4.0 International License, which permits use, sharing, adaptation, distribution and reproduction in any medium or format, as long as you give appropriate credit to the original author(s) and the source, provide a link to the Creative Commons license, and indicate if changes were made. The images or other third party material in this article are included in the article's Creative Commons license, unless indicated otherwise in a credit line to the material. If material is not included in the article's Creative Commons license and your intended use is not permitted by statutory regulation or exceeds the permitted use, you will need to obtain permission directly from the copyright holder. To view a copy of this license, visit http://creativecommons.org/licenses/by/4.0/. \n\nWe are grateful to T. Chu for the annotation of videos with gestures. We also thank J. Laca and J. Nguyen for early feedback on the presentation of the manuscript. A.J.H. discloses support for the research described in this study from the National Cancer Institute under award no. R01CA251579-01A1 and a multi-year Intuitive Surgical Clinical Research Grant. \n\nContributions. D.K. and A.J.H. contributed to the conception of the study. D.K. contributed to the study design, developed the deep learning models and wrote the manuscript. R.M. and T.H. provided annotations for the video samples. D.A.D. provided extensive feedback on the manuscript. B.J.M. provided data for the study. C.W. collected data from SAH and provided feedback on the manuscript. A.J.H. and A.A. provided supervision and contributed to edits of the manuscript. \n\nData availability. Data supporting the results in this study involve surgeon and patient data. As such, while the data from SAH and HMH are not publicly available, de-identified data from USC can be made available upon reasonable request from the authors. \n\nCode availability. Code is made available at https://github.com/danikiyasseh/SAIS.\n\nCompeting interests. D.K. is a paid employee of Vicarious Surgical and a consultant of Flatiron Health. C.W. is a paid consultant of Intuitive Surgical. A.A. is an employee of Nvidia. A.J.H. is a consultant of Intuitive Surgical. The other authors declare no competing interests.\n\nPublished - 41551_2023_Article_1010.pdf
Supplemental Material - 41551_2023_1010_MOESM1_ESM.pdf
", "abstract": "The intraoperative activity of a surgeon has substantial impact on postoperative outcomes. However, for most surgical procedures, the details of intraoperative surgical actions, which can vary widely, are not well understood. Here we report a machine learning system leveraging a vision transformer and supervised contrastive learning for the decoding of elements of intraoperative surgical activity from videos commonly collected during robotic surgeries. The system accurately identified surgical steps, actions performed by the surgeon, the quality of these actions and the relative contribution of individual video frames to the decoding of the actions. Through extensive testing on data from three different hospitals located in two different continents, we show that the system generalizes across videos, surgeons, hospitals and surgical procedures, and that it can provide information on surgical gestures and skills from unannotated videos. Decoding intraoperative activity via accurate machine learning systems could be used to provide surgeons with feedback on their operating skills, and may allow for the identification of optimal surgical behaviour and for the study of relationships between intraoperative factors and postoperative outcomes.", "date": "2023-06", "date_type": "published", "publication": "Nature Biomedical Engineering", "volume": "7", "number": "6", "publisher": "Nature Publishing Group", "pagerange": "780-796", "id_number": "CaltechAUTHORS:20230420-711199500.6", "issn": "2157-846X", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20230420-711199500.6", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NIH", "grant_number": "R01CA251579-01A1" } ] }, "doi": "10.1038/s41551-023-01010-8", "pmcid": "PMC10307635", "primary_object": { "basename": "41551_2023_1010_MOESM1_ESM.pdf", "url": "https://authors.library.caltech.edu/records/yn54g-8d682/files/41551_2023_1010_MOESM1_ESM.pdf" }, "related_objects": [ { "basename": "41551_2023_Article_1010.pdf", "url": "https://authors.library.caltech.edu/records/yn54g-8d682/files/41551_2023_Article_1010.pdf" } ], "resource_type": "article", "pub_year": "2023", "author_list": "Kiyasseh, Dani; Ma, Runzhuo; et el." }, { "id": "https://authors.library.caltech.edu/records/ehv4k-4pn80", "eprint_id": 121091, "eprint_status": "archive", "datestamp": "2023-08-22 20:36:25", "lastmod": "2023-10-18 18:09:06", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Kiyasseh-Dani", "name": { "family": "Kiyasseh", "given": "Dani" }, "orcid": "0000-0002-2898-1790" }, { "id": "Laca-Jasper-A", "name": { "family": "Laca", "given": "Jasper" } }, { "id": "Haque-Taseen-F", "name": { "family": "Haque", "given": "Taseen F." }, "orcid": "0000-0002-7165-6539" }, { "id": "Otiato-Maxwell-X", "name": { "family": "Otiato", "given": "Maxwell" }, "orcid": "0000-0001-6979-6316" }, { "id": "Miles-Brian-J", "name": { "family": "Miles", "given": "Brian J." }, "orcid": "0000-0001-7927-9873" }, { "id": "Wagner-Christian", "name": { "family": "Wagner", "given": "Christian" } }, { "id": "Donoho-Daniel-A", "name": { "family": "Donoho", "given": "Daniel A." }, "orcid": "0000-0002-0531-1436" }, { "id": "Trinh-Quoc-Dien", "name": { "family": "Trinh", "given": "Quoc-Dien" }, "orcid": "0000-0003-3857-9276" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" }, { "id": "Hung-Andrew-J", "name": { "family": "Hung", "given": "Andrew J." }, "orcid": "0000-0002-7201-6736" } ] }, "title": "Human visual explanations mitigate bias in AI-based assessment of surgeon skills", "ispublished": "pub", "full_text_status": "public", "keywords": "Health Information Management; Health Informatics; Computer Science Applications; Medicine (miscellaneous)", "note": "\u00a9 The Author(s) 2023. This article is licensed under a Creative Commons Attribution 4.0 International License, which permits use, sharing, adaptation, distribution and reproduction in any medium or format, as long as you give appropriate credit to the original author(s) and the source, provide a link to the Creative Commons license, and indicate if changes were made. The images or other third party material in this article are included in the article's Creative Commons license, unless indicated otherwise in a credit line to the material. If material is not included in the article's Creative Commons license and your intended use is not permitted by statutory regulation or exceeds the permitted use, you will need to obtain permission directly from the copyright holder. To view a copy of this license, visit http://creativecommons.org/licenses/by/4.0/. \n\nResearch reported in this publication was supported by the National Cancer Institute under Award No. R01CA251579-01A1. \n\nContributions. D.K. contributed to the conception of the study and the study design, developed the deeplearning models, and wrote the manuscript. J.L. collected the data from the training environment. D.K., J.L., T.F.H., and M.O. provided annotations for the video samples. D.A.D. and Q.-D.T. provided feedback on the manuscript. C.W. collected data from St. Antonius Hospital and B.J.M. collected data from Houston Methodist Hospital and provided feedback on the manuscript. A.J.H. and A.A. provided supervision and contributed to edits of the manuscript. \n\nData availability. The videos of live surgical procedures from the University of Southern California, St. Antonius Hospital, and Houston Methodist Hospital are not publicly available. However, the videos and the corresponding annotations of the suturing activities performed by medical students in the training environment are available upon reasonable request from the authors. \n\nCode availability. All models were developed using Python and standard deeplearning libraries such as PyTorch61. The code for the underlying model (SAIS) can be accessed at https://github.com/danikiyasseh/SAIS and that for TWIX can be accessed at https://github.com/danikiyasseh/TWIX. \n\nCompeting interests. The authors declare no competing non-financial interests but the following competing financial interests: D.K. is a paid consultant of Flatiron Health and an employee of Vicarious Surgical, C.W. is a paid consultant of Intuitive Surgical, A.A. is an employee of Nvidia, and A.J.H is a consultant of Intuitive Surgical.\n\nPublished - 41746_2023_Article_766.pdf
Supplemental Material - 41746_2023_766_MOESM1_ESM.pdf
", "abstract": "Artificial intelligence (AI) systems can now reliably assess surgeon skills through videos of intraoperative surgical activity. With such systems informing future high-stakes decisions such as whether to credential surgeons and grant them the privilege to operate on patients, it is critical that they treat all surgeons fairly. However, it remains an open question whether surgical AI systems exhibit bias against surgeon sub-cohorts, and, if so, whether such bias can be mitigated. Here, we examine and mitigate the bias exhibited by a family of surgical AI systems\u2014SAIS\u2014deployed on videos of robotic surgeries from three geographically-diverse hospitals (USA and EU). We show that SAIS exhibits an underskilling bias, erroneously downgrading surgical performance, and an overskilling bias, erroneously upgrading surgical performance, at different rates across surgeon sub-cohorts. To mitigate such bias, we leverage a strategy \u2014TWIX\u2014which teaches an AI system to provide a visual explanation for its skill assessment that otherwise would have been provided by human experts. We show that whereas baseline strategies inconsistently mitigate algorithmic bias, TWIX can effectively mitigate the underskilling and overskilling bias while simultaneously improving the performance of these AI systems across hospitals. We discovered that these findings carry over to the training environment where we assess medical students' skills today. Our study is a critical prerequisite to the eventual implementation of AI-augmented global surgeon credentialing programs, ensuring that all surgeons are treated fairly.", "date": "2023-04-04", "date_type": "published", "publication": "npj Digital Medicine", "volume": "6", "publisher": "Nature Publishing Group", "pagerange": "Art. No. 54", "id_number": "CaltechAUTHORS:20230420-614686900.13", "issn": "2398-6352", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20230420-614686900.13", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NIH", "grant_number": "R01CA251579-01A1" } ] }, "doi": "10.1038/s41746-023-00766-2", "pmcid": "PMC10063676", "primary_object": { "basename": "41746_2023_766_MOESM1_ESM.pdf", "url": "https://authors.library.caltech.edu/records/ehv4k-4pn80/files/41746_2023_766_MOESM1_ESM.pdf" }, "related_objects": [ { "basename": "41746_2023_Article_766.pdf", "url": "https://authors.library.caltech.edu/records/ehv4k-4pn80/files/41746_2023_Article_766.pdf" } ], "resource_type": "article", "pub_year": "2023", "author_list": "Kiyasseh, Dani; Laca, Jasper; et el." }, { "id": "https://authors.library.caltech.edu/records/25yyn-6ch14", "eprint_id": 117191, "eprint_status": "archive", "datestamp": "2023-08-22 20:29:45", "lastmod": "2023-10-24 22:01:50", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Inouye-Daniel-A", "name": { "family": "Inouye", "given": "Daniel A." }, "orcid": "0000-0001-7202-4800" }, { "id": "Ma-Runzhuo", "name": { "family": "Ma", "given": "Runzhuo" }, "orcid": "0000-0001-6381-2661" }, { "id": "Nguyen-Jessica-H", "name": { "family": "Nguyen", "given": "Jessica H." }, "orcid": "0000-0003-0454-8463" }, { "id": "Laca-Jasper-A", "name": { "family": "Laca", "given": "Jasper" } }, { "id": "Kocielnik-Rafal", "name": { "family": "Kocielnik", "given": "Rafal" }, "orcid": "0000-0001-5602-6056" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" }, { "id": "Hung-Andrew-J", "name": { "family": "Hung", "given": "Andrew J." }, "orcid": "0000-0002-7201-6736" } ] }, "title": "Assessing the efficacy of dissection gestures in robotic surgery", "ispublished": "pub", "full_text_status": "public", "keywords": "Health Informatics; Surgery", "note": "\u00a9 2023 Springer Nature. \n\nThe authors declare that no support, financial or otherwise, were received for the preparation of this manuscript. \n\nContributions. D Inouye: project development, data collection and management, data analysis, manuscript writing and editing. R Ma: project development, data management, data analysis, manuscript writing and editing. J Nguyen: project development, data analysis, manuscript writing and editing. J Laca: project development, data analysis. R Kocielnik: project development, data analysis. A Anandkumar: project development. A Hung: project development, data analysis, manuscript writing and editing. \n\nEthics approval. This study was approved by the University of Southern California's Institutional Review Board (protocol HS-16\u201300,318). \n\nInformed consent was obtained per Institutional Review Board protocol. \n\nConflict of interest. Andrew J. Hung has financial disclosures with Intuitive Surgical, Inc.", "abstract": "Our group previously defined a dissection gesture classification system that deconstructs robotic tissue dissection into its most elemental yet meaningful movements. The purpose of this study was to expand upon this framework by adding an assessment of gesture efficacy (ineffective, effective, or erroneous) and analyze dissection patterns between groups of surgeons of varying experience. We defined three possible gesture efficacies as ineffective (no meaningful effect on the tissue), effective (intended effect on the tissue), and erroneous (unintended disruption of the tissue). Novices (0 prior robotic cases), intermediates (1\u201399 cases), and experts (\u2265\u2009100 cases) completed a robotic dissection task in a dry-lab training environment. Video recordings were reviewed to classify each gesture and determine its efficacy, then dissection patterns between groups were analyzed. 23 participants completed the task, with 9 novices, 8 intermediates with median caseload 60 (IQR 41\u201380), and 6 experts with median caseload 525 (IQR 413\u2013900). For gesture selection, we found increasing experience associated with increasing proportion of overall dissection gestures (p\u2009=\u20090.009) and decreasing proportion of retraction gestures (p\u2009=\u20090.009). For gesture efficacy, novices performed the greatest proportion of ineffective gestures (9.8%, p\u2009<\u20090.001), intermediates commit the greatest proportion of erroneous gestures (26.8%, p\u2009<\u20090.001), and the three groups performed similar proportions of overall effective gestures, though experts performed the greatest proportion of effective retraction gestures (85.6%, p\u2009<\u20090.001). Between groups of experience, we found significant differences in gesture selection and gesture efficacy. These relationships may provide insight into further improving surgical training.", "date": "2023-04", "date_type": "published", "publication": "Journal of Robotic Surgery", "volume": "17", "number": "2", "publisher": "Springer", "pagerange": "597-603", "id_number": "CaltechAUTHORS:20220930-482429300.5", "issn": "1863-2491", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220930-482429300.5", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.1007/s11701-022-01458-x", "resource_type": "article", "pub_year": "2023", "author_list": "Inouye, Daniel A.; Ma, Runzhuo; et el." }, { "id": "https://authors.library.caltech.edu/records/2ybre-3g121", "eprint_id": 120697, "eprint_status": "archive", "datestamp": "2023-08-22 20:31:15", "lastmod": "2023-10-23 20:29:34", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Wen-Gege", "name": { "family": "Wen", "given": "Gege" }, "orcid": "0000-0003-1668-3777" }, { "id": "Li-Zongyi", "name": { "family": "Li", "given": "Zongyi" }, "orcid": "0000-0003-2081-9665" }, { "id": "Long-Qirui", "name": { "family": "Long", "given": "Qirui" }, "orcid": "0000-0002-6572-4021" }, { "id": "Azizzadenesheli-Kamyar", "name": { "family": "Azizzadenesheli", "given": "Kamyar" }, "orcid": "0000-0001-8507-1868" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" }, { "id": "Benson-Sally-M", "name": { "family": "Benson", "given": "Sally M." }, "orcid": "0000-0002-3733-4296" } ] }, "title": "Real-time high-resolution CO\u2082 geological storage prediction using nested Fourier neural operators", "ispublished": "pub", "full_text_status": "public", "keywords": "Pollution; Nuclear Energy and Engineering; Renewable Energy, Sustainability and the Environment; Environmental Chemistry", "note": "\u00a9 The Royal Society of Chemistry 2023. \n\nThe authors gratefully acknowledge Yanhua Yuan from ExxonMobil for many helpful conversations and suggestions. G. W. and S. B. gratefully acknowledge the support by ExxonMobil through the Strategic Energy Alliance at Stanford University and the Stanford Center for Carbon Storage. Z. L. gratefully acknowledges the financial support from the Kortschak Scholars, PIMCO Fellows, and Amazon AI4Science Fellows programs. A. A. is supported in part by Bren endowed chair. \n\nAuthor contributions. G. W. conceptualization, methodology, software, data acquisition, data curation, formal analysis, investigation, validation, visualization, writing \u2013 original draft, writing \u2013 review & editing. Z. L. methodology, investigation, validation, writing \u2013 original draft, writing \u2013 review & editing. Q. L. data acquisition. K. A. methodology, software, investigation, validation, writing \u2013 review & editing. A. A. funding acquisition, supervision, writing \u2013 review & editing. S. B. conceptualization, formal analysis, funding acquisition, methodology, resources, supervision, writing \u2013 review & editing. \n\nData and code availability. The python code for the Nested FNO model architecture and the data set used in training will be available at GitHub repository (https://github.com/gegewen/nested-fno). \n\nWeb application. The trained Nested FNO model will be hosted in web application https://CCSNet.ai (https://ccsnet.ai) to provide real-time predictions upon the publication of this manuscript. Please also see this link for a demonstration of publicly accessible web application for our previous works. \n\nThere are no conflicts to declare.", "abstract": "Carbon capture and storage (CCS) plays an essential role in global decarbonization. Scaling up CCS deployment requires accurate and high-resolution modeling of the storage reservoir pressure buildup and the gaseous plume migration. However, such modeling is very challenging at scale due to the high computational costs of existing numerical methods. This challenge leads to significant uncertainties in evaluating storage opportunities, which can delay the pace of large-scale CCS deployment. We introduce Nested Fourier Neural Operator (FNO), a machine-learning framework for high-resolution dynamic 3D CO\u2082 storage modeling at a basin scale. Nested FNO produces forecasts at different refinement levels using a hierarchy of FNOs and speeds up flow prediction nearly 700\u2006000 times compared to existing methods. By learning the solution operator for the family of governing partial differential equations, Nested FNO creates a general-purpose numerical simulator alternative for CO\u2082 storage with diverse reservoir conditions, geological heterogeneity, and injection schemes. Our framework enables unprecedented real-time modeling and probabilistic simulations that can support the scale-up of global CCS deployment.", "date": "2023-04", "date_type": "published", "publication": "Energy and Environmental Science", "volume": "16", "number": "4", "publisher": "Royal Society of Chemistry", "pagerange": "1732-1741", "id_number": "CaltechAUTHORS:20230404-448520900.5", "issn": "1754-5692", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20230404-448520900.5", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "ExxonMobil" }, { "agency": "Stanford University" }, { "agency": "Kortschak Scholars Program" }, { "agency": "PIMCO" }, { "agency": "Amazon AI4Science Fellowship" }, { "agency": "Bren Professor of Computing and Mathematical Sciences" } ] }, "doi": "10.1039/d2ee04204e", "resource_type": "article", "pub_year": "2023", "author_list": "Wen, Gege; Li, Zongyi; et el." }, { "id": "https://authors.library.caltech.edu/records/w21s1-f9826", "eprint_id": 121271, "eprint_status": "archive", "datestamp": "2023-08-20 16:45:22", "lastmod": "2023-10-20 15:23:59", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Kiyasseh-Dani", "name": { "family": "Kiyasseh", "given": "Dani" }, "orcid": "0000-0002-2898-1790" }, { "id": "Laca-Jasper-A", "name": { "family": "Laca", "given": "Jasper" } }, { "id": "Haque-Taseen-F", "name": { "family": "Haque", "given": "Taseen F." }, "orcid": "0000-0002-7165-6539" }, { "id": "Miles-Brian-J", "name": { "family": "Miles", "given": "Brian J." }, "orcid": "0000-0001-7927-9873" }, { "id": "Wagner-Christian", "name": { "family": "Wagner", "given": "Christian" } }, { "id": "Donoho-Daniel-A", "name": { "family": "Donoho", "given": "Daniel A." }, "orcid": "0000-0002-0531-1436" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" }, { "id": "Hung-Andrew-J", "name": { "family": "Hung", "given": "Andrew J." }, "orcid": "0000-0002-7201-6736" } ] }, "title": "A multi-institutional study using artificial intelligence to provide reliable and fair feedback to surgeons", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 The Author(s) 2023. This article is licensed under a Creative Commons Attribution 4.0 International License, which permits use, sharing, adaptation, distribution and reproduction in any medium or format, as long as you give appropriate credit to the original author(s) and the source, provide a link to the Creative Commons license, and indicate if changes were made. The images or other third party material in this article are included in the article's Creative Commons license, unless indicated otherwise in a credit line to the material. If material is not included in the article's Creative Commons license and your intended use is not permitted by statutory regulation or exceeds the permitted use, you will need to obtain permission directly from the copyright holder. To view a copy of this license, visit http://creativecommons.org/licenses/by/4.0/. \n\nResearch reported in this publication was supported by the National Cancer Institute under Award No. R01CA251579-01A1. \n\nContributions. D.K. contributed to the conception of the study and the study design, developed the deep learning models, and wrote the manuscript. J.L. collected the data from the training environment. D.K., J.L., T.H., and M.O. provided annotations for the video samples. D.A.D. provided feedback on the manuscript. C.W. collected data from St. Antonius-Hospital and B.J.M. collected data from Houston Methodist Hospital, and provided feedback on the manuscript. A.J.H., and A.A. provided supervision and contributed to edits of the manuscript. \n\nData availability. As the data contain protected health information, the videos of live surgical procedures and the patients' corresponding demographic information from the University of Southern California, St. Antonius Hospital, and Houston Methodist Hospital are not publicly available. However, since the data from the training environment do not involve patients, those videos and annotations are available on Zenodo (https://zenodo.org/record/7221656#.Y-ZIfi_MI2y) upon reasonable request from the authors. Source data for Fig. 1 is in Supplementary Data 1. Source data for Fig. 3 is in Supplementary Data 2. Source data for Fig. 4 is in Supplementary Data 3 and 4. Source data for Fig. 5 is in Supplementary Data 5. \n\nCode availability. While SAIS, the underlying AI system, can be accessed at https://github.com/danikiyasseh/SAIS, the code for the existing study can be found at https://github.com/danikiyasseh/TWIX. \n\nCompeting interests. The authors declare the following competing interests: D.K. is a paid consultant of Flatiron Health and an employee of Vicarious Surgical. C.W. is a paid consultant of Intuitive Surgical. A.A. is an employee of Nvidia. A.J.H is a consultant of Intuitive Surgical. The remaining authors declare no competing interests.\n\nPublished - 43856_2023_Article_263.pdf
Supplemental Material - 43856_2023_263_MOESM1_ESM.csv
Supplemental Material - 43856_2023_263_MOESM2_ESM.csv
Supplemental Material - 43856_2023_263_MOESM3_ESM.xlsx
Supplemental Material - 43856_2023_263_MOESM4_ESM.xlsx
Supplemental Material - 43856_2023_263_MOESM5_ESM.xlsx
Supplemental Material - 43856_2023_263_MOESM6_ESM.pdf
Supplemental Material - 43856_2023_263_MOESM7_ESM.pdf
", "abstract": "Background. Surgeons who receive reliable feedback on their performance quickly master the skills necessary for surgery. Such performance-based feedback can be provided by a recently-developed artificial intelligence (AI) system that assesses a surgeon's skills based on a surgical video while simultaneously highlighting aspects of the video most pertinent to the assessment. However, it remains an open question whether these highlights, or explanations, are equally reliable for all surgeons. \n \nMethods. Here, we systematically quantify the reliability of AI-based explanations on surgical videos from three hospitals across two continents by comparing them to explanations generated by humans experts. To improve the reliability of AI-based explanations, we propose the strategy of training with explanations \u2013TWIX \u2013which uses human explanations as supervision to explicitly teach an AI system to highlight important video frames. \n \nResults. We show that while AI-based explanations often align with human explanations, they are not equally reliable for different sub-cohorts of surgeons (e.g., novices vs. experts), a phenomenon we refer to as an explanation bias. We also show that TWIX enhances the reliability of AI-based explanations, mitigates the explanation bias, and improves the performance of AI systems across hospitals. These findings extend to a training environment where medical students can be provided with feedback today. \n \nConclusions. Our study informs the impending implementation of AI-augmented surgical training and surgeon credentialing programs, and contributes to the safe and fair democratization of surgery.", "date": "2023-03-30", "date_type": "published", "publication": "Communications Medicine", "volume": "3", "publisher": "Nature Publishing Group", "pagerange": "Art. No. 42", "id_number": "CaltechAUTHORS:20230502-987371300.6", "issn": "2730-664X", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20230502-987371300.6", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NIH", "grant_number": "R01CA251579-01A1" } ] }, "doi": "10.1038/s43856-023-00263-3", "pmcid": "PMC10063640", "primary_object": { "basename": "43856_2023_Article_263.pdf", "url": "https://authors.library.caltech.edu/records/w21s1-f9826/files/43856_2023_Article_263.pdf" }, "related_objects": [ { "basename": "43856_2023_263_MOESM1_ESM.csv", "url": "https://authors.library.caltech.edu/records/w21s1-f9826/files/43856_2023_263_MOESM1_ESM.csv" }, { "basename": "43856_2023_263_MOESM2_ESM.csv", "url": "https://authors.library.caltech.edu/records/w21s1-f9826/files/43856_2023_263_MOESM2_ESM.csv" }, { "basename": "43856_2023_263_MOESM3_ESM.xlsx", "url": "https://authors.library.caltech.edu/records/w21s1-f9826/files/43856_2023_263_MOESM3_ESM.xlsx" }, { "basename": "43856_2023_263_MOESM4_ESM.xlsx", "url": "https://authors.library.caltech.edu/records/w21s1-f9826/files/43856_2023_263_MOESM4_ESM.xlsx" }, { "basename": "43856_2023_263_MOESM5_ESM.xlsx", "url": "https://authors.library.caltech.edu/records/w21s1-f9826/files/43856_2023_263_MOESM5_ESM.xlsx" }, { "basename": "43856_2023_263_MOESM6_ESM.pdf", "url": "https://authors.library.caltech.edu/records/w21s1-f9826/files/43856_2023_263_MOESM6_ESM.pdf" }, { "basename": "43856_2023_263_MOESM7_ESM.pdf", "url": "https://authors.library.caltech.edu/records/w21s1-f9826/files/43856_2023_263_MOESM7_ESM.pdf" } ], "resource_type": "article", "pub_year": "2023", "author_list": "Kiyasseh, Dani; Laca, Jasper; et el." }, { "id": "https://authors.library.caltech.edu/records/32nad-tmr69", "eprint_id": 117830, "eprint_status": "archive", "datestamp": "2023-08-22 19:06:49", "lastmod": "2023-10-24 22:40:39", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Hung-Andrew-J", "name": { "family": "Hung", "given": "Andrew J." }, "orcid": "0000-0002-7201-6736" }, { "id": "Bao-Richard", "name": { "family": "Bao", "given": "Richard" } }, { "id": "Sunmola-Idris-O", "name": { "family": "Sunmola", "given": "Idris O." } }, { "id": "Huang-De-An", "name": { "family": "Huang", "given": "De-An" }, "orcid": "0000-0002-6945-7768" }, { "id": "Nguyen-Jessica-H", "name": { "family": "Nguyen", "given": "Jessica H." }, "orcid": "0000-0003-0454-8463" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" } ] }, "title": "Capturing fine-grained details for video-based automation of suturing skills assessment", "ispublished": "pub", "full_text_status": "public", "keywords": "Health Informatics; Radiology, Nuclear Medicine and imaging; General Medicine; Surgery; Computer Graphics and Computer-Aided Design; Computer Science Applications; Computer Vision and Pattern Recognition; Biomedical Engineering", "note": "\u00a9 2023 Springer Nature. \n\nWe thank Daniel Sanford, Balint Der, Ryan Hakim, Runzhuo Ma, and Taseen Haque for data collection and grading of technical skill scores through video review. Mimic Technologies, Inc. provided access to the raw kinematic instrument data for each exercise. Center for Robotic Simulation & Education, Catherine & Joseph Aresty Department of Urology, USC Institute of Urology, University of Southern California, Los Angeles, California.\n\nResearch reported in this publication was supported in part by the National Cancer Institute under Award No. R01CA251579-01A1. \n\nEthics approval: All procedures performed in studies involving human participants were in accordance with the ethical standards of the institutional and/or national research committee and with the 1964 Helsinki Declaration and its later amendments or comparable ethical standards. Our study complied with protocols put forth by the University of Southern California's IRB. \n\nInformed consent was obtained from all individual participants included in the study. \n\nStatement and Declarations: Andrew J. Hung has financial disclosures with Intuitive Surgical, Inc.\n\nAccepted Version - nihms-1870690.pdf
", "abstract": "Objectives: Manually-collected suturing technical skill scores are strong predictors of continence recovery after robotic radical prostatectomy. Herein, we automate suturing technical skill scoring through computer vision (CV) methods as a scalable method to provide feedback.\n\nMethods: Twenty-two surgeons completed a suturing exercise three times on the Mimic\u2122 Flex VR simulator. Instrument kinematic data (XYZ coordinates of each instrument and pose) were captured at 30 Hz. After standardized training, three human raters manually video segmented suturing task into four sub-stitch phases (Needle handling, Needle targeting, Needle driving, Needle withdrawal) and labeled the corresponding technical skill domains (Needle positioning, Needle entry, Needle driving, and Needle withdrawal). The CV framework extracted RGB features and optical flow frames using a pre-trained AlexNet. Additional CV strategies including auxiliary supervision (using kinematic data during training only) and attention mechanisms were implemented to improve performance.\n\nResults: This study included data from 15 expert surgeons (median caseload 300 [IQR 165\u2013750]) and 7 training surgeons (0 [IQR 0\u20138]). In all, 226 virtual sutures were captured. Automated assessments for Needle positioning performed best with the simplest approach (1 s video; AUC 0.749). Remaining skill domains exhibited improvements with the implementation of auxiliary supervision and attention mechanisms when deployed separately (AUC 0.604\u20130.794). All techniques combined produced the best performance, particularly for Needle driving and Needle withdrawal (AUC 0.959 and 0.879, respectively).\n\nConclusions: This study demonstrated the best performance of automated suturing technical skills assessment to date using advanced CV techniques. Future work will determine if a \"human in the loop\" is necessary to verify surgeon evaluations.", "date": "2023-03", "date_type": "published", "publication": "International Journal of Computer Assisted Radiology and Surgery", "volume": "18", "number": "3", "publisher": "Springer", "pagerange": "545-552", "id_number": "CaltechAUTHORS:20221110-430801400.16", "issn": "1861-6429", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20221110-430801400.16", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NIH", "grant_number": "R01CA251579-01A1" } ] }, "doi": "10.1007/s11548-022-02778-x", "pmcid": "PMC9975072", "primary_object": { "basename": "nihms-1870690.pdf", "url": "https://authors.library.caltech.edu/records/32nad-tmr69/files/nihms-1870690.pdf" }, "resource_type": "article", "pub_year": "2023", "author_list": "Hung, Andrew J.; Bao, Richard; et el." }, { "id": "https://authors.library.caltech.edu/records/rhktp-a6270", "eprint_id": 117475, "eprint_status": "archive", "datestamp": "2023-08-22 18:33:26", "lastmod": "2023-10-24 22:32:31", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Dommer-Abigail", "name": { "family": "Dommer", "given": "Abigail" }, "orcid": "0000-0003-4847-4136" }, { "id": "Casalino-Lorenzo", "name": { "family": "Casalino", "given": "Lorenzo" }, "orcid": "0000-0003-3581-1148" }, { "id": "Kearns-Fiona", "name": { "family": "Kearns", "given": "Fiona" }, "orcid": "0000-0002-5469-9035" }, { "name": { "family": "Rosenfeld", "given": "Mia" }, "orcid": "0000-0002-8961-8231" }, { "name": { "family": "Wauer", "given": "Nicholas" }, "orcid": "0000-0002-1230-9166" }, { "name": { "family": "Ahn", "given": "Surl-Hee" }, "orcid": "0000-0002-3422-805X" }, { "name": { "family": "Russo", "given": "John" }, "orcid": "0000-0002-2813-6554" }, { "name": { "family": "Oliveira", "given": "Sofia" }, "orcid": "0000-0001-8753-4950" }, { "name": { "family": "Morris", "given": "Clare" }, "orcid": "0000-0002-4314-5387" }, { "name": { "family": "Bogetti", "given": "Anthony" }, "orcid": "0000-0003-0610-2879" }, { "name": { "family": "Trifan", "given": "Anda" }, "orcid": "0000-0003-4808-9502" }, { "name": { "family": "Brace", "given": "Alexander" }, "orcid": "0000-0001-9873-9177" }, { "name": { "family": "Sztain", "given": "Terra" }, "orcid": "0000-0002-1327-8541" }, { "name": { "family": "Clyde", "given": "Austin" }, "orcid": "0000-0002-3697-7070" }, { "name": { "family": "Ma", "given": "Heng" }, "orcid": "0000-0002-7667-922X" }, { "name": { "family": "Chennubhotla", "given": "Chakra" }, "orcid": "0000-0002-0024-1627" }, { "name": { "family": "Lee", "given": "Hyungro" }, "orcid": "0000-0002-4221-7094" }, { "name": { "family": "Turilli", "given": "Matteo" }, "orcid": "0000-0003-0527-1435" }, { "name": { "family": "Khalid", "given": "Syma" }, "orcid": "0000-0002-3694-5044" }, { "name": { "family": "Tamayo-Mendoza", "given": "Teresa" } }, { "name": { "family": "Welborn", "given": "Matthew" }, "orcid": "0000-0001-8659-6535" }, { "name": { "family": "Christensen", "given": "Anders S." }, "orcid": "0000-0002-7253-6897" }, { "name": { "family": "Smith", "given": "Daniel G. A." }, "orcid": "0000-0001-8626-0900" }, { "id": "Qiao-Zhuoran", "name": { "family": "Qiao", "given": "Zhuoran" }, "orcid": "0000-0002-5704-7331" }, { "name": { "family": "Sirumalla", "given": "Sai K." } }, { "name": { "family": "O'Connor", "given": "Michael" } }, { "name": { "family": "Manby", "given": "Frederick" }, "orcid": "0000-0001-7611-714X" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" }, { "name": { "family": "Hardy", "given": "David" } }, { "name": { "family": "Phillips", "given": "James" }, "orcid": "0000-0002-2296-3591" }, { "name": { "family": "Stern", "given": "Abraham" } }, { "name": { "family": "Romero", "given": "Josh" } }, { "name": { "family": "Clark", "given": "David" } }, { "name": { "family": "Dorrell", "given": "Mitchell" } }, { "name": { "family": "Maiden", "given": "Tom" } }, { "name": { "family": "Huang", "given": "Lei" } }, { "name": { "family": "McCalpin", "given": "John" }, "orcid": "0000-0002-2535-1355" }, { "name": { "family": "Woods", "given": "Christopher" }, "orcid": "0000-0001-6563-9903" }, { "name": { "family": "Gray", "given": "Alan" } }, { "name": { "family": "Williams", "given": "Matt" }, "orcid": "0000-0003-2198-1058" }, { "name": { "family": "Barker", "given": "Bryan" } }, { "name": { "family": "Rajapaksha", "given": "Harinda" } }, { "name": { "family": "Pitts", "given": "Richard" }, "orcid": "0000-0002-2037-3360" }, { "name": { "family": "Gibbs", "given": "Tom" } }, { "name": { "family": "Stone", "given": "John" }, "orcid": "0000-0001-7215-762X" }, { "name": { "family": "Zuckerman", "given": "Daniel M." }, "orcid": "0000-0001-7662-2031" }, { "name": { "family": "Mulholland", "given": "Adrian J." }, "orcid": "0000-0003-1015-4567" }, { "id": "Miller-T-F-III", "name": { "family": "Miller", "given": "Thomas F., III" }, "orcid": "0000-0002-1882-5380" }, { "name": { "family": "Jha", "given": "Shantenu" }, "orcid": "0000-0002-5040-026X" }, { "name": { "family": "Ramanathan", "given": "Arvind" }, "orcid": "0000-0002-1622-5488" }, { "name": { "family": "Chong", "given": "Lillian" }, "orcid": "0000-0002-0590-483X" }, { "name": { "family": "Amaro", "given": "Rommie E." }, "orcid": "0000-0002-9275-9553" } ] }, "title": "#COVIDisAirborne: AI-enabled multiscale computational microscopy of delta SARS-CoV-2 in a respiratory aerosol", "ispublished": "pub", "full_text_status": "public", "keywords": "Hardware and Architecture; Theoretical Computer Science; Software", "note": "\u00a9 The Author(s) 2022. This article is distributed under the terms of the Creative Commons Attribution-NonCommercial 4.0 License (https://creativecommons.org/licenses/by-nc/4.0/) which permits non-commercial use, reproduction and distribution of the work without further permission provided the original work is attributed as specified on the SAGE and Open Access page (https://us.sagepub.com/en-us/nam/open-access-at-sage). \n\nWe thank Prof. Kim Prather for inspiring and informative discussions about aerosols and for her commitment to convey the airborne nature of SARS-CoV-2. We thank D. Veesler for sharing the Delta spike NTD coordinates in advance of publication. We thank B. Messer, D. Maxwell, and the Oak Ridge Leadership Computing Facility at Oak Ridge National Laboratory supported by the DOE under Contract DE-AC05-00OR22725. We thank the Texas Advanced Computing Center Frontera team, especially D. Stanzione and T. Cockerill, and for compute time made available through a Director's Discretionary Allocation (NSF OAC-1818253). We thank the Argonne Leadership Computing Facility supported by the DOE under DE-AC02-06CH11357. We thank the Pittsburgh Supercomputer Center for providing priority queues on Bridges-2 through the XSEDE allocation NSF TG-CHE060063. We thank N. Kern and J. Lee of the CHARMM-GUI support team for help converting topologies between NAMD and GROMACS. We thank J. Copperman, G. Simpson, D. Aristoff, and J. Leung for valuable discussions and support from NIH grant GM115805. NAMD and VMD are funded by NIH P41-GM104601. This work was supported by the NSF Center for Aerosol Impacts on Chemistry of the Environment (CAICE), National Science Foundation Center for Chemical Innovation (NSF CHE-1801971), as well as NIH GM132826, NSF RAPID MCB-2032054, an award from the RCSA Research Corp., a UC San Diego Moore's Cancer Center 2020 SARS-CoV-2 seed grant, to R.E.A. This work was also supported by Oracle Cloud credits and related resources provided by the Oracle for Research program. AJM and ASFO receive funding from the European Research Council (ERC) under the European Union's Horizon 2020 research and innovation programme (PREDACTED Advanced Grant, Grant agreement No.: 101021207). \n\nThe author(s) disclosed receipt of the following financial support for the research, authorship, and/or publication of this article: This work was supported by National Science Foundation (CHE- 1801971); National Science Foundation (MCB- 2032054); National Science Foundation (OAC-1818253); National Science Foundation (TG-CHE060063); U.S. Department of Energy (DE-AC02-06CH11357); U.S. Department of Energy (DE-AC05- 00OR22725); National Institutes of Health (P41-GM104601); National Institutes of Health (R01-GM132826). \n\nThe author(s) declared no potential conflicts of interest with respect to the research, authorship, and/or publication of this article.\n\nPublished - 10943420221128233.pdf
", "abstract": "We seek to completely revise current models of airborne transmission of respiratory viruses by providing never-before-seen atomic-level views of the SARS-CoV-2 virus within a respiratory aerosol. Our work dramatically extends the capabilities of multiscale computational microscopy to address the significant gaps that exist in current experimental methods, which are limited in their ability to interrogate aerosols at the atomic/molecular level and thus obscure our understanding of airborne transmission. We demonstrate how our integrated data-driven platform provides a new way of exploring the composition, structure, and dynamics of aerosols and aerosolized viruses, while driving simulation method development along several important axes. We present a series of initial scientific discoveries for the SARS-CoV-2 Delta variant, noting that the full scientific impact of this work has yet to be realized.", "date": "2023-01", "date_type": "published", "publication": "International Journal of High Performance Computing Applications", "volume": "37", "number": "1", "publisher": "SAGE Publications", "pagerange": "28-44", "id_number": "CaltechAUTHORS:20221017-15547800.39", "issn": "1094-3420", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20221017-15547800.39", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NSF", "grant_number": "CHE-1801971" }, { "agency": "NSF", "grant_number": "MCB-2032054" }, { "agency": "NSF", "grant_number": "OAC-1818253" }, { "agency": "NSF", "grant_number": "TG-CHE060063" }, { "agency": "Department of Energy (DOE)", "grant_number": "DE-AC02-06CH11357" }, { "agency": "Department of Energy (DOE)", "grant_number": "DE-AC05-00OR22725" }, { "agency": "NIH", "grant_number": "P41-GM104601" }, { "agency": "NIH", "grant_number": "R01-GM132826" }, { "agency": "European Research Council (ERC)", "grant_number": "101021207" } ] }, "local_group": { "items": [ { "id": "COVID-19" } ] }, "doi": "10.1177/10943420221128233", "pmcid": "PMC9527558", "primary_object": { "basename": "10943420221128233.pdf", "url": "https://authors.library.caltech.edu/records/rhktp-a6270/files/10943420221128233.pdf" }, "resource_type": "article", "pub_year": "2023", "author_list": "Dommer, Abigail; Casalino, Lorenzo; et el." }, { "id": "https://authors.library.caltech.edu/records/qw8kb-57104", "eprint_id": 119179, "eprint_status": "archive", "datestamp": "2023-08-22 18:31:32", "lastmod": "2023-10-24 23:50:38", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Ma-Runzhuo", "name": { "family": "Ma", "given": "Runzhuo" }, "orcid": "0000-0001-6381-2661" }, { "id": "Ramaswamy-Ashwin", "name": { "family": "Ramaswamy", "given": "Ashwin" }, "orcid": "0000-0002-8816-7838" }, { "id": "Xu-Jiashu", "name": { "family": "Xu", "given": "Jiashu" }, "orcid": "0000-0003-4093-2315" }, { "id": "Trinh-Loc", "name": { "family": "Trinh", "given": "Loc" } }, { "id": "Kiyasseh-Dani", "name": { "family": "Kiyasseh", "given": "Dani" }, "orcid": "0000-0002-2898-1790" }, { "id": "Chu-Timothy-N", "name": { "family": "Chu", "given": "Timothy N." } }, { "id": "Wong-Elyssa-Y", "name": { "family": "Wong", "given": "Elyssa Y." } }, { "id": "Lee-Ryan-S", "name": { "family": "Lee", "given": "Ryan S." } }, { "id": "Rodriguez-Ivan", "name": { "family": "Rodriguez", "given": "Ivan" } }, { "id": "DeMeo-Gina", "name": { "family": "DeMeo", "given": "Gina" } }, { "id": "Desai-Aditya", "name": { "family": "Desai", "given": "Aditya" } }, { "id": "Otiato-Maxwell-X", "name": { "family": "Otiato", "given": "Maxwell X." }, "orcid": "0000-0001-6979-6316" }, { "id": "Roberts-Sidney-I", "name": { "family": "Roberts", "given": "Sidney I." } }, { "id": "Nguyen-Jessica-H", "name": { "family": "Nguyen", "given": "Jessica H." }, "orcid": "0000-0003-0454-8463" }, { "id": "Laca-Jasper-A", "name": { "family": "Laca", "given": "Jasper" } }, { "id": "Liu-Yan", "name": { "family": "Liu", "given": "Yan" }, "orcid": "0000-0002-5837-4908" }, { "id": "Urbanova-Katarina", "name": { "family": "Urbanova", "given": "Katarina" } }, { "id": "Wagner-Christian", "name": { "family": "Wagner", "given": "Christian" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" }, { "id": "Hu-Jim-C", "name": { "family": "Hu", "given": "Jim C." }, "orcid": "0000-0003-2562-8024" }, { "id": "Hung-Andrew-J", "name": { "family": "Hung", "given": "Andrew J." }, "orcid": "0000-0002-7201-6736" } ] }, "title": "Surgical gestures as a method to quantify surgical performance and predict patient outcomes", "ispublished": "pub", "full_text_status": "public", "keywords": "Health Information Management; Health Informatics; Computer Science Applications; Medicine (miscellaneous)", "note": "This article is licensed under a Creative Commons Attribution 4.0 International License, which permits use, sharing, adaptation, distribution and reproduction in any medium or format, as long as you give appropriate credit to the original author(s) and the source, provide a link to the Creative Commons license, and indicate if changes were made. The images or other third party material in this article are included in the article's Creative Commons license, unless indicated otherwise in a credit line to the material. If material is not included in the article's Creative Commons license and your intended use is not permitted by statutory regulation or exceeds the permitted use, you will need to obtain permission directly from the copyright holder. To view a copy of this license, visit http://creativecommons.org/licenses/by/4.0/. \n\nThis study was supported in part by the National Cancer Institute under Award No. R01CA273031. \n\nContributions. A.J.H. conceived of the study. A.J.H. and J.C.H. obtained the funding. A.J.H., R.M., J.L., J.H.N., and C.W. designed and provided oversight for the administration and implementation of the study. R.M., T.N.C., I.R., G.D., A.D., M.X.O., K.U., S.I.R., and C.W. collected the data and annotataed the surgical videos. R.M., J.X., L.T., and D.K. performed the data analysis and visualization. A.A. and Y.L. provided data analysis guidance and supervision. R.M., A.R., and R.S.L. wrote the draft of the manuscript. \n\nData availability. The datasets generated during and/or analyzed during the current study are available from the corresponding author on reasonable request. \n\nCode availability. The code of this article can be found by: https://github.com/crseusc/NS-Gestures-and-EF-outcomes. \n\nCompeting interests. C.W. declares no competing non-financial interests but reports financial disclosures with Intuitive Surgical, Inc. A.A. declares no competing non-financial interests but is a paid employee of Nvidia. J.C.H. declares no competing non-financial interests but the following competing financial interests: salary support from the Frederick J. and Theresa Dow Wallace Fund of the New York and from Prostate Cancer Foundation Challenge Award. Also salary support from NIH R01 CA241758 and R01 CA259173, PCORI CER-2019C1-15682 and CER-2019C2-17372. A.J.H. declares no competing non-financial interests but reports financial disclosures with Intuitive Surgical, Inc. The remaining authors declare no competing interests.\n\nPublished - s41746-022-00738-y.pdf
", "abstract": "How well a surgery is performed impacts a patient's outcomes; however, objective quantification of performance remains an unsolved challenge. Deconstructing a procedure into discrete instrument-tissue \"gestures\" is a emerging way to understand surgery. To establish this paradigm in a procedure where performance is the most important factor for patient outcomes, we identify 34,323 individual gestures performed in 80 nerve-sparing robot-assisted radical prostatectomies from two international medical centers. Gestures are classified into nine distinct dissection gestures (e.g., hot cut) and four supporting gestures (e.g., retraction). Our primary outcome is to identify factors impacting a patient's 1-year erectile function (EF) recovery after radical prostatectomy. We find that less use of hot cut and more use of peel/push are statistically associated with better chance of 1-year EF recovery. Our results also show interactions between surgeon experience and gesture types\u2014similar gesture selection resulted in different EF recovery rates dependent on surgeon experience. To further validate this framework, two teams independently constructe distinct machine learning models using gesture sequences vs. traditional clinical features to predict 1-year EF. In both models, gesture sequences are able to better predict 1-year EF (Team 1: AUC 0.77, 95% CI 0.73\u20130.81; Team 2: AUC 0.68, 95% CI 0.66\u20130.70) than traditional clinical features (Team 1: AUC 0.69, 95% CI 0.65\u20130.73; Team 2: AUC 0.65, 95% CI 0.62\u20130.68). Our results suggest that gestures provide a granular method to objectively indicate surgical performance and outcomes. Application of this methodology to other surgeries may lead to discoveries on methods to improve surgery.", "date": "2022-12-22", "date_type": "published", "publication": "npj Digital Medicine", "volume": "5", "publisher": "Springer Science and Business Media LLC", "pagerange": "Art. No. 187", "id_number": "CaltechAUTHORS:20230209-988069100.14", "issn": "2398-6352", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20230209-988069100.14", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NIH", "grant_number": "R01CA273031" } ] }, "doi": "10.1038/s41746-022-00738-y", "pmcid": "PMC9780308", "primary_object": { "basename": "s41746-022-00738-y.pdf", "url": "https://authors.library.caltech.edu/records/qw8kb-57104/files/s41746-022-00738-y.pdf" }, "resource_type": "article", "pub_year": "2022", "author_list": "Ma, Runzhuo; Ramaswamy, Ashwin; et el." }, { "id": "https://authors.library.caltech.edu/records/vwezt-wv943", "eprint_id": 118207, "eprint_status": "archive", "datestamp": "2023-08-22 18:19:13", "lastmod": "2023-10-23 20:10:29", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Zhao-Jiawei", "name": { "family": "Zhao", "given": "Jiawei" }, "orcid": "0000-0002-5726-6040" }, { "id": "Dai-Steve", "name": { "family": "Dai", "given": "Steve" }, "orcid": "0000-0002-5045-1964" }, { "id": "Venkatesan-Rangharajan", "name": { "family": "Venkatesan", "given": "Rangharajan" } }, { "id": "Zimmer-Brian", "name": { "family": "Zimmer", "given": "Brian" }, "orcid": "0000-0001-9997-3141" }, { "id": "Ali-Mustafa", "name": { "family": "Ali", "given": "Mustafa" }, "orcid": "0000-0002-4452-6464" }, { "id": "Liu-Ming-Yu", "name": { "family": "Liu", "given": "Ming-Yu" }, "orcid": "0000-0002-2951-2398" }, { "id": "Khailany-Brucek", "name": { "family": "Khailany", "given": "Brucek" }, "orcid": "0000-0002-7584-3489" }, { "id": "Dally-William-J", "name": { "family": "Dally", "given": "William J." }, "orcid": "0000-0003-4632-2876" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" } ] }, "title": "LNS-Madam: Low-Precision Training in Logarithmic Number System using Multiplicative Weight Update", "ispublished": "pub", "full_text_status": "public", "keywords": "Computational Theory and Mathematics; Hardware and Architecture; Theoretical Computer Science; Software", "abstract": "Representing deep neural networks (DNNs) in low-precision is a promising approach to enable efficient acceleration and memory reduction. Previous methods that train DNNs in low-precision typically keep a copy of weights in high-precision during the weight updates. Directly training with low-precision weights leads to accuracy degradation due to complex interactions between the low-precision number systems and the learning algorithms. To address this issue, we develop a co-designed low-precision training framework, termed LNS-Madam, in which we jointly design a logarithmic number system (LNS) and a multiplicative weight update algorithm (Madam). We prove that LNS-Madam results in low quantization error during weight updates, leading to stable performance even if the precision is limited. We further propose a hardware design of LNS-Madam that resolves practical challenges in implementing an efficient datapath for LNS computations. Our implementation effectively reduces energy overhead incurred by LNS-to-integer conversion and partial sum accumulation. Experimental results show that LNS-Madam achieves comparable accuracy to full-precision counterparts with only 8 bits on popular computer vision and natural language tasks. Compared to FP32 and FP8, LNS-Madam reduces the energy consumption by over 90% and 55%, respectively.", "date": "2022-12", "date_type": "published", "publication": "IEEE Transactions on Computers", "volume": "71", "number": "12", "publisher": "IEEE", "pagerange": "3179-3190", "id_number": "CaltechAUTHORS:20221202-906480600.2", "issn": "0018-9340", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20221202-906480600.2", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.1109/tc.2022.3202747", "resource_type": "article", "pub_year": "2022", "author_list": "Zhao, Jiawei; Dai, Steve; et el." }, { "id": "https://authors.library.caltech.edu/records/c9g79-b2898", "eprint_id": 118004, "eprint_status": "archive", "datestamp": "2023-08-22 18:18:49", "lastmod": "2023-10-24 22:45:13", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Laca-Jasper-A", "name": { "family": "Laca", "given": "Jasper A." } }, { "id": "Kocielnik-Rafal", "name": { "family": "Kocielnik", "given": "Rafal" }, "orcid": "0000-0001-5602-6056" }, { "id": "Nguyen-Jessica-H", "name": { "family": "Nguyen", "given": "Jessica H." }, "orcid": "0000-0003-0454-8463" }, { "id": "You-Jonathan", "name": { "family": "You", "given": "Jonathan" } }, { "id": "Tsang-Ryan", "name": { "family": "Tsang", "given": "Ryan" } }, { "id": "Wong-Elyssa-Y", "name": { "family": "Wong", "given": "Elyssa Y." } }, { "id": "Shtulman-Andrew", "name": { "family": "Shtulman", "given": "Andrew" }, "orcid": "0000-0002-4687-3099" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" }, { "id": "Hung-Andrew-J", "name": { "family": "Hung", "given": "Andrew J." }, "orcid": "0000-0002-7201-6736" } ] }, "title": "Using Real-time Feedback To Improve Surgical Performance on a Robotic Tissue Dissection Task", "ispublished": "pub", "full_text_status": "public", "keywords": "Urology", "note": "\u00a9 2022 The Author(s). Published by Elsevier B.V. on behalf of European Association of Urology Under a Creative Commons license. Attribution-NonCommercial-NoDerivatives 4.0 International (CC BY-NC-ND 4.0) \n\nThis work was supported by the National Science Foundation under grant #2030859 to the Computing Research Association for the CIFellows Project. The sponsor played a role in analysis and interpretation of the data and review of the manuscript.\n\nPublished - main.pdf
", "abstract": "Background: There is no standard for the feedback that an attending surgeon provides to a training surgeon, which may lead to variable outcomes in teaching cases. \n\nObjective: To create and administer standardized feedback to medical students in an attempt to improve performance and learning. \n\nDesign, setting, and participants: A cohort of 45 medical students was recruited from a single medical school. Participants were randomly assigned to two groups. Both completed two rounds of a robotic surgical dissection task on a da Vinci Xi surgical system. The first round was the baseline assessment. In the second round, one group received feedback and the other served as the control (no feedback). \n\nOutcome measurements and statistical analysis: Video from each round was retrospectively reviewed by four blinded raters and given a total error tally (primary outcome) and a technical skills score (Global Evaluative Assessment of Robotic Surgery [GEARS]). Generalized linear models were used for statistical modeling. According to their initial performance, each participant was categorized as either an innate performer or an underperformer, depending on whether their error tally was above or below the median. \n\nResults and limitations: In round 2, the intervention group had a larger decrease in error rate than the control group, with a risk ratio (RR) of 1.51 (95% confidence interval [CI] 1.07\u20132.14; p = 0.02). The intervention group also had a greater increase in GEARS score in comparison to the control group, with a mean group difference of 2.15 (95% CI 0.81\u20133.49; p < 0.01). The interaction effect between innate performers versus underperformers and the intervention was statistically significant for the error rates, at F(1,38) = 5.16 (p = 0.03). Specifically, the intervention had a statistically significant effect on the error rate for underperformers (RR 2.23, 95% CI 1.37\u20133.62; p < 0.01) but not for innate performers (RR 1.03, 95% CI 0.63\u20131.68; p = 0.91). \n\nConclusions: Real-time feedback improved performance globally compared to the control. The benefit of real-time feedback was stronger for underperformers than for trainees with innate skill. \n\nPatient summary: We found that real-time feedback during a training task using a surgical robot improved the performance of trainees when the task was repeated. This feedback approach could help in training doctors in robotic surgery.", "date": "2022-12", "date_type": "published", "publication": "European Urology Open Science", "volume": "46", "publisher": "Elsevier", "pagerange": "15-21", "id_number": "CaltechAUTHORS:20221122-564647900.20", "issn": "2666-1683", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20221122-564647900.20", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NSF", "grant_number": "CCF-2030859" } ] }, "doi": "10.1016/j.euros.2022.09.015", "pmcid": "PMC9732447", "primary_object": { "basename": "main.pdf", "url": "https://authors.library.caltech.edu/records/c9g79-b2898/files/main.pdf" }, "resource_type": "article", "pub_year": "2022", "author_list": "Laca, Jasper A.; Kocielnik, Rafal; et el." }, { "id": "https://authors.library.caltech.edu/records/dsq3y-8gb92", "eprint_id": 117343, "eprint_status": "archive", "datestamp": "2023-08-22 17:16:39", "lastmod": "2023-10-24 22:29:28", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Trifan-Anda", "name": { "family": "Trifan", "given": "Anda" }, "orcid": "0000-0003-4808-9502" }, { "name": { "family": "Gorgun", "given": "Defne" } }, { "name": { "family": "Salim", "given": "Michael" } }, { "id": "Li-Zongyi", "name": { "family": "Li", "given": "Zongyi" } }, { "name": { "family": "Brace", "given": "Alexander" } }, { "name": { "family": "Zvyagin", "given": "Maxim" } }, { "name": { "family": "Ma", "given": "Heng" } }, { "name": { "family": "Clyde", "given": "Austin" } }, { "name": { "family": "Clark", "given": "David" } }, { "name": { "family": "Hardy", "given": "David J." } }, { "name": { "family": "Burnley", "given": "Tom" } }, { "name": { "family": "Huang", "given": "Lei" } }, { "name": { "family": "McCalpin", "given": "John" } }, { "name": { "family": "Emani", "given": "Murali" } }, { "name": { "family": "Yoo", "given": "Hyenseung" } }, { "name": { "family": "Yin", "given": "Junqi" } }, { "name": { "family": "Tsaris", "given": "Aristeidis" } }, { "name": { "family": "Subbiah", "given": "Vishal" } }, { "name": { "family": "Raza", "given": "Tanveer" } }, { "name": { "family": "Liu", "given": "Jessica" } }, { "name": { "family": "Trebesch", "given": "Noah" } }, { "name": { "family": "Wells", "given": "Geoffrey" } }, { "name": { "family": "Mysore", "given": "Venkatesh" } }, { "name": { "family": "Gibbs", "given": "Thomas" } }, { "name": { "family": "Phillips", "given": "James" } }, { "name": { "family": "Chennubhotla", "given": "S. Chakra" } }, { "name": { "family": "Foster", "given": "Ian" }, "orcid": "0000-0003-2129-5269" }, { "name": { "family": "Stevens", "given": "Rick" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" }, { "name": { "family": "Vishwanath", "given": "Venkatram" } }, { "name": { "family": "Stone", "given": "John E." } }, { "name": { "family": "Tajkhorshid", "given": "Emad" } }, { "name": { "family": "Harris", "given": "Sarah A." } }, { "name": { "family": "Ramanathan", "given": "Arvind" }, "orcid": "0000-0002-1622-5488" } ] }, "title": "Intelligent resolution: Integrating Cryo-EM with AI-driven multi-resolution simulations to observe the severe acute respiratory syndrome coronavirus-2 replication-transcription machinery in action", "ispublished": "pub", "full_text_status": "public", "keywords": "Hardware and Architecture; Theoretical Computer Science; Software", "abstract": "The severe acute respiratory syndrome coronavirus-2 (SARS-CoV-2) replication transcription complex (RTC) is a multi-domain protein responsible for replicating and transcribing the viral mRNA inside a human cell. Attacking RTC function with pharmaceutical compounds is a pathway to treating COVID-19. Conventional tools, e.g. cryo-electron microscopy and all-atom molecular dynamics (AAMD), do not provide sufficiently high resolution or timescale to capture important dynamics of this molecular machine. Consequently, we develop an innovative workflow that bridges the gap between these resolutions, using mesoscale fluctuating finite element analysis (FFEA) continuum simulations and a hierarchy of AI-methods that continually learn and infer features for maintaining consistency between AAMD and FFEA simulations. We leverage a multi-site distributed workflow manager to orchestrate AI, FFEA, and AAMD jobs, providing optimal resource utilization across HPC centers. Our study provides unprecedented access to study the SARS-CoV-2 RTC machinery, while providing general capability for AI-enabled multi-resolution simulations at scale.", "date": "2022-10-12", "date_type": "published", "publication": "International Journal of High Performance Computing Applications", "publisher": "SAGE Publications", "id_number": "CaltechAUTHORS:20221011-459145000.39", "issn": "1094-3420", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20221011-459145000.39", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Department of Energy (DOE)" }, { "agency": "NIH" } ] }, "doi": "10.1177/10943420221113513", "resource_type": "article", "pub_year": "2022", "author_list": "Trifan, Anda; Gorgun, Defne; et el." }, { "id": "https://authors.library.caltech.edu/records/2hkzz-hy091", "eprint_id": 115593, "eprint_status": "archive", "datestamp": "2023-08-20 08:35:46", "lastmod": "2023-10-24 16:36:32", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Hoeller-David", "name": { "family": "Hoeller", "given": "David" }, "orcid": "0000-0001-8010-9011" }, { "id": "Rudin-Nikita", "name": { "family": "Rudin", "given": "Nikita" }, "orcid": "0000-0001-5893-0348" }, { "id": "Choy-Christopher", "name": { "family": "Choy", "given": "Christopher" }, "orcid": "0000-0002-6566-3193" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" }, { "id": "Hutter-Marco", "name": { "family": "Hutter", "given": "Marco" }, "orcid": "0000-0001-9049-534X" } ] }, "title": "Neural Scene Representation for Locomotion on Structured Terrain", "ispublished": "pub", "full_text_status": "public", "keywords": "Representation learning, deep learning for visual perception", "note": "\u00a9 2022 IEEE. \n\nManuscript received 24 February 2022; accepted 6 June 2022. Date of publication 20 June 2022; date of current version 18 July 2022. \n\nThis letter was recommended for publication by Associate Editor D. Sadigh and Editor J. Kober upon evaluation of the reviewers' comments. This work was supported in part by NVIDIA, the Swiss National Science Foundation (SNSF) under Project 188596, in part by the National Centre of Competence in Research Robotics (NCCR Robotics), and in part by the European Union's Horizon 2020 Research and Innovation Program under Grant Agreement 780883. This work was also conducted as part of ANYmal Research, a community to advance legged robotics.\n\nAccepted Version - 2206.08077.pdf
", "abstract": "We propose a learning-based method to reconstruct the local terrain for locomotion with a mobile robot traversing urban environments. Using a stream of depth measurements from the onboard cameras and the robot's trajectory, the algorithm estimates the topography in the robot's vicinity. The raw measurements from these cameras are noisy and only provide partial and occluded observations that in many cases do not show the terrain the robot stands on. Therefore, we propose a 3D reconstruction model that faithfully reconstructs the scene, despite the noisy measurements and large amounts of missing data coming from the blind spots of the camera arrangement. The model consists of a 4D fully convolutional network on point clouds that learns the geometric priors to complete the scene from the context and an auto-regressive feedback to leverage spatio-temporal consistency and use evidence from the past. The network can be solely trained with synthetic data, and due to extensive augmentation, it is robust in the real world, as shown in the validation on a quadrupedal robot, ANYmal, traversing challenging settings. We run the pipeline on the robot's onboard low-power computer using an efficient sparse tensor implementation and show that the proposed method outperforms classical map representations.", "date": "2022-10", "date_type": "published", "publication": "IEEE Robotics and Automation Letters", "volume": "7", "number": "4", "publisher": "IEEE", "pagerange": "8667-8674", "id_number": "CaltechAUTHORS:20220714-224603901", "issn": "2377-3766", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220714-224603901", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NVIDIA Corporation" }, { "agency": "Swiss National Science Foundation (SNSF)", "grant_number": "188596" }, { "agency": "National Centre of Competence in Research Robotics" }, { "agency": "European Research Council (ERC)", "grant_number": "780883" } ] }, "doi": "10.1109/LRA.2022.3184779", "primary_object": { "basename": "2206.08077.pdf", "url": "https://authors.library.caltech.edu/records/2hkzz-hy091/files/2206.08077.pdf" }, "resource_type": "article", "pub_year": "2022", "author_list": "Hoeller, David; Rudin, Nikita; et el." }, { "id": "https://authors.library.caltech.edu/records/p7bmp-vrd31", "eprint_id": 112837, "eprint_status": "archive", "datestamp": "2023-08-20 08:29:16", "lastmod": "2023-10-23 22:48:25", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Pangal-Dhiraj-J", "name": { "family": "Pangal", "given": "Dhiraj J." }, "orcid": "0000-0001-7391-9825" }, { "id": "Kugener-Guillaume", "name": { "family": "Kugener", "given": "Guillaume" }, "orcid": "0000-0002-4697-2847" }, { "id": "Cardinal-Tyler", "name": { "family": "Cardinal", "given": "Tyler" }, "orcid": "0000-0001-8277-6942" }, { "id": "Lechtholz-Zey-Elizabeth", "name": { "family": "Lechtholz-Zey", "given": "Elizabeth" } }, { "id": "Collet-Casey", "name": { "family": "Collet", "given": "Casey" } }, { "id": "Lasky-Sasha", "name": { "family": "Lasky", "given": "Sasha" } }, { "id": "Sundaram-Shivani", "name": { "family": "Sundaram", "given": "Shivani" }, "orcid": "0000-0003-2863-9204" }, { "id": "Zhu-Yichao", "name": { "family": "Zhu", "given": "Yichao" } }, { "id": "Roshannai-Arman", "name": { "family": "Roshannai", "given": "Arman" } }, { "id": "Chan-Justin", "name": { "family": "Chan", "given": "Justin" } }, { "id": "Sinha-Aditya", "name": { "family": "Sinha", "given": "Aditya" } }, { "id": "Hung-Andrew-J", "name": { "family": "Hung", "given": "Andrew J." }, "orcid": "0000-0002-7201-6736" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" }, { "id": "Zada-Gabriel", "name": { "family": "Zada", "given": "Gabriel" }, "orcid": "0000-0001-5821-902X" }, { "id": "Donoho-Daniel-A", "name": { "family": "Donoho", "given": "Daniel A." }, "orcid": "0000-0002-0531-1436" } ] }, "title": "Use of surgical video\u2013based automated performance metrics to predict blood loss and success of simulated vascular injury control in neurosurgery: a pilot study", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2021 American Association of Neurological Surgeons. \n\nOnline Publication Date: 31 Dec 2021. \n\nAuthor Contributions\nConception and design: Pangal, Kugener, Zhu, Zada, Donoho. Acquisition of data: Pangal, Kugener, Lechtholz-Zey, Collet, Lasky, Sundaram, Chan, Zada, Donoho. Analysis and interpretation of data: Pangal, Kugener, Lechtholz-Zey, Collet, Lasky, Sundaram, Zhu, Roshannai, Chan, Donoho. Drafting the article: Pangal, Kugener, Donoho. Critically revising the article: Pangal, Kugener, Cardinal, Roshannai, Chan, Sinha, Hung, Anandkumar, Zada, Donoho. Reviewed submitted version of manuscript: Pangal, Kugener, Cardinal, Sinha, Hung, Anandkumar, Zada, Donoho. Approved the final version of the manuscript on behalf of all authors: Pangal. Statistical analysis: Pangal, Kugener, Cardinal, Donoho. Administrative/technical/material support: Zada, Donoho. Study supervision: Zada, Donoho. \n\nDisclosures. Dr. Hung is a consultant for Johnson and Johnson, Mimic Technologies, and Quantgene.", "abstract": "Objective: Experts can assess surgeon skill using surgical video, but a limited number of expert surgeons are available. Automated performance metrics (APMs) are a promising alternative but have not been created from operative videos in neurosurgery to date. The authors aimed to evaluate whether video-based APMs can predict task success and blood loss during endonasal endoscopic surgery in a validated cadaveric simulator of vascular injury of the internal carotid artery. \n\nMethods: Videos of cadaveric simulation trials by 73 neurosurgeons and otorhinolaryngologists were analyzed and manually annotated with bounding boxes to identify the surgical instruments in the frame. APMs in five domains were defined\u2014instrument usage, time-to-phase, instrument disappearance, instrument movement, and instrument interactions\u2014on the basis of expert analysis and task-specific surgical progressions. Bounding-box data of instrument position were then used to generate APMs for each trial. Multivariate linear regression was used to test for the associations between APMs and blood loss and task success (hemorrhage control in less than 5 minutes). The APMs of 93 successful trials were compared with the APMs of 49 unsuccessful trials. \n\nResults: In total, 29,151 frames of surgical video were annotated. Successful simulation trials had superior APMs in each domain, including proportionately more time spent with the key instruments in view (p 2 value of 0.87 (p < 0.001). \n\nConclusions: Video-based APMs were superior predictors of simulation trial success and blood loss than surgeon characteristics such as case volume and attending status. Surgeon educators can use APMs to assess competency, quantify performance, and provide actionable, structured feedback in order to improve patient outcomes. Validation of APMs provides a benchmark for further development of fully automated video assessment pipelines that utilize machine learning and computer vision.", "date": "2022-09", "date_type": "published", "publication": "Journal of Neurosurgery", "volume": "137", "number": "3", "publisher": "American Association of Neurological Surgeons", "pagerange": "840-849", "id_number": "CaltechAUTHORS:20220112-7446100", "issn": "0022-3085", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220112-7446100", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.3171/2021.10.jns211064", "resource_type": "article", "pub_year": "2022", "author_list": "Pangal, Dhiraj J.; Kugener, Guillaume; et el." }, { "id": "https://authors.library.caltech.edu/records/5dfyx-1yq85", "eprint_id": 116626, "eprint_status": "archive", "datestamp": "2023-08-22 17:32:12", "lastmod": "2023-10-24 21:08:23", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Markarian-Nicholas", "name": { "family": "Markarian", "given": "Nicholas" } }, { "id": "Kugener-Guillaume", "name": { "family": "Kugener", "given": "Guillaume" }, "orcid": "0000-0002-4697-2847" }, { "id": "Pangal-Dhiraj-J", "name": { "family": "Pangal", "given": "Dhiraj J." }, "orcid": "0000-0001-7391-9825" }, { "id": "Unadkat-Vyom", "name": { "family": "Unadkat", "given": "Vyom" } }, { "id": "Sinha-Aditya", "name": { "family": "Sinha", "given": "Aditya" } }, { "id": "Zhu-Yichao", "name": { "family": "Zhu", "given": "Yichao" } }, { "id": "Roshannai-Arman", "name": { "family": "Roshannai", "given": "Arman" } }, { "id": "Chan-Justin", "name": { "family": "Chan", "given": "Justin" } }, { "id": "Hung-Andrew-J", "name": { "family": "Hung", "given": "Andrew J." }, "orcid": "0000-0002-7201-6736" }, { "id": "Wrobel-Bozena-B", "name": { "family": "Wrobel", "given": "Bozena B." } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" }, { "id": "Zada-Gabriel", "name": { "family": "Zada", "given": "Gabriel" }, "orcid": "0000-0001-5821-902X" }, { "id": "Donoho-Daniel-A", "name": { "family": "Donoho", "given": "Daniel A." }, "orcid": "0000-0002-0531-1436" } ] }, "title": "Validation of Machine Learning-Based Automated Surgical Instrument Annotation Using Publicly Available Intraoperative Video", "ispublished": "pub", "full_text_status": "public", "keywords": "Neurology (clinical); Surgery", "abstract": "BACKGROUND: Intraoperative tool movement data have been demonstrated to be clinically useful in quantifying surgical performance. However, collecting this information from intraoperative video requires laborious hand annotation. The ability to automatically annotate tools in surgical video would advance surgical data science by eliminating a time-intensive step in research. \n\nOBJECTIVE: To identify whether machine learning (ML) can automatically identify surgical instruments contained within neurosurgical video. \n\nMETHODS: A ML model which automatically identifies surgical instruments in frame was developed and trained on multiple publicly available surgical video data sets with instrument location annotations. A total of 39\u2009693 frames from 4 data sets were used (endoscopic endonasal surgery [EEA] [30\u2009015 frames], cataract surgery [4670], laparoscopic cholecystectomy [2532], and microscope-assisted brain/spine tumor removal [2476]). A second model trained only on EEA video was also developed. Intraoperative EEA videos from YouTube were used for test data (3 videos, 1239 frames). \n\nRESULTS: The YouTube data set contained 2169 total instruments. Mean average precision (mAP) for instrument detection on the YouTube data set was 0.74. The mAP for each individual video was 0.65, 0.74, and 0.89. The second model trained only on EEA video also had an overall mAP of 0.74 (0.62, 0.84, and 0.88 for individual videos). Development costs were $130 for manual video annotation and under $100 for computation. \n\nCONCLUSION: Surgical instruments contained within endoscopic endonasal intraoperative video can be detected using a fully automated ML model. The addition of disparate surgical data sets did not improve model performance, although these data sets may improve generalizability of the model in other use cases.", "date": "2022-09", "date_type": "published", "publication": "Operative Neurosurgery", "volume": "23", "number": "3", "publisher": "Wolters Kluwer", "pagerange": "235-240", "id_number": "CaltechAUTHORS:20220908-194215690", "issn": "2332-4252", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220908-194215690", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.1227/ons.0000000000000274", "resource_type": "article", "pub_year": "2022", "author_list": "Markarian, Nicholas; Kugener, Guillaume; et el." }, { "id": "https://authors.library.caltech.edu/records/hpyps-c1q28", "eprint_id": 116869, "eprint_status": "archive", "datestamp": "2023-08-22 17:10:44", "lastmod": "2023-10-23 20:08:17", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Patti-Taylor-L", "name": { "family": "Patti", "given": "Taylor L." }, "orcid": "0000-0002-4242-6072" }, { "id": "Kossaifi-Jean", "name": { "family": "Kossaifi", "given": "Jean" }, "orcid": "0000-0002-4445-3429" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" }, { "id": "Yelin-Susanne-F", "name": { "family": "Yelin", "given": "Susanne F." } } ] }, "title": "Variational quantum optimization with multibasis encodings", "ispublished": "pub", "full_text_status": "public", "keywords": "General Physics and Astronomy", "note": "This work was done during T.L.P.'s internship at NVIDIA. At CalTech, A.A. is supported in part by the Bren endowed chair, and Microsoft, Google, Adobe faculty fellowships. S.F.Y. thanks the AFOSR and the NSF for funding. The authors would like to thank Brucek Khailany, Johnnie Gray, Garnet Chan, Andreas Hehn, and Adam Jedrych for conversations.", "abstract": "Despite extensive research efforts, few quantum algorithms for classical optimization demonstrate a realizable quantum advantage. The utility of many quantum algorithms is limited by high requisite circuit depth and nonconvex optimization landscapes. We tackle these challenges by introducing a variational quantum algorithm that benefits from two innovations: multibasis graph encodings using single-qubit expectation values and nonlinear activation functions. Our technique results in increased observed optimization performance and a factor-of-two reduction in requisite qubits. While the classical simulation of many qubits with traditional quantum formalism is impossible due to its exponential scaling, we mitigate this limitation with exact circuit representations using factorized tensor rings. In particular, the shallow circuits permitted by our technique, combined with efficient factorized tensor-based simulation, enable us to successfully optimize the MaxCut of the 512-vertex DIMACS library graphs on a single GPU. By improving the performance of quantum optimization algorithms while requiring fewer quantum resources and utilizing shallower, more error-resistant circuits, we offer tangible progress for variational quantum optimization.", "date": "2022-08", "date_type": "published", "publication": "Physical Review Research", "volume": "4", "number": "3", "publisher": "American Physical Society", "pagerange": "Art. No. 4.033142", "id_number": "CaltechAUTHORS:20220909-232706000", "issn": "2643-1564", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220909-232706000", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Air Force Office of Scientific Research (AFOSR)" }, { "agency": "NSF" }, { "agency": "Bren Professor of Computing and Mathematical Sciences" }, { "agency": "Microsoft Faculty Fellowship" }, { "agency": "Google Faculty Research Award" }, { "agency": "Adobe" } ] }, "doi": "10.1103/physrevresearch.4.033142", "resource_type": "article", "pub_year": "2022", "author_list": "Patti, Taylor L.; Kossaifi, Jean; et el." }, { "id": "https://authors.library.caltech.edu/records/kemxx-q5m20", "eprint_id": 110646, "eprint_status": "archive", "datestamp": "2023-08-22 16:58:50", "lastmod": "2023-10-23 19:47:01", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Qiao-Zhuoran", "name": { "family": "Qiao", "given": "Zhuoran" }, "orcid": "0000-0002-5704-7331" }, { "id": "Christensen-Anders-S", "name": { "family": "Christensen", "given": "Anders S." }, "orcid": "0000-0002-7253-6897" }, { "id": "Welborn-Matthew-G", "name": { "family": "Welborn", "given": "Matthew" }, "orcid": "0000-0001-8659-6535" }, { "id": "Manby-Frederick-R", "name": { "family": "Manby", "given": "Frederick R." }, "orcid": "0000-0001-7611-714X" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" }, { "id": "Miller-T-F-III", "name": { "family": "Miller", "given": "Thomas F., III" }, "orcid": "0000-0002-1882-5380" } ] }, "title": "Informing geometric deep learning with electronic interactions to accelerate quantum chemistry", "ispublished": "pub", "full_text_status": "public", "keywords": "quantum chemistry; machine learning; equivariance", "note": "\u00a9 2022 the Author(s). Published by PNAS. This article is distributed under Creative Commons Attribution-NonCommercial-NoDerivatives License 4.0 (CC BY-NC-ND). \n\nEdited by Klavs Jensen, Massachusetts Institute of Technology, Cambridge, MA; received April 1, 2022; accepted June 6, 2022. Published July 28, 2022. \n\nZ.Q. acknowledges graduate research funding from Caltech\nand partial support from the Amazon\u2013Caltech AI4Science fellowship. A.A. and T.F.M. acknowledge partial support from the Caltech DeLogi fund, and A.A. acknowledges support from a Caltech Bren professorship. Z.Q. acknowledges Bo\nLi, Vignesh Bhethanabotla, Dani Kiyasseh, Hongkai Zheng, Sahin Lale, and Rafal Kocielnik for proofreading and helpful comments on the manuscript. \n\nAuthor contributions: Z.Q., F.R.M., A.A., and T.F.M. designed research; Z.Q. performed research; A.S.C. and M.W. contributed new reagents/analytic tools; Z.Q. and A.S.C. analyzed data; F.R.M. and A.A. contributed to the theoretical results; and Z.Q., A.A., and T.F.M. wrote the paper. \n\nCompeting interest statement: A patent application related to this work has been filed. A.S.C., M.W., F.R.M., and T.F.M. are employees of Entos, Inc. or its affiliates. The software used for computing input features and gradients is proprietary to Entos, Inc. \n\nData Availability: Source data for results described in the text and SI Appendix, the training dataset, code, and evaluation examples have been deposited in\nZenodo (https://zenodo.org/record/6568518#.YrtTKHbMK38) (99). \n\nThis article is a PNAS Direct Submission.\n\nPublished - pnas.2205221119.pdf
Submitted - 2105.14655.pdf
Supplemental Material - pnas.2205221119.sapp.pdf
", "abstract": "Predicting electronic energies, densities, and related chemical properties can facilitate the discovery of novel catalysts, medicines, and battery materials. However, existing machine learning techniques are challenged by the scarcity of training data when exploring unknown chemical spaces. We overcome this barrier by systematically incorporating knowledge of molecular electronic structure into deep learning. By developing a physics-inspired equivariant neural network, we introduce a method to learn molecular representations based on the electronic interactions among atomic orbitals. Our method, OrbNet-Equi, leverages efficient tight-binding simulations and learned mappings to recover high-fidelity physical quantities. OrbNet-Equi accurately models a wide spectrum of target properties while being several orders of magnitude faster than density functional theory. Despite only using training samples collected from readily available small-molecule libraries, OrbNet-Equi outperforms traditional semiempirical and machine learning\u2013based methods on comprehensive downstream benchmarks that encompass diverse main-group chemical processes. Our method also describes interactions in challenging charge-transfer complexes and open-shell systems. We anticipate that the strategy presented here will help to expand opportunities for studies in chemistry and materials science, where the acquisition of experimental or reference training data is costly.", "date": "2022-07-28", "date_type": "published", "publication": "Proceedings of the National Academy of Sciences", "volume": "119", "number": "31", "publisher": "National Academy of Science", "pagerange": "Art. No. e2205221119", "id_number": "CaltechAUTHORS:20210831-203900979", "issn": "0027-8424", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210831-203900979", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Amazon AI4Science Fellowship" }, { "agency": "Caltech DeLogi Fund" }, { "agency": "Bren Professor of Computing and Mathematical Sciences" } ] }, "doi": "10.1073/pnas.2205221119", "pmcid": "PMC9351474", "primary_object": { "basename": "2105.14655.pdf", "url": "https://authors.library.caltech.edu/records/kemxx-q5m20/files/2105.14655.pdf" }, "related_objects": [ { "basename": "pnas.2205221119.pdf", "url": "https://authors.library.caltech.edu/records/kemxx-q5m20/files/pnas.2205221119.pdf" }, { "basename": "pnas.2205221119.sapp.pdf", "url": "https://authors.library.caltech.edu/records/kemxx-q5m20/files/pnas.2205221119.sapp.pdf" } ], "resource_type": "article", "pub_year": "2022", "author_list": "Qiao, Zhuoran; Christensen, Anders S.; et el." }, { "id": "https://authors.library.caltech.edu/records/g79fh-yqz70", "eprint_id": 115574, "eprint_status": "archive", "datestamp": "2023-08-20 08:02:27", "lastmod": "2023-10-24 16:35:44", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Xu-Pan", "name": { "family": "Xu", "given": "Pan" } }, { "id": "Zheng-Hongkai", "name": { "family": "Zheng", "given": "Hongkai" } }, { "id": "Mazumdar-Eric", "name": { "family": "Mazumdar", "given": "Eric V." }, "orcid": "0000-0002-1815-269X" }, { "id": "Azizzadenesheli-Kamyar", "name": { "family": "Azizzadenesheli", "given": "Kamyar" }, "orcid": "0000-0001-8507-1868" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" } ] }, "title": "Langevin Monte Carlo for Contextual Bandits", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2022 by the author(s). \n\nThe authors would like to thank the anonymous reviewers for their invaluable comments. PX is supported by PIMCO Postdoctoral Fellowship. AA is partially supported by Bren Named Chair Professorship at Caltech.\n\nPublished - xu22p.pdf
Accepted Version - 2206.11254.pdf
", "abstract": "We study the efficiency of Thompson sampling for contextual bandits. Existing Thompson sampling-based algorithms need to construct a Laplace approximation (i.e., a Gaussian distribution) of the posterior distribution, which is inefficient to sample in high dimensional applications for general covariance matrices. Moreover, the Gaussian approximation may not be a good surrogate for the posterior distribution for general reward generating functions. We propose an efficient posterior sampling algorithm, viz., Langevin Monte Carlo Thompson Sampling (LMC-TS), that uses Markov Chain Monte Carlo (MCMC) methods to directly sample from the posterior distribution in contextual bandits. Our method is computationally efficient since it only needs to perform noisy gradient descent updates without constructing the Laplace approximation of the posterior distribution. We prove that the proposed algorithm achieves the same sublinear regret bound as the best Thompson sampling algorithms for a special case of contextual bandits, viz., linear contextual bandits. We conduct experiments on both synthetic data and real-world datasets on different contextual bandit models, which demonstrates that directly sampling from the posterior is both computationally efficient and competitive in performance.", "date": "2022-06-22", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "162", "publisher": "ML Research Press", "pagerange": "24830-24850", "id_number": "CaltechAUTHORS:20220714-212437915", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220714-212437915", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "PIMCO" }, { "agency": "Bren Professor of Computing and Mathematical Sciences" } ] }, "doi": "10.48550/arXiv.arXiv.2206.11254", "primary_object": { "basename": "2206.11254.pdf", "url": "https://authors.library.caltech.edu/records/g79fh-yqz70/files/2206.11254.pdf" }, "related_objects": [ { "basename": "xu22p.pdf", "url": "https://authors.library.caltech.edu/records/g79fh-yqz70/files/xu22p.pdf" } ], "resource_type": "article", "pub_year": "2022", "author_list": "Xu, Pan; Zheng, Hongkai; et el." }, { "id": "https://authors.library.caltech.edu/records/87tww-zn973", "eprint_id": 115576, "eprint_status": "archive", "datestamp": "2023-08-20 08:00:07", "lastmod": "2023-10-24 16:35:50", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Kargin-Taylan", "name": { "family": "Kargin", "given": "Taylan" }, "orcid": "0000-0001-6744-654X" }, { "id": "Lale-Sahin", "name": { "family": "Lale", "given": "Sahin" }, "orcid": "0000-0002-7191-346X" }, { "id": "Azizzadenesheli-Kamyar", "name": { "family": "Azizzadenesheli", "given": "Kamyar" }, "orcid": "0000-0001-8507-1868" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" }, { "id": "Hassibi-B", "name": { "family": "Hassibi", "given": "Babak" }, "orcid": "0000-0002-1375-5838" } ] }, "title": "Thompson Sampling Achieves \u00d5(\u221aT) Regret in Linear Quadratic Control", "ispublished": "pub", "full_text_status": "public", "keywords": "Thompson sampling, adaptive control, linear quadratic control, regret", "note": "\u00a9 2022 T. Kargin, S. Lale, K. Azizzadenesheli, A. Anandkumar & B. Hassibi.\n\nPublished - kargin22a.pdf
", "abstract": "Thompson Sampling (TS) is an efficient method for decision-making under uncertainty, where an action is sampled from a carefully prescribed distribution which is updated based on the observed data. In this work, we study the problem of adaptive control of stabilizable linear-quadratic regulators (LQRs) using TS, where the system dynamics are unknown. Previous works have established that \u00d5(\u221aT) frequentist regret is optimal for the adaptive control of LQRs. However, the existing methods either work only in restrictive settings, require a priori known stabilizing controllers, or utilize computationally intractable approaches. We propose an efficient TS algorithm for the adaptive control of LQRs, TS-based Adaptive Control, TSAC, that attains \u00d5(\u221aT)regret, even for multidimensional systems, thereby solving the open problem posed in Abeille and Lazaric (2018). TSAC does not require a priori known stabilizing controller and achieves fast stabilization of the underlying system by effectively exploring the environment in the early stages. Our result hinges on developing a novel lower bound on the probability that the TS provides an optimistic sample. By carefully prescribing an early exploration strategy and a policy update rule, we show that TS achieves order-optimal regret in adaptive control of multidimensional stabilizable LQRs. We empirically demonstrate the performance and the efficiency of TSAC in several adaptive control tasks.", "date": "2022-06-17", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "178", "publisher": "ML Research Press", "pagerange": "3235-3284", "id_number": "CaltechAUTHORS:20220714-212445251", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220714-212445251", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.48550/arXiv.2206.08520", "primary_object": { "basename": "kargin22a.pdf", "url": "https://authors.library.caltech.edu/records/87tww-zn973/files/kargin22a.pdf" }, "resource_type": "article", "pub_year": "2022", "author_list": "Kargin, Taylan; Lale, Sahin; et el." }, { "id": "https://authors.library.caltech.edu/records/gt5nn-4yp94", "eprint_id": 114284, "eprint_status": "archive", "datestamp": "2023-08-22 15:51:07", "lastmod": "2023-10-23 23:28:29", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Kugener-Guillaume", "name": { "family": "Kugener", "given": "Guillaume" }, "orcid": "0000-0002-4697-2847" }, { "id": "Zhu-Yichao", "name": { "family": "Zhu", "given": "Yichao" } }, { "id": "Pangal-Dhiraj-J", "name": { "family": "Pangal", "given": "Dhiraj J." }, "orcid": "0000-0001-7391-9825" }, { "id": "Sinha-Aditya", "name": { "family": "Sinha", "given": "Aditya" } }, { "id": "Markarian-Nicholas", "name": { "family": "Markarian", "given": "Nicholas" } }, { "id": "Roshannai-Arman", "name": { "family": "Roshannai", "given": "Arman" } }, { "id": "Chan-Justin", "name": { "family": "Chan", "given": "Justin" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" }, { "id": "Hung-Andrew-J", "name": { "family": "Hung", "given": "Andrew J." }, "orcid": "0000-0002-7201-6736" }, { "id": "Wrobel-Bozena-B", "name": { "family": "Wrobel", "given": "Bozena B." } }, { "id": "Zada-Gabriel", "name": { "family": "Zada", "given": "Gabriel" }, "orcid": "0000-0001-5821-902X" }, { "id": "Donoho-Daniel-A", "name": { "family": "Donoho", "given": "Daniel A." }, "orcid": "0000-0002-0531-1436" } ] }, "title": "Deep Neural Networks Can Accurately Detect Blood Loss and Hemorrhage Control Task Success From Video", "ispublished": "pub", "full_text_status": "public", "keywords": "Artificial intelligence, Complication management, Hemorrhage control, Machine learning, Video; Neurology (clinical); Surgery", "note": "\u00a9 2022 Congress of Neurological Surgeons.", "abstract": "Background: Deep neural networks (DNNs) have not been proven to detect blood loss (BL) or predict surgeon performance from video. \n\nObjective: To train a DNN using video from cadaveric training exercises of surgeons controlling simulated internal carotid hemorrhage to predict clinically relevant outcomes. \n\nMethods: Video was input as a series of images; deep learning networks were developed, which predicted BL and task success from images alone (automated model) and images plus human-labeled instrument annotations (semiautomated model). These models were compared against 2 reference models, which used average BL across all trials as its prediction (control 1) and a linear regression with time to hemostasis (a metric with known association with BL) as input (control 2). The root-mean-square error (RMSE) and correlation coefficients were used to compare the models; lower RMSE indicates superior performance. \n\nResults: One hundred forty-three trials were used (123 for training and 20 for testing). Deep learning models outperformed controls (control 1: RMSE 489 mL, control 2: RMSE 431 mL, R2 = 0.35) at BL prediction. The automated model predicted BL with an RMSE of 358 mL (R2 = 0.4) and correctly classified outcome in 85% of trials. The RMSE and classification performance of the semiautomated model improved to 260 mL and 90%, respectively. \n\nConclusion: BL and task outcome classification are important components of an automated assessment of surgical performance. DNNs can predict BL and outcome of hemorrhage control from video alone; their performance is improved with surgical instrument presence data. The generalizability of DNNs trained on hemorrhage control tasks should be investigated.", "date": "2022-06", "date_type": "published", "publication": "Neurosurgery", "volume": "90", "number": "6", "publisher": "Lippincott, Williams & Wilkins", "pagerange": "823-829", "id_number": "CaltechAUTHORS:20220413-607067100", "issn": "0148-396X", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220413-607067100", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.1227/neu.0000000000001906", "resource_type": "article", "pub_year": "2022", "author_list": "Kugener, Guillaume; Zhu, Yichao; et el." }, { "id": "https://authors.library.caltech.edu/records/dkz22-hkm86", "eprint_id": 113084, "eprint_status": "archive", "datestamp": "2023-08-22 15:38:31", "lastmod": "2023-10-23 19:51:06", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Pangal-Dhiraj-J", "name": { "family": "Pangal", "given": "Dhiraj J." }, "orcid": "0000-0001-7391-9825" }, { "id": "Kugener-Guillaume", "name": { "family": "Kugener", "given": "Guillaume" }, "orcid": "0000-0002-4697-2847" }, { "id": "Zhu-Yichao", "name": { "family": "Zhu", "given": "Yichao" } }, { "id": "Sinha-Aditya", "name": { "family": "Sinha", "given": "Aditya" } }, { "id": "Unadkat-Vyom", "name": { "family": "Unadkat", "given": "Vyom" } }, { "id": "Cote-David-J", "name": { "family": "Cote", "given": "David J." } }, { "id": "Strickland-Ben-A", "name": { "family": "Strickland", "given": "Ben" }, "orcid": "0000-0002-4620-9542" }, { "id": "Rutkowski-Martin", "name": { "family": "Rutkowski", "given": "Martin" }, "orcid": "0000-0002-5188-3419" }, { "id": "Hung-Andrew-J", "name": { "family": "Hung", "given": "Andrew" }, "orcid": "0000-0002-7201-6736" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" }, { "id": "Han-X-Y", "name": { "family": "Han", "given": "X. Y." } }, { "id": "Papyan-Vardan", "name": { "family": "Papyan", "given": "Vardan" }, "orcid": "0000-0002-5028-2144" }, { "id": "Wrobel-Bozena-B", "name": { "family": "Wrobel", "given": "Bozena" } }, { "id": "Zada-Gabriel", "name": { "family": "Zada", "given": "Gabriel" }, "orcid": "0000-0001-5821-902X" }, { "id": "Donoho-Daniel-A", "name": { "family": "Donoho", "given": "Daniel A." }, "orcid": "0000-0002-0531-1436" } ] }, "title": "Expert surgeons and deep learning models can predict the outcome of surgical hemorrhage from 1 min of video", "ispublished": "pub", "full_text_status": "public", "keywords": "Machine learning; Outcomes research", "note": "\u00a9 The Author(s) 2022. This article is licensed under a Creative Commons Attribution 4.0 International License, which permits use, sharing, adaptation, distribution and reproduction in any medium or format, as long as you give appropriate credit to the original author(s) and the source, provide a link to the Creative Commons licence, and indicate if changes were made. The images or other third party material in this article are included in the article's Creative Commons licence, unless indicated otherwise in a credit line to the material. If material is not included in the article's Creative Commons licence and your intended use is not permitted by statutory regulation or exceeds the permitted use, you will need to obtain permission directly from the copyright holder. To view a copy of this licence, visit http://creativecommons.org/licenses/by/4.0/. \n\nReceived 22 December 2021; Accepted 18 April 2022; Published 17 May 2022. \n\nData availability: The datasets generated during and/or analyzed during the current study are available in the figshare repository, link: https://doi.org/10.6084/m9.figshare.15132468.v1. \n\nContributions: Study design: D.J.P., G.K., A.S., G.Z., D.A.D. Data acquisition: D.J.P., G.K., B.S., M.R., G.Z., D.A.D. Model development: D.J.P., G.K., A.S., V.U., X.H., V.P., D.A.D. Statistical analysis: D.J.P., G.K., D.A.D. Writing\u2014original draft: D.J.P., G.K., D.A.D. Writing\u2014revisions: All authors. Final approval: All authors. Study supervision: G.Z., D.A.D. \n\nThe authors declare no competing interests.\n\nPublished - s41598-022-11549-2.pdf
Submitted - 2022.01.22.22269640v1.full.pdf
Supplemental Material - 41598_2022_11549_MOESM1_ESM.docx
Supplemental Material - 41598_2022_11549_MOESM2_ESM.pdf
", "abstract": "Major vascular injury resulting in uncontrolled bleeding is a catastrophic and often fatal complication of minimally invasive surgery. At the outset of these events, surgeons do not know how much blood will be lost or whether they will successfully control the hemorrhage (achieve hemostasis). We evaluate the ability of a deep learning neural network (DNN) to predict hemostasis control ability using the first minute of surgical video and compare model performance with human experts viewing the same video. The publicly available SOCAL dataset contains 147 videos of attending and resident surgeons managing hemorrhage in a validated, high-fidelity cadaveric simulator. Videos are labeled with outcome and blood loss (mL). The first minute of 20 videos was shown to four, blinded, fellowship trained skull-base neurosurgery instructors, and to SOCALNet (a DNN trained on SOCAL videos). SOCALNet architecture included a convolutional network (ResNet) identifying spatial features and a recurrent network identifying temporal features (LSTM). Experts independently assessed surgeon skill, predicted outcome and blood loss (mL). Outcome and blood loss predictions were compared with SOCALNet. Expert inter-rater reliability was 0.95. Experts correctly predicted 14/20 trials (Sensitivity: 82%, Specificity: 55%, Positive Predictive Value (PPV): 69%, Negative Predictive Value (NPV): 71%). SOCALNet correctly predicted 17/20 trials (Sensitivity 100%, Specificity 66%, PPV 79%, NPV 100%) and correctly identified all successful attempts. Expert predictions of the highest and lowest skill surgeons and expert predictions reported with maximum confidence were more accurate. Experts systematically underestimated blood loss (mean error \u2212 131 mL, RMSE 350 mL, R2 0.70) and fewer than half of expert predictions identified blood loss\u2009>\u2009500 mL (47.5%, 19/40). SOCALNet had superior performance (mean error \u2212 57 mL, RMSE 295 mL, R\u00b2 0.74) and detected most episodes of blood loss\u2009>\u2009500 mL (80%, 8/10). In validation experiments, SOCALNet evaluation of a critical on-screen surgical maneuver and high/low-skill composite videos were concordant with expert evaluation. Using only the first minute of video, experts and SOCALNet can predict outcome and blood loss during surgical hemorrhage. Experts systematically underestimated blood loss, and SOCALNet had no false negatives. DNNs can provide accurate, meaningful assessments of surgical video. We call for the creation of datasets of surgical adverse events for quality improvement research.", "date": "2022-05-17", "date_type": "published", "publication": "Scientific Reports", "volume": "12", "publisher": "Nature Publishing Group", "pagerange": "Art. No. 8137", "id_number": "CaltechAUTHORS:20220124-214564000", "issn": "2045-2322", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220124-214564000", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.1038/s41598-022-11549-2", "pmcid": "PMC9114003", "primary_object": { "basename": "2022.01.22.22269640v1.full.pdf", "url": "https://authors.library.caltech.edu/records/dkz22-hkm86/files/2022.01.22.22269640v1.full.pdf" }, "related_objects": [ { "basename": "41598_2022_11549_MOESM1_ESM.docx", "url": "https://authors.library.caltech.edu/records/dkz22-hkm86/files/41598_2022_11549_MOESM1_ESM.docx" }, { "basename": "41598_2022_11549_MOESM2_ESM.pdf", "url": "https://authors.library.caltech.edu/records/dkz22-hkm86/files/41598_2022_11549_MOESM2_ESM.pdf" }, { "basename": "s41598-022-11549-2.pdf", "url": "https://authors.library.caltech.edu/records/dkz22-hkm86/files/s41598-022-11549-2.pdf" } ], "resource_type": "article", "pub_year": "2022", "author_list": "Pangal, Dhiraj J.; Kugener, Guillaume; et el." }, { "id": "https://authors.library.caltech.edu/records/dq8ck-xrt79", "eprint_id": 115623, "eprint_status": "archive", "datestamp": "2023-08-20 07:43:04", "lastmod": "2023-10-24 16:37:34", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Nie-Weili", "name": { "family": "Nie", "given": "Weili" } }, { "id": "Guo-Brandon", "name": { "family": "Guo", "given": "Brandon" } }, { "id": "Huang-Yujia", "name": { "family": "Huang", "given": "Yujia" }, "orcid": "0000-0001-7667-8342" }, { "id": "Xiao-Chaowei", "name": { "family": "Xiao", "given": "Chaowei" }, "orcid": "0000-0002-7043-4926" }, { "id": "Vahdat-Arash", "name": { "family": "Vahdat", "given": "Arash" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" } ] }, "title": "Diffusion Models for Adversarial Purification", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2022 by the author(s). \n\nWe would like to thank the AIALGO team at NVIDIA and Anima Anandkumar's research group at Caltech for reading the paper and providing fruitful suggestions. We also thank the anonymous reviewers for helpful comments.\n\nPublished - nie22a.pdf
Submitted - 2205.07460.pdf
", "abstract": "Adversarial purification refers to a class of defense methods that remove adversarial perturbations using a generative model. These methods do not make assumptions on the form of attack and the classification model, and thus can defend pre-existing classifiers against unseen threats. However, their performance currently falls behind adversarial training methods. In this work, we propose DiffPure that uses diffusion models for adversarial purification: Given an adversarial example, we first diffuse it with a small amount of noise following a forward diffusion process, and then recover the clean image through a reverse generative process. To evaluate our method against strong adaptive attacks in an efficient and scalable way, we propose to use the adjoint method to compute full gradients of the reverse generative process. Extensive experiments on three image datasets including CIFAR-10, ImageNet and CelebA-HQ with three classifier architectures including ResNet, WideResNet and ViT demonstrate that our method achieves the state-of-the-art results, outperforming current adversarial training and adversarial purification methods, often by a large margin. Project page:\nhttps://diffpure.github.io.", "date": "2022-05-16", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "162", "publisher": "ML Research Press", "pagerange": "16805-16827", "id_number": "CaltechAUTHORS:20220715-174841781", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220715-174841781", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.48550/arXiv.2205.07460", "primary_object": { "basename": "2205.07460.pdf", "url": "https://authors.library.caltech.edu/records/dq8ck-xrt79/files/2205.07460.pdf" }, "related_objects": [ { "basename": "nie22a.pdf", "url": "https://authors.library.caltech.edu/records/dq8ck-xrt79/files/nie22a.pdf" } ], "resource_type": "article", "pub_year": "2022", "author_list": "Nie, Weili; Guo, Brandon; et el." }, { "id": "https://authors.library.caltech.edu/records/q3grb-3vz72", "eprint_id": 114603, "eprint_status": "archive", "datestamp": "2023-08-22 15:30:09", "lastmod": "2023-10-24 15:03:32", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "O'Connell-Michael", "name": { "family": "O'Connell", "given": "Michael" }, "orcid": "0000-0001-6681-8823" }, { "id": "Shi-Guanya", "name": { "family": "Shi", "given": "Guanya" }, "orcid": "0000-0002-9075-3705" }, { "id": "Shi-Xichen", "name": { "family": "Shi", "given": "Xichen" }, "orcid": "0000-0002-5366-9256" }, { "id": "Azizzadenesheli-Kamyar", "name": { "family": "Azizzadenesheli", "given": "Kamyar" }, "orcid": "0000-0001-8507-1868" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" }, { "id": "Yue-Yisong", "name": { "family": "Yue", "given": "Yisong" }, "orcid": "0000-0001-9127-1989" }, { "id": "Chung-Soon-Jo", "name": { "family": "Chung", "given": "Soon-Jo" }, "orcid": "0000-0002-6657-3907" } ] }, "title": "Neural-Fly enables rapid learning for agile flight in strong winds", "ispublished": "pub", "full_text_status": "public", "keywords": "Artificial Intelligence; Control and Optimization; Computer Science Applications; Mechanical Engineering", "note": "\u00a9 2022 The Authors, some rights reserved; exclusive licensee\nAmerican Association for the Advancement of Science. No claim to original U.S. Government Works. \n\nSubmitted 11 October 2021; Accepted 12 April 2022; Published 4 May 2022. \n\nA.A. is also affiliated with NVIDIA Corporation, and Y.Y. is also with associated Argo AI. K.A. is currently affiliated with Purdue University. We thank J. Burdick and J.-J. E. Slotine for their helpful discussions. We thank M. Anderson for help with configuring the quadrotor platform, and M. Anderson and P. Spieler for help with hardware troubleshooting. We also thank N. Badillo and L. Pabon Madrid for help in experiments. \n\nThis research was developed with funding from the Defense Advanced Research Projects Agency (DARPA). This research was also conducted in part with funding from Raytheon Technologies. The views, opinions, and/or findings expressed are those of the authors and should not be interpreted as representing the official views or policies of the Department of Defense or the U.S. Government. The experiments reported in this article were conducted at Caltech's Center for Autonomous Systems and Technologies (CAST). \n\nAuthor contributions: S.-J.C. and Y.Y. directed the research activities. G.S. and M.O. designed and implemented the metalearning algorithm under the guidance of Y.Y., K.A., A.A., and S.-J.C., while the last-layer adaptation idea was started with a discussion by G.S., M.O., X.S., and S.-J.C. M.O. and G.S. designed and implemented the adaptive control algorithm with inputs from S.-J.C. and X.S. M.O. and G.S. performed experiments and evaluated the results. M.O. conducted the theoretical analysis of the meta-learning based adaptive controller with input from S.-J.C., G.S., and X.S. G.S. analyzed the learning algorithm with feedback from Y.Y., K.A., A.A., and S.-J.C. G.S. and M.O. created all the figures and videos with input from the other authors. All authors prepared the manuscript. \n\nThe authors declare that they have no competing interests. \n\nData and materials availability: All data needed to evaluate the conclusions in the article are present in the article or in the Supplementary Materials. We have provided the machine learning model training code, training data, and experimental data at github.com/aerorobotics/neural-fly.\n\nAccepted Version - 2205.06908.pdf
Supplemental Material - scirobotics.abm6597_sm.pdf
", "abstract": "Executing safe and precise flight maneuvers in dynamic high-speed winds is important for the ongoing commoditization of uninhabited aerial vehicles (UAVs). However, because the relationship between various wind conditions and its effect on aircraft maneuverability is not well understood, it is challenging to design effective robot controllers using traditional control design methods. We present Neural-Fly, a learning-based approach that allows rapid online adaptation by incorporating pretrained representations through deep learning. Neural-Fly builds on two key observations that aerodynamics in different wind conditions share a common representation and that the wind-specific part lies in a low-dimensional space. To that end, Neural-Fly uses a proposed learning algorithm, domain adversarially invariant meta-learning (DAIML), to learn the shared representation, only using 12 minutes of flight data. With the learned representation as a basis, Neural-Fly then uses a composite adaptation law to update a set of linear coefficients for mixing the basis elements. When evaluated under challenging wind conditions generated with the Caltech Real Weather Wind Tunnel, with wind speeds up to 43.6 kilometers/hour (12.1 meters/second), Neural-Fly achieves precise flight control with substantially smaller tracking error than stateof-the-art nonlinear and adaptive controllers. In addition to strong empirical performance, the exponential stability of Neural-Fly results in robustness guarantees. Last, our control design extrapolates to unseen wind conditions, is shown to be effective for outdoor flights with only onboard sensors, and can transfer across drones with minimal performance degradation.", "date": "2022-05-04", "date_type": "published", "publication": "Science Robotics", "volume": "7", "number": "66", "publisher": "American Association for the Advancement of Science", "pagerange": "Art. No. eabm6597", "id_number": "CaltechAUTHORS:20220505-792409800", "issn": "2470-9476", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220505-792409800", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Defense Advanced Research Projects Agency (DARPA)" }, { "agency": "Raytheon Company" } ] }, "local_group": { "items": [ { "id": "Center-for-Autonomous-Systems-and-Technologies-(CAST)" }, { "id": "GALCIT" } ] }, "doi": "10.1126/scirobotics.abm6597", "primary_object": { "basename": "2205.06908.pdf", "url": "https://authors.library.caltech.edu/records/q3grb-3vz72/files/2205.06908.pdf" }, "related_objects": [ { "basename": "scirobotics.abm6597_sm.pdf", "url": "https://authors.library.caltech.edu/records/q3grb-3vz72/files/scirobotics.abm6597_sm.pdf" } ], "resource_type": "article", "pub_year": "2022", "author_list": "O'Connell, Michael; Shi, Guanya; et el." }, { "id": "https://authors.library.caltech.edu/records/96rss-qa524", "eprint_id": 116901, "eprint_status": "archive", "datestamp": "2023-08-22 15:28:20", "lastmod": "2023-10-24 21:10:45", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Roberts-Sidney-I", "name": { "family": "Roberts", "given": "Sidney I." } }, { "id": "Cen-Steven-Y", "name": { "family": "Cen", "given": "Steven Y." }, "orcid": "0000-0002-7859-8909" }, { "id": "Nguyen-Jessica-H", "name": { "family": "Nguyen", "given": "Jessica H." }, "orcid": "0000-0003-0454-8463" }, { "id": "Perez-Laura-C", "name": { "family": "Perez", "given": "Laura C." } }, { "id": "Medina-Luis-G", "name": { "family": "Medina", "given": "Luis G." } }, { "id": "Ma-Runzhuo", "name": { "family": "Ma", "given": "Runzhuo" }, "orcid": "0000-0001-6381-2661" }, { "id": "Marshall-Sandra", "name": { "family": "Marshall", "given": "Sandra" } }, { "id": "Kocielnik-Rafal", "name": { "family": "Kocielnik", "given": "Rafal" }, "orcid": "0000-0001-5602-6056" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" }, { "id": "Hung-Andrew-J", "name": { "family": "Hung", "given": "Andrew J." }, "orcid": "0000-0002-7201-6736" } ] }, "title": "The Relationship Between Technical Skills, Cognitive Workload, and Errors During Robotic Surgical Exercises", "ispublished": "pub", "full_text_status": "public", "keywords": "Urology", "note": "\u00a9 2022, Mary Ann Liebert, Inc. \n\nResearch reported in this publication was supported in part by the National Institute of Biomedical Imaging and Bioengineering of the National Institutes of Health under Award No. K23EB026493, and by the National Cancer Institute under Award No. 1 R01CA251579-01A1. \n\nAuthors' Contributions. S.I.R. was in charge of project development, data collection and preparation, and article writing and editing. S.Y.C. performed data analysis and article writing and editing. J.H.N. performed article editing and project management. L.C.P. performed data collection and preparation and article editing. L.G.M. performed data collection and article editing. R.M. performed data collection, data analysis, and article editing. S.M. performed data analysis and article editing. R.K. performed data analysis and article editing. A.A. performed data analysis and article editing. A.J.H. was in charge of project development, data management, and article writing and editing. \n\nIRB Approval and Human and Animal Rights. Our study complied with protocols was approved by the University of Southern California's IRB. All procedures performed in studies involving human participants were in accordance with the ethical standards of the institutional and/or national research committee and with the 1964 Helsinki Declaration and its later amendments or comparable ethical standards. This article does not contain any studies with animals performed by any of the authors. \n\nInformed consent was obtained from all individuals included in the study. \n\nAuthor Disclosure Statement. A.J.H. is a consultant for Mimic, Quantagene, and Johnson & Johnson. The study was not funded by any of these companies. Other authors have no conflict of interest.\n\nPublished - end.2021.0790.pdf
", "abstract": "Purpose: We attempt to understand the relationship between surgeon technical skills, cognitive workload, and errors during a simulated robotic dissection task.\n\nMaterials and Methods: Participant surgeons performed a robotic surgery dissection exercise. Participants were grouped based on surgical experience. Technical skills were evaluated utilizing the validated Global Evaluative Assessment of Robotic Skills (GEARS) assessment tool. The dissection task was evaluated for errors during active dissection or passive retraction maneuvers. We quantified cognitive workload of surgeon participants as an index of cognitive activity (ICA), derived from task-evoked pupillary response metrics; ICA ranged 0 to 1, with 1 representing maximum ICA. Generalized estimating equation (GEE) was used for all modelings to establish relationships between surgeon technical skills, cognitive workload, and errors.\n\nResults: We found a strong association between technical skills as measured by multiple GEARS domains (depth perception, force sensitivity, and robotic control) and passive errors, with higher GEARS scores associated with a lower relative risk of errors (all p\u2009<\u20090.01). For novice surgeons, as average GEARS scores increased, the average estimated ICA decreased. In contrast, as average GEARS increased for expert surgeons, the average estimated ICA increased. When exhibiting optimal technical skill (maximal GEARS scores), novices and experts reached a similar range of ICA scores (ICA: 0.47 and 0.42, respectively).\n\nConclusions: This study found that there is an optimal cognitive workload level for surgeons of all experience levels during our robotic surgical exercise. Select technical skill domains were strong predictors of errors. Future research will explore whether an ideal cognitive workload range truly optimizes surgical training and reduces surgical errors.", "date": "2022-05", "date_type": "published", "publication": "Journal of Endourology", "volume": "36", "number": "5", "publisher": "Mary Ann Liebert Inc", "pagerange": "712-720", "id_number": "CaltechAUTHORS:20220912-920381000", "issn": "0892-7790", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220912-920381000", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NIH", "grant_number": "K23EB026493" }, { "agency": "NIH", "grant_number": "1 R01CA251579-01A1" } ] }, "doi": "10.1089/end.2021.0790", "pmcid": "PMC9145254", "primary_object": { "basename": "end.2021.0790.pdf", "url": "https://authors.library.caltech.edu/records/96rss-qa524/files/end.2021.0790.pdf" }, "resource_type": "article", "pub_year": "2022", "author_list": "Roberts, Sidney I.; Cen, Steven Y.; et el." }, { "id": "https://authors.library.caltech.edu/records/0m99v-t3788", "eprint_id": 114256, "eprint_status": "archive", "datestamp": "2023-08-22 15:21:48", "lastmod": "2023-10-23 19:50:53", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Wen-Gege", "name": { "family": "Wen", "given": "Gege" }, "orcid": "0000-0003-1668-3777" }, { "id": "Li-Zongyi", "name": { "family": "Li", "given": "Zongyi" }, "orcid": "0000-0003-2081-9665" }, { "id": "Azizzadenesheli-Kamyar", "name": { "family": "Azizzadenesheli", "given": "Kamyar" }, "orcid": "0000-0001-8507-1868" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" }, { "id": "Benson-Sally-M", "name": { "family": "Benson", "given": "Sally M." }, "orcid": "0000-0002-3733-4296" } ] }, "title": "U-FNO\u2014An enhanced Fourier neural operator-based deep-learning model for multiphase flow", "ispublished": "pub", "full_text_status": "public", "keywords": "Multiphase flow; Fourier neural operator; Convolutional neural network; Carbon capture and storage; Deep learning; Water Science and Technology", "note": "\u00a9 2022 Elsevier. \n\nReceived 30 August 2021, Revised 7 February 2022, Accepted 25 March 2022, Available online 5 April 2022. \n\nG. Wen and S. M. Benson gratefully acknowledges the supported by ExxonMobil through the Strategic Energy Alliance at Stanford University and the Stanford Center for Carbon Storage . Z. Li gratefully acknowledges the financial support from the Kortschak Scholars Program. A. Anandkumar is supported in part by Bren endowed chair, LwLL grants, Beyond Limits, Raytheon, Microsoft, Google, Adobe faculty fellowships, and DE Logi grant. The authors would like to acknowledge the reviewers and editors for the constructive comments. \n\nCode and data availability: The python code for U-FNO model architecture and the data set used in training is available at https://github.com/gegewen/ufno. Web application https://ccsnet.ai hosts the trained U-FNO models to provide real time predictions. \n\nCRediT authorship contribution statement: Gege Wen: Conceptualization, Methodology, Software, Data curation, Formal analysis, Investigation, Validation, Visualization, Writing \u2013 original draft, Writing \u2013 review & editing. Zongyi Li: Conceptualization, Methodology, Software, Investigation, Validation, Writing \u2013 review & editing. Kamyar Azizzadenesheli: Methodology, Software, Investigation, Validation, Writing \u2013 review & editing. Anima Anandkumar: Funding acquisition, Supervision, Writing \u2013 review & editing. Sally M. Benson: Conceptualization, Formal analysis, Funding acquisition, Methodology, Resources, Supervision, Writing \u2013 review & editing. \n\nThe authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.\n\nSubmitted - 2109.03697.pdf
", "abstract": "Numerical simulation of multiphase flow in porous media is essential for many geoscience applications. Machine learning models trained with numerical simulation data can provide a faster alternative to traditional simulators. Here we present U-FNO, a novel neural network architecture for solving multiphase flow problems with superior accuracy, speed, and data efficiency. U-FNO is designed based on the newly proposed Fourier neural operator (FNO), which has shown excellent performance in single-phase flows. We extend the FNO-based architecture to a highly complex CO\u2082-water multiphase problem with wide ranges of permeability and porosity heterogeneity, anisotropy, reservoir conditions, injection configurations, flow rates, and multiphase flow properties. The U-FNO architecture is more accurate in gas saturation and pressure buildup predictions than the original FNO and a state-of-the-art convolutional neural network (CNN) benchmark. Meanwhile, it has superior data utilization efficiency, requiring only a third of the training data to achieve the equivalent accuracy as CNN. U-FNO provides superior performance in highly heterogeneous geological formations and critically important applications such as gas saturation and pressure buildup \"fronts\" determination. The trained model can serve as a general-purpose alternative to routine numerical simulations of 2D-radial CO\u2082 injection problems with significant speed-ups than traditional simulators.", "date": "2022-05", "date_type": "published", "publication": "Advances in Water Resources", "volume": "163", "publisher": "Elsevier", "pagerange": "Art. No. 104180", "id_number": "CaltechAUTHORS:20220412-15492000", "issn": "0309-1708", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220412-15492000", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "ExxonMobil Research and Engineering Company" }, { "agency": "Kortschak Scholars Program" }, { "agency": "Bren Professor of Computing and Mathematical Sciences" }, { "agency": "Learning with Less Labels (LwLL)" }, { "agency": "Beyond Limits" }, { "agency": "Raytheon Company" }, { "agency": "Microsoft Faculty Fellowship" }, { "agency": "Google Faculty Research Award" }, { "agency": "Adobe" }, { "agency": "Caltech De Logi Fund" } ] }, "doi": "10.1016/j.advwatres.2022.104180", "primary_object": { "basename": "2109.03697.pdf", "url": "https://authors.library.caltech.edu/records/0m99v-t3788/files/2109.03697.pdf" }, "resource_type": "article", "pub_year": "2022", "author_list": "Wen, Gege; Li, Zongyi; et el." }, { "id": "https://authors.library.caltech.edu/records/xk16q-c4k63", "eprint_id": 115585, "eprint_status": "archive", "datestamp": "2023-08-20 07:33:51", "lastmod": "2023-10-24 16:36:07", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Zhou-Daquan", "name": { "family": "Zhou", "given": "Daquan" } }, { "id": "Yu-Zhiding", "name": { "family": "Yu", "given": "Zhiding" } }, { "id": "Xie-Enze", "name": { "family": "Xie", "given": "Enze" } }, { "id": "Xiao-Chaowei", "name": { "family": "Xiao", "given": "Chaowei" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" }, { "id": "Feng-Jiashi", "name": { "family": "Feng", "given": "Jiashi" } }, { "id": "Alvarez-Jose-M", "name": { "family": "Alvarez", "given": "Jose M." } } ] }, "title": "Understanding The Robustness in Vision Transformers", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2022 by the author(s).\n\nPublished - zhou22m.pdf
Submitted - 2204.12451.pdf
", "abstract": "Recent studies show that Vision Transformers(ViTs) exhibit strong robustness against various corruptions. Although this property is partly attributed to the self-attention mechanism, there is still a lack of systematic understanding. In this paper, we examine the role of self-attention in learning robust representations. Our study is motivated by the intriguing properties of the emerging visual grouping in Vision Transformers, which indicates that self-attention may promote robustness through improved mid-level representations. We further propose a family of fully attentional networks (FANs) that strengthen this capability by incorporating an attentional channel processing design. We validate the design comprehensively on various hierarchical backbones. Our model achieves a state of-the-art 87.1% accuracy and 35.8% mCE on ImageNet-1k and ImageNet-C with 76.8M parameters. We also demonstrate state-of-the-art accuracy and robustness in two downstream tasks: semantic segmentation and object detection. Code will be available at this https URL.", "date": "2022-04-26", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "162", "publisher": "ML Research Press", "pagerange": "27378-27394", "id_number": "CaltechAUTHORS:20220714-212518736", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220714-212518736", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.48550/arXiv.2204.12451", "primary_object": { "basename": "2204.12451.pdf", "url": "https://authors.library.caltech.edu/records/xk16q-c4k63/files/2204.12451.pdf" }, "related_objects": [ { "basename": "zhou22m.pdf", "url": "https://authors.library.caltech.edu/records/xk16q-c4k63/files/zhou22m.pdf" } ], "resource_type": "article", "pub_year": "2022", "author_list": "Zhou, Daquan; Yu, Zhiding; et el." }, { "id": "https://authors.library.caltech.edu/records/09mnq-t5j04", "eprint_id": 114119, "eprint_status": "archive", "datestamp": "2023-08-22 14:22:17", "lastmod": "2023-10-23 23:22:26", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Kugener-Guillaume", "name": { "family": "Kugener", "given": "Guillaume" }, "orcid": "0000-0002-4697-2847" }, { "id": "Pangal-Dhiraj-J", "name": { "family": "Pangal", "given": "Dhiraj J." }, "orcid": "0000-0001-7391-9825" }, { "id": "Cardinal-Tyler", "name": { "family": "Cardinal", "given": "Tyler" } }, { "id": "Collet-Casey", "name": { "family": "Collet", "given": "Casey" } }, { "id": "Lechtholz-Zey-Elizabeth", "name": { "family": "Lechtholz-Zey", "given": "Elizabeth" } }, { "id": "Lasky-Sasha", "name": { "family": "Lasky", "given": "Sasha" } }, { "id": "Sundaram-Shivani", "name": { "family": "Sundaram", "given": "Shivani" }, "orcid": "0000-0003-2863-9204" }, { "id": "Markarian-Nicholas", "name": { "family": "Markarian", "given": "Nicholas" } }, { "id": "Zhu-Yichao", "name": { "family": "Zhu", "given": "Yichao" } }, { "id": "Roshannai-Arman", "name": { "family": "Roshannai", "given": "Arman" } }, { "id": "Sinha-Aditya", "name": { "family": "Sinha", "given": "Aditya" } }, { "id": "Han-X-Y", "name": { "family": "Han", "given": "X. Y." } }, { "id": "Papyan-Vardan", "name": { "family": "Papyan", "given": "Vardan" }, "orcid": "0000-0002-5028-2144" }, { "id": "Hung-Andrew-J", "name": { "family": "Hung", "given": "Andrew J." }, "orcid": "0000-0002-7201-6736" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" }, { "id": "Wrobel-Bozena-B", "name": { "family": "Wrobel", "given": "Bozena" } }, { "id": "Zada-Gabriel", "name": { "family": "Zada", "given": "Gabriel" }, "orcid": "0000-0001-5821-902X" }, { "id": "Donoho-Daniel-A", "name": { "family": "Donoho", "given": "Daniel A." }, "orcid": "0000-0002-0531-1436" } ] }, "title": "Utility of the Simulated Outcomes Following Carotid Artery Laceration Video Data Set for Machine Learning Applications", "ispublished": "pub", "full_text_status": "public", "keywords": "General Medicine", "note": "\u00a9 2022 Kugener G et al. JAMA Network Open. This is an open access article distributed under the terms of the CC-BY License. \n\nAccepted for Publication: January 31, 2022. Published: March 21, 2022. \n\nAuthor Contributions: Dr Donoho had full access to all the data in the study and takes responsibility for the integrity of the data and the accuracy of the data analysis. \n\nConcept and design: Kugener, Pangal, Roshannai, Papyan, Hung, Anandkumar, Wrobel, Zada, Donoho. \n\nAcquisition, analysis, or interpretation of data: Kugener, Pangal, Cardinal, Collet, Lechtholz-Zey, Lasky, Sundaram, Markarian, Zhu, Roshannai, Sinha, Han, Wrobel, Donoho. \n\nDrafting of the manuscript: Kugener, Pangal, Lechtholz-Zey, Sundaram, Roshannai, Donoho. \n\nCritical revision of the manuscript for important intellectual content: Kugener, Pangal, Cardinal, Collet, Lasky, Markarian, Zhu, Sinha, Han, Papyan, Hung, Anandkumar, Wrobel, Zada, Donoho. \n\nStatistical analysis: Kugener, Pangal, Collet, Lechtholz-Zey, Zhu, Roshannai, Anandkumar, Donoho. \n\nObtained funding: Zada. \n\nAdministrative, technical, or material support: Kugener, Pangal, Lasky, Sundaram, Markarian, Zhu, Sinha, Han, Wrobel, Zada, Donoho. \n\nSupervision: Kugener, Pangal, Papyan, Hung, Wrobel, Zada, Donoho. \n\nConflict of Interest Disclosures: None reported.\n\nPublished - kugener_2022_oi_220124_1646862767.52045.pdf
Supplemental Material - zoi220124supp1_prod_1646862767.53045.pdf
", "abstract": "Importance. Surgical data scientists lack video data sets that depict adverse events, which may affect model generalizability and introduce bias. Hemorrhage may be particularly challenging for computer vision\u2013based models because blood obscures the scene. \n\nObjective. To assess the utility of the Simulated Outcomes Following Carotid Artery Laceration (SOCAL)\u2014a publicly available surgical video data set of hemorrhage complication management with instrument annotations and task outcomes\u2014to provide benchmarks for surgical data science techniques, including computer vision instrument detection, instrument use metrics and outcome associations, and validation of a SOCAL-trained neural network using real operative video. \n\nDesign, Setting, and Participants. For this quailty improvement study, a total of 75 surgeons with 1 to 30 years' experience (mean, 7 years) were filmed from January 1, 2017, to December 31, 2020, managing catastrophic surgical hemorrhage in a high-fidelity cadaveric training exercise at nationwide training courses. Videos were annotated from January 1 to June 30, 2021. \n\nInterventions. Surgeons received expert coaching between 2 trials. \n\nMain Outcomes and Measures. Hemostasis within 5 minutes (task success, dichotomous), time to hemostasis (in seconds), and blood loss (in milliliters) were recorded. Deep neural networks (DNNs) were trained to detect surgical instruments in view. Model performance was measured using mean average precision (mAP), sensitivity, and positive predictive value. \n\nResults. SOCAL contains 31\u202f443 frames with 65\u202f071 surgical instrument annotations from 147 trials with associated surgeon demographic characteristics, time to hemostasis, and recorded blood loss for each trial. Computer vision\u2013based instrument detection methods using DNNs trained on SOCAL achieved a mAP of 0.67 overall and 0.91 for the most common surgical instrument (suction). Hemorrhage control challenges standard object detectors: detection of some surgical instruments remained poor (mAP,\u20090.25). On real intraoperative video, the model achieved a sensitivity of 0.77 and a positive predictive value of 0.96. Instrument use metrics derived from the SOCAL video were significantly associated with performance (blood loss). \n\nConclusions and Relevance. Hemorrhage control is a high-stakes adverse event that poses unique challenges for video analysis, but no data sets of hemorrhage control exist. The use of SOCAL, the first data set to depict hemorrhage control, allows the benchmarking of data science applications, including object detection, performance metric development, and identification of metrics associated with outcomes. In the future, SOCAL may be used to build and validate surgical data science models.", "date": "2022-03", "date_type": "published", "publication": "JAMA Network Open", "volume": "5", "number": "3", "publisher": "American Medical Association", "pagerange": "Art. No. e223177", "id_number": "CaltechAUTHORS:20220329-772928599", "issn": "2574-3805", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220329-772928599", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.1001/jamanetworkopen.2022.3177", "pmcid": "PMC8938712", "primary_object": { "basename": "kugener_2022_oi_220124_1646862767.52045.pdf", "url": "https://authors.library.caltech.edu/records/09mnq-t5j04/files/kugener_2022_oi_220124_1646862767.52045.pdf" }, "related_objects": [ { "basename": "zoi220124supp1_prod_1646862767.53045.pdf", "url": "https://authors.library.caltech.edu/records/09mnq-t5j04/files/zoi220124supp1_prod_1646862767.53045.pdf" } ], "resource_type": "article", "pub_year": "2022", "author_list": "Kugener, Guillaume; Pangal, Dhiraj J.; et el." }, { "id": "https://authors.library.caltech.edu/records/j2js9-txr69", "eprint_id": 108205, "eprint_status": "archive", "datestamp": "2023-08-22 12:59:03", "lastmod": "2023-10-23 16:32:08", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Liu-Burigede", "name": { "family": "Liu", "given": "Burigede" }, "orcid": "0000-0002-6518-3368" }, { "id": "Kovachki-Nikola-B", "name": { "family": "Kovachki", "given": "Nikola" }, "orcid": "0000-0002-3650-2972" }, { "id": "Li-Zongyi", "name": { "family": "Li", "given": "Zongyi" } }, { "id": "Azizzadenesheli-Kamyar", "name": { "family": "Azizzadenesheli", "given": "Kamyar" }, "orcid": "0000-0001-8507-1868" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" } }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "Andrew M." }, "orcid": "0000-0001-9091-7266" }, { "id": "Bhattacharya-K", "name": { "family": "Bhattacharya", "given": "Kaushik" }, "orcid": "0000-0003-2908-5469" } ] }, "title": "A learning-based multiscale method and its application to inelastic impact problems", "ispublished": "pub", "full_text_status": "public", "keywords": "Multiscale modeling; Machine learning; Crystal plasticity", "note": "\u00a9 2021 Elsevier Ltd. \n\nReceived 11 June 2021, Revised 30 September 2021, Accepted 6 October 2021, Available online 22 October 2021. \n\nWe are grateful to Dennis Kochmann for discussion and for providing us with the 2DFFT and the 3D Taylor code to generate the data. This research was sponsored by the Army Research Laboratory, United States and was accomplished under Cooperative Agreement Number W911NF-12-2-0022. The views and conclusions contained in this document are those of the authors and should not be interpreted as representing the official policies, either expressed or implied, of the Army Research Laboratory or the U.S. Government. The U.S. Government is authorized to reproduce and distribute reprints for Government purposes notwithstanding any copyright notation herein. ZL is supported by the Kortschak Scholars Program. AA is supported in part by Bren endowed chair and De Logi grant. AMS is also partially supported by NSF, United States grant DMS-1818977. \n\nData availability: The data and scripts needed to evaluate the conclusions of this paper are available in the GitHub repository \"Learning based multiscale\" (https://github.com/Burigede/Learning_based_multiscale.git). \n\nCRediT authorship contribution statement: Burigede Liu: Conceived the work, Developed the framework, Lead in implementing the framework, Obtaining the numerical results, Discussions during the course of this work and in interpreting the results, Lead in drafting the manuscript, Finalizing. Nikola Kovachki: Conceived the work, Developed the framework, Discussions during the course of this work and in interpreting the results, Finalizing. Zongyi Li: Discussions during the course of this work and in interpreting the results, Finalizing. Kamyar Azizzadenesheli: Discussions during the course of this work and in interpreting the results, Finalizing. Anima Anandkumar: Discussions during the course of this work and in interpreting the results, Finalizing. Andrew M. Stuart: Conceived the work, Developed the framework, Discussions during the course of this work and in interpreting the results, Finalizing. Kaushik Bhattacharya: Conceived the work, Developed the framework, Discussions during the course of this work and in interpreting the results, Lead in drafting the manuscript, Finalizing. \n\nThe authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.\n\nSubmitted - 2102.07256.pdf
", "abstract": "The macroscopic properties of materials that we observe and exploit in engineering application result from complex interactions between physics at multiple length and time scales: electronic, atomistic, defects, domains etc. Multiscale modeling seeks to understand these interactions by exploiting the inherent hierarchy where the behavior at a coarser scale regulates and averages the behavior at a finer scale. This requires the repeated solution of computationally expensive finer-scale models, and often a priori knowledge of those aspects of the finer-scale behavior that affect the coarser scale (order parameters, state variables, descriptors, etc.). We address this challenge in a two-scale setting where we learn the fine-scale behavior from off-line calculations and then use the learnt behavior directly in coarse scale calculations. The approach builds on the recent success of deep neural networks by combining their approximation power in high dimensions with ideas from model reduction. It results in a neural network approximation that has high fidelity, is computationally inexpensive, is independent of the need for a priori knowledge, and can be used directly in the coarse scale calculations. We demonstrate the approach on problems involving the impact of magnesium, a promising light-weight structural and protective material.", "date": "2022-01", "date_type": "published", "publication": "Journal of the Mechanics and Physics of Solids", "volume": "158", "publisher": "Elsevier", "pagerange": "Art. No. 104668", "id_number": "CaltechAUTHORS:20210225-132721680", "issn": "0022-5096", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210225-132721680", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Army Research Laboratory", "grant_number": "W911NF-12-2-0022" }, { "agency": "Kortschak Scholars Program" }, { "agency": "Bren Professor of Computing and Mathematical Sciences" }, { "agency": "Caltech De Logi Fund" }, { "agency": "NSF", "grant_number": "DMS-1818977" } ] }, "doi": "10.1016/j.jmps.2021.104668", "primary_object": { "basename": "2102.07256.pdf", "url": "https://authors.library.caltech.edu/records/j2js9-txr69/files/2102.07256.pdf" }, "resource_type": "article", "pub_year": "2022", "author_list": "Liu, Burigede; Kovachki, Nikola; et el." }, { "id": "https://authors.library.caltech.edu/records/cwhg8-t6c31", "eprint_id": 110655, "eprint_status": "archive", "datestamp": "2023-08-20 06:03:01", "lastmod": "2023-10-23 15:32:18", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Christensen-Anders-S", "name": { "family": "Christensen", "given": "Anders S." }, "orcid": "0000-0002-7253-6897" }, { "id": "Sirumalla-Sai-Krishna", "name": { "family": "Sirumalla", "given": "Sai Krishna" }, "orcid": "0000-0002-1875-2062" }, { "id": "Qiao-Zhuoran", "name": { "family": "Qiao", "given": "Zhuoran" }, "orcid": "0000-0002-5704-7331" }, { "id": "O'Connor-Michael-B", "name": { "family": "O'Connor", "given": "Michael B." } }, { "id": "Smith-Daniel-G-A", "name": { "family": "Smith", "given": "Daniel G. A." }, "orcid": "0000-0001-8626-0900" }, { "id": "Ding-Feizhi", "name": { "family": "Ding", "given": "Feizhi" } }, { "id": "Bygrave-Peter-J", "name": { "family": "Bygrave", "given": "Peter J." }, "orcid": "0000-0002-5505-5637" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" }, { "id": "Welborn-Matthew-G", "name": { "family": "Welborn", "given": "Matthew" }, "orcid": "0000-0001-8659-6535" }, { "id": "Manby-Frederick-R", "name": { "family": "Manby", "given": "Frederick R." }, "orcid": "0000-0001-7611-714X" }, { "id": "Miller-T-F-III", "name": { "family": "Miller", "given": "Thomas F., III" }, "orcid": "0000-0002-1882-5380" } ] }, "title": "OrbNet Denali: A machine learning potential for biological and organic chemistry with semi-empirical cost and DFT accuracy", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2021 Author(s). Published under an exclusive license by AIP Publishing. \n\nSubmitted: 1 July 2021; Accepted: 26 October 2021; Published Online: 23 November 2021. \n\nZ.Q. acknowledges graduate research funding from Caltech and partial support from the Amazon\u2013Caltech AI4Science fellowship. T.F.M. and A.A. acknowledge partial support from the Caltech DeLogi fund, and A.A. acknowledges support from a Caltech Bren professorship. The authors acknowledge NVIDIA, including Abe Stern, Thorsten Kurth, Josh Romero, and Tom Gibbs, for helpful discussions regarding GPU implementations of graph neural networks. Computational resources were provided by the National Energy Research Scientific Computing Center (NERSC), a DOE Office of Science User Facility supported by the DOE Office of Science, under Contract No. DE-AC02-05CH11231. This research used resources of the Oak Ridge Leadership Computing Facility at the Oak Ridge National Laboratory, which is supported by the Office of Science of the U.S. Department of Energy under Contract No. DE-AC05-00OR22725. \n\nConflict of Interest: Nine of the authors (A.S.C., S.K.S., M.B.O., D.G.A.S., F.D., P.J.B., M.W., F.R.M., and T.F.M.) are employees of Entos, Inc., or its affiliates. \n\nAuthor Contributions: A.S.C. and S.K.S. contributed equally to this work. \n\nData Availability: The 2.3 \u00d7 10\u2076 geometries and energy labels in the OrbNet Denali training set are openly available in FigShare at https://doi.org/10.6084/m9.figshare.14883867.\n\nPublished - 5.0061990.pdf
Submitted - 2107.00299.pdf
Supplemental Material - si.pdf
", "abstract": "We present OrbNet Denali, a machine learning model for an electronic structure that is designed as a drop-in replacement for ground-state density functional theory (DFT) energy calculations. The model is a message-passing graph neural network that uses symmetry-adapted atomic orbital features from a low-cost quantum calculation to predict the energy of a molecule. OrbNet Denali is trained on a vast dataset of 2.3 \u00d7 10\u2076 DFT calculations on molecules and geometries. This dataset covers the most common elements in biochemistry and organic chemistry (H, Li, B, C, N, O, F, Na, Mg, Si, P, S, Cl, K, Ca, Br, and I) and charged molecules. OrbNet Denali is demonstrated on several well-established benchmark datasets, and we find that it provides accuracy that is on par with modern DFT methods while offering a speedup of up to three orders of magnitude. For the GMTKN55 benchmark set, OrbNet Denali achieves WTMAD-1 and WTMAD-2 scores of 7.19 and 9.84, on par with modern DFT functionals. For several GMTKN55 subsets, which contain chemical problems that are not present in the training set, OrbNet Denali produces a mean absolute error comparable to those of DFT methods. For the Hutchison conformer benchmark set, OrbNet Denali has a median correlation coefficient of R\u00b2 = 0.90 compared to the reference DLPNO-CCSD(T) calculation and R\u00b2 = 0.97 compared to the method used to generate the training data (\u03c9B97X-D3/def2-TZVP), exceeding the performance of any other method with a similar cost. Similarly, the model reaches chemical accuracy for non-covalent interactions in the S66x10 dataset. For torsional profiles, OrbNet Denali reproduces the torsion profiles of \u03c9B97X-D3/def2-TZVP with an average mean absolute error of 0.12 kcal/mol for the potential energy surfaces of the diverse fragments in the TorsionNet500 dataset.", "date": "2021-11-28", "date_type": "published", "publication": "Journal of Chemical Physics", "volume": "155", "number": "20", "publisher": "American Institute of Physics", "pagerange": "Art. No. 204103", "id_number": "CaltechAUTHORS:20210831-203931813", "issn": "0021-9606", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210831-203931813", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Amazon Web Services" }, { "agency": "Caltech De Logi Fund" }, { "agency": "Bren Professor of Computing and Mathematical Sciences" }, { "agency": "NVIDIA Corporation" }, { "agency": "Department of Energy (DOE)", "grant_number": "DE-AC02-05CH11231" }, { "agency": "Department of Energy (DOE)", "grant_number": "DE-AC05-00OR22725" } ] }, "doi": "10.1063/5.0061990", "primary_object": { "basename": "2107.00299.pdf", "url": "https://authors.library.caltech.edu/records/cwhg8-t6c31/files/2107.00299.pdf" }, "related_objects": [ { "basename": "5.0061990.pdf", "url": "https://authors.library.caltech.edu/records/cwhg8-t6c31/files/5.0061990.pdf" }, { "basename": "si.pdf", "url": "https://authors.library.caltech.edu/records/cwhg8-t6c31/files/si.pdf" } ], "resource_type": "article", "pub_year": "2021", "author_list": "Christensen, Anders S.; Sirumalla, Sai Krishna; et el." }, { "id": "https://authors.library.caltech.edu/records/emvcf-kqt83", "eprint_id": 115604, "eprint_status": "archive", "datestamp": "2023-08-20 05:58:39", "lastmod": "2023-10-24 16:37:01", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Lee-Youngwoon", "name": { "family": "Lee", "given": "Youngwoon" } }, { "id": "Lim-Joseph-J", "name": { "family": "Lim", "given": "Joseph J." } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" }, { "id": "Zhu-Yuke", "name": { "family": "Zhu", "given": "Yuke" }, "orcid": "0000-0002-9198-2227" } ] }, "title": "Adversarial Skill Chaining for Long-Horizon Robot Manipulation via Terminal State Regularization", "ispublished": "pub", "full_text_status": "public", "keywords": "Long-Horizon Manipulation, Skill Chaining, Reinforcement Learning", "note": "This work was initiated when Youngwoon Lee worked at NVIDIA Research as an intern. This research is also supported by the Annenberg Fellowship from USC and the Google Cloud Research Credits program with the award GCP19980904. We would like to thank Byron Boots for initial discussion, Jim Fan, De-An Huang, Christopher B. Choy, and NVIDIA AI Algorithms team for their insightful feedback, and the USC CLVR lab members for constructive feedback.\n\nPublished - lee22a.pdf
Accepted Version - 2111.07999.pdf
Supplemental Material - lee22a-supp.zip
", "abstract": "Skill chaining is a promising approach for synthesizing complex behaviors by sequentially combining previously learned skills. Yet, a naive composition of skills fails when a policy encounters a starting state never seen during its training. For successful skill chaining, prior approaches attempt to widen the policy's starting state distribution. However, these approaches require larger state distributions to be covered as more policies are sequenced, and thus are limited to short skill sequences. In this paper, we propose to chain multiple policies without excessively large initial state distributions by regularizing the terminal state distributions in an adversarial learning framework. We evaluate our approach on two complex long-horizon manipulation tasks of furniture assembly. Our results have shown that our method establishes the first model-free reinforcement learning algorithm to solve these tasks; whereas prior skill chaining approaches fail. The code and videos are available at https://clvrai.com/skill-chaining.", "date": "2021-11-15", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "164", "publisher": "ML Research Press", "pagerange": "406-416", "id_number": "CaltechAUTHORS:20220714-224643553", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220714-224643553", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "University of Southern California" }, { "agency": "Google Cloud", "grant_number": "GCP19980904" } ] }, "doi": "10.48550/arXiv.arXiv.2111.07999", "primary_object": { "basename": "2111.07999.pdf", "url": "https://authors.library.caltech.edu/records/emvcf-kqt83/files/2111.07999.pdf" }, "related_objects": [ { "basename": "lee22a-supp.zip", "url": "https://authors.library.caltech.edu/records/emvcf-kqt83/files/lee22a-supp.zip" }, { "basename": "lee22a.pdf", "url": "https://authors.library.caltech.edu/records/emvcf-kqt83/files/lee22a.pdf" } ], "resource_type": "article", "pub_year": "2021", "author_list": "Lee, Youngwoon; Lim, Joseph J.; et el." }, { "id": "https://authors.library.caltech.edu/records/qxbfh-mre98", "eprint_id": 111293, "eprint_status": "archive", "datestamp": "2023-08-20 05:08:17", "lastmod": "2023-10-23 20:30:10", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Hung-Andrew-J", "name": { "family": "Hung", "given": "Andrew J." }, "orcid": "0000-0002-7201-6736" }, { "id": "Liu-Yan", "name": { "family": "Liu", "given": "Yan" }, "orcid": "0000-0002-5837-4908" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" } ] }, "title": "Deep Learning to Automate Technical Skills Assessment in Robotic Surgery", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2021 American Medical Association.", "abstract": "Surgeon performance affects patient outcomes. To improve patient outcomes, we must identify poor surgical performance. However, surgeons may not always associate a specific surgical act with its consequential outcome unless the error is egregious and the outcome is immediate. Today, there is little formal structure for surgeons to receive specific technical skills feedback after formal training. Current hurdles for surgeons to obtain and maintain hospital privileges to perform an operative procedure include peer proctoring and evaluation, which are arguably insufficient when juxtaposed to the potentially devastating outcomes that can occur if surgical errors arise.", "date": "2021-09-15", "date_type": "published", "publication": "JAMA Surgery", "volume": "156", "number": "11", "publisher": "American Medical Association", "pagerange": "1059-1060", "id_number": "CaltechAUTHORS:20211008-183538597", "issn": "2168-6254", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20211008-183538597", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.1001/jamasurg.2021.3651", "resource_type": "article", "pub_year": "2021", "author_list": "Hung, Andrew J.; Liu, Yan; et el." }, { "id": "https://authors.library.caltech.edu/records/03nhs-3kv35", "eprint_id": 110808, "eprint_status": "archive", "datestamp": "2023-08-22 10:45:12", "lastmod": "2023-10-23 19:53:00", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Chan-Justin", "name": { "family": "Chan", "given": "Justin" } }, { "id": "Pangal-Dhiraj-J", "name": { "family": "Pangal", "given": "Dhiraj J." }, "orcid": "0000-0001-7391-9825" }, { "id": "Cardinal-Tyler", "name": { "family": "Cardinal", "given": "Tyler" }, "orcid": "0000-0001-8277-6942" }, { "id": "Kugener-Guillaume", "name": { "family": "Kugener", "given": "Guillaume" }, "orcid": "0000-0002-4697-2847" }, { "id": "Zhu-Yichao", "name": { "family": "Zhu", "given": "Yichao" } }, { "id": "Roshannai-Arman", "name": { "family": "Roshannai", "given": "Arman" } }, { "id": "Markarian-Nicholas", "name": { "family": "Markarian", "given": "Nicholas" } }, { "id": "Sinha-Aditya", "name": { "family": "Sinha", "given": "Aditya" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" }, { "id": "Hung-Andrew-J", "name": { "family": "Hung", "given": "Andrew J." }, "orcid": "0000-0002-7201-6736" }, { "id": "Zada-Gabriel", "name": { "family": "Zada", "given": "Gabriel" }, "orcid": "0000-0001-5821-902X" }, { "id": "Donoho-Daniel-A", "name": { "family": "Donoho", "given": "Daniel A." }, "orcid": "0000-0002-0531-1436" } ] }, "title": "A systematic review of virtual reality for the assessment of technical skills in neurosurgery", "ispublished": "pub", "full_text_status": "public", "keywords": "virtual reality; augmented reality; technical assessment", "note": "\u00a9 AANS 2021. \n\nSubmitted March 31, 2021. Accepted May 19, 2021. \n\nAuthor Contributions: Conception and design: Chan, Pangal. Acquisition of data: Chan, Pangal. Analysis and interpretation of data: Chan, Pangal. Drafting the article: Chan, Pangal, Cardinal. Critically revising the article: all authors. Reviewed submitted version of manuscript: all authors. Approved the final version of the manuscript on behalf of all authors: Zada. \n\nAdministrative/technical/material support: Zada, Chan, Pangal, Cardinal, Donoho. Study supervision: Zada, Donoho. \n\nDisclosures: Dr. Hung is a consultant for Johnson & Johnson, Quantgene, and Mimic Technologies.\n\nSupplemental Material - SupplementalTables1-6_FOCUS21-210.pdf
", "abstract": "Objective: Virtual reality (VR) and augmented reality (AR) systems are increasingly available to neurosurgeons. These systems may provide opportunities for technical rehearsal and assessments of surgeon performance. The assessment of neurosurgeon skill in VR and AR environments and the validity of VR and AR feedback has not been systematically reviewed. \n\nMethods: A systematic review following the Preferred Reporting Items for Systematic Reviews and Meta-Analyses (PRISMA) guidelines was conducted through MEDLINE and PubMed. Studies published in English between January 1990 and February 2021 describing the use of VR or AR to quantify surgical technical performance of neurosurgeons without the use of human raters were included. The types and categories of automated performance metrics (APMs) from each of these studies were recorded. \n\nResults: Thirty-three VR studies were included in the review; no AR studies met inclusion criteria. VR APMs were categorized as either distance to target, force, kinematics, time, blood loss, or volume of resection. Distance and time were the most well-studied APM domains, although all domains were effective at differentiating surgeon experience levels. Distance was successfully used to track improvements with practice. Examining volume of resection demonstrated that attending surgeons removed less simulated tumor but preserved more normal tissue than trainees. More recently, APMs have been used in machine learning algorithms to predict level of training with a high degree of accuracy. Key limitations to enhanced-reality systems include limited AR usage for automated surgical assessment and lack of external and longitudinal validation of VR systems. \n\nConclusions: VR has been used to assess surgeon performance across a wide spectrum of domains. The VR environment can be used to quantify surgeon performance, assess surgeon proficiency, and track training progression. AR systems have not yet been used to provide metrics for surgeon performance assessment despite potential for intraoperative integration. VR-based APMs may be especially useful for metrics that are difficult to assess intraoperatively, including blood loss and extent of resection.", "date": "2021-08", "date_type": "published", "publication": "Neurosurgical Focus", "volume": "51", "number": "2", "publisher": "American Association of Neurological Surgeons", "pagerange": "Art. No. E15", "id_number": "CaltechAUTHORS:20210910-182725636", "issn": "1092-0684", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210910-182725636", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.3171/2021.5.focus21210", "primary_object": { "basename": "SupplementalTables1-6_FOCUS21-210.pdf", "url": "https://authors.library.caltech.edu/records/03nhs-3kv35/files/SupplementalTables1-6_FOCUS21-210.pdf" }, "resource_type": "article", "pub_year": "2021", "author_list": "Chan, Justin; Pangal, Dhiraj J.; et el." }, { "id": "https://authors.library.caltech.edu/records/na91v-x7h45", "eprint_id": 110645, "eprint_status": "archive", "datestamp": "2023-08-20 04:02:06", "lastmod": "2023-10-23 19:46:58", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Liu-Bo", "name": { "family": "Liu", "given": "Bo" } }, { "id": "Liu-Qiang", "name": { "family": "Liu", "given": "Qiang" } }, { "id": "Stone-Peter", "name": { "family": "Stone", "given": "Peter" } }, { "id": "Garg-Animesh", "name": { "family": "Garg", "given": "Animesh" }, "orcid": "0000-0003-0482-4296" }, { "id": "Zhu-Yuke", "name": { "family": "Zhu", "given": "Yuke" }, "orcid": "0000-0002-9198-2227" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } } ] }, "title": "Coach-Player Multi-agent Reinforcement Learning for Dynamic Team Composition", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2021 The authors.\n\nPublished - liu21m.pdf
Accepted Version - 2105.08692.pdf
Supplemental Material - liu21m-supp.pdf
", "abstract": "In real-world multi-agent systems, agents with different capabilities may join or leave without altering the team's overarching goals. Coordinating teams with such dynamic composition is challenging: the optimal team strategy varies with the composition. We propose COPA, a coach-player framework to tackle this problem. We assume the coach has a global view of the environment and coordinates the players, who only have partial views, by distributing individual strategies. Specifically, we 1) adopt the attention mechanism for both the coach and the players; 2) propose a variational objective to regularize learning; and 3) design an adaptive communication method to let the coach decide when to communicate with the players. We validate our methods on a resource collection task, a rescue game, and the StarCraft micromanagement tasks. We demonstrate zero-shot generalization to new team compositions. Our method achieves comparable or better performance than the setting where all players have a full view of the environment. Moreover, we see that the performance remains high even when the coach communicates as little as 13% of the time using the adaptive communication strategy.", "date": "2021-07", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "139", "publisher": "ML Research Press", "pagerange": "6860-6870", "id_number": "CaltechAUTHORS:20210831-203857558", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210831-203857558", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.48550/arXiv.2105.08692", "primary_object": { "basename": "2105.08692.pdf", "url": "https://authors.library.caltech.edu/records/na91v-x7h45/files/2105.08692.pdf" }, "related_objects": [ { "basename": "liu21m-supp.pdf", "url": "https://authors.library.caltech.edu/records/na91v-x7h45/files/liu21m-supp.pdf" }, { "basename": "liu21m.pdf", "url": "https://authors.library.caltech.edu/records/na91v-x7h45/files/liu21m.pdf" } ], "resource_type": "article", "pub_year": "2021", "author_list": "Liu, Bo; Liu, Qiang; et el." }, { "id": "https://authors.library.caltech.edu/records/qydjs-btm85", "eprint_id": 109038, "eprint_status": "archive", "datestamp": "2023-08-20 03:57:15", "lastmod": "2023-10-23 17:31:38", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Chang-Nadine", "name": { "family": "Chang", "given": "Nadine" } }, { "id": "Yu-Zhiding", "name": { "family": "Yu", "given": "Zhiding" } }, { "id": "Wang-Yu-Xiong", "name": { "family": "Wang", "given": "Yu-Xiong" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Fidler-Sanja", "name": { "family": "Fidler", "given": "Sanja" } }, { "id": "Alvarez-Jose-M", "name": { "family": "Alvarez", "given": "Jose M." } } ] }, "title": "Image-Level or Object-Level? A Tale of Two Resampling Strategies for Long-Tailed Detection", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2021 by the author(s). \n\nWe would like to sincerely thank Achal Dave, Kenneth Marino, Senthil Purushwalkam and other NVIDIA colleagues for the discussion and constructive suggestions.\n\nPublished - chang21c.pdf
Submitted - 2104.05702.pdf
", "abstract": "Training on datasets with long-tailed distributions has been challenging for major recognition tasks such as classification and detection. To deal with this challenge, image resampling is typically introduced as a simple but effective approach. However, we observe that long-tailed detection differs from classification since multiple classes may be present in one image. As a result, image resampling alone is not enough to yield a sufficiently balanced distribution at the object-level. We address object-level resampling by introducing an object-centric sampling strategy based on a dynamic, episodic memory bank. Our proposed strategy has two benefits: 1) convenient object-level resampling without significant extra computation, and 2) implicit feature-level augmentation from model updates. We show that image-level and object-level resamplings are both important, and thus unify them with a joint resampling strategy. Our method achieves state-of-the-art performance on the rare categories of LVIS, with 1.89% and 3.13% relative improvements over Forest R-CNN on detection and instance segmentation.", "date": "2021-07", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "139", "publisher": "ML Research Press", "pagerange": "1463-1472", "id_number": "CaltechAUTHORS:20210510-134322482", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210510-134322482", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.48550/arXiv.2104.05702", "primary_object": { "basename": "2104.05702.pdf", "url": "https://authors.library.caltech.edu/records/qydjs-btm85/files/2104.05702.pdf" }, "related_objects": [ { "basename": "chang21c.pdf", "url": "https://authors.library.caltech.edu/records/qydjs-btm85/files/chang21c.pdf" } ], "resource_type": "article", "pub_year": "2021", "author_list": "Chang, Nadine; Yu, Zhiding; et el." }, { "id": "https://authors.library.caltech.edu/records/jb40p-2w034", "eprint_id": 110651, "eprint_status": "archive", "datestamp": "2023-08-20 04:02:22", "lastmod": "2023-10-23 19:47:12", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Fan-Linxi-Jim", "name": { "family": "Fan", "given": "Linxi" }, "orcid": "0000-0001-7393-3125" }, { "id": "Wang-Guanzhi", "name": { "family": "Wang", "given": "Guanzhi" } }, { "id": "Huang-De-An", "name": { "family": "Huang", "given": "De-An" }, "orcid": "0000-0002-6945-7768" }, { "id": "Yu-Zhiding", "name": { "family": "Yu", "given": "Zhiding" }, "orcid": "0000-0003-1776-996X" }, { "id": "Fei-Fei-Li", "name": { "family": "Fei-Fei", "given": "Li" }, "orcid": "0000-0002-7481-0810" }, { "id": "Zhu-Yuke", "name": { "family": "Zhu", "given": "Yuke" }, "orcid": "0000-0002-9198-2227" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" } ] }, "title": "SECANT: Self-Expert Cloning for Zero-Shot Generalization of Visual Policies", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2021 by the author(s). \n\nWe are extremely grateful to Chris Choy, Jean Kossaifi, Shikun Liu, Zhiyuan \"Jerry\" Lin, Josiah Wong, Huaizu Jiang, Guanya Shi, Jacob Austin, Ismail Elezi, Ajay Mandlekar, Fei Xia, Agrim Gupta, Shyamal Buch, and many other colleagues for their helpful feedback and insightful discussions.\n\nPublished - fan21c.pdf
Accepted Version - 2106.09678.pdf
", "abstract": "Generalization has been a long-standing challenge for reinforcement learning (RL). Visual RL, in particular, can be easily distracted by irrelevant factors in high-dimensional observation space. In this work, we consider robust policy learning which targets zero-shot generalization to unseen visual environments with large distributional shift. We propose SECANT, a novel self-expert cloning technique that leverages image augmentation in two stages to *decouple* robust representation learning from policy optimization. Specifically, an expert policy is first trained by RL from scratch with weak augmentations. A student network then learns to mimic the expert policy by supervised learning with strong augmentations, making its representation more robust against visual variations compared to the expert. Extensive experiments demonstrate that SECANT significantly advances the state of the art in zero-shot generalization across 4 challenging domains. Our average reward improvements over prior SOTAs are: DeepMind Control (+26.5%), robotic manipulation (+337.8%), vision-based autonomous driving (+47.7%), and indoor object navigation (+15.8%). Code release and video are available at https://linxifan.github.io/secant-site/.", "date": "2021-07", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "139", "publisher": "ML Research Press", "pagerange": "3088-3099", "id_number": "CaltechAUTHORS:20210831-203918113", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210831-203918113", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.48550/arXiv.2106.09678", "primary_object": { "basename": "2106.09678.pdf", "url": "https://authors.library.caltech.edu/records/jb40p-2w034/files/2106.09678.pdf" }, "related_objects": [ { "basename": "fan21c.pdf", "url": "https://authors.library.caltech.edu/records/jb40p-2w034/files/fan21c.pdf" } ], "resource_type": "article", "pub_year": "2021", "author_list": "Fan, Linxi; Wang, Guanzhi; et el." }, { "id": "https://authors.library.caltech.edu/records/6a12y-8yq46", "eprint_id": 110647, "eprint_status": "archive", "datestamp": "2023-08-20 04:02:14", "lastmod": "2023-10-23 19:47:04", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Mahajan-Anuj", "name": { "family": "Mahajan", "given": "Anuj" } }, { "id": "Samvelyan-Mikayel", "name": { "family": "Samvelyan", "given": "Mikayel" } }, { "id": "Mao-Lei", "name": { "family": "Mao", "given": "Lei" } }, { "id": "Makoviychuk-Viktor", "name": { "family": "Makoviychuk", "given": "Viktor" } }, { "id": "Garg-Animesh", "name": { "family": "Garg", "given": "Animesh" }, "orcid": "0000-0003-0482-4296" }, { "id": "Kossaifi-Jean", "name": { "family": "Kossaifi", "given": "Jean" }, "orcid": "0000-0002-4445-3429" }, { "id": "Whiteson-Shimon", "name": { "family": "Whiteson", "given": "Shimon" } }, { "id": "Zhu-Yuke", "name": { "family": "Zhu", "given": "Yuke" }, "orcid": "0000-0002-9198-2227" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } } ] }, "title": "Tesseract: Tensorised Actors for Multi-Agent Reinforcement Learning", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2021 The authors. \n\nAM is funded by the J.P. Morgan A.I. fellowship. Part of this work was done during AM's internship at NVIDIA. This project has received funding from the European Research Council under the European Union's Horizon 2020 research and innovation programme (grant agreement number 637713). The experiments were made possible by generous equipment grant from NVIDIA.\n\nPublished - mahajan21a.pdf
Accepted Version - 2106.00136.pdf
Supplemental Material - mahajan21a-supp.pdf
", "abstract": "Reinforcement Learning in large action spaces is a challenging problem. This is especially true for cooperative multi-agent reinforcement learning (MARL), which often requires tractable learning while respecting various constraints like communication budget and information about other agents. In this work, we focus on the fundamental hurdle affecting both value-based and policy-gradient approaches: an exponential blowup of the action space with the number of agents. For value-based methods, it poses challenges in accurately representing the optimal value function for value-based methods, thus inducing suboptimality. For policy gradient methods, it renders the critic ineffective and exacerbates the problem of the lagging critic. We show that from a learning theory perspective, both problems can be addressed by accurately representing the associated action-value function with a low-complexity hypothesis class. This requires accurately modelling the agent interactions in a sample efficient way. To this end, we propose a novel tensorised formulation of the Bellman equation. This gives rise to our method Tesseract, which utilises the view of Q-function seen as a tensor where the modes correspond to action spaces of different agents. Algorithms derived from Tesseract decompose the Q-tensor across the agents and utilise low-rank tensor approximations to model the agent interactions relevant to the task. We provide PAC analysis for Tesseract based algorithms and highlight their relevance to the class of rich observation MDPs. Empirical results in different domains confirm the gains in sample efficiency using Tesseract as supported by the theory.", "date": "2021-07", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "139", "publisher": "ML Research Press", "pagerange": "7301-7312", "id_number": "CaltechAUTHORS:20210831-203904421", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210831-203904421", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "J.P. Morgan A.I. fellowship" }, { "agency": "European Research Council (ERC)", "grant_number": "637713" }, { "agency": "NVIDIA Corporation" } ] }, "doi": "10.48550/arXiv.2106.00136", "primary_object": { "basename": "2106.00136.pdf", "url": "https://authors.library.caltech.edu/records/6a12y-8yq46/files/2106.00136.pdf" }, "related_objects": [ { "basename": "mahajan21a-supp.pdf", "url": "https://authors.library.caltech.edu/records/6a12y-8yq46/files/mahajan21a-supp.pdf" }, { "basename": "mahajan21a.pdf", "url": "https://authors.library.caltech.edu/records/6a12y-8yq46/files/mahajan21a.pdf" } ], "resource_type": "article", "pub_year": "2021", "author_list": "Mahajan, Anuj; Samvelyan, Mikayel; et el." }, { "id": "https://authors.library.caltech.edu/records/rszc7-7g943", "eprint_id": 110023, "eprint_status": "archive", "datestamp": "2023-08-20 03:32:16", "lastmod": "2023-10-23 18:15:30", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Lale-Sahin", "name": { "family": "Lale", "given": "Sahin" }, "orcid": "0000-0002-7191-346X" }, { "id": "Azizzadenesheli-Kamyar", "name": { "family": "Azizzadenesheli", "given": "Kamyar" }, "orcid": "0000-0001-8507-1868" }, { "id": "Hassibi-B", "name": { "family": "Hassibi", "given": "Babak" }, "orcid": "0000-0002-1375-5838" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" } ] }, "title": "Finite-time System Identification and Adaptive Control in Autoregressive Exogenous Systems", "ispublished": "pub", "full_text_status": "public", "keywords": "ARX systems, system identification, adaptive control, regret", "note": "\u00a9 2021 S. Lale, K. Azizzadenesheli, B. Hassibi & A. Anandkumar.\n\nPublished - lale21b.pdf
", "abstract": "Autoregressive exogenous (ARX) systems are the general class of input-output dynamical system used for modeling stochastic linear dynamical system (LDS) including partially observable LDS such as LQG systems. In this work, we study the problem of system identification and adaptive control of unknown ARX systems. We provide finite-time learning guarantees for the ARX systems under both open-loop and closed-loop data collection. Using these guarantees, we design adaptive control algorithms for unknown ARX systems with arbitrary strongly convex or non-strongly convex quadratic regulating costs. Under strongly convex cost functions, we design an adaptive control algorithm based on online gradient descent to design and update the controllers that are constructed via a convex controller reparametrization. We show that our algorithm has \u00d5(\u221aT) regret via explore and commit approach and if the model estimates are updated in epochs using closed-loop data collection, it attains the optimal regret of polylog(T) after T time-steps of interaction. For the case of non-strongly convex quadratic cost functions, we propose an adaptive control algorithm that deploys the optimism in the face of uncertainty principle to design the controller. In this setting, we show that the explore and commit approach has a regret upper bound of \u00d5(\u221aT^(2/3)), and the adaptive control with continuous model estimate updates attains \u00d5(\u221aT) regret after T time-steps.", "date": "2021-06", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "144", "publisher": "PMLR", "pagerange": "967-979", "id_number": "CaltechAUTHORS:20210727-162630002", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210727-162630002", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "primary_object": { "basename": "lale21b.pdf", "url": "https://authors.library.caltech.edu/records/rszc7-7g943/files/lale21b.pdf" }, "resource_type": "article", "pub_year": "2021", "author_list": "Lale, Sahin; Azizzadenesheli, Kamyar; et el." }, { "id": "https://authors.library.caltech.edu/records/0nxef-bvv19", "eprint_id": 110027, "eprint_status": "archive", "datestamp": "2023-08-20 03:32:21", "lastmod": "2023-10-23 18:15:38", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Yu-Jing", "name": { "family": "Yu", "given": "Jing" } }, { "id": "Gehring-Clement", "name": { "family": "Gehring", "given": "Clement" } }, { "id": "Sch\u00e4fer-Florian", "name": { "family": "Sch\u00e4fer", "given": "Florian" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } } ] }, "title": "Robust Reinforcement Learning: A Constrained Game-theoretic Approach", "ispublished": "pub", "full_text_status": "public", "keywords": "robust reinforcement learning, zero-sum game, adversarial training, competitive optimization, policy gradient", "note": "\u00a9 2021 J. Yu, C. Gehring, F. Sch\u00e4fer & A. Anandkumar. \n\nWe thank the anonymous referees for their valuable feedback. CG gratefully acknowledges support from NSF grant 1723381; from AFOSR grant FA9550-17-1-0165; from ONR grant N00014-18-1-2847 and from the MIT-IBM Watson Lab. FS gratefully acknowledges support by the Air Force Office of Scientific Research under award number FA9550-18-1-0271 (Games for Computation and Learning) and the Ronald and Maxine Linde Institute of Economic and Management Sciences at Caltech. AA is supported in part by the Bren endowed chair, Microsoft, Google, Facebook and Adobe faculty fellowships.\n\nPublished - yu21a.pdf
", "abstract": "Deep reinforcement learning (RL) methods provide state-of-art performance in complex control tasks. However, it has been widely recognized that RL methods often fail to generalize due to unaccounted uncertainties. In this work, we propose a game theoretic framework for robust reinforcement learning that comprises many previous works as special cases. We formulate robust RL as a constrained minimax game between the RL agent and an environmental agent which represents uncertainties such as model parameter variations and adversarial disturbances. To solve the competitive optimization problems arising in our framework, we propose to use competitive mirror descent (CMD). This method accounts for the interactive nature of the game at each iteration while using Bregman divergences to adapt to the global structure of the constraint set. We demonstrate an RRL policy gradient algorithm that leverages Lagrangian duality and CMD. We empirically show that our algorithm is stable for large step sizes, resulting in faster convergence on linear quadratic games.", "date": "2021-06", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "144", "publisher": "PMLR", "pagerange": "1242-1254", "id_number": "CaltechAUTHORS:20210727-172214672", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210727-172214672", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NSF", "grant_number": "IIS-1723381" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-17-1-0165" }, { "agency": "Office of Naval Research (ONR)", "grant_number": "N00014-18-1-2847" }, { "agency": "Massachusetts Institute of Technology (MIT)" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-18-1-0271" }, { "agency": "Linde Institute of Economic and Management Science" }, { "agency": "Bren Professor of Computing and Mathematical Sciences" }, { "agency": "Microsoft" }, { "agency": "Google" }, { "agency": "Facebook" }, { "agency": "Adobe" } ] }, "primary_object": { "basename": "yu21a.pdf", "url": "https://authors.library.caltech.edu/records/0nxef-bvv19/files/yu21a.pdf" }, "resource_type": "article", "pub_year": "2021", "author_list": "Yu, Jing; Gehring, Clement; et el." }, { "id": "https://authors.library.caltech.edu/records/12k1k-p2p46", "eprint_id": 108207, "eprint_status": "archive", "datestamp": "2023-08-20 03:25:44", "lastmod": "2023-10-23 16:32:17", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Lale-Sahin", "name": { "family": "Lale", "given": "Sahin" }, "orcid": "0000-0002-7191-346X" }, { "id": "Teke-Oguzhan", "name": { "family": "Teke", "given": "Oguzhan" }, "orcid": "0000-0002-1131-5206" }, { "id": "Hassibi-B", "name": { "family": "Hassibi", "given": "Babak" }, "orcid": "0000-0002-1375-5838" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" } ] }, "title": "Stability and Identification of Random Asynchronous Linear Time-Invariant Systems", "ispublished": "pub", "full_text_status": "public", "keywords": "Random asynchronous systems, linear systems, stability, system identification", "note": "\u00a9 2021 S. Lale, O. Teke, B. Hassibi & A. Anandkumar.\n\nPublished - lale21a.pdf
Submitted - 2012.04160.pdf
", "abstract": "In many computational tasks and dynamical systems, asynchrony and randomization are naturally present and have been considered as ways to increase the speed and reduce the cost of computation while compromising the accuracy and convergence rate. In this work, we show the additional benefits of randomization and asynchrony on the stability of linear dynamical systems. We introduce a natural model for random asynchronous linear time-invariant (LTI) systems which generalizes the standard (synchronous) LTI systems. In this model, each state variable is updated randomly and asynchronously with some probability according to the underlying system dynamics. We examine how the mean-square stability of random asynchronous LTI systems vary with respect to randomization and asynchrony. Surprisingly, we show that the stability of random asynchronous LTI systems does not imply or is not implied by the stability of the synchronous variant of the system and an unstable synchronous system can be stabilized via randomization and/or asynchrony. We further study a special case of the introduced model, namely randomized LTI systems, where each state element is updated randomly with some fixed but unknown probability. We consider the problem of system identification of unknown randomized LTI systems using the precise characterization of mean-square stability via extended Lyapunov equation. For unknown randomized LTI systems, we propose a systematic identification method to recover the underlying dynamics. Given a single input/output trajectory, our method estimates the model parameters that govern the system dynamics, the update probability of state variables, and the noise covariance using the correlation matrices of collected data and the extended Lyapunov equation. Finally, we empirically demonstrate that the proposed method consistently recovers the underlying system dynamics with optimal rate.", "date": "2021-06", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "144", "publisher": "PMLR", "pagerange": "651-663", "id_number": "CaltechAUTHORS:20210225-132728423", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210225-132728423", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.48550/arXiv.2012.04160", "primary_object": { "basename": "2012.04160.pdf", "url": "https://authors.library.caltech.edu/records/12k1k-p2p46/files/2012.04160.pdf" }, "related_objects": [ { "basename": "lale21a.pdf", "url": "https://authors.library.caltech.edu/records/12k1k-p2p46/files/lale21a.pdf" } ], "resource_type": "article", "pub_year": "2021", "author_list": "Lale, Sahin; Teke, Oguzhan; et el." }, { "id": "https://authors.library.caltech.edu/records/bz7g6-w7j65", "eprint_id": 109025, "eprint_status": "archive", "datestamp": "2023-08-20 03:26:56", "lastmod": "2023-10-23 17:31:06", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Qu-Guannan", "name": { "family": "Qu", "given": "Guannan" }, "orcid": "0000-0002-5466-3550" }, { "id": "Shi-Yuanyuan", "name": { "family": "Shi", "given": "Yuanyuan" }, "orcid": "0000-0002-6182-7664" }, { "id": "Lale-Sahin", "name": { "family": "Lale", "given": "Sahin" }, "orcid": "0000-0002-7191-346X" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" }, { "id": "Wierman-A", "name": { "family": "Wierman", "given": "Adam" }, "orcid": "0000-0002-5923-0199" } ] }, "title": "Stable Online Control of Linear Time-Varying Systems", "ispublished": "pub", "full_text_status": "public", "keywords": "Time-varying systems, online linear quadratic control, stability guarantee", "note": "\u00a9 2021 G. Qu, Y. Shi, S. Lale, A. Anandkumar & A. Wierman.\n\nPublished - qu21a.pdf
Submitted - 2104.14134.pdf
", "abstract": "Linear time-varying (LTV) systems are widely used for modeling real-world dynamical systems due to their generality and simplicity. Providing stability guarantees for LTV systems is one of the central problems in control theory. However, existing approaches that guarantee stability typically lead to significantly sub-optimal cumulative control cost in online settings where only current or short-term system information is available. In this work, we propose an efficient online control algorithm, COvariance Constrained Online Linear Quadratic (COCO-LQ) control, that guarantees input-to-state stability for a large class of LTV systems while also minimizing the control cost. The proposed method incorporates a state covariance constraint into the semi-definite programming (SDP) formulation of the LQ optimal controller. We empirically demonstrate the performance of COCO-LQ in both synthetic experiments and a power system frequency control example.", "date": "2021-06", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "144", "publisher": "PMLR", "pagerange": "742-753", "id_number": "CaltechAUTHORS:20210510-092451106", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210510-092451106", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.48550/arXiv.2104.14134", "primary_object": { "basename": "2104.14134.pdf", "url": "https://authors.library.caltech.edu/records/bz7g6-w7j65/files/2104.14134.pdf" }, "related_objects": [ { "basename": "qu21a.pdf", "url": "https://authors.library.caltech.edu/records/bz7g6-w7j65/files/qu21a.pdf" } ], "resource_type": "article", "pub_year": "2021", "author_list": "Qu, Guannan; Shi, Yuanyuan; et el." }, { "id": "https://authors.library.caltech.edu/records/kasz1-0dp07", "eprint_id": 105591, "eprint_status": "archive", "datestamp": "2023-08-20 02:54:36", "lastmod": "2023-10-20 22:09:03", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Luongo-Francisco-J", "name": { "family": "Luongo", "given": "Francisco" } }, { "id": "Hakim-Ryan", "name": { "family": "Hakim", "given": "Ryan" } }, { "id": "Nguyen-Jessica-H", "name": { "family": "Nguyen", "given": "Jessica H." }, "orcid": "0000-0003-0454-8463" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" }, { "id": "Hung-Andrew-J", "name": { "family": "Hung", "given": "Andrew J." }, "orcid": "0000-0002-7201-6736" } ] }, "title": "Deep learning-based computer vision to recognize and classify suturing gestures in robot-assisted surgery", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2020 Elsevier Inc. \n\nAccepted 6 August 2020, Available online 26 September 2020. \n\nWe would like to acknowledge Jian Chen, Shubham Bhatia, Kartik Aron, and Vijay Damerla for procedure segmentation and suturing gesture labeling. \n\nConflict of interest/Disclosure: Andrew J. Hung has financial disclosures with Quantgene, Inc (consultant), Mimic Technologies, Inc (consultant), and Johnson & Johnson (consultant). \n\nThis study is supported in part by the National Institute of Biomedical Imaging and Bioengineering of the National Institutes of Health under award number K23EB026493.\n\nAccepted Version - nihms-1625729.pdf
Submitted - 2008.11833.pdf
", "abstract": "Background: Our previous work classified a taxonomy of needle driving gestures during a vesicourethral anastomosis of robotic radical prostatectomy in association with tissue tears and patient outcomes. Herein, we train deep learning-based computer vision to automate the identification and classification of suturing gestures for needle driving attempts. \n\nMethods: Two independent raters manually annotated live suturing video clips to label timepoints and gestures. Identification (2,395 videos) and classification (511 videos) datasets were compiled to train computer vision models to produce 2- and 5-class label predictions, respectively. Networks were trained on inputs of raw red/blue/green pixels as well as optical flow for each frame. We explore the effect of different recurrent models (long short-term memory versus convolutional long short-term memory). All models were trained on 80/20 train/test splits. \n\nResults: We observe that all models are able to reliably predict either the presence of a gesture (identification, area under the curve: 0.88) as well as the type of gesture (classification, area under the curve: 0.87) at significantly above chance levels. For both gesture identification and classification datasets, we observed no effect of recurrent classification model choice on performance. \n\nConclusion: Our results demonstrate computer vision's ability to recognize features that not only can identify the action of suturing but also distinguish between different classifications of suturing gestures. This demonstrates the potential to utilize deep learning computer vision toward future automation of surgical skill assessment.", "date": "2021-05", "date_type": "published", "publication": "Surgery", "volume": "169", "number": "5", "publisher": "Elsevier", "pagerange": "1240-1244", "id_number": "CaltechAUTHORS:20200928-140721280", "issn": "0039-6060", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20200928-140721280", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NIH", "grant_number": "K23EB026493" } ] }, "doi": "10.1016/j.surg.2020.08.016", "pmcid": "PMC7994208", "primary_object": { "basename": "2008.11833.pdf", "url": "https://authors.library.caltech.edu/records/kasz1-0dp07/files/2008.11833.pdf" }, "related_objects": [ { "basename": "nihms-1625729.pdf", "url": "https://authors.library.caltech.edu/records/kasz1-0dp07/files/nihms-1625729.pdf" } ], "resource_type": "article", "pub_year": "2021", "author_list": "Luongo, Francisco; Hakim, Ryan; et el." }, { "id": "https://authors.library.caltech.edu/records/gaad6-9qt19", "eprint_id": 109489, "eprint_status": "archive", "datestamp": "2023-08-20 03:02:57", "lastmod": "2023-10-23 18:00:38", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Panagakis-Yannis", "name": { "family": "Panagakis", "given": "Yannis" }, "orcid": "0000-0003-0153-5210" }, { "id": "Kossaifi-Jean", "name": { "family": "Kossaifi", "given": "Jean" }, "orcid": "0000-0002-4445-3429" }, { "id": "Chrysos-Grigorios-G", "name": { "family": "Chrysos", "given": "Grigorios G." }, "orcid": "0000-0002-0650-1856" }, { "id": "Oldfield-James", "name": { "family": "Oldfield", "given": "James" }, "orcid": "0000-0002-7000-5179" }, { "id": "Nicolaou-Mihalis-A", "name": { "family": "Nicolaou", "given": "Mihalis A." }, "orcid": "0000-0001-9175-477X" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" } }, { "id": "Zafeiriou-Stefanos", "name": { "family": "Zafeiriou", "given": "Stefanos" }, "orcid": "0000-0002-5222-1740" } ] }, "title": "Tensor Methods in Computer Vision and Deep Learning", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2021 IEEE. \n\nManuscript received August 8, 2020; revised December 23, 2020 and March 10, 2021; accepted April 12, 2021. Date of current version April 30, 2021. \n\nThe work of Stefanos Zafeiriou was supported in part by the Engineering and Physical Sciences Research Council (EPSRC) Fellowship DEFORM: Large Scale Shape Analysis of Deformable Models of Humans under Grant EP/S010203/1.\n\nAccepted Version - 2107.03436.pdf
", "abstract": "Tensors, or multidimensional arrays, are data structures that can naturally represent visual data of multiple dimensions. Inherently able to efficiently capture structured, latent semantic spaces and high-order interactions, tensors have a long history of applications in a wide span of computer vision problems. With the advent of the deep learning paradigm shift in computer vision, tensors have become even more fundamental. Indeed, essential ingredients in modern deep learning architectures, such as convolutions and attention mechanisms, can readily be considered as tensor mappings. In effect, tensor methods are increasingly finding significant applications in deep learning, including the design of memory and compute efficient network architectures, improving robustness to random noise and adversarial attacks, and aiding the theoretical understanding of deep networks. This article provides an in-depth and practical review of tensors and tensor methods in the context of representation learning and deep learning, with a particular focus on visual data analysis and computer vision applications. Concretely, besides fundamental work in tensor-based visual data analysis methods, we focus on recent developments that have brought on a gradual increase in tensor methods, especially in deep learning architectures and their implications in computer vision applications. To further enable the newcomer to grasp such concepts quickly, we provide companion Python notebooks, covering key aspects of this article and implementing them, step-by-step with TensorLy.", "date": "2021-05", "date_type": "published", "publication": "Proceedings of the IEEE", "volume": "109", "number": "5", "publisher": "IEEE", "pagerange": "863-890", "id_number": "CaltechAUTHORS:20210611-152119929", "issn": "0018-9219", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210611-152119929", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Engineering and Physical Sciences Research Council (EPSRC)", "grant_number": "EP/S010203/1" } ] }, "doi": "10.1109/jproc.2021.3074329", "primary_object": { "basename": "2107.03436.pdf", "url": "https://authors.library.caltech.edu/records/gaad6-9qt19/files/2107.03436.pdf" }, "resource_type": "article", "pub_year": "2021", "author_list": "Panagakis, Yannis; Kossaifi, Jean; et el." }, { "id": "https://authors.library.caltech.edu/records/p1h5b-5rx70", "eprint_id": 108161, "eprint_status": "archive", "datestamp": "2023-08-20 02:39:36", "lastmod": "2023-10-20 23:07:59", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Kashinath-Karthik", "name": { "family": "Kashinath", "given": "K." }, "orcid": "0000-0002-9311-5215" }, { "id": "Mustafa-M", "name": { "family": "Mustafa", "given": "M." } }, { "id": "Albert-A", "name": { "family": "Albert", "given": "A." } }, { "id": "Wu-J-L", "name": { "family": "Wu", "given": "J-L." } }, { "id": "Jiang-C", "name": { "family": "Jiang", "given": "C." } }, { "id": "Esmaeilzadeh-Soheil", "name": { "family": "Esmaeilzadeh", "given": "S." }, "orcid": "0000-0001-6122-9122" }, { "id": "Azizzadenesheli-Kamyar", "name": { "family": "Azizzadenesheli", "given": "K." }, "orcid": "0000-0001-8507-1868" }, { "id": "Wang-R", "name": { "family": "Wang", "given": "R." } }, { "id": "Chattopadhyay-A", "name": { "family": "Chattopadhyay", "given": "A." } }, { "id": "Singh-A", "name": { "family": "Singh", "given": "A." } }, { "id": "Manepalli-A", "name": { "family": "Manepalli", "given": "A." } }, { "id": "Chirila-Dragos", "name": { "family": "Chirila", "given": "D." }, "orcid": "0000-0002-6394-4688" }, { "id": "Yu-R", "name": { "family": "Yu", "given": "R." } }, { "id": "Walters-R", "name": { "family": "Walters", "given": "R." } }, { "id": "White-Brian", "name": { "family": "White", "given": "B." }, "orcid": "0000-0002-3739-9604" }, { "id": "Xiao-H", "name": { "family": "Xiao", "given": "H." } }, { "id": "Tchelepi-Hamdi-A", "name": { "family": "Tchelepi", "given": "H. A." }, "orcid": "0000-0002-3084-6635" }, { "id": "Marcus-P", "name": { "family": "Marcus", "given": "P." } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "A." } }, { "id": "Hassanzadeh-Pedram", "name": { "family": "Hassanzadeh", "given": "P." }, "orcid": "0000-0001-9425-8085" } ] }, "title": "Physics-informed machine learning: case studies for weather and climate modelling", "ispublished": "pub", "full_text_status": "restricted", "keywords": "neural networks, physical constraints, turbulent flows, physics-informed machine learning, weather and climate modeling", "note": "\u00a9 2021 The Author(s). Published by the Royal Society. \n\nManuscript accepted 24/11/2020; Published online 15/02/2021;\nPublished in print 05/04/2021. \n\nThis article is part of the theme issue 'Machine learning for weather and climate modelling'. \n\nData accessibility: Data, code and supporting materials are publicly available via the following links: https://github.com/jinlong83/statistical-constrained-GANS; https://github.com/maxjiang93/space_time_pde; https://github.com/Rose-STL-Lab/Turbulent-Flow-Net; https://github.com/Rui1521/Equivariant-Neural-Nets; https://github.com/ashesh6810/Deep-Spatial-Transformers; https://resources.marine.copernicus.eu/?option=com_csw&view=details&product_id=GLOBAL_ANALYSIS_FORECAST_PHY_001_024; https://portal.edirepository.org/nis/mapbrowse?packageid=edi.200.6; https://doi.org/10.6073/pasta/8f19c5d19d816857e55077ba20570265; https://prism.oregonstate.edu/; https://github.com/arkadaw9/PGA_LSTM; https://lter.limnology.wisc.edu/data; https://gitlab.com/mspritch/spcam3.0-neural-net; https://doi.org/10.5281/zenodo.2559313. \n\nAuthors' contributions: K.K. conceived the idea and designed the structure of the manuscript, wrote the manuscript, and responded to reviewer comments. K.K., M.M., and A.A. led the majority of the research reviewed as case studies in this article. The rest of the authors contributed to the research reviewed as case studies or provided feedback on sections of the manuscript. K.K. dedicates this work to A.A., a colleague and dear friend, who unfortunately was killed in a hit-and-run road accident while he was biking, during the course of preparation of this manuscript. \n\nWe declare we have no competing interests. \n\nNo funding has been received for this article.", "abstract": "Machine learning (ML) provides novel and powerful ways of accurately and efficiently recognizing complex patterns, emulating nonlinear dynamics, and predicting the spatio-temporal evolution of weather and climate processes. Off-the-shelf ML models, however, do not necessarily obey the fundamental governing laws of physical systems, nor do they generalize well to scenarios on which they have not been trained. We survey systematic approaches to incorporating physics and domain knowledge into ML models and distill these approaches into broad categories. Through 10 case studies, we show how these approaches have been used successfully for emulating, downscaling, and forecasting weather and climate processes. The accomplishments of these studies include greater physical consistency, reduced training time, improved data efficiency, and better generalization. Finally, we synthesize the lessons learned and identify scientific, diagnostic, computational, and resource challenges for developing truly robust and reliable physics-informed ML models for weather and climate processes.", "date": "2021-04-05", "date_type": "published", "publication": "Philosophical Transactions A: Mathematical, Physical and Engineering Sciences", "volume": "379", "number": "2194", "publisher": "Royal Society of London", "pagerange": "Art. No. 20200093", "id_number": "CaltechAUTHORS:20210223-154127043", "issn": "1364-503X", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210223-154127043", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.1098/rsta.2020.0093", "resource_type": "article", "pub_year": "2021", "author_list": "Kashinath, K.; Mustafa, M.; et el." }, { "id": "https://authors.library.caltech.edu/records/vat05-w9c33", "eprint_id": 106577, "eprint_status": "archive", "datestamp": "2023-08-20 02:28:47", "lastmod": "2023-10-20 23:36:10", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Zhao-Eric", "name": { "family": "Zhao", "given": "Eric" }, "orcid": "0000-0002-9595-0150" }, { "id": "Liu-Anqi", "name": { "family": "Liu", "given": "Anqi" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Yue-Yisong", "name": { "family": "Yue", "given": "Yisong" }, "orcid": "0000-0001-9127-1989" } ] }, "title": "Active Learning under Label Shift", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2021 by the author(s). \n\nAnqi Liu is supported by the PIMCO Postdoctoral Fellowship. Prof. Anandkumar is supported by Bren endowed Chair, faculty awards from Microsoft, Google, and Adobe, Beyond Limits, and LwLL grants. This work is also supported by funding from Raytheon and NASA TRISH.\n\nPublished - zhao21b.pdf
Submitted - 2007.08479.pdf
Supplemental Material - zhao21b-supp.pdf
", "abstract": "We address the problem of active learning under label shift: when the class proportions of source and target domains differ. We introduce a \"medial distribution\" to incorporate a tradeoff between importance weighting and class-balanced sampling and propose their combined usage in active learning. Our method is known as Mediated Active Learning under Label Shift (MALLS). It balances the bias from class-balanced sampling and the variance from importance weighting. We prove sample complexity and generalization guarantees for MALLS which show active learning reduces asymptotic sample complexity even under arbitrary label shift. We empirically demonstrate MALLS scales to high-dimensional datasets and can reduce the sample complexity of active learning by 60% in deep active learning tasks.", "date": "2021-04", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "130", "publisher": "PMLR", "pagerange": "3412-3420", "id_number": "CaltechAUTHORS:20201110-074357009", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20201110-074357009", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "PIMCO Postdoctoral Fellowship" }, { "agency": "Bren Professor of Computing and Mathematical Sciences" }, { "agency": "Microsoft Faculty Fellowship" }, { "agency": "Google Faculty Research Award" }, { "agency": "Adobe" }, { "agency": "Learning with Less Labels (LwLL)" }, { "agency": "Raytheon Company" }, { "agency": "NASA" } ] }, "doi": "10.48550/arXiv.2007.08479", "primary_object": { "basename": "2007.08479.pdf", "url": "https://authors.library.caltech.edu/records/vat05-w9c33/files/2007.08479.pdf" }, "related_objects": [ { "basename": "zhao21b-supp.pdf", "url": "https://authors.library.caltech.edu/records/vat05-w9c33/files/zhao21b-supp.pdf" }, { "basename": "zhao21b.pdf", "url": "https://authors.library.caltech.edu/records/vat05-w9c33/files/zhao21b.pdf" } ], "resource_type": "article", "pub_year": "2021", "author_list": "Zhao, Eric; Liu, Anqi; et el." }, { "id": "https://authors.library.caltech.edu/records/m40dq-4s262", "eprint_id": 103472, "eprint_status": "archive", "datestamp": "2023-08-20 02:27:33", "lastmod": "2023-10-20 16:23:53", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Nakka-Yashwanth-K", "name": { "family": "Nakka", "given": "Yashwanth Kumar" }, "orcid": "0000-0001-7897-3644" }, { "id": "Liu-Anqi", "name": { "family": "Liu", "given": "Anqi" } }, { "id": "Shi-Guanya", "name": { "family": "Shi", "given": "Guanya" }, "orcid": "0000-0002-9075-3705" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" } }, { "id": "Yue-Yisong", "name": { "family": "Yue", "given": "Yisong" }, "orcid": "0000-0001-9127-1989" }, { "id": "Chung-Soon-Jo", "name": { "family": "Chung", "given": "Soon-Jo" }, "orcid": "0000-0002-6657-3907" } ] }, "title": "Chance-Constrained Trajectory Optimization for Safe Exploration and Learning of Nonlinear Systems", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2020 IEEE. \n\nManuscript receivedMay 8, 2020; accepted October 1, 2020. Date of publication December 10, 2020; date of current version December 28, 2020. \n\nThis letter was recommended for publication by Associate Editor L. Tapia and Editor N. Amato upon evaluation of the reviewers' comments. This work was supported by the Jet Propulsion Laboratory, Caltech and the Raytheon Company. The work of Anqi Liu was supported by a PIMCO Postdoctoral Fellowship. \n\nWe acknowledge the contribution of Irene S. Crowell in\nimplementing Info-SNOC.\n\nSubmitted - 2005.04374.pdf
", "abstract": "Learning-based control algorithms require data collection with abundant supervision for training. Safe exploration algorithms ensure the safety of this data collection process even when only partial knowledge is available. We present a new approach for optimal motion planning with safe exploration that integrates chance-constrained stochastic optimal control with dynamics learning and feedback control. We derive an iterative convex optimization algorithm that solves an Information-cost Stochastic Nonlinear Optimal Control problem (Info-SNOC). The optimization objective encodes control cost for performance and exploration cost for learning, and the safety is incorporated as distributionally robust chance constraints. The dynamics are predicted from a robust regression model that is learned from data. The Info-SNOC algorithm is used to compute a sub-optimal pool of safe motion plans that aid in exploration for learning unknown residual dynamics under safety constraints. A stable feedback controller is used to execute the motion plan and collect data for model learning. We prove the safety of rollout from our exploration method and reduction in uncertainty over epochs, thereby guaranteeing the consistency of our learning method. We validate the effectiveness of Info-SNOC by designing and implementing a pool of safe trajectories for a planar robot. We demonstrate that our approach has higher success rate in ensuring safety when compared to a deterministic trajectory optimization approach.", "date": "2021-04", "date_type": "published", "publication": "IEEE Robotics and Automation Letters", "volume": "6", "number": "2", "publisher": "IEEE", "pagerange": "389-396", "id_number": "CaltechAUTHORS:20200526-150616242", "issn": "2377-3766", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20200526-150616242", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "JPL/Caltech" }, { "agency": "Raytheon Company" }, { "agency": "PIMCO" } ] }, "local_group": { "items": [ { "id": "GALCIT" }, { "id": "Center-for-Autonomous-Systems-and-Technologies-(CAST)" } ] }, "doi": "10.1109/LRA.2020.3044033", "primary_object": { "basename": "2005.04374.pdf", "url": "https://authors.library.caltech.edu/records/m40dq-4s262/files/2005.04374.pdf" }, "resource_type": "article", "pub_year": "2021", "author_list": "Nakka, Yashwanth Kumar; Liu, Anqi; et el." }, { "id": "https://authors.library.caltech.edu/records/355ty-9zz43", "eprint_id": 102676, "eprint_status": "archive", "datestamp": "2023-08-19 23:33:17", "lastmod": "2023-10-20 00:24:03", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Chu-Linda-C", "name": { "family": "Chu", "given": "Linda C." } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" }, { "id": "Shin-Hoo-Chang", "name": { "family": "Shin", "given": "Hoo Chang" } }, { "id": "Fishman-Elliot-K", "name": { "family": "Fishman", "given": "Elliot K." } } ] }, "title": "The Potential Dangers of Artificial Intelligence for Radiology and Radiologists", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2020 American College of Radiology. \n\nAvailable online 17 April 2020. \n\nThe authors thank senior science editor Edmund Weisberg, MS, MBE, for his editorial assistance. The authors state that they have no conflict of interest related to the material discussed in this article.\n\nPublished - Chu_2020p1309.pdf
", "abstract": "With the advent of artificial intelligence (AI) across many fields and subspecialties, there are considerable expectations for transformative impact. However, there are also concerns regarding the potential abuse of AI. Many scientists have been worried about the dangers of AI leading to \"biased\" conclusions, in part because of the enthusiasm of the inventor or overenthusiasm among the general public. Here, though, we consider some scenarios in which people may intend to cause potential errors within data sets of analyzed information, resulting in incorrect conclusions and leading to potential problems with patient care and outcomes.", "date": "2020-10", "date_type": "published", "publication": "Journal of the American College of Radiology", "volume": "17", "number": "10", "publisher": "Elsevier", "pagerange": "1309-1311", "id_number": "CaltechAUTHORS:20200420-154710784", "issn": "1546-1440", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20200420-154710784", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.1016/j.jacr.2020.04.010", "pmcid": "PMC7164850", "primary_object": { "basename": "Chu_2020p1309.pdf", "url": "https://authors.library.caltech.edu/records/355ty-9zz43/files/Chu_2020p1309.pdf" }, "resource_type": "article", "pub_year": "2020", "author_list": "Chu, Linda C.; Anandkumar, Animashree; et el." }, { "id": "https://authors.library.caltech.edu/records/4m70t-56j02", "eprint_id": 104991, "eprint_status": "archive", "datestamp": "2023-08-19 23:30:13", "lastmod": "2023-10-20 21:02:06", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Qiao-Zhuoran", "name": { "family": "Qiao", "given": "Zhuoran" }, "orcid": "0000-0002-5704-7331" }, { "id": "Welborn-Matthew-G", "name": { "family": "Welborn", "given": "Matthew" }, "orcid": "0000-0001-8659-6535" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" }, { "id": "Manby-Frederick-R", "name": { "family": "Manby", "given": "Frederick R." }, "orcid": "0000-0001-7611-714X" }, { "id": "Miller-T-F-III", "name": { "family": "Miller", "given": "Thomas F., III" }, "orcid": "0000-0002-1882-5380" } ] }, "title": "OrbNet: Deep learning for quantum chemistry using symmetry-adapted atomic-orbital features", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2020 Published under license by AIP Publishing. \n\nSubmitted: 16 July 2020; Accepted: 7 September 2020; Published Online: 25 September 2020. \n\nThe authors thank Lixue Sherry Cheng for providing geometries for the DrugBank-T dataset and Anders Christensen for helpful comments on the manuscript. Z.Q. acknowledges the graduate research funding from Caltech. T.F.M. and A.A. acknowledge partial support from the Caltech DeLogi fund, and A.A. acknowledges support from a Caltech Bren professorship.\n\nPublished - 5.0021955.pdf
Submitted - 2007.08026.pdf
Supplemental Material - drugbank-t_geometries.zip
Supplemental Material - splits.zip
", "abstract": "We introduce a machine learning method in which energy solutions from the Schr\u00f6dinger equation are predicted using symmetry adapted atomic orbital features and a graph neural-network architecture. OrbNet is shown to outperform existing methods in terms of learning efficiency and transferability for the prediction of density functional theory results while employing low-cost features that are obtained from semi-empirical electronic structure calculations. For applications to datasets of drug-like molecules, including QM7b-T, QM9, GDB-13-T, DrugBank, and the conformer benchmark dataset of Folmsbee and Hutchison [Int. J. Quantum Chem. (published online) (2020)], OrbNet predicts energies within chemical accuracy of density functional theory at a computational cost that is 1000-fold or more reduced.", "date": "2020-09-28", "date_type": "published", "publication": "Journal of Chemical Physics", "volume": "153", "number": "12", "publisher": "American Institute of Physics", "pagerange": "Art. No. 124111", "id_number": "CaltechAUTHORS:20200818-095759329", "issn": "0021-9606", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20200818-095759329", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Caltech De Logi Fund" }, { "agency": "Bren Professor of Computing and Mathematical Sciences" } ] }, "doi": "10.1063/5.0021955", "primary_object": { "basename": "2007.08026.pdf", "url": "https://authors.library.caltech.edu/records/4m70t-56j02/files/2007.08026.pdf" }, "related_objects": [ { "basename": "5.0021955.pdf", "url": "https://authors.library.caltech.edu/records/4m70t-56j02/files/5.0021955.pdf" }, { "basename": "drugbank-t_geometries.zip", "url": "https://authors.library.caltech.edu/records/4m70t-56j02/files/drugbank-t_geometries.zip" }, { "basename": "splits.zip", "url": "https://authors.library.caltech.edu/records/4m70t-56j02/files/splits.zip" } ], "resource_type": "article", "pub_year": "2020", "author_list": "Qiao, Zhuoran; Welborn, Matthew; et el." }, { "id": "https://authors.library.caltech.edu/records/3gm5w-asa82", "eprint_id": 106483, "eprint_status": "archive", "datestamp": "2023-08-19 22:45:12", "lastmod": "2023-10-20 23:32:06", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Ren-Hongyu", "name": { "family": "Ren", "given": "Hongyu" } }, { "id": "Zhu-Yuke", "name": { "family": "Zhu", "given": "Yuke" }, "orcid": "0000-0002-9198-2227" }, { "id": "Leskovec-J", "name": { "family": "Leskovec", "given": "Jure" }, "orcid": "0000-0002-5411-923X" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" } }, { "id": "Garg-Animesh", "name": { "family": "Garg", "given": "Animesh" }, "orcid": "0000-0003-0482-4296" } ] }, "title": "OCEAN: Online Task Inference for Compositional Tasks with Context Adaptation", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 The authors and PMLR 2020. \nA.G. is a CIFAR AI chair and also acknowledges Vector Institute for computing support. J. L. is a Chan Zuckerberg Biohub investigator. We gratefully acknowledge the support of DARPA under Nos. FA865018C7880 (ASED), N660011924033 (MCS); ARO under Nos. W911NF-16-1-0342 (MURI), W911NF-16-1-0171 (DURIP); NSF under Nos. OAC-1835598 (CINES), OAC-1934578 (HDR), CCF-1918940 (Expeditions), IIS-2030477 (RAPID); Stanford Data Science Initiative, Wu Tsai Neurosciences Institute, Chan Zuckerberg Biohub, Amazon, Boeing, Chase, Docomo, Hitachi, Huawei, JD.com, NVIDIA, Dell. The U.S. Government is authorized to reproduce and distribute reprints for Governmental purposes notwithstanding any copyright notation thereon. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views, policies, or endorsements, either expressed or implied, of DARPA, NIH, ARO, or the U.S. Government.\n\nPublished - ren20a.pdf
Accepted Version - 2008.07087.pdf
Supplemental Material - ren20a-supp.pdf
", "abstract": "Real-world tasks often exhibit a compositional structure that contains a sequence of simpler sub-tasks. For instance, opening a door requires reaching, grasping, rotating, and pulling the door knob. Such compositional tasks require an agent to reason about the sub-task at hand while orchestrating global behavior accordingly. This can be cast as an online task inference problem, where the current task identity, represented by a context variable, is estimated from the agent's past experiences with probabilistic inference. Previous approaches have employed simple latent distributions, e.g., Gaussian, to model a single context for the entire task. However, this formulation lacks the expressiveness to capture the composition and transition of the sub-tasks. We propose a variational inference framework OCEAN to perform online task inference for compositional tasks. OCEAN models global and local context variables in a joint latent space, where the global variables represent a mixture of sub-tasks required for the task, while the local variables capture the transitions between the sub-tasks. Our framework supports flexible latent distributions based on prior knowledge of the task structure and can be trained in an unsupervised manner. Experimental results show that OCEAN provides more effective task inference with sequential context adaptation and thus leads to a performance boost on complex, multi-stage tasks.", "date": "2020-08", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "124", "publisher": "PMLR", "pagerange": "1378-1387", "id_number": "CaltechAUTHORS:20201106-120151731", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20201106-120151731", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Canadian Institute for Advanced Research (CIFAR)" }, { "agency": "Chan-Zuckerberg Biohub" }, { "agency": "Defense Advanced Research Projects Agency (DARPA)", "grant_number": "FA865018C7880" }, { "agency": "Defense Advanced Research Projects Agency (DARPA)", "grant_number": "N660011924033" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-16-1-0342" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-16-1-0171" }, { "agency": "NSF", "grant_number": "OAC-1835598" }, { "agency": "NSF", "grant_number": "OAC-1934578" }, { "agency": "NSF", "grant_number": "CCF-1918940" }, { "agency": "NSF", "grant_number": "IIS-2030477" }, { "agency": "Stanford University" }, { "agency": "Wu Tsai Neurosciences Institute" }, { "agency": "Amazon" }, { "agency": "Boeing Corporation" }, { "agency": "Chase Manhattan Bank" }, { "agency": "Docomo" }, { "agency": "Hitachi" }, { "agency": "Huawei" }, { "agency": "JD.com" }, { "agency": "NVIDIA Corporation" }, { "agency": "Dell Inc." } ] }, "doi": "10.48550/arXiv.2008.07087", "primary_object": { "basename": "ren20a-supp.pdf", "url": "https://authors.library.caltech.edu/records/3gm5w-asa82/files/ren20a-supp.pdf" }, "related_objects": [ { "basename": "ren20a.pdf", "url": "https://authors.library.caltech.edu/records/3gm5w-asa82/files/ren20a.pdf" }, { "basename": "2008.07087.pdf", "url": "https://authors.library.caltech.edu/records/3gm5w-asa82/files/2008.07087.pdf" } ], "resource_type": "article", "pub_year": "2020", "author_list": "Ren, Hongyu; Zhu, Yuke; et el." }, { "id": "https://authors.library.caltech.edu/records/rztks-hw818", "eprint_id": 94168, "eprint_status": "archive", "datestamp": "2023-08-19 22:28:55", "lastmod": "2023-10-20 17:44:56", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Kossaifi-J", "name": { "family": "Kossaifi", "given": "Jean" } }, { "id": "Lipton-Z-C", "name": { "family": "Lipton", "given": "Zachary C." } }, { "id": "Kolbeinsson-A", "name": { "family": "Kolbeinsson", "given": "Arinbj\u00f6rn" } }, { "id": "Khanna-A", "name": { "family": "Khanna", "given": "Aran" } }, { "id": "Furlanello-T", "name": { "family": "Furlanello", "given": "Tommaso" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" } } ] }, "title": "Tensor Regression Networks", "ispublished": "pub", "full_text_status": "public", "keywords": "Machine Learning, Tensor Methods, Tensor Regression Networks, Low-Rank Regression, Tensor Regression Layers, Deep Learning, Tensor Contraction", "note": "\u00a9 2020 Jean Kossaifi, Zachary C. Lipton, Arinbj\u00f6rn Kolbeinsson, Aran Khanna, Tommaso Furlanello and Anima Anandkumar. License: CC-BY 4.0, see https://creativecommons.org/licenses/by/4.0/. Attribution requirements are provided at http://jmlr.org/papers/v21/18-503.html. \n\nSubmitted 7/18; Published 7/20. \n\nThis research has been conducted using the UK Biobank Resource under Application Number 18545. The authors would like to thank the editor and anonymous reviewers for the constructive feedback which helped improve this manuscript.\n\nPublished - 18-503.pdf
Submitted - 1707.08308.pdf
", "abstract": "Convolutional neural networks typically consist of many convolutional layers followed by one or more fully connected layers. While convolutional layers map between high-order activation tensors, the fully connected layers operate on flattened activation vectors. Despite empirical success, this approach has notable drawbacks. Flattening followed by fully connected layers discards multilinear structure in the activations and requires many parameters. We address these problems by incorporating tensor algebraic operations that preserve multilinear structure at every layer. First, we introduce Tensor Contraction Layers (TCLs) that reduce the dimensionality of their input while preserving their multilinear structure using tensor contraction. Next, we introduce Tensor Regression Layers (TRLs), which express outputs through a low-rank multilinear mapping from a high-order activation tensor to an output tensor of arbitrary order. We learn the contraction and regression factors end-to-end, and produce accurate nets with fewer parameters. Additionally, our layers regularize networks by imposing low-rank constraints on the activations (TCL) and regression weights (TRL). Experiments on ImageNet show that, applied to VGG and ResNet architectures, TCLs and TRLs reduce the number of parameters compared to fully connected layers by more than 65% while maintaining or increasing accuracy. In addition to the space savings, our approach's ability to leverage topological structure can be crucial for structured data such as MRI. In particular, we demonstrate significant performance improvements over comparable architectures on three tasks associated with the UK Biobank dataset.", "date": "2020-07-20", "date_type": "published", "publication": "Journal of Machine Learning Research", "volume": "21", "publisher": "Journal of Machine Learning Research", "pagerange": "1-21", "id_number": "CaltechAUTHORS:20190327-085728859", "issn": "1533-7928", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190327-085728859", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.48550/arXiv.1707.08308", "primary_object": { "basename": "1707.08308.pdf", "url": "https://authors.library.caltech.edu/records/rztks-hw818/files/1707.08308.pdf" }, "related_objects": [ { "basename": "18-503.pdf", "url": "https://authors.library.caltech.edu/records/rztks-hw818/files/18-503.pdf" } ], "resource_type": "article", "pub_year": "2020", "author_list": "Kossaifi, Jean; Lipton, Zachary C.; et el." }, { "id": "https://authors.library.caltech.edu/records/y0yqt-30w39", "eprint_id": 106487, "eprint_status": "archive", "datestamp": "2023-08-19 22:24:16", "lastmod": "2023-10-20 23:32:24", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Chen-Wuyang", "name": { "family": "Chen", "given": "Wuyang" } }, { "id": "Yu-Zhiding", "name": { "family": "Yu", "given": "Zhiding" } }, { "id": "Wang-Zhangyang", "name": { "family": "Wang", "given": "Zhangyang" }, "orcid": "0000-0002-2050-5693" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" } } ] }, "title": "Automated Synthetic-to-Real Generalization", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2020 by the author(s). \n\nWork done during internship at NVIDIA. We appreciate the\ncomputing power supported by NVIDIA GPU infrastructure.\nWe also thank for the discussion and suggestions from four\nanonymous reviewers and the help from Yang Zou for the\ndomain adaptation experiments. The research of Z. Wang\nwas partially supported by NSF Award RI-1755701.\n\nPublished - chen20x.pdf
Accepted Version - 2007.06965.pdf
", "abstract": "Models trained on synthetic images often face degraded generalization to real data. As a convention, these models are often initialized with ImageNet pretrained representation. Yet the role of ImageNet knowledge is seldom discussed despite common practices that leverage this knowledge to maintain the generalization ability. An example is the careful hand-tuning of early stopping and layer-wise learning rates, which is shown to improve synthetic-to-real generalization but is also laborious and heuristic. In this work, we explicitly encourage the synthetically trained model to maintain similar representations with the ImageNet pretrained model, and propose a learning-to-optimize (L2O) strategy to automate the selection of layer-wise learning rates. We demonstrate that the proposed framework can significantly improve the synthetic-to-real generalization performance without seeing and training on real data, while also benefiting downstream tasks such as domain adaptation. Code is available at: https://github.com/NVlabs/ASG.", "date": "2020-07-14", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "119", "publisher": "ML Research Press", "pagerange": "1746-1756", "id_number": "CaltechAUTHORS:20201106-120205331", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20201106-120205331", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NSF", "grant_number": "IIS-1755701" } ] }, "doi": "10.48550/arXiv.2007.06965", "primary_object": { "basename": "chen20x.pdf", "url": "https://authors.library.caltech.edu/records/y0yqt-30w39/files/chen20x.pdf" }, "related_objects": [ { "basename": "2007.06965.pdf", "url": "https://authors.library.caltech.edu/records/y0yqt-30w39/files/2007.06965.pdf" } ], "resource_type": "article", "pub_year": "2020", "author_list": "Chen, Wuyang; Yu, Zhiding; et el." }, { "id": "https://authors.library.caltech.edu/records/8k3ah-wcs70", "eprint_id": 100577, "eprint_status": "archive", "datestamp": "2023-08-19 22:02:13", "lastmod": "2023-10-18 21:37:06", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Chen-Beidi", "name": { "family": "Chen", "given": "Beidi" } }, { "id": "Liu-Weiyang", "name": { "family": "Liu", "given": "Weiyang" } }, { "id": "Yu-Zhiding", "name": { "family": "Yu", "given": "Zhiding" } }, { "id": "Kautz-Jan", "name": { "family": "Kautz", "given": "Jan" } }, { "id": "Shrivastava-Anshumali", "name": { "family": "Shrivastava", "given": "Anshumali" } }, { "id": "Garg-Animesh", "name": { "family": "Garg", "given": "Animesh" }, "orcid": "0000-0003-0482-4296" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" } } ] }, "title": "Angular Visual Hardness", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2020 by the author(s). \n\nWork done during internship at NVIDIA. We would like to thank Shiyu Liang, Yue Zhu and Yang Zou for the valuable discussions that enlighten our research. We are also grateful to the anonymous reviewers for their constructive comments that significantly helped to improve our paper. Weiyang Liu is partially supported by Baidu scholarship and NVIDIA GPU grant. This work was supported by NSF-1652131, NSF-BIGDATA 1838177, AFOSR-YIPFA9550-18-1-0152, Amazon Research Award, and ONR BRC grant for Randomized Numerical Linear Algebra.\n\nPublished - chen20n.pdf
Submitted - 1912.02279.pdf
Supplemental Material - chen20n-supp.pdf
", "abstract": "Recent convolutional neural networks (CNNs) have led to impressive performance but often suffer from poor calibration. They tend to be overconfident, with the model confidence not always reflecting the underlying true ambiguity and hardness. In this paper, we propose angular visual hardness (AVH), a score given by the normalized angular distance between the sample feature embedding and the target classifier to measure sample hardness. We validate this score with an in-depth and extensive scientific study, and observe that CNN models with the highest accuracy also have the best AVH scores. This agrees with an earlier finding that state-of-art models improve on the classification of harder examples. We observe that the training dynamics of AVH is vastly different compared to the training loss. Specifically, AVH quickly reaches a plateau for all samples even though the training loss keeps improving. This suggests the need for designing better loss functions that can target harder examples more effectively. We also find that AVH has a statistically significant correlation with human visual hardness. Finally, we demonstrate the benefit of AVH to a variety of applications such as self-training for domain adaptation and domain generalization.", "date": "2020-07", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "119", "publisher": "ML Research Press", "pagerange": "1637-1648", "id_number": "CaltechAUTHORS:20200109-084932688", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20200109-084932688", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Baidu Scholarship" }, { "agency": "NVIDIA Corporation" }, { "agency": "NSF", "grant_number": "IIS-1652131" }, { "agency": "NSF", "grant_number": "IIS-1838177" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-18-1-0152" }, { "agency": "Amazon Research Award" }, { "agency": "Office of Naval Research (ONR)" } ] }, "doi": "10.48550/arXiv.1912.02279", "primary_object": { "basename": "1912.02279.pdf", "url": "https://authors.library.caltech.edu/records/8k3ah-wcs70/files/1912.02279.pdf" }, "related_objects": [ { "basename": "chen20n-supp.pdf", "url": "https://authors.library.caltech.edu/records/8k3ah-wcs70/files/chen20n-supp.pdf" }, { "basename": "chen20n.pdf", "url": "https://authors.library.caltech.edu/records/8k3ah-wcs70/files/chen20n.pdf" } ], "resource_type": "article", "pub_year": "2020", "author_list": "Chen, Beidi; Liu, Weiyang; et el." }, { "id": "https://authors.library.caltech.edu/records/0tppf-v5s28", "eprint_id": 98453, "eprint_status": "archive", "datestamp": "2023-08-22 03:44:51", "lastmod": "2023-10-18 17:22:50", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Ross-Z-E", "name": { "family": "Ross", "given": "Zachary E." }, "orcid": "0000-0002-6343-8400" }, { "id": "Trugman-Daniel-T", "name": { "family": "Trugman", "given": "Daniel T." }, "orcid": "0000-0002-9296-4223" }, { "id": "Azizzadenesheli-Kamyar", "name": { "family": "Azizzadenesheli", "given": "Kamyar" }, "orcid": "0000-0001-8507-1868" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" } ] }, "title": "Directivity Modes of Earthquake Populations with Unsupervised Learning", "ispublished": "pub", "full_text_status": "public", "keywords": "rupture directivity; earthquake source properties; machine learning; unsupervised learning", "note": "\u00a9 2020 American Geophysical Union. \n\nReceived 1 JUL 2019; Accepted 31 JAN 2020; Accepted article online 5 FEB 2020. \n\nWe thank Val\u00e8re Lambert for helpful discussions. The waveform and catalog data used in this study are publicly available from the Southern California Earthquake Data Center (scedc.caltech.edu) and the Northern California Earthquake Data Center (ncedc.org). D. Trugman acknowledges institutional support from the Laboratory Directed Research and Development (LDRD) program of Los Alamos National Laboratory under project number 20180700PRD1. A. Anandkumar is supported in part by Bren endowed chair, Darpa PAI, Raytheon, and Microsoft, Google, and Adobe faculty fellowships. K. Azizzadenesheli is supported in part by NSF Career Award CCF\u20101254106 and AFOSR YIPFA9550\u201015\u20101\u20100221.\n\nPublished - 2019JB018299.pdf
Submitted - 1907.00496.pdf
Supplemental Material - jgrb54024-sup-0001-2019jb018299-text_si-s01.pdf
Supplemental Material - jgrb54024-sup-0002-2019jb018299-data_set_si-s01.txt
Supplemental Material - jgrb54024-sup-0003-2019jb018299-data_set_si-s02.txt
Supplemental Material - jgrb54024-sup-0004-2019jb018299-data_set_si-s03.txt
Supplemental Material - jgrb54024-sup-0005-2019jb018299-data_set_si-s04.txt
Supplemental Material - jgrb54024-sup-0006-2019jb018299-data_set_si-s05.txt
Supplemental Material - jgrb54024-sup-0007-2019jb018299-data_set_si-s06.txt
Supplemental Material - jgrb54024-sup-0008-2019jb018299-data_set_si-s07.txt
Supplemental Material - jgrb54024-sup-0009-2019jb018299-data_set_si-s08.txt
", "abstract": "We present a novel approach for resolving modes of rupture directivity in large populations of earthquakes. A seismic spectral decomposition technique is used to first produce relative measurements of radiated energy for earthquakes in a spatially compact cluster. The azimuthal distribution of energy for each earthquake is then assumed to result from one of several distinct modes of rupture propagation. Rather than fitting a kinematic rupture model to determine the most likely mode of rupture propagation, we instead treat the modes as latent variables and learn them with a Gaussian mixture model. The mixture model simultaneously determines the number of events that best identify with each mode. The technique is demonstrated on four datasets in California, each with compact clusters of several thousand earthquakes with comparable slip mechanisms. We show that the datasets naturally decompose into distinct rupture propagation modes that correspond to different rupture directions, and the fault plane is unambiguously identified for all cases. We find that these small earthquakes exhibit unilateral ruptures 63\u201373% of the time on average. The results provide important observational constraints on the physics of earthquakes and faults.", "date": "2020-02", "date_type": "published", "publication": "Journal of Geophysical Research. Solid Earth", "volume": "125", "number": "2", "publisher": "American Geophysical Union", "pagerange": "Art. No. e2019JB018299", "id_number": "CaltechAUTHORS:20190905-154247884", "issn": "2169-9313", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190905-154247884", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Los Alamos National Laboratory", "grant_number": "20180700PRD1" }, { "agency": "Bren Professor of Computing and Mathematical Sciences" }, { "agency": "Defense Advanced Research Projects Agency (DARPA)" }, { "agency": "Raytheon" }, { "agency": "Microsoft Faculty Fellowship" }, { "agency": "Google Faculty Research Award" }, { "agency": "Adobe" }, { "agency": "NSF", "grant_number": "CCF\u20101254106" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "YIPFA9550\u201015\u20101\u20100221" } ] }, "local_group": { "items": [ { "id": "Seismological-Laboratory" }, { "id": "Division-of-Geological-and-Planetary-Sciences" } ] }, "doi": "10.1029/2019JB018299", "primary_object": { "basename": "jgrb54024-sup-0001-2019jb018299-text_si-s01.pdf", "url": "https://authors.library.caltech.edu/records/0tppf-v5s28/files/jgrb54024-sup-0001-2019jb018299-text_si-s01.pdf" }, "related_objects": [ { "basename": "jgrb54024-sup-0002-2019jb018299-data_set_si-s01.txt", "url": "https://authors.library.caltech.edu/records/0tppf-v5s28/files/jgrb54024-sup-0002-2019jb018299-data_set_si-s01.txt" }, { "basename": "jgrb54024-sup-0003-2019jb018299-data_set_si-s02.txt", "url": "https://authors.library.caltech.edu/records/0tppf-v5s28/files/jgrb54024-sup-0003-2019jb018299-data_set_si-s02.txt" }, { "basename": "jgrb54024-sup-0004-2019jb018299-data_set_si-s03.txt", "url": "https://authors.library.caltech.edu/records/0tppf-v5s28/files/jgrb54024-sup-0004-2019jb018299-data_set_si-s03.txt" }, { "basename": "jgrb54024-sup-0005-2019jb018299-data_set_si-s04.txt", "url": "https://authors.library.caltech.edu/records/0tppf-v5s28/files/jgrb54024-sup-0005-2019jb018299-data_set_si-s04.txt" }, { "basename": "jgrb54024-sup-0009-2019jb018299-data_set_si-s08.txt", "url": "https://authors.library.caltech.edu/records/0tppf-v5s28/files/jgrb54024-sup-0009-2019jb018299-data_set_si-s08.txt" }, { "basename": "1907.00496.pdf", "url": "https://authors.library.caltech.edu/records/0tppf-v5s28/files/1907.00496.pdf" }, { "basename": "2019JB018299.pdf", "url": "https://authors.library.caltech.edu/records/0tppf-v5s28/files/2019JB018299.pdf" }, { "basename": "jgrb54024-sup-0006-2019jb018299-data_set_si-s05.txt", "url": "https://authors.library.caltech.edu/records/0tppf-v5s28/files/jgrb54024-sup-0006-2019jb018299-data_set_si-s05.txt" }, { "basename": "jgrb54024-sup-0007-2019jb018299-data_set_si-s06.txt", "url": "https://authors.library.caltech.edu/records/0tppf-v5s28/files/jgrb54024-sup-0007-2019jb018299-data_set_si-s06.txt" }, { "basename": "jgrb54024-sup-0008-2019jb018299-data_set_si-s07.txt", "url": "https://authors.library.caltech.edu/records/0tppf-v5s28/files/jgrb54024-sup-0008-2019jb018299-data_set_si-s07.txt" } ], "resource_type": "article", "pub_year": "2020", "author_list": "Ross, Zachary E.; Trugman, Daniel T.; et el." }, { "id": "https://authors.library.caltech.edu/records/j16vt-na095", "eprint_id": 103456, "eprint_status": "archive", "datestamp": "2023-08-19 18:49:27", "lastmod": "2023-10-20 16:22:39", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Janzamin-M", "name": { "family": "Janzamin", "given": "Majid" } }, { "id": "Ge-Rong", "name": { "family": "Ge", "given": "Rong" } }, { "id": "Kossaifi-J", "name": { "family": "Kossaifi", "given": "Jean" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" } } ] }, "title": "Spectral Learning on Matrices and Tensors", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2019 M. Janzamin, R. Ge, J. Kossaifi and A. Anandkumar. \n\nPublication Date: 28 Nov 2019. \n\nThe authors are grateful to anonymous reviewers for valuable comments that have significantly improved the manuscript.\n\nAccepted Version - 2004.07984.pdf
", "abstract": "Spectral methods have been the mainstay in several domains such as machine learning, applied mathematics and scientific computing. They involve finding a certain kind of spectral decomposition to obtain basis functions that can capture important structures or directions for the problem at hand. The most common spectral method is the principal component analysis (PCA). It utilizes the principal components or the top eigenvectors of the data covariance matrix to carry out dimensionality reduction as one of its applications. This data pre-processing step is often effective in separating signal from noise. PCA and other spectral techniques applied to matrices have several limitations. By limiting to only pairwise moments, they are effectively making a Gaussian approximation on the underlying data. Hence, they fail on data with hidden variables which lead to non-Gaussianity. However, in almost any data set, there are latent effects that cannot be directly observed, e.g., topics in a document corpus, or underlying causes of a disease. By extending the spectral decomposition methods to higher order moments, we demonstrate the ability to learn a wide range of latent variable models efficiently. Higher-order moments can be represented by tensors, and intuitively, they can encode more information than just pairwise moment matrices. More crucially, tensor decomposition can pick up latent effects that are missed by matrix methods. For instance, tensor decomposition can uniquely identify non-orthogonal components. Exploiting these aspects turns out to be fruitful for provable unsupervised learning of a wide range of latent variable models. We also outline the computational techniques to design efficient tensor decomposition methods. They are embarrassingly parallel and thus scalable to large data sets. Whilst there exist many optimized linear algebra software packages, efficient tensor algebra packages are also beginning to be developed. We introduce Tensorly, which has a simple python interface for expressing tensor operations. It has a flexible back-end system supporting NumPy, PyTorch, TensorFlow and MXNet amongst others. This allows it to carry out multi-GPU and CPU operations, and can also be seamlessly integrated with deep-learning functionalities.", "date": "2019-11-28", "date_type": "published", "publication": "Foundations and Trends in Machine Learning", "volume": "12", "number": "5-6", "publisher": "Now Publishers", "pagerange": "393-536", "id_number": "CaltechAUTHORS:20200526-130837701", "issn": "1935-8237", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20200526-130837701", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.1561/2200000057", "primary_object": { "basename": "2004.07984.pdf", "url": "https://authors.library.caltech.edu/records/j16vt-na095/files/2004.07984.pdf" }, "resource_type": "article", "pub_year": "2019", "author_list": "Janzamin, Majid; Ge, Rong; et el." }, { "id": "https://authors.library.caltech.edu/records/dx4yc-jbh70", "eprint_id": 112776, "eprint_status": "archive", "datestamp": "2023-08-19 16:40:25", "lastmod": "2023-10-23 22:46:27", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Huang-Furong", "name": { "family": "Huang", "given": "Furong" } }, { "id": "Naresh-Niranjan-Uma", "name": { "family": "Naresh", "given": "Niranjan Uma" } }, { "id": "Perros-Ioakeim", "name": { "family": "Perros", "given": "Ioakeim" } }, { "id": "Chen-Robert", "name": { "family": "Chen", "given": "Robert" } }, { "id": "Sun-Jimeng", "name": { "family": "Sun", "given": "Jimeng" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } } ] }, "title": "Guaranteed Scalable Learning of Latent Tree Models", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2020 by the author(s). \n\nHuang is supported by startup fund from Department\nof Computer Science, University of Maryland, National\nScience Foundation IIS-1850220 CRII Award 030742-\n00001, and Adobe, Capital One and JP Morgan faculty\nfellowships. Sun is supported by the National\nScience Foundation award IIS-1418511, CCF-1533768\nand IIS-1838042, the National Institute of Health award\n1R01MD011682-01 and R56HL138415. Anandkumar\nis supported in part by Bren endowed chair, Darpa PAI,\nRaytheon, and Microsoft, Google and Adobe faculty fellowships.\n\nPublished - huang20b.pdf
Submitted - 1406.4566.pdf
Supplemental Material - huang20b-supp.pdf
", "abstract": "We present an integrated approach to structure and parameter estimation in latent tree graphical models, where some nodes are hidden. Our overall approach follows a \"divide-and-conquer\" strategy that learns models over small groups of variables and iteratively merges into a global solution. The structure learning involves combinatorial operations such as minimum spanning tree construction and local recursive grouping; the parameter learning is based on the method of moments and on tensor decompositions. Our method is guaranteed to correctly recover the unknown tree structure and the model parameters with low sample complexity for the class of linear multivariate latent tree models which includes discrete and Gaussian distributions, and Gaussian mixtures. Our bulk asynchronous parallel algorithm is implemented in parallel and scales logarithmically with the number of variables and linearly with dimensionality of each variable.", "date": "2019-07", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "115", "publisher": "ML Research Press", "pagerange": "883-893", "id_number": "CaltechAUTHORS:20220107-163918011", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220107-163918011", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "University of Maryland" }, { "agency": "NSF", "grant_number": "IIS-1850220" }, { "agency": "NSF", "grant_number": "030742-00001" }, { "agency": "Adobe" }, { "agency": "Capital One" }, { "agency": "JP Morgan" }, { "agency": "NSF", "grant_number": "IIS-1418511" }, { "agency": "NSF", "grant_number": "CCF-1533768" }, { "agency": "NSF", "grant_number": "IIS-1838042" }, { "agency": "NIH", "grant_number": "1R01MD011682-01" }, { "agency": "NIH", "grant_number": "R56HL138415" }, { "agency": "Bren Professor of Computing and Mathematical Sciences" }, { "agency": "Defense Advanced Research Projects Agency (DARPA)" }, { "agency": "Raytheon Company" }, { "agency": "Microsoft" }, { "agency": "Google" } ] }, "doi": "10.48550/arXiv.1406.4566", "primary_object": { "basename": "huang20b.pdf", "url": "https://authors.library.caltech.edu/records/dx4yc-jbh70/files/huang20b.pdf" }, "related_objects": [ { "basename": "1406.4566.pdf", "url": "https://authors.library.caltech.edu/records/dx4yc-jbh70/files/1406.4566.pdf" }, { "basename": "huang20b-supp.pdf", "url": "https://authors.library.caltech.edu/records/dx4yc-jbh70/files/huang20b-supp.pdf" } ], "resource_type": "article", "pub_year": "2019", "author_list": "Huang, Furong; Naresh, Niranjan Uma; et el." }, { "id": "https://authors.library.caltech.edu/records/ey15f-s3m29", "eprint_id": 94180, "eprint_status": "archive", "datestamp": "2023-08-19 16:00:56", "lastmod": "2023-10-20 17:45:32", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Cvitkovic-M", "name": { "family": "Cvitkovic", "given": "Milan" } }, { "id": "Singh-Badal", "name": { "family": "Singh", "given": "Badal" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" } } ] }, "title": "Open Vocabulary Learning on Source Code with a Graph-Structured Cache", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2019 by the author(s). \n\nProceedings of the 36th International Conference on Machine Learning, Long Beach, California, PMLR 97, 2019. \n\nMany thanks to Miltos Allamanis and Hyokun Yun for their advice and useful conversations.\n\nPublished - cvitkovic19b.pdf
Submitted - 1810.08305.pdf
Supplemental Material - cvitkovic19b-supp.pdf
", "abstract": "Machine learning models that take computer program source code as input typically use Natural Language Processing (NLP) techniques. However, a major challenge is that code is written using an open, rapidly changing vocabulary due to, e.g., the coinage of new variable and method names. Reasoning over such a vocabulary is not something for which most NLP methods are designed. We introduce a Graph-Structured Cache to address this problem; this cache contains a node for each new word the model encounters with edges connecting each word to its occurrences in the code. We find that combining this graph-structured cache strategy with recent Graph-Neural-Network-based models for supervised learning on code improves the models' performance on a code completion task and a variable naming task \u2014 with over 100% relative improvement on the latter \u2014 at the cost of a moderate increase in computation time.", "date": "2019-06", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "97", "publisher": "PMLR", "pagerange": "1475-1485", "id_number": "CaltechAUTHORS:20190327-085810844", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190327-085810844", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.48550/arXiv.1810.08305", "primary_object": { "basename": "1810.08305.pdf", "url": "https://authors.library.caltech.edu/records/ey15f-s3m29/files/1810.08305.pdf" }, "related_objects": [ { "basename": "cvitkovic19b-supp.pdf", "url": "https://authors.library.caltech.edu/records/ey15f-s3m29/files/cvitkovic19b-supp.pdf" }, { "basename": "cvitkovic19b.pdf", "url": "https://authors.library.caltech.edu/records/ey15f-s3m29/files/cvitkovic19b.pdf" } ], "resource_type": "article", "pub_year": "2019", "author_list": "Cvitkovic, Milan; Singh, Badal; et el." }, { "id": "https://authors.library.caltech.edu/records/7gpw5-j1d54", "eprint_id": 101651, "eprint_status": "archive", "datestamp": "2023-08-19 15:29:49", "lastmod": "2023-10-19 22:55:43", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Kwok-Roberta", "name": { "family": "Kwok", "given": "Roberta" } }, { "id": "Ranade-G", "name": { "family": "Ranade", "given": "Gireeja" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" } }, { "id": "Maskey-S", "name": { "family": "Maskey", "given": "Sameer" } }, { "id": "Mohaghegh-M", "name": { "family": "Mohaghegh", "given": "Mahsa" } }, { "id": "Vreeken-J", "name": { "family": "Vreeken", "given": "Jilles" } }, { "id": "Herman-H", "name": { "family": "Herman", "given": "Herman" } } ] }, "title": "Junior AI researchers are in demand by universities and industry", "ispublished": "pub", "full_text_status": "restricted", "note": "\u00a9 2020 Springer Nature Limited.", "abstract": "Opportunities for moving between academia and business are expanding for scientists as companies step up recruitment.", "date": "2019-04-23", "date_type": "published", "publication": "Nature", "volume": "568", "number": "7753", "publisher": "Nature Publishing Group", "pagerange": "581-583", "id_number": "CaltechAUTHORS:20200302-111944472", "issn": "0028-0836", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20200302-111944472", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.1038/d41586-019-01248-w", "resource_type": "article", "pub_year": "2019", "author_list": "Kwok, Roberta; Ranade, Gireeja; et el." }, { "id": "https://authors.library.caltech.edu/records/ne64r-z5789", "eprint_id": 93356, "eprint_status": "archive", "datestamp": "2023-08-19 14:06:59", "lastmod": "2023-10-20 17:04:23", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Kossaifi-J", "name": { "family": "Kossaifi", "given": "Jean" } }, { "id": "Panagakis-Y", "name": { "family": "Panagakis", "given": "Yannis" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Pantic-M", "name": { "family": "Pantic", "given": "Maja" } } ] }, "title": "TensorLy: Tensor Learning in Python", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2019 Jean Kossai, Yannis Panagakis, Anima Anandkumar and Maja Pantic. License: CC-BY 4.0, see https://creativecommons.org/licenses/by/4.0/. Attribution requirements are provided at http://jmlr.org/papers/v20/18-277.html. \n\nSubmitted 5/18; Revised 10/18; Published 2/19.\n\nPublished - 18-277.pdf
Submitted - 1610.09555v1.pdf
", "abstract": "Tensors are higher-order extensions of matrices. While matrix methods form the cornerstone of traditional machine learning and data analysis, tensor methods have been gaining increasing traction. However, software support for tensor operations is not on the same footing. In order to bridge this gap, we have developed TensorLy, a Python library that provides a high-level API for tensor methods and deep tensorized neural networks. TensorLy aims to follow the same standards adopted by the main projects of the Python scientific community, and to seamlessly integrate with them. Its BSD license makes it suitable for both academic and commercial applications. TensorLy's backend system allows users to perform computations with several libraries such as NumPy or PyTorch to name but a few. They can be scaled on multiple CPU or GPU machines. In addition, using the deep-learning frameworks as backend allows to easily design and train deep tensorized neural networks. TensorLy is available at https://github.com/tensorly/tensorly", "date": "2019-02", "date_type": "published", "publication": "Journal of Machine Learning Research", "volume": "20", "number": "26", "publisher": "Journal of Machine Learning Research", "pagerange": "1-6", "id_number": "CaltechAUTHORS:20190228-133230688", "issn": "1533-7928", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190228-133230688", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.48550/arXiv.1610.09555", "primary_object": { "basename": "1610.09555v1.pdf", "url": "https://authors.library.caltech.edu/records/ne64r-z5789/files/1610.09555v1.pdf" }, "related_objects": [ { "basename": "18-277.pdf", "url": "https://authors.library.caltech.edu/records/ne64r-z5789/files/18-277.pdf" } ], "resource_type": "article", "pub_year": "2019", "author_list": "Kossaifi, Jean; Panagakis, Yannis; et el." }, { "id": "https://authors.library.caltech.edu/records/gyb97-qmz56", "eprint_id": 94176, "eprint_status": "archive", "datestamp": "2023-08-19 10:07:05", "lastmod": "2023-10-20 17:45:21", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Furlanello-T", "name": { "family": "Furlanello", "given": "Tommaso" } }, { "id": "Lipton-Z-C", "name": { "family": "Lipton", "given": "Zachary C." } }, { "id": "Tschannen-M", "name": { "family": "Tschannen", "given": "Michael" } }, { "id": "Itti-L", "name": { "family": "Itti", "given": "Laurent" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" } } ] }, "title": "Born Again Neural Networks", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2018 by the author(s). \n\nThis work was supported by the National Science Foundation (grant numbers CCF-1317433 and CNS-1545089), C-BRIC (one of six centers in JUMP, a Semiconductor Research Corporation (SRC) program sponsored by DARPA), and the Intel Corporation. The authors affirm that the views expressed herein are solely their own, and do not represent the views of the United States government or any agency thereof.\n\nPublished - furlanello18a.pdf
", "abstract": "Knowledge Distillation (KD) consists of transferring \"knowledge\" from one machine learning model (the teacher) to another (the student). Commonly, the teacher is a high-capacity model with formidable performance, while the student is more compact. By transferring knowledge, one hopes to benefit from the student's compactness, without sacrificing too much performance. We study KD from a new perspective: rather than compressing models, we train students parameterized identically to their teachers. Surprisingly, these Born-Again Networks (BANs), outperform their teachers significantly, both on computer vision and language modeling tasks. Our experiments with BANs based on DenseNets demonstrate state-of-the-art performance on the CIFAR-10 (3.5%) and CIFAR-100 (15.5%) datasets, by validation error. Additional experiments explore two distillation objectives: (i) Confidence-Weighted by Teacher Max (CWTM) and (ii) Dark Knowledge with Permuted Predictions (DKPP). Both methods elucidate the essential components of KD, demonstrating the effect of the teacher outputs on both predicted and non-predicted classes.", "date": "2018-07", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "80", "publisher": "PMLR", "pagerange": "1607-1616", "id_number": "CaltechAUTHORS:20190327-085757099", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190327-085757099", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NSF", "grant_number": "CCF-1317433" }, { "agency": "NSF", "grant_number": "CNS-1545089" }, { "agency": "Center for Brain-inspired Computing Enabling Autonomous Intelligence (C-BRIC)" }, { "agency": "Intel" } ] }, "doi": "10.48550/arXiv.1805.04770", "primary_object": { "basename": "furlanello18a.pdf", "url": "https://authors.library.caltech.edu/records/gyb97-qmz56/files/furlanello18a.pdf" }, "resource_type": "article", "pub_year": "2018", "author_list": "Furlanello, Tommaso; Lipton, Zachary C.; et el." }, { "id": "https://authors.library.caltech.edu/records/cnyfz-31290", "eprint_id": 94171, "eprint_status": "archive", "datestamp": "2023-08-19 10:06:54", "lastmod": "2023-10-20 17:45:04", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Tschannen-M", "name": { "family": "Tschannen", "given": "Michael" } }, { "id": "Khanna-A", "name": { "family": "Khanna", "given": "Aran" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } } ] }, "title": "StrassenNets: Deep Learning with a Multiplication Budget", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2018 by the author(s). \n\nThe authors would like to thank Eirikur Agustsson, Helmut B\u00f6lcskei, Lukas Cavigelli, Asmus Hetzel, Risi Kondor, Andrew Lavin, Michael Lerjen, Zachary Lipton, Weitang Liu, Andrea Olgiati, John Owens, Sheng Zha, and Zhi Zhang for inspiring discussions and comments. This work was supported by the \"AWS Cloud Credits for Research\" program.\n\nPublished - tschannen18a.pdf
", "abstract": "A large fraction of the arithmetic operations required to evaluate deep neural networks (DNNs) consists of matrix multiplications, in both convolution and fully connected layers. We perform end-to-end learning of low-cost approximations of matrix multiplications in DNN layers by casting matrix multiplications as 2-layer sum-product networks (SPNs) (arithmetic circuits) and learning their (ternary) edge weights from data. The SPNs disentangle multiplication and addition operations and enable us to impose a budget on the number of multiplication operations. Combining our method with knowledge distillation and applying it to image classification DNNs (trained on ImageNet) and language modeling DNNs (using LSTMs), we obtain a first-of-a-kind reduction in number of multiplications (over 99.5%) while maintaining the predictive performance of the full-precision models. Finally, we demonstrate that the proposed framework is able to rediscover Strassen's matrix multiplication algorithm, learning to multiply 2\u00d72 matrices using only 7 multiplications instead of 8.", "date": "2018-07", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "80", "publisher": "PMLR", "pagerange": "4985-4994", "id_number": "CaltechAUTHORS:20190327-085739295", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190327-085739295", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Amazon Web Services" } ] }, "doi": "10.48550/arXiv.1712.03942", "primary_object": { "basename": "tschannen18a.pdf", "url": "https://authors.library.caltech.edu/records/cnyfz-31290/files/tschannen18a.pdf" }, "resource_type": "article", "pub_year": "2018", "author_list": "Tschannen, Michael; Khanna, Aran; et el." }, { "id": "https://authors.library.caltech.edu/records/jm2vp-qjb50", "eprint_id": 94172, "eprint_status": "archive", "datestamp": "2023-08-19 10:07:01", "lastmod": "2023-10-20 17:45:07", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Bernstein-Jeremy-D", "name": { "family": "Bernstein", "given": "Jeremy" }, "orcid": "0000-0001-9110-7476" }, { "id": "Wang-Yu-Xiang", "name": { "family": "Wang", "given": "Yu-Xiang" }, "orcid": "0000-0002-6403-212X" }, { "id": "Azizzadenesheli-Kamyar", "name": { "family": "Azizzadenesheli", "given": "Kamyar" }, "orcid": "0000-0001-8507-1868" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" } ] }, "title": "signSGD: Compressed Optimisation for Non-Convex Problems", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2018 by the author(s). \n\nThe authors are grateful to the anonymous reviewers for their helpful comments, as well as Jiawei Zhao, Michael Tschannen, Julian Salazar, Tan Nguyen, Fanny Yang, Mu Li, Aston Zhang and Zack Lipton for useful discussions. Thanks to Ryan Tibshirani for pointing out the connection to steepest descent. \n\nKA is supported in part by NSF Career Award CCF-1254106 and Air Force FA9550-15-1-0221. AA is supported in part by Microsoft Faculty Fellowship, Google Faculty Research Award, Adobe Grant, NSF Career Award CCF-1254106, and AFOSR YIP FA9550-15-1-0221.\n\nPublished - bernstein18a.pdf
Supplemental Material - bernstein18a-supp.pdf
", "abstract": "Training large neural networks requires distributing learning across multiple workers, where the cost of communicating gradients can be a significant bottleneck. signSGD alleviates this problem by transmitting just the sign of each minibatch stochastic gradient. We prove that it can get the best of both worlds: compressed gradients and SGD-level convergence rate. The relative \u2113_1/\u2113_2 geometry of gradients, noise and curvature informs whether signSGD or SGD is theoretically better suited to a particular problem. On the practical side we find that the momentum counterpart of signSGD is able to match the accuracy and convergence speed of Adam on deep Imagenet models. We extend our theory to the distributed setting, where the parameter server uses majority vote to aggregate gradient signs from each worker enabling 1-bit compression of worker-server communication in both directions. Using a theorem by Gauss we prove that majority vote can achieve the same reduction in variance as full precision distributed SGD. Thus, there is great promise for sign-based optimisation schemes to achieve fast communication and fast convergence. Code to reproduce experiments is to be found at https://github.com/jxbz/signSGD.", "date": "2018-07", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "80", "publisher": "PMLR", "pagerange": "560-569", "id_number": "CaltechAUTHORS:20190327-085742729", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190327-085742729", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NSF", "grant_number": "CCF-1254106" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-15-1-0221" }, { "agency": "Microsoft Faculty Fellowship" }, { "agency": "Google Faculty Research Award" }, { "agency": "Adobe" } ] }, "doi": "10.48550/arXiv.1802.04434", "primary_object": { "basename": "bernstein18a-supp.pdf", "url": "https://authors.library.caltech.edu/records/jm2vp-qjb50/files/bernstein18a-supp.pdf" }, "related_objects": [ { "basename": "bernstein18a.pdf", "url": "https://authors.library.caltech.edu/records/jm2vp-qjb50/files/bernstein18a.pdf" } ], "resource_type": "article", "pub_year": "2018", "author_list": "Bernstein, Jeremy; Wang, Yu-Xiang; et el." }, { "id": "https://authors.library.caltech.edu/records/16dxg-bb611", "eprint_id": 94330, "eprint_status": "archive", "datestamp": "2023-08-19 04:01:03", "lastmod": "2023-10-20 17:52:59", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" } }, { "id": "Deng-Yuan", "name": { "family": "Deng", "given": "Yuan" } }, { "id": "Ge-Rong", "name": { "family": "Ge", "given": "Rong" } }, { "id": "Mobahi-H", "name": { "family": "Mobahi", "given": "Hossein" } } ] }, "title": "Homotopy Analysis for Tensor PCA", "ispublished": "pub", "full_text_status": "public", "keywords": "Tensor PCA, homotopy, continuation, Gaussian smoothing, nonconvex optimization, global optimization", "note": "\u00a9 2017 A. Anandkumar, Y. Deng, R. Ge & H. Mobahi.\n\nPublished - anandkumar17a.pdf
Accepted Version - 1610.09322.pdf
", "abstract": "Developing efficient and guaranteed nonconvex algorithms has been an important challenge in modern machine learning. Algorithms with good empirical performance such as stochastic gradient descent often lack theoretical guarantees. In this paper, we analyze the class of homotopy or continuation methods for global optimization of nonconvex functions. These methods start from an objective function that is efficient to optimize (e.g. convex), and progressively modify it to obtain the required objective, and the solutions are passed along the homotopy path. For the challenging problem of tensor PCA, we prove global convergence of the homotopy method in the \"high noise\" regime. The signal-to-noise requirement for our algorithm is tight in the sense that it matches the recovery guarantee for the \\em best degree-4 sum-of-squares algorithm. In addition, we prove a phase transition along the homotopy path for tensor PCA. This allows us to simplify the homotopy method to a local search algorithm, viz., tensor power iterations, with a specific initialization and a noise injection procedure, while retaining the theoretical guarantees.", "date": "2017-07", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "65", "publisher": "PMLR", "pagerange": "79-104", "id_number": "CaltechAUTHORS:20190401-123333151", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190401-123333151", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.48550/arXiv.1610.09322", "primary_object": { "basename": "anandkumar17a.pdf", "url": "https://authors.library.caltech.edu/records/16dxg-bb611/files/anandkumar17a.pdf" }, "related_objects": [ { "basename": "1610.09322.pdf", "url": "https://authors.library.caltech.edu/records/16dxg-bb611/files/1610.09322.pdf" } ], "resource_type": "article", "pub_year": "2017", "author_list": "Anandkumar, Anima; Deng, Yuan; et el." }, { "id": "https://authors.library.caltech.edu/records/x6ay2-sy046", "eprint_id": 81620, "eprint_status": "archive", "datestamp": "2023-08-19 00:54:20", "lastmod": "2023-10-17 20:54:40", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Agarwal-A", "name": { "family": "Agarwal", "given": "Alekh" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Netrapalli-P", "name": { "family": "Netrapalli", "given": "Praneeth" } } ] }, "title": "A Clustering Approach to Learning Sparsely Used Overcomplete Dictionaries", "ispublished": "pub", "full_text_status": "public", "keywords": "Dictionary learning, sparse coding, overcomplete dictionaries, incoherence, lasso", "note": "\u00a9 2017 IEEE. \n\nManuscript received July 6, 2014; revised June 6, 2016; accepted September 11, 2016. Date of publication September 30, 2016; date of current version December 20, 2016. \n\nA. Anandkumar was supported in part by the Microsoft Faculty Fellowship, in part by the NSF Career Award under Grant CCF1254106, in part by the NSF Award under Grant CCF-1219234, and in part by the ARO YIP Award under Grant W911NF-13-1-0084. This paper was presented at the 2014 COLT.\n\nSubmitted - 1309.1952.pdf
", "abstract": "We consider the problem of learning over complete dictionaries in the context of sparse coding, where each sample selects a sparse subset of dictionary elements. Our main result is a strategy to approximately recover the unknown dictionary using an efficient algorithm. Our algorithm is a clustering-style procedure, where each cluster is used to estimate a dictionary element. The resulting solution can often be further cleaned up to obtain a high accuracy estimate, and we provide one simple scenario where \u2113_1-regularized regression can be used for such a second stage.", "date": "2017-01", "date_type": "published", "publication": "IEEE Transactions on Information Theory", "volume": "63", "number": "1", "publisher": "IEEE", "pagerange": "575-592", "id_number": "CaltechAUTHORS:20170920-111802806", "issn": "0018-9448", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170920-111802806", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Microsoft Research" }, { "agency": "NSF", "grant_number": "CCF-1254106" }, { "agency": "NSF", "grant_number": "CCF-1219234" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-13-1-0084" } ] }, "doi": "10.1109/TIT.2016.2614684", "primary_object": { "basename": "1309.1952.pdf", "url": "https://authors.library.caltech.edu/records/x6ay2-sy046/files/1309.1952.pdf" }, "resource_type": "article", "pub_year": "2017", "author_list": "Agarwal, Alekh; Anandkumar, Animashree; et el." }, { "id": "https://authors.library.caltech.edu/records/b6q6r-7yf44", "eprint_id": 81619, "eprint_status": "archive", "datestamp": "2023-08-19 00:42:17", "lastmod": "2023-10-17 20:54:35", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Ge-Rong", "name": { "family": "Ge", "given": "Rong" } }, { "id": "Janzamin-M", "name": { "family": "Janzamin", "given": "Majid" } } ] }, "title": "Analyzing Tensor Power Method Dynamics in Overcomplete Regime", "ispublished": "pub", "full_text_status": "public", "keywords": "tensor decomposition, tensor power iteration, overcomplete representation, unsupervised learning, latent variable models", "note": "\u00a9 2017 Animashree Anandkumar, Rong Ge, and Majid Janzamin.\nLicense: CC-BY 4.0, see https://creativecommons.org/licenses/by/4.0/. \n\nA. Anandkumar is supported in part by Microsoft Faculty Fellowship, NSF Career award CCF-1254106, NSF award CCF-1219234, ONR award N00014-14-1-0665, ARO YIP award W911NF-13-1-0084, and AFOSR YIP award FA9550-15-1-0221. M. Janzamin is supported by NSF Award CCF-1219234.\n\nPublished - 15-486.pdf
Submitted - 1411.1488.pdf
", "abstract": "We present a novel analysis of the dynamics of tensor power iterations in the overcomplete regime where the tensor CP rank is larger than the input dimension. Finding the CP decomposition of an overcomplete tensor is NP-hard in general. We consider the case where the tensor components are randomly drawn, and show that the simple power iteration recovers the components with bounded error under mild initialization conditions. We apply our analysis to unsupervised learning of latent variable models, such as multi-view mixture models and spherical Gaussian mixtures. Given the third order moment tensor, we learn the parameters using tensor power iterations. We prove it can correctly learn the model parameters when the number of hidden components k is much larger than the data dimension d, up to k=o(d^(1.5)). We initialize the power iterations with data samples and prove its success under mild conditions on the signal-to-noise ratio of the samples. Our analysis significantly expands the class of latent variable models where spectral methods are applicable. Our analysis also deals with noise in the input tensor leading to sample complexity result in the application to learning latent variable models.", "date": "2017", "date_type": "published", "publication": "Journal of Machine Learning Research", "volume": "18", "number": "22", "publisher": "Journal of Machine Learning Research", "pagerange": "1-40", "id_number": "CaltechAUTHORS:20170920-110910164", "issn": "1533-7928", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170920-110910164", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Microsoft Research" }, { "agency": "NSF", "grant_number": "CCF-1254106" }, { "agency": "NSF", "grant_number": "CCF-1219234" }, { "agency": "Office of Naval Research (ONR)", "grant_number": "N00014-14-1-0665" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-13-1-0084" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-15-1-0221" } ] }, "doi": "10.48550/arXiv.1411.1488", "primary_object": { "basename": "15-486.pdf", "url": "https://authors.library.caltech.edu/records/b6q6r-7yf44/files/15-486.pdf" }, "related_objects": [ { "basename": "1411.1488.pdf", "url": "https://authors.library.caltech.edu/records/b6q6r-7yf44/files/1411.1488.pdf" } ], "resource_type": "article", "pub_year": "2017", "author_list": "Anandkumar, Animashree; Ge, Rong; et el." }, { "id": "https://authors.library.caltech.edu/records/06qe9-9s868", "eprint_id": 81869, "eprint_status": "archive", "datestamp": "2023-08-19 00:29:08", "lastmod": "2023-10-17 21:53:23", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Agarwal-A", "name": { "family": "Agarwal", "given": "Alekh" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Jain-P", "name": { "family": "Jain", "given": "Prateek" } }, { "id": "Netrapalli-P", "name": { "family": "Netrapalli", "given": "Praneeth" } } ] }, "title": "Learning Sparsely Used Overcomplete Dictionaries via Alternating Minimization", "ispublished": "pub", "full_text_status": "public", "keywords": "dictionary learning, sparse coding, alternating minimization, RIP, incoherence, lasso", "note": "\u00a9 2016 Society for Industrial and Applied Mathematics. \n\nReceived by the editors July 29, 2014; accepted for publication (in revised form) September 12, 2016; published electronically December 8, 2016. \n\nPart of this work was done when P. Netrapalli was a student at UT Austin and A. Anandkumar and P. Netrapalli were visiting Microsoft Research. An extended abstract containing an earlier version of these results appears in Proceedings of COLT 2014. \n\nThe second author is supported in part by Microsoft Faculty Fellowship, Google Faculty Award, NSF Career Award CCF-1254106, ONR Award N00014-14-1-0665, and AFOSR YIP FA9550-15-1-0221.\n\nPublished - 140979861.pdf
Submitted - 1310.7991.pdf
", "abstract": "We consider the problem of sparse coding, where each sample consists of a sparse linear combination of a set of dictionary atoms, and the task is to learn both the dictionary elements and the mixing coefficients. Alternating minimization is a popular heuristic for sparse coding, where the dictionary and the coefficients are estimated in alternate steps, keeping the other fixed. Typically, the coefficients are estimated via \u2113_1 minimization, keeping the dictionary fixed, and the dictionary is estimated through least squares, keeping the coefficients fixed. In this paper, we establish local linear convergence for this variant of alternating minimization and establish that the basin of attraction for the global optimum (corresponding to the true dictionary and the coefficients) is O(1/s^2), where s is the sparsity level in each sample and the dictionary satisfies restricted isometry property. Combined with the recent results of approximate dictionary estimation, this yields provable guarantees for exact recovery of both the dictionary elements and the coefficients, when the dictionary elements are incoherent.", "date": "2016-12-08", "date_type": "published", "publication": "SIAM Journal of Optimization", "volume": "26", "number": "4", "publisher": "Society for Industrial and Applied Mathematics", "pagerange": "2775-2799", "id_number": "CaltechAUTHORS:20170927-090108498", "issn": "1052-6234", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170927-090108498", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Microsoft Research" }, { "agency": "Google" }, { "agency": "NSF", "grant_number": "CCF-1254106" }, { "agency": "Office of Naval Research (ONR)", "grant_number": "N00014-14-1-0665" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-15-1-0221" } ] }, "doi": "10.1137/140979861", "primary_object": { "basename": "1310.7991.pdf", "url": "https://authors.library.caltech.edu/records/06qe9-9s868/files/1310.7991.pdf" }, "related_objects": [ { "basename": "140979861.pdf", "url": "https://authors.library.caltech.edu/records/06qe9-9s868/files/140979861.pdf" } ], "resource_type": "article", "pub_year": "2016", "author_list": "Agarwal, Alekh; Anandkumar, Animashree; et el." }, { "id": "https://authors.library.caltech.edu/records/tmaq7-9k471", "eprint_id": 94328, "eprint_status": "archive", "datestamp": "2023-08-20 11:59:04", "lastmod": "2023-10-20 17:52:53", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Azizzadenesheli-Kamyar", "name": { "family": "Azizzadenesheli", "given": "Kamyar" }, "orcid": "0000-0001-8507-1868" }, { "id": "Lazaric-Alessandro", "name": { "family": "Lazaric", "given": "Alessandro" }, "orcid": "0000-0002-8970-413X" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" } ] }, "title": "Open Problem: Approximate Planning of POMDPs in the class of Memoryless Policies", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2016 K. Azizzadenesheli, A. Lazaric & A. Anandkumar.\n\nPublished - azizzadenesheli16b.pdf
", "abstract": "Planning plays an important role in the broad class of decision theory. Planning has drawn much attention in recent work in the robotics and sequential decision making areas. Recently, Reinforcement Learning (RL), as an agent-environment interaction problem, has brought further attention to planning methods. Generally in RL, one can assume a generative model, e.g. graphical models, for the environment, and then the task for the RL agent is to learn the model parameters and find the optimal strategy based on these learnt parameters. Based on environment behavior, the agent can assume various types of generative models, e.g. Multi Armed Bandit for a static environment, or Markov Decision Process (MDP) for a dynamic environment. The advantage of these popular models is their simplicity, which results in tractable methods of learning the parameters and finding the optimal policy. The drawback of these models is again their simplicity: these models usually underfit and underestimate the actual environment behavior. For example, in robotics, the agent usually has noisy observations of the environment inner state and MDP is not a suitable model. \n\nMore complex models like Partially Observable Markov Decision Process (POMDP) can compensate for this drawback. Fitting this model to the environment, where the partial observation is given to the agent, generally gives dramatic performance improvement, sometimes unbounded improvement, compared to MDP. In general, finding the optimal policy for the POMDP model is computationally intractable and fully non convex, even for the class of memoryless policies. The open problem is to come up with a method to find an exact or an approximate optimal stochastic memoryless policy for POMDP models.", "date": "2016-06", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "49", "publisher": "PMLR", "pagerange": "1639-1642", "id_number": "CaltechAUTHORS:20190401-123326217", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190401-123326217", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.48550/arXiv.1608.04996", "primary_object": { "basename": "azizzadenesheli16b.pdf", "url": "https://authors.library.caltech.edu/records/tmaq7-9k471/files/azizzadenesheli16b.pdf" }, "resource_type": "article", "pub_year": "2016", "author_list": "Azizzadenesheli, Kamyar; Lazaric, Alessandro; et el." }, { "id": "https://authors.library.caltech.edu/records/ttqvx-6ps30", "eprint_id": 94324, "eprint_status": "archive", "datestamp": "2023-08-20 11:59:00", "lastmod": "2023-10-20 17:52:43", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Azizzadenesheli-Kamyar", "name": { "family": "Azizzadenesheli", "given": "Kamyar" }, "orcid": "0000-0001-8507-1868" }, { "id": "Lazaric-Alessandro", "name": { "family": "Lazaric", "given": "Alessandro" }, "orcid": "0000-0002-8970-413X" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" } ] }, "title": "Reinforcement Learning of POMDPs using Spectral Methods", "ispublished": "pub", "full_text_status": "public", "keywords": "Spectral Methods, Method of Moments, Partially Observable Markov Decision Process, Latent Variable Model, Upper Confidence Reinforcement Learning", "note": "\u00a9 2016 K. Azizzadenesheli, A. Lazaric & A. Anandkumar. \n\nK. Azizzadenesheli is supported in part by NSF Career award CCF-1254106 and ONR Award N00014-14-1-0665. \n\nA. Lazaric is supported in part by a grant from CPER Nord-Pas de Calais/FEDER DATA Advanced data science and technologies 2015-2020, CRIStAL (Centre de Recherche en Informatique et Automatique de Lille), and the French National Research Agency (ANR) under project ExTra-Learn n.ANR-14-CE24-0010-01. \n\nA. Anandkumar is supported in part by Microsoft Faculty Fellowship, NSF Career award CCF-1254106, ONR Award N00014-14-1-0665, ARO YIP Award W911NF-13-1-0084 and AFOSR YIP FA9550-15-1-0221.\n\nPublished - azizzadenesheli16a.pdf
", "abstract": "We propose a new reinforcement learning algorithm for partially observable Markov decision processes (POMDP) based on spectral decomposition methods. While spectral methods have been previously employed for consistent learning of (passive) latent variable models such as hidden Markov models, POMDPs are more challenging since the learner interacts with the environment and possibly changes the future observations in the process. We devise a learning algorithm running through episodes, in each episode we employ spectral techniques to learn the POMDP parameters from a trajectory generated by a fixed policy. At the end of the episode, an optimization oracle returns the optimal memoryless planning policy which maximizes the expected reward based on the estimated POMDP model. We prove an order-optimal regret bound w.r.t. the optimal memoryless policy and efficient scaling with respect to the dimensionality of observation and action spaces.", "date": "2016-06", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "49", "publisher": "PMLR", "pagerange": "193-256", "id_number": "CaltechAUTHORS:20190401-123310700", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190401-123310700", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NSF", "grant_number": "CCF-1254106" }, { "agency": "Office of Naval Research (ONR)", "grant_number": "N00014-14-1-0665" }, { "agency": "Contrat de plan Etat-r\u00e9gion Nord - Pas-de-Calais" }, { "agency": "Fondo Europeo de Desarrollo Regional (FEDER)" }, { "agency": "Centre de Recherche en Informatique et Automatique de Lille" }, { "agency": "Agence Nationale pour la Recherche (ANR)", "grant_number": "ANR-14-CE24-0010-01" }, { "agency": "Microsoft Faculty Fellowship" }, { "agency": "Office of Naval Research (ONR)", "grant_number": "N00014-14-1-0665" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-13-1-0084" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-15-1-0221" } ] }, "doi": "10.48550/arXiv.1602.07764", "primary_object": { "basename": "azizzadenesheli16a.pdf", "url": "https://authors.library.caltech.edu/records/ttqvx-6ps30/files/azizzadenesheli16a.pdf" }, "resource_type": "article", "pub_year": "2016", "author_list": "Azizzadenesheli, Kamyar; Lazaric, Alessandro; et el." }, { "id": "https://authors.library.caltech.edu/records/vdyfb-6na98", "eprint_id": 81879, "eprint_status": "archive", "datestamp": "2023-08-20 09:21:43", "lastmod": "2023-10-17 21:53:43", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Huang-Furong", "name": { "family": "Huang", "given": "Furong" } }, { "id": "Niranjan-U-N", "name": { "family": "Niranjan", "given": "U. N." } }, { "id": "Hakeem-M-U", "name": { "family": "Hakeem", "given": "Mohammad Umar" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } } ] }, "title": "Online Tensor Methods for Learning Latent Variable Models", "ispublished": "pub", "full_text_status": "public", "keywords": "mixed membership stochastic blockmodel, topic modeling, tensor method, stochastic gradient descent, parallel implementation, large datasets", "note": "\u00a9 2015 Furong Huang, U. N. Niranjan, Mohammad Umar Hakeem, and Animashree Anandkumar. \n\nSubmitted 3/14; Revised 9/14; Published 12/15. \n\nThe first author is supported by NSF BIGDATA IIS-1251267, the second author is supported in part by UCI graduate fellowship and NSF Award CCF-1219234, and the last author is supported in part by Microsoft Faculty Fellowship, NSF Career award CCF-1254106, NSF Award CCF-1219234, and ARO YIP Award W911NF-13-1-0084. The authors acknowledge insightful discussions with Prem Gopalan, David Mimno, David Blei, Qirong Ho, Eric Xing, Carter Butts, Blake Foster, Rui Wang, Sridhar Mahadevan, and the CULA team. Special thanks to Prem Gopalan and David Mimno for providing the variational code and answering all our questions. The authors also thank Daniel Hsu and Sham Kakade for initial discussions regarding the implementation of the tensor method. We also thank Dan Melzer for helping us with the system-related issues.\n\nPublished - huang15a.pdf
Submitted - 1309.0787.pdf
", "abstract": "We introduce an online tensor decomposition based approach for two latent variable modeling problems namely, (1) community detection, in which we learn the latent communities that the social actors in social networks belong to, and (2) topic modeling, in which we infer hidden topics of text articles. We consider decomposition of moment tensors using stochastic gradient descent. We conduct optimization of multilinear operations in SGD and avoid directly forming the tensors, to save computational and storage costs. We present optimized algorithm in two platforms. Our GPU-based implementation exploits the parallelism of SIMD architectures to allow for maximum speed-up by a careful optimization of storage and data transfer, whereas our CPU-based implementation uses efficient sparse matrix computations and is suitable for large sparse data sets. For the community detection problem, we demonstrate accuracy and computational efficiency on Facebook, Yelp and DBLP data sets, and for the topic modeling problem, we also demonstrate good performance on the New York Times data set. We compare our results to the state-of-the-art algorithms such as the variational method, and report a gain of accuracy and a gain of several orders of magnitude in the execution time.", "date": "2015-12", "date_type": "published", "publication": "Journal of Machine Learning Research", "volume": "16", "publisher": "Journal of Machine Learning Research", "pagerange": "2797-2835", "id_number": "CaltechAUTHORS:20170927-111140656", "issn": "1533-7928", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170927-111140656", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NSF", "grant_number": "IIS-1251267" }, { "agency": "University of California, Irvine" }, { "agency": "NSF", "grant_number": "CCF-1219234" }, { "agency": "Microsoft Research" }, { "agency": "NSF", "grant_number": "CCF-1254106" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-13-1-0084" } ] }, "doi": "10.48550/arXiv.1309.0787", "primary_object": { "basename": "1309.0787.pdf", "url": "https://authors.library.caltech.edu/records/vdyfb-6na98/files/1309.0787.pdf" }, "related_objects": [ { "basename": "huang15a.pdf", "url": "https://authors.library.caltech.edu/records/vdyfb-6na98/files/huang15a.pdf" } ], "resource_type": "article", "pub_year": "2015", "author_list": "Huang, Furong; Niranjan, U. N.; et el." }, { "id": "https://authors.library.caltech.edu/records/vt9tt-kd996", "eprint_id": 81884, "eprint_status": "archive", "datestamp": "2023-08-20 09:21:49", "lastmod": "2023-10-17 21:53:56", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Hsu-Daniel", "name": { "family": "Hsu", "given": "Daniel" } }, { "id": "Janzamin-M", "name": { "family": "Janzamin", "given": "Majid" } }, { "id": "Kakade-S-M", "name": { "family": "Kakade", "given": "Sham" } } ] }, "title": "When Are Overcomplete Topic Models Identifiable? Uniqueness of Tensor Tucker Decompositions with Structured Sparsity", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2015 Animashree Anandkumar, Daniel Hsu, Majid Janzamin and Sham Kakade. \n\nThe authors acknowledge useful discussions with Sina Jafarpour, Adel Javanmard, Alex Dimakis, Moses Charikar, Sanjeev Arora, Ankur Moitra and Kamalika Chaudhuri. Sham Kakade thanks the Washington Research Foundation. A. Anandkumar is supported in part by Microsoft Faculty Fellowship, NSF Career award CCF-1254106, NSF Award CCF-1219234, ARO Award W911NF-12-1-0404, and ARO YIP Award W911NF-13-1-0084. M. Janzamin is supported by NSF Award CCF-1219234, ARO Award W911NF-12-1-0404 and ARO YIP Award W911NF-13-1-0084.\n\nPublished - p2643-anandkumar.pdf
Submitted - 1308.2853.pdf
", "abstract": "Overcomplete latent representations have been very popular for unsupervised feature learning in recent years. In this paper, we specify which overcomplete models can be identified given observable moments of a certain order. We consider probabilistic admixture or topic models in the overcomplete regime, where the number of latent topics can greatly exceed the size of the observed word vocabulary. While general overcomplete topic models are not identifiable, we establish generic identifiability under a constraint, referred to as topic persistence. Our sufficient conditions for identifiability involve a novel set of \"higher order\" expansion conditions on the topic-word matrix or the population structure of the model. This set of higher-order expansion conditions allow for overcomplete models, and require the existence of a perfect matching from latent topics to higher order observed words. We establish that random structured topic models are identifiable w.h.p. in the overcomplete regime. Our identifiability results allows for general (non-degenerate) distributions for modeling the topic proportions, and thus, we can handle arbitrarily correlated topics in our framework. Our identifiability results imply uniqueness of a class of tensor decompositions with structured sparsity which is contained in the class of Tucker decompositions, but is more general than the Candecomp/Parafac (CP) decomposition.", "date": "2015-12", "date_type": "published", "publication": "Journal of Machine Learning Research", "volume": "16", "publisher": "Journal of Machine Learning Research", "pagerange": "2643-2694", "id_number": "CaltechAUTHORS:20170927-144026647", "issn": "1533-7928", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170927-144026647", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Washington Research Foundation" }, { "agency": "Microsoft Research" }, { "agency": "NSF", "grant_number": "CCF-1254106" }, { "agency": "NSF", "grant_number": "CCF-1219234" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-12-1-0404" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-13-1-0084" } ] }, "doi": "10.48550/arXiv.1308.2853", "primary_object": { "basename": "1308.2853.pdf", "url": "https://authors.library.caltech.edu/records/vt9tt-kd996/files/1308.2853.pdf" }, "related_objects": [ { "basename": "p2643-anandkumar.pdf", "url": "https://authors.library.caltech.edu/records/vt9tt-kd996/files/p2643-anandkumar.pdf" } ], "resource_type": "article", "pub_year": "2015", "author_list": "Anandkumar, Animashree; Hsu, Daniel; et el." }, { "id": "https://authors.library.caltech.edu/records/ahvyn-gsn72", "eprint_id": 81632, "eprint_status": "archive", "datestamp": "2023-08-22 15:29:32", "lastmod": "2023-10-17 20:55:13", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" }, { "id": "Foster-Dean-P", "name": { "family": "Foster", "given": "Dean P." }, "orcid": "0000-0002-8503-0270" }, { "id": "Hsu-Daniel", "name": { "family": "Hsu", "given": "Daniel" }, "orcid": "0000-0002-3495-7113" }, { "id": "Kakade-Sham-M", "name": { "family": "Kakade", "given": "Sham M." } }, { "id": "Liu-Yi-Kai", "name": { "family": "Liu", "given": "Yi-Kai" }, "orcid": "0000-0001-7458-4721" } ] }, "title": "A Spectral Algorithm for Latent Dirichlet Allocation", "ispublished": "pub", "full_text_status": "public", "keywords": "Topic models; Mixture models; Method of moments; Latent Dirichlet allocation", "note": "\u00a9 2014 Springer Science+Business Media New York. \n\nReceived: 01 October 2013; Accepted: 12 June 2014; First Online: 03 July 2014. \n\nWe thank Kamalika Chaudhuri, Adam Kalai, Percy Liang, Chris Meek, David Sontag, and Tong Zhang for valuable insights. We also thank Rong Ge for sharing preliminary results (in [8]) and the anonymous reviewers for their comments, suggestions, and pointers to references. Part of this work was completed while DH was a postdoctoral researcher at Microsoft Research New England, and while DPF, YKL, and AA were visiting the same lab. AA is supported in part by Microsoft Faculty Fellowship, NSF Career award CCF-1254106, NSF Award CCF-1219234, NSF BIGDATA IIS-1251267 and ARO YIP Award W911NF-13-1-0084.\n\nSubmitted - 1204.6703.pdf
", "abstract": "Topic modeling is a generalization of clustering that posits that observations (words in a document) are generated by multiple latent factors (topics), as opposed to just one. The increased representational power comes at the cost of a more challenging unsupervised learning problem for estimating the topic-word distributions when only words are observed, and the topics are hidden. This work provides a simple and efficient learning procedure that is guaranteed to recover the parameters for a wide class of multi-view models and topic models, including latent Dirichlet allocation (LDA). For LDA, the procedure correctly recovers both the topic-word distributions and the parameters of the Dirichlet prior over the topic mixtures, using only trigram statistics (i.e., third order moments, which may be estimated with documents containing just three words). The method is based on an efficiently computable orthogonal tensor decomposition of low-order moments.", "date": "2015-05", "date_type": "published", "publication": "Algorithmica", "volume": "72", "number": "1", "publisher": "Springer", "pagerange": "193-214", "id_number": "CaltechAUTHORS:20170920-142816744", "issn": "0178-4617", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170920-142816744", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Microsoft Research" }, { "agency": "NSF", "grant_number": "CCF-1254106" }, { "agency": "NSF", "grant_number": "CCF-1219234" }, { "agency": "NSF", "grant_number": "IIS-1251267" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-13-1-0084" } ] }, "doi": "10.1007/s00453-014-9909-1", "primary_object": { "basename": "1204.6703.pdf", "url": "https://authors.library.caltech.edu/records/ahvyn-gsn72/files/1204.6703.pdf" }, "resource_type": "article", "pub_year": "2015", "author_list": "Anandkumar, Animashree; Foster, Dean P.; et el." }, { "id": "https://authors.library.caltech.edu/records/kj7vp-een86", "eprint_id": 94345, "eprint_status": "archive", "datestamp": "2023-08-20 04:00:38", "lastmod": "2023-10-20 17:53:38", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Sedghi-H", "name": { "family": "Sedghi", "given": "Hanie" } }, { "id": "Janzamin-M", "name": { "family": "Janzamin", "given": "Majid" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" } } ] }, "title": "Provable Tensor Methods for Learning Mixtures of Generalized Linear Models", "ispublished": "pub", "full_text_status": "public", "keywords": "Mixture of generalized linear models, score function, spectral/tensor decomposition", "note": "\u00a9 2016 by the authors. \n\nThis work was done while H. Sedghi was a visiting researcher at UC Irvine and was supported by NSF Career award FG15890. M. Janzamin is supported by NSF BIGDATA award FG16455. A. Anandkumar is supported in part by Microsoft Faculty Fellowship, NSF Career award CCF-1254106, and ONR Award N00014-14-1-0665.\n\nPublished - sedghi16.pdf
Accepted Version - 1412.3046.pdf
Supplemental Material - sedghi16-supp.pdf
", "abstract": "We consider the problem of learning mixtures of generalized linear models (GLM) which arise in classification and regression problems. Typical learning approaches such as expectation maximization (EM) or variational Bayes can get stuck in spurious local optima. In contrast, we present a tensor decomposition method which is guaranteed to correctly recover the parameters. The key insight is to employ certain feature transformations of the input, which depend on the input generative model. Specifically, we employ score function tensors of the input and compute their cross-correlation with the response variable. We establish that the decomposition of this tensor consistently recovers the parameters, under mild non-degeneracy conditions. We demonstrate that the computational and sample complexity of our method is a low order polynomial of the input and the latent dimensions.", "date": "2014-12-09", "date_type": "published", "publication": "Proceedings of Machine Learning Research", "volume": "51", "publisher": "PMLR", "pagerange": "1223-1231", "id_number": "CaltechAUTHORS:20190401-162921773", "issn": "2640-3498", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190401-162921773", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NSF", "grant_number": "FG15890" }, { "agency": "NSF", "grant_number": "FG16455" }, { "agency": "Microsoft Faculty Fellowship" }, { "agency": "NSF", "grant_number": "CCF-1254106" }, { "agency": "Office of Naval Research (ONR)", "grant_number": "N00014-14-1-0665" } ] }, "doi": "10.48550/arXiv.1412.3046", "primary_object": { "basename": "sedghi16-supp.pdf", "url": "https://authors.library.caltech.edu/records/kj7vp-een86/files/sedghi16-supp.pdf" }, "related_objects": [ { "basename": "sedghi16.pdf", "url": "https://authors.library.caltech.edu/records/kj7vp-een86/files/sedghi16.pdf" }, { "basename": "1412.3046.pdf", "url": "https://authors.library.caltech.edu/records/kj7vp-een86/files/1412.3046.pdf" } ], "resource_type": "article", "pub_year": "2014", "author_list": "Sedghi, Hanie; Janzamin, Majid; et el." }, { "id": "https://authors.library.caltech.edu/records/9wngc-yb438", "eprint_id": 81881, "eprint_status": "archive", "datestamp": "2023-08-20 02:22:19", "lastmod": "2023-10-17 21:53:46", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Ge-Rong", "name": { "family": "Ge", "given": "Rong" } }, { "id": "Hsu-Daniel", "name": { "family": "Hsu", "given": "Daniel" } }, { "id": "Kakade-S-M", "name": { "family": "Kakade", "given": "Sham M." } }, { "id": "Telgarsky-M", "name": { "family": "Telgarsky", "given": "Matus" } } ] }, "title": "Tensor Decompositions for Learning Latent Variable Models", "ispublished": "pub", "full_text_status": "public", "keywords": "latent variable models, tensor decompositions, mixture models, topic models, method of moments, power method", "note": "\u00a9 2014 Animashree Anandkumar, Rong Ge, Daniel Hsu, Sham M. Kakade, and Matus Telgarsky. \n\nSubmitted 2/13; Revised 3/14; Published 8/14. \n\nWe thank Boaz Barak, Dean Foster, Jon Kelner, and Greg Valiant for helpful discussions. We are also grateful to Hanzhang Hu, Drew Bagnell, and Martial Hebert for alerting us of an issue with Theorem 4.2 and suggesting a simple fix. This work was completed while DH was a postdoctoral researcher at Microsoft Research New England, and partly while AA, RG, and MT were visiting the same lab. AA is supported in part by the NSF Award CCF-1219234, AFOSR Award FA9550-10-1-0310 and the ARO Award W911NF-12-1-0404.\n\nPublished - anandkumar14b.pdf
Submitted - 1210.7559.pdf
", "abstract": "This work considers a computationally and statistically efficient parameter estimation method for a wide class of latent variable models---including Gaussian mixture models, hidden Markov models, and latent Dirichlet allocation---which exploits a certain tensor structure in their low-order observable moments (typically, of second- and third-order). Specifically, parameter estimation is reduced to the problem of extracting a certain (orthogonal) decomposition of a symmetric tensor derived from the moments; this decomposition can be viewed as a natural generalization of the singular value decomposition for matrices. Although tensor decompositions are generally intractable to compute, the decomposition of these specially structured tensors can be efficiently obtained by a variety of approaches, including power iterations and maximization approaches (similar to the case of matrices). A detailed analysis of a robust tensor power method is provided, establishing an analogue of Wedin's perturbation theorem for the singular vectors of matrices. This implies a robust and computationally tractable estimation approach for several popular latent variable models.", "date": "2014-08", "date_type": "published", "publication": "Journal of Machine Learning Research", "volume": "15", "publisher": "Journal of Machine Learning Research", "pagerange": "2773-2832", "id_number": "CaltechAUTHORS:20170927-134735763", "issn": "1533-7928", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170927-134735763", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Microsoft Research" }, { "agency": "NSF", "grant_number": "CCF-1219234" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-10-1-0310" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-12-1-0404" } ] }, "doi": "10.48550/arXiv.1210.7559", "primary_object": { "basename": "1210.7559.pdf", "url": "https://authors.library.caltech.edu/records/9wngc-yb438/files/1210.7559.pdf" }, "related_objects": [ { "basename": "anandkumar14b.pdf", "url": "https://authors.library.caltech.edu/records/9wngc-yb438/files/anandkumar14b.pdf" } ], "resource_type": "article", "pub_year": "2014", "author_list": "Anandkumar, Animashree; Ge, Rong; et el." }, { "id": "https://authors.library.caltech.edu/records/0ewyv-97w09", "eprint_id": 81871, "eprint_status": "archive", "datestamp": "2023-08-20 01:16:51", "lastmod": "2023-10-17 21:53:28", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Ge-Rong", "name": { "family": "Ge", "given": "Rong" } }, { "id": "Hsu-Daniel", "name": { "family": "Hsu", "given": "Daniel" } }, { "id": "Kakade-S-M", "name": { "family": "Kakade", "given": "Sham M." } } ] }, "title": "A Tensor Approach to Learning Mixed Membership Community Models", "ispublished": "pub", "full_text_status": "public", "keywords": "community detection, spectral methods, tensor methods, moment-based estimation, mixed membership models", "note": "\u00a9 2014 Anima Anandkumar, Rong Ge, Daniel Hsu, Sham Kakade. \n\nSubmitted 7/13; Revised 11/13; Published 10/00. \n\nWe thank the JMLR Action Editor Nathan Srebro and the anonymous reviewers for comments which significantly improved this manuscript. We thank Jure Leskovec for helpful discussions regarding various community models. Part of this work was done when AA, RG, and DH were at MSR New England. AA is supported in part by the Microsoft faculty fellowship, NSF Career award CCF-1254106, NSF Award CCF-1219234 and the ARO YIP Award W911NF-13-1-0084.\n\nPublished - anandkumar14a.pdf
Submitted - 1302.2684.pdf
", "abstract": "Community detection is the task of detecting hidden communities from observed interactions. Guaranteed community detection has so far been mostly limited to models with non-overlapping communities such as the stochastic block model. In this paper, we remove this restriction, and provide guaranteed community detection for a family of probabilistic network models with overlapping communities, termed as the mixed membership Dirichlet model, first introduced by Airoldi et al. (2008). This model allows for nodes to have fractional memberships in multiple communities and assumes that the community memberships are drawn from a Dirichlet distribution. Moreover, it contains the stochastic block model as a special case. We propose a unified approach to learning these models via a tensor spectral decomposition method. Our estimator is based on low-order moment tensor of the observed network, consisting of 33-star counts. Our learning method is fast and is based on simple linear algebraic operations, e.g., singular value decomposition and tensor power iterations. We provide guaranteed recovery of community memberships and model parameters and present a careful finite sample analysis of our learning method. As an important special case, our results match the best known scaling requirements for the (homogeneous) stochastic block model.", "date": "2014-06", "date_type": "published", "publication": "Journal of Machine Learning Research", "volume": "15", "publisher": "Journal of Machine Learning Research", "pagerange": "2239-2312", "id_number": "CaltechAUTHORS:20170927-093022023", "issn": "1533-7928", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170927-093022023", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Microsoft Research" }, { "agency": "NSF", "grant_number": "CCF-1254106" }, { "agency": "NSF", "grant_number": "CCF-1219234" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-13-1-0084" } ] }, "doi": "10.48550/arXiv.1302.2684", "primary_object": { "basename": "1302.2684.pdf", "url": "https://authors.library.caltech.edu/records/0ewyv-97w09/files/1302.2684.pdf" }, "related_objects": [ { "basename": "anandkumar14a.pdf", "url": "https://authors.library.caltech.edu/records/0ewyv-97w09/files/anandkumar14a.pdf" } ], "resource_type": "article", "pub_year": "2014", "author_list": "Anandkumar, Animashree; Ge, Rong; et el." }, { "id": "https://authors.library.caltech.edu/records/dh3b9-e9480", "eprint_id": 81805, "eprint_status": "archive", "datestamp": "2023-08-20 00:25:45", "lastmod": "2023-10-17 21:50:21", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Sattari-P", "name": { "family": "Sattari", "given": "Pegah" } }, { "id": "Kurant-M", "name": { "family": "Kurant", "given": "Maciej" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Markopoulou-A", "name": { "family": "Markopoulou", "given": "Athina" } }, { "id": "Rabbat-M-G", "name": { "family": "Rabbat", "given": "Michael G." } } ] }, "title": "Active Learning of Multiple Source Multiple Destination Topologies", "ispublished": "pub", "full_text_status": "public", "keywords": "Active hypothesis testing, adaptive sensing algorithms,\napplications of statistical signal processing techniques, inference and estimation on graphs, Internet, network monitoring,\nsequential learning, tomography", "note": "\u00a9 2014 IEEE.\n\nManuscript received July 27, 2013; accepted January 16, 2014. Date of publication February 04, 2014; date of current version March 17, 2014. \n\nThe associate editor coordinating the review of this manuscript and approving it for publication was Prof. Shuguang (Robert) Cui. This work was supported by an NSF Award 1028394, AFOSR Award FA9550-10-1-0310 and AFOSR MURI FA9550-09-0643. The work of M. Rabbat was supported in part by the Natural Sciences and Engineering Research Council of Canada.\n\nSubmitted - 1212.2310.pdf
", "abstract": "We consider the problem of inferring the topology of a network with M sources and N receivers (an M-by- N network), by sending probes between the sources and receivers. Prior work has shown that this problem can be decomposed into two parts: first, infer smaller subnetwork components (1-by- N's or 2-by-2's) and then merge them to identify the M-by- N topology. We focus on the second part, which had previously received less attention in the literature. We assume that a 1-by- N topology is given and that all 2-by-2 components can be queried and learned using end-to-end probes. The problem is which 2-by-2's to query and how to merge them with the given 1-by- N, so as to exactly identify the 2-by- N topology, and optimize a number of performance metrics, including the number of queries (which directly translates into measurement bandwidth), time complexity, and memory usage. We provide a lower bound, [N/2], on the number of 2-by-2's required by any active learning algorithm and propose two greedy algorithms. The first algorithm follows the framework of multiple hypothesis testing, in particular Generalized Binary Search (GBS). The second algorithm is called the Receiver Elimination Algorithm (REA) and follows a bottom-up approach. It requires exactly N-1 steps, which is much less than all (2N) possible 2-by-2's. Simulation results demonstrate that both algorithms correctly identify the 2-by- N topology and are near-optimal, but REA is more efficient in practice.", "date": "2014-04-15", "date_type": "published", "publication": "IEEE Transactions on Signal Processing", "volume": "62", "number": "8", "publisher": "IEEE", "pagerange": "1926-1937", "id_number": "CaltechAUTHORS:20170925-101553300", "issn": "1053-587X", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170925-101553300", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NSF", "grant_number": "OIA-1028394" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-10-1-0310" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-09-0643" }, { "agency": "Natural Sciences and Engineering Research Council of Canada (NSERC)" } ] }, "doi": "10.1109/TSP.2014.2304431", "primary_object": { "basename": "1212.2310.pdf", "url": "https://authors.library.caltech.edu/records/dh3b9-e9480/files/1212.2310.pdf" }, "resource_type": "article", "pub_year": "2014", "author_list": "Sattari, Pegah; Kurant, Maciej; et el." }, { "id": "https://authors.library.caltech.edu/records/02zya-rxd68", "eprint_id": 81883, "eprint_status": "archive", "datestamp": "2023-08-20 00:17:37", "lastmod": "2023-10-17 21:53:53", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Janzamin-M", "name": { "family": "Janzamin", "given": "Majid" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } } ] }, "title": "High-Dimensional Covariance Decomposition into Sparse Markov and Independence Models", "ispublished": "pub", "full_text_status": "public", "keywords": "high-dimensional covariance estimation, sparse graphical model selection, sparse covariance models, sparsistency, convex optimization", "note": "\u00a9 2014 Majid Janzamin and Animashree Anandkumar. \n\nWe thank Karthik Mohan for helpful discussions on running experiments. We also acknowledge useful discussions with Max Welling, Babak Hassibi and Martin Wainwright. We also thank Bin Yu and the JMLR reviewers for valuable comments that have significantly improved the manuscript. M. Janzamin is supported by NSF Award CCF-1219234 and ARO Award W911NF-12-1-0404. A. Anandkumar is supported in part by Microsoft Faculty Fellowship, NSF Career award CCF-1254106, NSF Award CCF-1219234, AFOSR Award FA9550-10-1-0310, and ARO Award W911NF-12-1-0404.\n\nPublished - p1549-janzamin.pdf
Submitted - 1211.0919.pdf
", "abstract": "Fitting high-dimensional data involves a delicate tradeoff between faithful representation and the use of sparse models. Too often, sparsity assumptions on the fitted model are too restrictive to provide a faithful representation of the observed data. In this paper, we present a novel framework incorporating sparsity in different domains. We decompose the observed covariance matrix into a sparse Gaussian Markov model (with a sparse precision matrix) and a sparse independence model (with a sparse covariance matrix). Our framework incorporates sparse covariance and sparse precision estimation as special cases and thus introduces a richer class of high-dimensional models. We posit the observed data as generated from a linear combination of a sparse Gaussian Markov model (with a sparse precision matrix) and a sparse Gaussian independence model (with a sparse covariance matrix). We characterize sufficient conditions for identifiability of the two models, viz., Markov and independence models. We propose an efficient decomposition method based on a modification of the popular \u2113_1-penalized maximum- likelihood estimator (\u2113_1-MLE). We establish that our estimator is consistent in both the domains, i.e., it successfully recovers the supports of both Markov and independence models, when the number of samples n scales as n=\u03a9(d^2log p), where p is the number of variables and d is the maximum node degree in the Markov model. Our experiments validate these results and also demonstrate that our models have better inference accuracy under simple algorithms such as loopy belief propagation.", "date": "2014-04", "date_type": "published", "publication": "Journal of Machine Learning Research", "volume": "15", "publisher": "Journal of Machine Learning Research", "pagerange": "1549-1591", "id_number": "CaltechAUTHORS:20170927-142820777", "issn": "1533-7928", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170927-142820777", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NSF", "grant_number": "CCF-1219234" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-12-1-0404" }, { "agency": "Microsoft Research" }, { "agency": "NSF", "grant_number": "CCF-1254106" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-10-1-0310" } ] }, "doi": "10.48550/arXiv.1211.0919", "primary_object": { "basename": "1211.0919.pdf", "url": "https://authors.library.caltech.edu/records/02zya-rxd68/files/1211.0919.pdf" }, "related_objects": [ { "basename": "p1549-janzamin.pdf", "url": "https://authors.library.caltech.edu/records/02zya-rxd68/files/p1549-janzamin.pdf" } ], "resource_type": "article", "pub_year": "2014", "author_list": "Janzamin, Majid and Anandkumar, Animashree" }, { "id": "https://authors.library.caltech.edu/records/nas3a-knz03", "eprint_id": 81631, "eprint_status": "archive", "datestamp": "2023-08-22 11:01:54", "lastmod": "2023-10-17 20:55:08", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "He-Ting", "name": { "family": "He", "given": "Ting" } }, { "id": "Bisdikian-C", "name": { "family": "Bisdikian", "given": "Chatschik" } }, { "id": "Agrawal-D", "name": { "family": "Agrawal", "given": "Dakshi" } } ] }, "title": "Seeing through black boxes: Tracking transactions through queues under monitoring resource constraints", "ispublished": "pub", "full_text_status": "public", "keywords": "Probabilistic transaction monitoring; Queueing networks; Stochastic comparison; Bipartite matching", "note": "\u00a9 2013 Elsevier B.V. \n\nReceived 10 February 2010, Revised 1 August 2011, Accepted 3 August 2013, Available online 24 August 2013. \n\nThe authors thank R. Nunez Queija for discussions on the processor-sharing queue and Varun Gupta for discussions on the notion of convex order at the MAMA 2009 workshop.\n\nSubmitted - 1006.1674.pdf
", "abstract": "The problem of optimal allocation of monitoring resources for tracking transactions progressing through a distributed system, modeled as a queueing network, is considered. Two forms of monitoring information are considered, viz., locally unique transaction identifiers, and arrival and departure timestamps of transactions at each processing queue. The timestamps are assumed to be available at all the queues but in the absence of identifiers, only enable imprecise tracking since parallel processing can result in out-of-order departures. On the other hand, identifiers enable precise tracking but are not available without proper instrumentation. Given an instrumentation budget, only a subset of queues can be selected for the production of identifiers, while the remaining queues have to resort to imprecise tracking using timestamps. The goal is then to optimally allocate the instrumentation budget to maximize the overall tracking accuracy. The challenge is that the optimal allocation strategy depends on accuracies of timestamp-based tracking at different queues, which has complex dependencies on the arrival and service processes, and the queueing discipline. We propose two simple heuristics for allocation by predicting the order of timestamp-based tracking accuracies of different queues. We derive sufficient conditions for these heuristics to achieve optimality through the notion of the stochastic comparison of queues. Simulations show that our heuristics are close to optimality, even when the parameters deviate from these conditions.", "date": "2013-12", "date_type": "published", "publication": "Performance Evaluation", "volume": "70", "number": "12", "publisher": "Elsevier", "pagerange": "1090-1110", "id_number": "CaltechAUTHORS:20170920-142253537", "issn": "0166-5316", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170920-142253537", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "doi": "10.1016/j.peva.2013.08.003", "primary_object": { "basename": "1006.1674.pdf", "url": "https://authors.library.caltech.edu/records/nas3a-knz03/files/1006.1674.pdf" }, "resource_type": "article", "pub_year": "2013", "author_list": "Anandkumar, Animashree; He, Ting; et el." }, { "id": "https://authors.library.caltech.edu/records/w15ds-38039", "eprint_id": 81626, "eprint_status": "archive", "datestamp": "2023-08-22 10:03:18", "lastmod": "2023-10-17 20:54:54", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Hassidim-A", "name": { "family": "Hassidim", "given": "Avinatan" } }, { "id": "Kelner-J", "name": { "family": "Kelner", "given": "Jonathan" } } ] }, "title": "Topology discovery of sparse random graphs with few participants", "ispublished": "pub", "full_text_status": "public", "keywords": "topology discovery; sparse random graphs; end-to-end measurements; hidden nodes; quartet tests", "note": "\u00a9 2012 Wiley Periodicals, Inc. \n\nIssue online: 20 June 2013; Version of record online: 27 April 2012; Manuscript Accepted: 17 February 2012; Manuscript Received: 23 March 2011. \n\nSupported in part by the setup funds at UCI and the AFOSR Award (FA9550-10-1-0310). \n\nA shorter version appears in Proceedings of ACM SIGMETRICS, June 2011.\n\nSubmitted - 1102.5063.pdf
", "abstract": "We consider the task of topology discovery of sparse random graphs using end-to-end random measurements (e.g., delay) between a subset of nodes, referred to as the participants. The rest of the nodes are hidden, and do not provide any information for topology discovery. We consider topology discovery under two routing models: (a) the participants exchange messages along the shortest paths and obtain end-to-end measurements, and (b) additionally, the participants exchange messages along the second shortest path. For scenario (a), our proposed algorithm results in a sub-linear edit-distance guarantee using a sub-linear number of uniformly selected participants. For scenario (b), we obtain a much stronger result, and show that we can achieve consistent reconstruction when a sub-linear number of uniformly selected nodes participate. This implies that accurate discovery of sparse random graphs is tractable using an extremely small number of participants. We finally obtain a lower bound on the number of participants required by any algorithm to reconstruct the original random graph up to a given edit distance. We also demonstrate that while consistent discovery is tractable for sparse random graphs using a small number of participants, in general, there are graphs which cannot be discovered by any algorithm even with a significant number of participants, and with the availability of end-to-end information along all the paths between the participants.", "date": "2013-08", "date_type": "published", "publication": "Random Structures & Algorithms", "volume": "43", "number": "1", "publisher": "Wiley", "pagerange": "16-48", "id_number": "CaltechAUTHORS:20170920-132342501", "issn": "1042-9832", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170920-132342501", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "University of California, Irvine" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-10-1-0310" } ] }, "doi": "10.1002/rsa.20420", "primary_object": { "basename": "1102.5063.pdf", "url": "https://authors.library.caltech.edu/records/w15ds-38039/files/1102.5063.pdf" }, "resource_type": "article", "pub_year": "2013", "author_list": "Anandkumar, Animashree; Hassidim, Avinatan; et el." }, { "id": "https://authors.library.caltech.edu/records/04z10-a8295", "eprint_id": 81877, "eprint_status": "archive", "datestamp": "2023-08-19 14:06:11", "lastmod": "2023-10-17 21:53:39", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Valluvan-R", "name": { "family": "Valluvan", "given": "Ragupathyraj" } } ] }, "title": "Learning loopy graphical models with latent variables: Efficient methods and guarantees", "ispublished": "pub", "full_text_status": "public", "keywords": "Graphical model selection, latent variables, quartet methods", "note": "\u00a9 2013 Institute of Mathematical Statistics. \n\nReceived March 2012; revised October 2012. \n\nSupported in part by NSF Award CCF-1219234, AFOSR Award FA9550-10-1-0310 and ARO Award W911NF-12-1-0404. \n\nSupported by the ONR Award N00014-08-1-1015. \n\nThe authors thank E. Mossel (Berkeley) for detailed discussions in the beginning regarding problem formulation, modeling and algorithmic approaches and Padhraic Smyth (UCI) and David Newman (UCI) for evaluation measures for topic models. The authors also thank the editor Tony Cai (Wharton) and anonymous reviewers whose comments substantially improved the paper. An abridged version of this work appears in the Proceedings of NIPS 2012.\n\nPublished - euclid.aos.1366138196.pdf
Submitted - 1203.3887.pdf
Supplemental Material - euclid.aos.1366138196_si.pdf
", "abstract": "The problem of structure estimation in graphical models with latent variables is considered. We characterize conditions for tractable graph estimation and develop efficient methods with provable guarantees. We consider models where the underlying Markov graph is locally tree-like, and the model is in the regime of correlation decay. For the special case of the Ising model, the number of samples n required for structural consistency of our method scales as n=\u03a9(\u03b8^(\u2212\u03b4\u03b7(\u03b7+1)\u22122)_(min)log p), where p is the number of variables, \u03b8_(min) is the minimum edge potential, \u03b4 is the depth (i.e., distance from a hidden node to the nearest observed nodes), and \u03b7 is a parameter which depends on the bounds on node and edge potentials in the Ising model. Necessary conditions for structural consistency under any algorithm are derived and our method nearly matches the lower bound on sample requirements. Further, the proposed method is practical to implement and provides flexibility to control the number of latent variables and the cycle lengths in the output graph.", "date": "2013", "date_type": "published", "publication": "Annals of Statistics", "volume": "41", "number": "2", "publisher": "Institute of Mathematical Statistics", "pagerange": "401-435", "id_number": "CaltechAUTHORS:20170927-104250746", "issn": "0090-5364", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170927-104250746", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NSF", "grant_number": "CCF-1219234" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-10-1-0310" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-12-1-0404" }, { "agency": "Office of Naval Research (ONR)", "grant_number": "N00014-08-1-1015" } ] }, "doi": "10.48550/arXiv.1203.3887", "primary_object": { "basename": "1203.3887.pdf", "url": "https://authors.library.caltech.edu/records/04z10-a8295/files/1203.3887.pdf" }, "related_objects": [ { "basename": "euclid.aos.1366138196.pdf", "url": "https://authors.library.caltech.edu/records/04z10-a8295/files/euclid.aos.1366138196.pdf" }, { "basename": "euclid.aos.1366138196_si.pdf", "url": "https://authors.library.caltech.edu/records/04z10-a8295/files/euclid.aos.1366138196_si.pdf" } ], "resource_type": "article", "pub_year": "2013", "author_list": "Anandkumar, Animashree and Valluvan, Ragupathyraj" }, { "id": "https://authors.library.caltech.edu/records/96w9p-6n432", "eprint_id": 33349, "eprint_status": "archive", "datestamp": "2023-08-22 06:16:28", "lastmod": "2023-10-18 19:00:30", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Liu-Ying", "name": { "family": "Liu", "given": "Ying" } }, { "id": "Chandrasekaran-V", "name": { "family": "Chandrasekaran", "given": "Venkat" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Willsky-A-S", "name": { "family": "Willsky", "given": "Alan S." } } ] }, "title": "Feedback Message Passing for Inference in Gaussian Graphical Models", "ispublished": "pub", "full_text_status": "public", "keywords": "Belief propagation; feedback vertex set; Gaussian graphical models; graphs with cycles; Markov random field", "note": "\u00a9 2012 IEEE. \n\nManuscript received May 09, 2011; revised January 09, 2012; accepted April 03, 2012. Date of publication May 03, 2012; date of current version July 10, 2012. \n\nThe associate editor coordinating the review of this manuscript and approving it for publication was Prof. Raviv Raich. This research was supported in part by AFOSR through Grant FA9550-08-1-1080 and in part by Shell International Exploration and Production, Inc. This paper was presented in part at the International Symposium of Information Theory, Austin, Texas, 2010. \n\nThe authors would like to thank D. Shah, J. Dauwels, V. Tan\nfor helpful discussions, and the reviewers for their constructive comments.\n\nSubmitted - 1105.1853.pdf
", "abstract": "While loopy belief propagation (LBP) performs reasonably well for inference in some Gaussian graphical models with cycles, its performance is unsatisfactory for many others. In particular for some models LBP does not converge, and in general when it does converge, the computed variances are incorrect (except for cycle-free graphs for which belief propagation (BP) is non-iterative and exact). In this paper we propose feedback message passing (FMP), a message-passing algorithm that makes use of a special set of vertices (called a feedback vertex set or FVS) whose removal results in a cycle-free graph. In FMP, standard BP is employed several times on the cycle-free subgraph excluding the FVS while a special message-passing scheme is used for the nodes in the FVS. The computational complexity of exact inference is O(k^(2)n), where is the number of feedback nodes, and is the total number of nodes. When the size of the FVS is very large, FMP is computationally costly. Hence we propose approximate FMP, where a pseudo-FVS is used instead of an FVS, and where inference in the non-cycle-free graph obtained by removing the pseudo-FVS is carried out approximately using LBP. We show that, when approximate FMP converges, it yields exact means and variances on the pseudo-FVS and exact means throughout the remainder of the graph. We also provide theoretical results on the convergence and accuracy of approximate FMP. In particular, we prove error bounds on variance computation. Based on these theoretical results, we design efficient algorithms to select a pseudo-FVS of bounded size. The choice of the pseudo-FVS allows us to explicitly trade off between efficiency and accuracy. Experimental results show that using a pseudo-FVS of size no larger than log (n), this procedure converges much more often, more quickly, and provides more accurate results than LBP on the entire graph.", "date": "2012-08", "date_type": "published", "publication": "IEEE Transactions on Signal Processing", "volume": "60", "number": "8", "publisher": "IEEE", "pagerange": "4135-4150", "id_number": "CaltechAUTHORS:20120820-094221711", "issn": "1053-587X", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20120820-094221711", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-08-1-1080" }, { "agency": "Shell International Exploration and Production, Inc." } ] }, "doi": "10.1109/TSP.2012.2195656", "primary_object": { "basename": "1105.1853.pdf", "url": "https://authors.library.caltech.edu/records/96w9p-6n432/files/1105.1853.pdf" }, "resource_type": "article", "pub_year": "2012", "author_list": "Liu, Ying; Chandrasekaran, Venkat; et el." }, { "id": "https://authors.library.caltech.edu/records/a5bhb-28r93", "eprint_id": 81870, "eprint_status": "archive", "datestamp": "2023-08-19 12:00:00", "lastmod": "2023-10-17 21:53:26", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Tan-Vincent-Y-F", "name": { "family": "Tan", "given": "Vincent Y. F." } }, { "id": "Huang-Furong", "name": { "family": "Huang", "given": "Furong" } }, { "id": "Willsky-A-S", "name": { "family": "Willsky", "given": "Alan S." } } ] }, "title": "High-Dimensional Gaussian Graphical Model Selection: Walk Summability and Local Separation Criterion", "ispublished": "pub", "full_text_status": "public", "note": "\u00a9 2012 Animashree Anandkumar, Vincent Tan, Furong Huang and Alan Willsky. \n\nSubmitted 7/11; Revised 4/12; Published 8/12. \n\nAn abridged version of this paper appeared in the Proceedings of NIPS 2011. The first author is supported in part by the setup funds at UCI and the AFOSR Award FA9550-10-1-0310, the second author is supported by A*STAR, Singapore and the third author is supported in part by AFOSR under Grant FA9550-08-1-1080. The authors thank Venkat Chandrasekaran (UC Berkeley) for discussions on walk-summable models, Elchanan Mossel (UC Berkeley) for discussions on the necessary conditions for model selection and Divyanshu Vats (U. Minn.) for extensive comments. \n\nThe authors thank the Associate Editor Martin Wainwright (Berkeley) and the anonymous reviewers for comments which significantly improved this manuscript.\n\nPublished - anandkumar12a.pdf
Submitted - 1107.1270.pdf
", "abstract": "We consider the problem of high-dimensional Gaussian graphical model selection. We identify a set of graphs for which an efficient estimation algorithm exists, and this algorithm is based on thresholding of empirical conditional covariances. Under a set of transparent conditions, we establish structural consistency (or sparsistency) for the proposed algorithm, when the number of samples n=\u03a9(J_(min)^(-2) log p), where p is the number of variables and J_(min) is the minimum (absolute) edge potential of the graphical model. The sufficient conditions for sparsistency are based on the notion of walk-summability of the model and the presence of sparse local vertex separators in the underlying graph. We also derive novel non-asymptotic necessary conditions on the number of samples required for sparsistency.", "date": "2012-08", "date_type": "published", "publication": "Journal of Machine Learning Research", "volume": "13", "publisher": "Journal of Machine Learning Research", "pagerange": "2293-2337", "id_number": "CaltechAUTHORS:20170927-091743601", "issn": "1533-7928", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170927-091743601", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "University of California, Irvine" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-10-1-0310" }, { "agency": "Agency for Science, Technology and Research (A*STAR)" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-08-1-1080" } ] }, "doi": "10.48550/arXiv.1107.1270", "primary_object": { "basename": "1107.1270.pdf", "url": "https://authors.library.caltech.edu/records/a5bhb-28r93/files/1107.1270.pdf" }, "related_objects": [ { "basename": "anandkumar12a.pdf", "url": "https://authors.library.caltech.edu/records/a5bhb-28r93/files/anandkumar12a.pdf" } ], "resource_type": "article", "pub_year": "2012", "author_list": "Anandkumar, Animashree; Tan, Vincent Y. F.; et el." }, { "id": "https://authors.library.caltech.edu/records/zm5sy-k6p16", "eprint_id": 81876, "eprint_status": "archive", "datestamp": "2023-08-19 11:13:58", "lastmod": "2023-10-17 21:53:36", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" }, "orcid": "0000-0002-6974-6797" }, { "id": "Tan-Vincent-Y-F", "name": { "family": "Tan", "given": "Vincent Y. F." } }, { "id": "Huang-Furong", "name": { "family": "Huang", "given": "Furong" } }, { "id": "Willsky-Alan-S", "name": { "family": "Willsky", "given": "Alan S." } } ] }, "title": "High-dimensional structure estimation in Ising models: Local separation criterion", "ispublished": "pub", "full_text_status": "public", "keywords": "Ising models, graphical model selection, local-separation property", "note": "\u00a9 2012 Institute of Mathematical Statistics. \n\nSupported by the setup funds at UCI and the AFOSR Award FA9550-10-1-0310. \n\nSupported in part by A*STAR, Singapore. \n\nSupported in part by AFOSR under Grant FA9550-08-1-1080. \n\nThe authors thank Sujay Sanghavi (U.T. Austin), Elchanan Mossel (UC Berkeley), Martin Wainwright (UC Berkeley), Sebastien Roch (UCLA), Rui Wu (UIUC) and Divyanshu Vats (U. Minn.) for extensive comments, and B\u00e9la Bollob\u00e1s (Cambridge) for discussions on random graphs. \n\nThe authors thank the anonymous reviewers and the co-editor Peter B\u00fchlmann (ETH) for valuable comments that significantly improved this manuscript.\n\nPublished - euclid.aos.1344610586.pdf
Accepted Version - 1107.1736.pdf
Supplemental Material - euclid.aos.1344610586_si.pdf
", "abstract": "We consider the problem of high-dimensional Ising (graphical) model selection. We propose a simple algorithm for structure estimation based on the thresholding of the empirical conditional variation distances. We introduce a novel criterion for tractable graph families, where this method is efficient, based on the presence of sparse local separators between node pairs in the underlying graph. For such graphs, the proposed algorithm has a sample complexity of n=\u03a9(J^(\u22122)_(min)log p), where p is the number of variables, and J_(min) is the minimum (absolute) edge potential in the model. We also establish nonasymptotic necessary and sufficient conditions for structure estimation.", "date": "2012-06", "date_type": "published", "publication": "Annals of Statistics", "volume": "40", "number": "3", "publisher": "Institute of Mathematical Statistics", "pagerange": "1346-1375", "id_number": "CaltechAUTHORS:20170927-101515951", "issn": "0090-5364", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170927-101515951", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-10-1-0310" }, { "agency": "Agency for Science, Technology and Research (A*STAR)" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-08-1-1080" }, { "agency": "University of California, Irvine" } ] }, "doi": "10.48550/arXiv.1107.1736", "primary_object": { "basename": "1107.1736.pdf", "url": "https://authors.library.caltech.edu/records/zm5sy-k6p16/files/1107.1736.pdf" }, "related_objects": [ { "basename": "euclid.aos.1344610586.pdf", "url": "https://authors.library.caltech.edu/records/zm5sy-k6p16/files/euclid.aos.1344610586.pdf" }, { "basename": "euclid.aos.1344610586_si.pdf", "url": "https://authors.library.caltech.edu/records/zm5sy-k6p16/files/euclid.aos.1344610586_si.pdf" } ], "resource_type": "article", "pub_year": "2012", "author_list": "Anandkumar, Animashree; Tan, Vincent Y. F.; et el." }, { "id": "https://authors.library.caltech.edu/records/hsndz-x7g55", "eprint_id": 81801, "eprint_status": "archive", "datestamp": "2023-08-19 08:33:43", "lastmod": "2023-10-17 21:50:01", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Anandkumar-Amod-J-G", "name": { "family": "Anandkumar", "given": "Amod J. G." } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Lambotharan-S", "name": { "family": "Lambotharan", "given": "Sangarapillai" } }, { "id": "Chambers-J-A", "name": { "family": "Chambers", "given": "Jonathon A." } } ] }, "title": "Robust Rate Maximization Game Under Bounded Channel Uncertainty", "ispublished": "pub", "full_text_status": "restricted", "keywords": "Channel-state information (CSI) uncertainty, game theory, Nash equilibrium, rate maximization, robust games, waterfilling", "note": "\u00a9 2011 IEEE. \n\nManuscript received March 1, 2011; revised July 10, 2011; accepted September 2, 2011. Date of publication October 10, 2011; date of current version December 9, 2011. \n\nThis work was supported in part by the Engineering\nand Physical Sciences Research Council (EPSRC) under Grant EP/F065477/1. The work of A. Anandkumar was supported in part by ARO Grant W911NF-06-1-0076 and in part by setup funds at UCI and AFOSR award FA9550-10-1-0310. This paper was presented in part at the 2010 IEEE International Conference on Acoustics, Speech, and Signal Processing and the 43rd Asilomar Conference on Signals, Systems and Computers. The review of this paper was coordinated by Dr. S. Zhong. \n\nThe authors would like to thank Dr. I. Menache of Microsoft\nResearch for his input on the robust game theory, Dr. G. Scutari of the University of Illinois at Urbana-Champaign for his initial guidance and advice on waterfilling algorithms, P. von Wrycza of the Royal Institute of Technology (KTH) and Dr. M. R. Bhavani Shankar of the University of Luxembourg for pointing out a typographical error in an early version of the proofs, Prof. B. Ottersten of KTH for the valuable discussions, and the anonymous reviewers for their valuable feedback.", "abstract": "We consider the problem of decentralized power allocation for competitive rate maximization in a frequency-selective Gaussian interference channel under bounded channel uncertainty. We formulate a distribution-free robust framework for the rate maximization game. We present the robust optimization equilibrium for this game and derive sufficient conditions for its existence and uniqueness. We show that an iterative waterfilling algorithm converges to this equilibrium under certain sufficient conditions. We analyze the social properties of the equilibrium under varying channel uncertainty bounds for the two-user case. We also observe an interesting phenomenon that the equilibrium moves toward a frequency-division multiple-access solution for any set of channel coefficients under increasing channel uncertainty bounds. We further prove that increasing channel uncertainty can lead to a more efficient equilibrium and, hence, a better sum rate in certain two-user communication systems. Finally, we confirm, through simulations, that this improvement in equilibrium efficiency is also observed in systems with a higher number of users.", "date": "2011-11", "date_type": "published", "publication": "IEEE Transactions on Vehicular Technology", "volume": "60", "number": "9", "publisher": "IEEE", "pagerange": "4471-4486", "id_number": "CaltechAUTHORS:20170925-094601829", "issn": "0018-9545", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170925-094601829", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Engineering and Physical Sciences Research Council (EPSRC)", "grant_number": "EP/F065477/1" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-06-1-0076" }, { "agency": "University of California, Irvine" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-10-1-0310" } ] }, "doi": "10.1109/TVT.2011.2171011", "resource_type": "article", "pub_year": "2011", "author_list": "Anandkumar, Amod J. G.; Anandkumar, Animashree; et el." }, { "id": "https://authors.library.caltech.edu/records/c3saa-sgs90", "eprint_id": 81885, "eprint_status": "archive", "datestamp": "2023-08-19 07:01:51", "lastmod": "2023-10-17 21:53:59", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Tan-Vincent-Y-F", "name": { "family": "Tan", "given": "Vincent Y. F." } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Willsky-A-S", "name": { "family": "Willsky", "given": "Alan S." } } ] }, "title": "Learning High-Dimensional Markov Forest Distributions: Analysis of Error Rates", "ispublished": "pub", "full_text_status": "public", "keywords": "graphical models, forest distributions, structural consistency, risk consistency, method of types", "note": "\u00a9 2011 Vincent Tan, Animashree Anandkumar and Alan Willsky. \n\nThis work was supported by a AFOSR funded through Grant FA9559-08-1-1080, a MURI funded through ARO Grant W911NF-06-1-0076 and a MURI funded through AFOSR Grant FA9550-06-1-0324. V. Tan is also funded by A*STAR, Singapore. The authors would like to thank Sanjoy Mitter, Lav Varshney, Matt Johnson and James Saunderson for discussions. The authors would also like to thank Rui Wu (UIUC) for pointing out an error in the proof of Theorem 3.\n\nPublished - tan11a.pdf
Submitted - 1005.0766.pdf
", "abstract": "The problem of learning forest-structured discrete graphical models from i.i.d. samples is considered. An algorithm based on pruning of the Chow-Liu tree through adaptive thresholding is proposed. It is shown that this algorithm is both structurally consistent and risk consistent and the error probability of structure learning decays faster than any polynomial in the number of samples under fixed model size. For the high-dimensional scenario where the size of the model d and the number of edges k scale with the number of samples n, sufficient conditions on (n,d,k) are given for the algorithm to satisfy structural and risk consistencies. In addition, the extremal structures for learning are identified; we prove that the independent (resp., tree) model is the hardest (resp., easiest) to learn using the proposed algorithm in terms of error rates for structure learning.", "date": "2011-06", "date_type": "published", "publication": "Journal of Machine Learning Research", "volume": "12", "publisher": "Journal of Machine Learning Research", "pagerange": "1617-1653", "id_number": "CaltechAUTHORS:20170927-144736867", "issn": "1533-7928", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170927-144736867", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9559-08-1-1080" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-06-1-0076" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-06-1-0324" }, { "agency": "Agency for Science, Technology and Research (A*STAR)" } ] }, "doi": "10.48550/arXiv.1005.0766", "primary_object": { "basename": "1005.0766.pdf", "url": "https://authors.library.caltech.edu/records/c3saa-sgs90/files/1005.0766.pdf" }, "related_objects": [ { "basename": "tan11a.pdf", "url": "https://authors.library.caltech.edu/records/c3saa-sgs90/files/tan11a.pdf" } ], "resource_type": "article", "pub_year": "2011", "author_list": "Tan, Vincent Y. F.; Anandkumar, Animashree; et el." }, { "id": "https://authors.library.caltech.edu/records/fx9my-swj04", "eprint_id": 81875, "eprint_status": "archive", "datestamp": "2023-08-19 06:26:40", "lastmod": "2023-10-17 21:53:33", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Choi-Myung Jin", "name": { "family": "Choi", "given": "Myung Jin" } }, { "id": "Tan-Vincent-Y-F", "name": { "family": "Tan", "given": "Vincent Y. F." } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Willsky-A-S", "name": { "family": "Willsky", "given": "Alan S." } } ] }, "title": "Learning Latent Tree Graphical Models", "ispublished": "pub", "full_text_status": "public", "keywords": "graphical models, Markov random fields, hidden variables, latent tree models, structure learning", "note": "\u00a9 2011 Myung Jin Choi, Vincent Y. F. Tan, Animashree Anandkumar and Alan S. Willsky. \n\nSubmitted 9/10; Revised 2/11; Published 5/11. \n\nThis research was supported in part by Shell International Exploration and Production, Inc. and in part by the Air Force Office of Scientific Research under Award No. FA9550-06-1-0324. This work was also supported in part by AFOSR under Grant FA9550-08-1-1080 and in part by MURI under AFOSR Grant FA9550-06-1-0324. Any opinions, findings, and conclusions or recommendations expressed in this publication are those of the author(s) and do not necessarily reflect the views of the Air Force. Vincent Tan and Animashree Anandkumar are supported by A*STAR, Singapore and by the setup funds at U.C. Irvine respectively.\n\nPublished - choi11b.pdf
Submitted - 1009.2722.pdf
", "abstract": "We study the problem of learning a latent tree graphical model where samples are available only from a subset of variables. We propose two consistent and computationally efficient algorithms for learning minimal latent trees, that is, trees without any redundant hidden nodes. Unlike many existing methods, the observed nodes (or variables) are not constrained to be leaf nodes. Our algorithms can be applied to both discrete and Gaussian random variables and our learned models are such that all the observed and latent variables have the same domain (state space). Our first algorithm, recursive grouping, builds the latent tree recursively by identifying sibling groups using so-called information distances. One of the main contributions of this work is our second algorithm, which we refer to as CLGrouping. CLGrouping starts with a pre-processing procedure in which a tree over the observed variables is constructed. This global step groups the observed nodes that are likely to be close to each other in the true latent tree, thereby guiding subsequent recursive grouping (or equivalent procedures such as neighbor-joining) on much smaller subsets of variables. This results in more accurate and efficient learning of latent trees. We also present regularized versions of our algorithms that learn latent tree approximations of arbitrary distributions. We compare the proposed algorithms to other methods by performing extensive numerical experiments on various latent tree graphical models such as hidden Markov models and star graphs. In addition, we demonstrate the applicability of our methods on real-world data sets by modeling the dependency structure of monthly stock returns in the S&P index and of the words in the 20 newsgroups data set.", "date": "2011-05", "date_type": "published", "publication": "Journal of Machine Learning Research", "volume": "12", "publisher": "Journal of Machine Learning Research", "pagerange": "1771-1812", "id_number": "CaltechAUTHORS:20170927-100701408", "issn": "1533-7928", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170927-100701408", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Shell International Exploration and Production, Inc." }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-06-1-0324" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-08-1-1080" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-06-1-0324" }, { "agency": "Agency for Science, Technology and Research (A*STAR)" }, { "agency": "University of California, Irvine" } ] }, "doi": "10.48550/arXiv.1009.2722", "primary_object": { "basename": "1009.2722.pdf", "url": "https://authors.library.caltech.edu/records/fx9my-swj04/files/1009.2722.pdf" }, "related_objects": [ { "basename": "choi11b.pdf", "url": "https://authors.library.caltech.edu/records/fx9my-swj04/files/choi11b.pdf" } ], "resource_type": "article", "pub_year": "2011", "author_list": "Choi, Myung Jin; Tan, Vincent Y. F.; et el." }, { "id": "https://authors.library.caltech.edu/records/pbdd0-km498", "eprint_id": 81748, "eprint_status": "archive", "datestamp": "2023-08-19 06:06:01", "lastmod": "2023-10-17 21:19:26", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Michael-N", "name": { "family": "Michael", "given": "Nithin" } }, { "id": "Tang-Ao", "name": { "family": "Tang", "given": "Ao Kevin" }, "orcid": "0000-0001-6296-644X" }, { "id": "Swami-A", "name": { "family": "Swami", "given": "Ananthram" } } ] }, "title": "Distributed Algorithms for Learning and Cognitive Medium Access with Logarithmic Regret", "ispublished": "pub", "full_text_status": "public", "keywords": "Cognitive medium access control, multi-armed bandits, distributed algorithms, logarithmic regret", "note": "\u00a9 2011 IEEE. \n\nManuscript received 1 December 2009; revised 4 June 2010. \n\nDuring the stint of this work, the first author was supported by MURI through AFOSR Grant FA9550-06-1-0324. The second and the third authors are supported in part through NSF grant CCF-0835706. Parts of this paper were presented at [1].\n\nSubmitted - 1006.1673v1.pdf
", "abstract": "The problem of distributed learning and channel access is considered in a cognitive network with multiple secondary users. The availability statistics of the channels are initially unknown to the secondary users and are estimated using sensing decisions. There is no explicit information exchange or prior agreement among the secondary users and sensing and access decisions are undertaken by them in a completely distributed manner. We propose policies for distributed learning and access which achieve order-optimal cognitive system throughput (number of successful secondary transmissions) under self play, i.e., when implemented at all the secondary users. Equivalently, our policies minimize the sum regret in distributed learning and access, which is the loss in secondary throughput due to learning and distributed access. For the scenario when the number of secondary users is known to the policy, we prove that the total regret is logarithmic in the number of transmission slots. This policy achieves order-optimal regret based on a logarithmic lower bound for regret under any uniformly-good learning and access policy. We then consider the case when the number of secondary users is fixed but unknown, and is estimated at each user through feedback. We propose a policy whose sum regret grows only slightly faster than logarithmic in the number of transmission slots.", "date": "2011-04", "date_type": "published", "publication": "IEEE Journal on Selected Areas in Communications", "volume": "29", "number": "4", "publisher": "IEEE", "pagerange": "731-745", "id_number": "CaltechAUTHORS:20170922-133040888", "issn": "0733-8716", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170922-133040888", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-06-1-0324" }, { "agency": "NSF", "grant_number": "CCF-0835706" } ] }, "doi": "10.1109/JSAC.2011.110406", "primary_object": { "basename": "1006.1673v1.pdf", "url": "https://authors.library.caltech.edu/records/pbdd0-km498/files/1006.1673v1.pdf" }, "resource_type": "article", "pub_year": "2011", "author_list": "Anandkumar, Animashree; Michael, Nithin; et el." }, { "id": "https://authors.library.caltech.edu/records/4464m-myq24", "eprint_id": 81737, "eprint_status": "archive", "datestamp": "2023-08-19 05:47:07", "lastmod": "2023-10-17 21:18:47", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Tan-Vincent-Y-F", "name": { "family": "Tan", "given": "Vincent Y. F." } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Tong-Lang", "name": { "family": "Tong", "given": "Lang" } }, { "id": "Willsky-A-S", "name": { "family": "Willsky", "given": "Alan S." } } ] }, "title": "A Large-Deviation Analysis of the Maximum-Likelihood Learning of Markov Tree Structures", "ispublished": "pub", "full_text_status": "public", "keywords": "Error exponent, Euclidean information theory,\nlarge-deviations principle, Markov structure, maximum-likelihood (ML) distribution estimation, tree-structured distributions", "note": "\u00a9 2011 IEEE. \n\nManuscript received May 06, 2009; revised October 19, 2010; accepted November 18, 2010. Date of current version February 18, 2011. \n\nThis work was supported in part by A*STAR, Singapore, by a MURI funded through ARO Grant W911NF-06-1-0076 and by AFOSR Grant FA9550-08-1-0180 and in part by the Army Research Office MURI Program under award W911NF-08-1-0238. The material in this paper was presented in part at the International Symposium on Information Theory (ISIT), Seoul, Korea, June 2009. V. Y. F. Tan performed this work while at MIT. \n\nThe authors would like to thank the anonymous referees and\nAssociate Editor A. Krzyzak who have helped to improve the\nexposition. One reviewer, in particular, helped highlight the connection of this work with robust hypothesis testing, leading to Section V-D. The authors would also like to thank Prof. L. Zheng, M. Agrawal, and A. Olshevsky for many stimulating discussions.\n\nPublished - 05714274.pdf
Submitted - 0905.0940.pdf
", "abstract": "The problem of maximum-likelihood (ML) estimation of discrete tree-structured distributions is considered. Chow and Liu established that ML-estimation reduces to the construction of a maximum-weight spanning tree using the empirical mutual information quantities as the edge weights. Using the theory of large-deviations, we analyze the exponent associated with the error probability of the event that the ML-estimate of the Markov tree structure differs from the true tree structure, given a set of independently drawn samples. By exploiting the fact that the output of ML-estimation is a tree, we establish that the error exponent is equal to the exponential rate of decay of a single dominant crossover event. We prove that in this dominant crossover event, a non-neighbor node pair replaces a true edge of the distribution that is along the path of edges in the true tree graph connecting the nodes in the non-neighbor pair. Using ideas from Euclidean information theory, we then analyze the scenario of ML-estimation in the very noisy learning regime and show that the error exponent can be approximated as a ratio, which is interpreted as the signal-to-noise ratio (SNR) for learning tree distributions. We show via numerical experiments that in this regime, our SNR approximation is accurate.", "date": "2011-03", "date_type": "published", "publication": "IEEE Transactions on Information Theory", "volume": "57", "number": "3", "publisher": "IEEE", "pagerange": "1714-1735", "id_number": "CaltechAUTHORS:20170922-092634649", "issn": "0018-9448", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170922-092634649", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Agency for Science, Technology and Research (A*STAR)" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-06-1-0076" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-08-1-0180" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-08-1-0238" } ] }, "doi": "10.1109/TIT.2011.2104513", "primary_object": { "basename": "0905.0940.pdf", "url": "https://authors.library.caltech.edu/records/4464m-myq24/files/0905.0940.pdf" }, "related_objects": [ { "basename": "05714274.pdf", "url": "https://authors.library.caltech.edu/records/4464m-myq24/files/05714274.pdf" } ], "resource_type": "article", "pub_year": "2011", "author_list": "Tan, Vincent Y. F.; Anandkumar, Animashree; et el." }, { "id": "https://authors.library.caltech.edu/records/9at92-pns60", "eprint_id": 81730, "eprint_status": "archive", "datestamp": "2023-08-19 02:28:30", "lastmod": "2023-10-17 21:11:56", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Tan-Vincent-Y-F", "name": { "family": "Tan", "given": "Vincent Y. F." } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Willsky-A-S", "name": { "family": "Willsky", "given": "Alan S." } } ] }, "title": "Learning Gaussian Tree Models: Analysis of Error Exponents and Extremal Structures", "ispublished": "pub", "full_text_status": "public", "keywords": "Error exponents, Euclidean information theory,\nGauss-Markov random fields, Gaussian graphical models, large\ndeviations, structure learning, tree distributions", "note": "\u00a9 2010 IEEE. \n\nManuscript received September 28, 2009; accepted January 21, 2010. Date of publication February 05, 2010; date of current version April 14, 2010. \n\nThe associate editor coordinating the review of this manuscript and approving it for publication was Dr. Deniz Erdogmus. This work was presented in part at the Allerton Conference on Communication, Control, and Computing, Monticello, IL, September 2009. This work was supported in part by a AFOSR through Grant FA9550-08-1-1080, in part by a MURI funded through ARO Grant W911NF-06-1-0076, and in part under a MURI through AFOSR Grant FA9550-06-1-0324. The work of V. Tan was supported by A*STAR, Singapore.\n\nPublished - 05406101.pdf
Submitted - 0909.5216.pdf
", "abstract": "The problem of learning tree-structured Gaussian graphical models from independent and identically distributed (i.i.d.) samples is considered. The influence of the tree structure and the parameters of the Gaussian distribution on the learning rate as the number of samples increases is discussed. Specifically, the error exponent corresponding to the event that the estimated tree structure differs from the actual unknown tree structure of the distribution is analyzed. Finding the error exponent reduces to a least-squares problem in the very noisy learning regime. In this regime, it is shown that the extremal tree structure that minimizes the error exponent is the star for any fixed set of correlation coefficients on the edges of the tree. If the magnitudes of all the correlation coefficients are less than 0.63, it is also shown that the tree structure that maximizes the error exponent is the Markov chain. In other words, the star and the chain graphs represent the hardest and the easiest structures to learn in the class of tree-structured Gaussian graphical models. This result can also be intuitively explained by correlation decay: pairs of nodes which are far apart, in terms of graph distance, are unlikely to be mistaken as edges by the maximum-likelihood estimator in the asymptotic regime.", "date": "2010-05", "date_type": "published", "publication": "IEEE Transactions on Signal Processing", "volume": "58", "number": "5", "publisher": "IEEE", "pagerange": "2701-2714", "id_number": "CaltechAUTHORS:20170922-082655078", "issn": "1053-587X", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170922-082655078", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-08-1-1080" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-06-1-0076" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-06-1-0324" }, { "agency": "Agency for Science, Technology and Research (A*STAR)" } ] }, "doi": "10.1109/TSP.2010.2042478", "primary_object": { "basename": "05406101.pdf", "url": "https://authors.library.caltech.edu/records/9at92-pns60/files/05406101.pdf" }, "related_objects": [ { "basename": "0909.5216.pdf", "url": "https://authors.library.caltech.edu/records/9at92-pns60/files/0909.5216.pdf" } ], "resource_type": "article", "pub_year": "2010", "author_list": "Tan, Vincent Y. F.; Anandkumar, Animashree; et el." }, { "id": "https://authors.library.caltech.edu/records/z3yyh-d3t74", "eprint_id": 81715, "eprint_status": "archive", "datestamp": "2023-08-19 00:00:13", "lastmod": "2023-10-17 21:11:17", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Yukich-J-E", "name": { "family": "Yukich", "given": "Joseph E." } }, { "id": "Tong-Lang", "name": { "family": "Tong", "given": "Lang" } }, { "id": "Swami-A", "name": { "family": "Swami", "given": "Ananthram" } } ] }, "title": "Energy scaling laws for distributed inference in random fusion networks", "ispublished": "pub", "full_text_status": "public", "keywords": "Distributed inference, graphical models, Euclidean random graphs, stochastic geometry and data fusion", "note": "\u00a9 2009 IEEE. \n\nManuscript received 25 August 2008; revised 1 February 2009. \n\nParts of this paper were presented at [1], [2]. This work was supported in part through collaborative participation in Communications and Networks Consortium sponsored by the U. S. Army Research Laboratory under the Collaborative\nTechnology Alliance Program, Cooperative Agreement DAAD19-01-2-0011 and by the Army Research Office under Grant ARO-W911NF-06-1-0346. The first author is supported by the IBM Ph.D Fellowship for the year 2008-09 and is currently a visiting student at MIT, Cambridge, MA 02139. The second author was partially supported by NSA grant H98230-06-1-0052 and NSF grant DMS-0805570. The U. S. Government is authorized to reproduce and distribute reprints for Government purposes notwithstanding any copyright notation thereon.\n\nPublished - 05226971.pdf
Submitted - 0809.0686.pdf
", "abstract": "The energy scaling laws of multihop data fusion networks for distributed inference are considered. The fusion network consists of randomly located sensors distributed i.i.d. according to a general spatial distribution in an expanding region. Under Markov random field (MRF) hypotheses, among the class of data-fusion policies which enable optimal statistical inference at the fusion center using all the sensor measurements, the policy with the minimum average energy consumption is bounded below by the average energy of fusion along the minimum spanning tree, and above by a suboptimal policy, referred to as Data Fusion for Markov Random Fields (DFMRF). Scaling laws are derived for the energy consumption of the optimal and suboptimal fusion policies. It is shown that the average asymptotic energy of the DFMRF scheme is strictly finite for a class of MRF models with Euclidean stabilizing dependency graphs.", "date": "2009-09", "date_type": "published", "publication": "IEEE Journal on Selected Areas in Communications", "volume": "27", "number": "7", "publisher": "IEEE", "pagerange": "1203-1217", "id_number": "CaltechAUTHORS:20170921-155701400", "issn": "0733-8716", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170921-155701400", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Army Research Laboratory (ARL)", "grant_number": "DAAD19-01-2-0011" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-06-1-0346" }, { "agency": "IBM" }, { "agency": "National Security Agency", "grant_number": "H98230-06-1-0052" }, { "agency": "NSF", "grant_number": "DMS-0805570" } ] }, "doi": "10.1109/JSAC.2009.090916", "primary_object": { "basename": "05226971.pdf", "url": "https://authors.library.caltech.edu/records/z3yyh-d3t74/files/05226971.pdf" }, "related_objects": [ { "basename": "0809.0686.pdf", "url": "https://authors.library.caltech.edu/records/z3yyh-d3t74/files/0809.0686.pdf" } ], "resource_type": "article", "pub_year": "2009", "author_list": "Anandkumar, Animashree; Yukich, Joseph E.; et el." }, { "id": "https://authors.library.caltech.edu/records/h718a-79k59", "eprint_id": 81659, "eprint_status": "archive", "datestamp": "2023-08-20 00:49:54", "lastmod": "2023-10-17 20:56:16", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Tong-Lang", "name": { "family": "Tong", "given": "Lang" } }, { "id": "Swami-A", "name": { "family": "Swami", "given": "Ananthram" } } ] }, "title": "Detection of Gauss-Markov Random Fields With Nearest-Neighbor Dependency", "ispublished": "pub", "full_text_status": "public", "keywords": "Detection and estimation, error exponent, Gauss\u2013Markov random fields, law of large numbers", "note": "\u00a9 2009 IEEE. \n\nManuscript received January 02, 2007; revised January 31, 2008. Current version published February 04, 2009. \n\nThis work was supported in part through the collaborative participation in the Communications and Networks Consortium sponsored by the U.S. Army Research Laboratory under the Collaborative Technology Alliance Program, Cooperative Agreement DAAD19-01-2-0011 and by the Army Research Office under Grant ARO-W911NF-06-1-0346. The U.S. Government is authorized to reproduce and distribute reprints for Government purposes notwithstanding any copyright notation thereon. The material in this\npaper was presented in part at IEEE International Conference on Acoustics, Speech and Signal Processing, Hawaii, April 2007.\n\nPublished - 04777634.pdf
Submitted - 0706.1588.pdf
", "abstract": "The problem of hypothesis testing against independence for a Gauss-Markov random field (GMRF) is analyzed. Assuming an acyclic dependency graph, an expression for the log-likelihood ratio of detection is derived. Assuming random placement of nodes over a large region according to the Poisson or uniform distribution and nearest-neighbor dependency graph, the error exponent of the Neyman-Pearson detector is derived using large-deviations theory. The error exponent is expressed as a dependency-graph functional and the limit is evaluated through a special law of large numbers for stabilizing graph functionals. The exponent is analyzed for different values of the variance ratio and correlation. It is found that a more correlated GMRF has a higher exponent at low values of the variance ratio whereas the situation is reversed at high values of the variance ratio.", "date": "2009-02", "date_type": "published", "publication": "IEEE Transactions on Information Theory", "volume": "55", "number": "2", "publisher": "IEEE", "pagerange": "816-827", "id_number": "CaltechAUTHORS:20170920-162015185", "issn": "0018-9448", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170920-162015185", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Army Research Laboratory (ARL)", "grant_number": "DAAD19-01-2-0011" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-06-1-0346" } ] }, "doi": "10.1109/TIT.2008.2009855", "primary_object": { "basename": "04777634.pdf", "url": "https://authors.library.caltech.edu/records/h718a-79k59/files/04777634.pdf" }, "related_objects": [ { "basename": "0706.1588.pdf", "url": "https://authors.library.caltech.edu/records/h718a-79k59/files/0706.1588.pdf" } ], "resource_type": "article", "pub_year": "2009", "author_list": "Anandkumar, Animashree; Tong, Lang; et el." }, { "id": "https://authors.library.caltech.edu/records/c0hac-kqd07", "eprint_id": 81654, "eprint_status": "archive", "datestamp": "2023-08-19 23:44:02", "lastmod": "2023-10-17 20:56:06", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Tong-Lang", "name": { "family": "Tong", "given": "Lang" } }, { "id": "Swami-A", "name": { "family": "Swami", "given": "Ananthram" } } ] }, "title": "Optimal Node Density for Detection in Energy-Constrained Random Networks", "ispublished": "pub", "full_text_status": "public", "keywords": "Distributed detection, error exponent, Gauss-Markov random fields (GMRF), routing, sensor networks", "note": "\u00a9 2008 IEEE. \n\nManuscript received October 25, 2007; revised June 5, 2008. First published July 15, 2008; current version published September 17, 2008. \n\nThis work was supported in part through the collaborative participation in the Communications and Networks Consortium sponsored by the U. S. Army Research Laboratory under the Collaborative Technology Alliance Program, Cooperative Agreement DAAD19-01-2-0011 and by the Army Research Office by Grant ARO-W911NF-06-1-0346. The work of A. Swami was supported in part by the DARPA ITMANET program. The U. S. Government is authorized to reproduce and distribute reprints for Government purposes notwithstanding any copyright notation thereon. Parts of this work were presented at the 45th Allerton Conf. on Communication, Control and Computing Monticello, NY, Sep. 2007, and CISS\n'07 Baltimore, MD, Mar. 2007. The associate editor coordinating the review of this manuscript and approving it for publication was Dr. Danilo P. Mandic.\n\nPublished - 04564187.pdf
", "abstract": "The problem of optimal node density maximizing the Neyman-Pearson detection error exponent subject to a constraint on average (per node) energy consumption is analyzed. The spatial correlation among the sensor measurements is incorporated through a Gauss-Markov random field (GMRF) model with Euclidean nearest-neighbor dependency graph. A constant density deployment of sensors under the uniform or Poisson distribution is assumed. It is shown that the optimal node density crucially depends on the ratio between the measurement variances under the two hypotheses and displays a threshold behavior. Below the threshold value of the variance ratio, the optimal node density tends to infinity under any feasible average energy constraint. On the other hand, when the variance ratio is above the threshold, the optimal node density is the minimum value at which it is feasible to process and deliver the likelihood ratio (sufficient statistic) of the sensor measurements to the fusion center. In this regime of the variance ratio, an upper bound on the optimal node density based on a proposed 2-approximation fusion scheme and a lower bound based on the minimum spanning tree are established. Under an alternative formulation where the energy consumption per unit area is constrained, the optimal node density is shown to be strictly finite for all values of the variance ratio and bounds on this optimal node density are provided.", "date": "2008-10", "date_type": "published", "publication": "IEEE Transactions on Signal Processing", "volume": "56", "number": "10", "publisher": "IEEE", "pagerange": "5232-5245", "id_number": "CaltechAUTHORS:20170920-160524319", "issn": "1053-587X", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170920-160524319", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Army Research Laboratory (ARL)", "grant_number": "DAAD19-01-2-0011" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-06-1-0346" }, { "agency": "Defense Advanced Research Projects Agency (DARPA)" } ] }, "doi": "10.1109/TSP.2008.928514", "primary_object": { "basename": "04564187.pdf", "url": "https://authors.library.caltech.edu/records/c0hac-kqd07/files/04564187.pdf" }, "resource_type": "article", "pub_year": "2008", "author_list": "Anandkumar, Animashree; Tong, Lang; et el." }, { "id": "https://authors.library.caltech.edu/records/yts23-dqp36", "eprint_id": 81652, "eprint_status": "archive", "datestamp": "2023-08-19 23:05:13", "lastmod": "2023-10-17 20:56:02", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Tong-Lang", "name": { "family": "Tong", "given": "Lang" } }, { "id": "Swami-A", "name": { "family": "Swami", "given": "Ananthram" } } ] }, "title": "Distributed Estimation Via Random Access", "ispublished": "pub", "full_text_status": "public", "keywords": "Distributed inference, random-access communications, sensor networks", "note": "\u00a9 2008 IEEE. \n\nManuscript received August 24, 2006; revised October 1, 2007. \n\nThis work was supported in part through the collaborative participation in the Communications and Networks Consortium sponsored by the U.S. Army Research Laboratory under the Collaborative Technology Alliance Program, Cooperative Agreement DAAD19-01-2-0011 and by the Army Research Office under Grant ARO-W911NF-06-1-0346. The U.S. Government is authorized to reproduce and distribute reprints for Government purposes notwithstanding any copyright notation thereon.\n\nPublished - 04544950.pdf
", "abstract": "In this correspondence, the problem of distributed Bayesian estimation is considered in the context of a wireless sensor network. The Bayesian estimation performance is analyzed in terms of the expected Fisher information normalized by the transmission rate of the sensors. The sensors use a communication scheme known as the type-based random access (TBRA) scheme. Under a constraint on the expected transmission energy, an optimal spatio-temporal allocation scheme that maximizes the performance metric is characterized. It is shown that the performance metric is crucially dependent on the fading parameter known as the channel coherence index. For channels with low coherence indices, sensor transmissions tend to cancel each other, and there exists an optimal finite mean transmission rate that maximizes the performance metric. On the other hand, for channels with high coherence indices, there should be as many simultaneous transmissions as allowed by the network. The presence of a critical coherence index where the change from one behavior to another occurs is established.", "date": "2008-07", "date_type": "published", "publication": "IEEE Transactions on Information Theory", "volume": "54", "number": "7", "publisher": "IEEE", "pagerange": "3175-3181", "id_number": "CaltechAUTHORS:20170920-155855969", "issn": "0018-9448", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170920-155855969", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Army Research Laboratory (ARL)", "grant_number": "DAAD19-01-2-0011" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-06-1-0346" } ] }, "doi": "10.1109/TIT.2008.924652", "primary_object": { "basename": "04544950.pdf", "url": "https://authors.library.caltech.edu/records/yts23-dqp36/files/04544950.pdf" }, "resource_type": "article", "pub_year": "2008", "author_list": "Anandkumar, Animashree; Tong, Lang; et el." }, { "id": "https://authors.library.caltech.edu/records/c5kfj-s1x74", "eprint_id": 81648, "eprint_status": "archive", "datestamp": "2023-08-19 21:03:34", "lastmod": "2023-10-17 20:55:52", "type": "article", "metadata_visibility": "show", "creators": { "items": [ { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Animashree" } }, { "id": "Tong-Lang", "name": { "family": "Tong", "given": "Lang" } } ] }, "title": "Type-Based Random Access for Distributed Detection Over Multiaccess Fading Channels", "ispublished": "pub", "full_text_status": "public", "keywords": "signal processing for communications, Distributed detection, multisensor systems, performance analysis", "note": "\u00a9 2007 IEEE. \n\nManuscript received December 24, 2005; revised December 28, 2006. \n\nThis work was supported in part through the collaborative participation in the Communications and Networks Consortium sponsored by the U. S. Army Research Laboratory under the Collaborative Technology Alliance Program, Cooperative\nAgreement DAAD19-01-2-0011 and by the Army Research Office under Grant ARO-W911NF-06-1-0346. The associate editor coordinating the review of this manuscript and approving it for publication was Dr. Jaume Riba.\n\nPublished - 04305425.pdf
", "abstract": "The problem of distributed detection in a sensor network over multiaccess fading channels is considered. A random-access transmission scheme referred to as the type-based random access (TBRA) is proposed and analyzed. Error exponents of TBRA under noncoherent detection are characterized with respect to the mean transmission rate and the channel-coherence index. For the zero-mean multiaccess fading channels, it is shown that there exists an optimal mean-transmission rate that maximizes the detection-error exponents. The optimal mean-transmission rate can be calculated numerically or estimated using the Gaussian approximation, and it gives a sensor-activation strategy that achieves an optimal allocation of transmission energy to spatial and temporal domains. Numerical examples and simulations are used to compare TBRA with the conventional centralized time-division multiple access (TDMA) scheme. It is shown that for the zero-mean multiaccess fading channels, TBRA gives substantial improvement in the low signal-to-noise ratio (SNR) regime whereas for the nonzero mean fading channels, TBRA performs better over a wide range of SNR.", "date": "2007-10", "date_type": "published", "publication": "IEEE Transactions on Signal Processing", "volume": "55", "number": "10", "publisher": "IEEE", "pagerange": "5032-5043", "id_number": "CaltechAUTHORS:20170920-153723886", "issn": "1053-587X", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170920-153723886", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Army Research Laboratory (ARL)", "grant_number": "DAAD19-01-2-0011" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-06-1-0346" } ] }, "doi": "10.1109/TSP.2007.896302", "primary_object": { "basename": "04305425.pdf", "url": "https://authors.library.caltech.edu/records/c5kfj-s1x74/files/04305425.pdf" }, "resource_type": "article", "pub_year": "2007", "author_list": "Anandkumar, Animashree and Tong, Lang" } ]