[ { "id": "https://authors.library.caltech.edu/records/e011a-65k64", "eprint_id": 118577, "eprint_status": "archive", "datestamp": "2023-08-20 08:22:45", "lastmod": "2023-10-24 23:24:45", "type": "monograph", "metadata_visibility": "show", "creators": { "items": [ { "id": "Liu-Ziming", "name": { "family": "Liu", "given": "Ziming" } }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "Andrew M." }, "orcid": "0000-0001-9091-7266" }, { "id": "Wang-Yixuan", "name": { "family": "Wang", "given": "Yixuan" }, "orcid": "0000-0001-7305-5422" } ] }, "title": "Second Order Ensemble Langevin Method for Sampling and Inverse Problems", "ispublished": "unpub", "full_text_status": "public", "note": "Attribution 4.0 International (CC BY 4.0) \n\nThe work of ZL is supported by IAIFI through NSF grant PHY2019786. The work of AMS is supported by NSF award AGS1835860, the Office of Naval Research award N00014-17-1-2079 and by a Department of Defense Vannevar Bush Faculty Fellowship.\n\n
Submitted - 2208.04506.pdf
", "abstract": "We propose a sampling method based on an ensemble approximation of second order Langevin dynamics. The log target density is appended with a quadratic term in an auxiliary momentum variable and damped-driven Hamiltonian dynamics introduced; the resulting stochastic differential equation is invariant to the Gibbs measure, with marginal on the position coordinates given by the target. A preconditioner based on covariance under the law of the dynamics does not change this invariance property, and is introduced to accelerate convergence to the Gibbs measure. The resulting mean-field dynamics may be approximated by an ensemble method; this results in a gradient-free and affine-invariant stochastic dynamical system. Numerical results demonstrate its potential as the basis for a numerical sampler in Bayesian inverse problems.", "date": "2022-12-22", "date_type": "published", "publisher": "arXiv", "id_number": "CaltechAUTHORS:20221221-222944367", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20221221-222944367", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NSF", "grant_number": "PHY-2019786" }, { "agency": "NSF", "grant_number": "AGS-1835860" }, { "agency": "Office of Naval Research (ONR)", "grant_number": "N00014-17-1-2079" }, { "agency": "Vannever Bush Faculty Fellowship" } ] }, "doi": "10.48550/arXiv.2208.04506", "primary_object": { "basename": "2208.04506.pdf", "url": "https://authors.library.caltech.edu/records/e011a-65k64/files/2208.04506.pdf" }, "resource_type": "monograph", "pub_year": "2022", "author_list": "Liu, Ziming; Stuart, Andrew M.; et el." }, { "id": "https://authors.library.caltech.edu/records/eweb1-3wn20", "eprint_id": 114901, "eprint_status": "archive", "datestamp": "2023-08-20 04:48:44", "lastmod": "2023-10-24 15:15:12", "type": "monograph", "metadata_visibility": "show", "creators": { "items": [ { "id": "de-Hoop-Maarten-V", "name": { "family": "de Hoop", "given": "Maarten V." }, "orcid": "0000-0002-6333-0379" }, { "id": "Kovachki-Nikola-B", "name": { "family": "Kovachki", "given": "Nikola B." }, "orcid": "0000-0002-3650-2972" }, { "id": "Nelsen-Nicholas-H", "name": { "family": "Nelsen", "given": "Nicholas H." }, "orcid": "0000-0002-8328-1199" }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "Andrew M." }, "orcid": "0000-0001-9091-7266" } ] }, "title": "Convergence Rates for Learning Linear Operators from Noisy Data", "ispublished": "unpub", "full_text_status": "public", "keywords": "operator regression, linear inverse problems, Bayesian inference, posterior consistency, learning theory", "note": "The authors thank Kamyar Azizzadenesheli and Joel A. Tropp for helpful discussions about statistical learning. The computations presented in this paper were conducted on the Resnick High Performance Computing Center, a facility supported by the Resnick Sustainability Institute at the California Institute of Technology. \n\nMVdH is supported by the Simons Foundation under the MATH + X program, U.S. Department of Energy, Office of Basic Energy Sciences, Chemical Sciences, Geosciences, and Biosciences Division under grant number DE-SC0020345, the National Science Foundation (NSF) under grant DMS-1815143, and the corporate members of the Geo-Mathematical Imaging Group at Rice University. NHN is supported by the NSF Graduate Research Fellowship Program under grant DGE-1745301. AMS is supported by NSF (grant DMS-1818977). NBK, NHN, and AMS are supported by NSF (grant AGS-1835860) and ONR (grant N00014-19-1-2408).\n\nSubmitted - 2108.12515.pdf
", "abstract": "We study the Bayesian inverse problem of learning a linear operator on a Hilbert space from its noisy pointwise evaluations on random input data. Our framework assumes that this target operator is self-adjoint and diagonal in a basis shared with the Gaussian prior and noise covariance operators arising from the imposed statistical model and is able to handle target operators that are compact, bounded, or even unbounded. We establish posterior contraction rates with respect to a family of Bochner norms as the number of data tend to infinity and derive related lower bounds on the estimation error. In the large data limit, we also provide asymptotic convergence rates of suitably defined excess risk and generalization gap functionals associated with the posterior mean point estimator. In doing so, we connect the posterior consistency results to nonparametric learning theory. Furthermore, these convergence rates highlight and quantify the difficulty of learning unbounded linear operators in comparison with the learning of bounded or compact ones. Numerical experiments confirm the theory and demonstrate that similar conclusions may be expected in more general problem settings.", "date": "2022-05-24", "date_type": "published", "publisher": "arXiv", "id_number": "CaltechAUTHORS:20220524-180322099", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220524-180322099", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Simons Foundation" }, { "agency": "Department of Energy (DOE)", "grant_number": "DE-SC0020345" }, { "agency": "NSF", "grant_number": "DMS-1815143" }, { "agency": "Rice University" }, { "agency": "NSF Graduate Research Fellowship", "grant_number": "DGE-1745301" }, { "agency": "NSF", "grant_number": "DMS-1818977" }, { "agency": "NSF", "grant_number": "AGS-1835860" }, { "agency": "Office of Naval Research (ONR)", "grant_number": "N00014-19-1-2408" } ] }, "local_group": { "items": [ { "id": "Resnick-Sustainability-Institute" } ] }, "doi": "10.48550/arXiv.2108.12515", "primary_object": { "basename": "2108.12515.pdf", "url": "https://authors.library.caltech.edu/records/eweb1-3wn20/files/2108.12515.pdf" }, "resource_type": "monograph", "pub_year": "2022", "author_list": "de Hoop, Maarten V.; Kovachki, Nikola B.; et el." }, { "id": "https://authors.library.caltech.edu/records/hye2r-0jx31", "eprint_id": 112987, "eprint_status": "archive", "datestamp": "2023-09-15 07:26:32", "lastmod": "2023-10-23 21:35:05", "type": "monograph", "metadata_visibility": "show", "creators": { "items": [ { "id": "Dunbar-Oliver-R-A", "name": { "family": "Dunbar", "given": "Oliver R. A." }, "orcid": "0000-0001-7374-0382" }, { "id": "Howland-Michael-F", "name": { "family": "Howland", "given": "Michael F." }, "orcid": "0000-0002-2878-3874" }, { "id": "Schneider-T", "name": { "family": "Schneider", "given": "Tapio" }, "orcid": "0000-0001-5687-2287" }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "Andrew M." }, "orcid": "0000-0001-9091-7266" } ] }, "title": "Ensemble-Based Experimental Design for Targeted High-Resolution Simulations to Inform Climate Models", "ispublished": "unpub", "full_text_status": "public", "note": "We gratefully acknowledge the generous support of Eric and Wendy Schmidt (by recommendation of Schmidt Futures) and the National Science Foundation (grant AGS-1835860). The simulations were performed on Caltech's High Performance Cluster, which is partially supported by a grant from the Gordon and Betty Moore Foundation. AMS is also supported by the Office of Naval Research (grant N00014-17-1-2079). \n\nData Availability. All computer code used in this paper is open source. The code for the idealized GCM, the Julia code for the optimal design algorithm, the plot tools, and the slurm/bash scripts to run both GCM and design algorithms are available at: https://doi.org/10.5281/zenodo.5835269.\n\nSubmitted - 2201.06998.pdf
Submitted - essoar.10510142.1.pdf
", "abstract": "Targeted high-resolution simulations driven by a general circulation model (GCM) can be used to calibrate GCM parameterizations of processes that are globally unresolvable but can be resolved in limited-area simulations. This raises the question of where to place high-resolution simulations to be maximally informative about the uncertain parameterizations in the global model. Here we construct an ensemble-based parallel algorithm to locate regions that maximize the uncertainty reduction, or information gain, in the uncertainty quantification of GCM parameters with regional data. The algorithm is based on a Bayesian framework that exploits a quantified posterior distribution on GCM parameters as a measure of uncertainty. The algorithm is embedded in the recently developed calibrate-emulate-sample (CES) framework, which performs efficient model calibration and uncertainty quantification with only O(10\u00b2) forward model evaluations, compared with O(10\u2075) forward model evaluations typically needed for traditional approaches to Bayesian calibration. We demonstrate the algorithm with an idealized GCM, with which we generate surrogates of high-resolution data. In this setting, we calibrate parameters and quantify uncertainties in a quasi-equilibrium convection scheme. We consider (i) localization in space for a statistically stationary problem, and (ii) localization in space and time for a seasonally varying problem. In these proof-of-concept applications, the calculated information gain reflects the reduction in parametric uncertainty obtained from Bayesian inference when harnessing a targeted sample of data. The largest information gain results from regions near the intertropical convergence zone (ITCZ) and indeed the algorithm automatically targets these regions for data collection.", "date": "2022-01-17", "date_type": "published", "id_number": "CaltechAUTHORS:20220119-572479000", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220119-572479000", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Schmidt Futures Program" }, { "agency": "NSF", "grant_number": "AGS-1835860" }, { "agency": "Gordon and Betty Moore Foundation" }, { "agency": "Office of Naval Research (ONR)", "grant_number": "N00014-17-1-2079" } ] }, "local_group": { "items": [ { "id": "Division-of-Geological-and-Planetary-Sciences" } ] }, "doi": "10.1002/essoar.10510142.1", "primary_object": { "basename": "2201.06998.pdf", "url": "https://authors.library.caltech.edu/records/hye2r-0jx31/files/2201.06998.pdf" }, "related_objects": [ { "basename": "essoar.10510142.1.pdf", "url": "https://authors.library.caltech.edu/records/hye2r-0jx31/files/essoar.10510142.1.pdf" } ], "resource_type": "monograph", "pub_year": "2022", "author_list": "Dunbar, Oliver R. A.; Howland, Michael F.; et el." }, { "id": "https://authors.library.caltech.edu/records/h5ry0-fsp13", "eprint_id": 110666, "eprint_status": "archive", "datestamp": "2023-08-20 04:44:45", "lastmod": "2023-10-23 19:47:45", "type": "monograph", "metadata_visibility": "show", "creators": { "items": [ { "id": "Kovachki-Nikola-B", "name": { "family": "Kovachki", "given": "Nikola" }, "orcid": "0000-0002-3650-2972" }, { "id": "Li-Zongyi", "name": { "family": "Li", "given": "Zongyi" }, "orcid": "0000-0003-2081-9665" }, { "id": "Liu-Burigede", "name": { "family": "Liu", "given": "Burigede" }, "orcid": "0000-0002-6518-3368" }, { "id": "Azizzadenesheli-Kamyar", "name": { "family": "Azizzadenesheli", "given": "Kamyar" }, "orcid": "0000-0001-8507-1868" }, { "id": "Bhattacharya-K", "name": { "family": "Bhattacharya", "given": "Kaushik" }, "orcid": "0000-0003-2908-5469" }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "Andrew" }, "orcid": "0000-0001-9091-7266" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" } } ] }, "title": "Neural Operator: Learning Maps Between Function Spaces", "ispublished": "unpub", "full_text_status": "public", "keywords": "Deep Learning, Operator Inference, Partial Differential Equations, Navier-Stokes Equation", "note": "Z. Li gratefully acknowledges the financial support from the Kortschak Scholars Program. A. Anandkumar is supported in part by Bren endowed chair, LwLL grants, Beyond Limits, Raytheon, Microsoft, Google, Adobe faculty fellowships, and DE Logi grant. K. Bhattacharya, N. B. Kovachki, B. Liu and A. M. Stuart gratefully acknowledge the financial support of the Army Research Laboratory through the Cooperative Agreement Number W911NF-12-0022. Research was sponsored by the Army Research Laboratory and was accomplished under Cooperative Agreement Number W911NF-12-2-0022. AMS is also supported by NSF (award DMS-1818977). \n\nThe views and conclusions contained in this document are those of the authors and should not be interpreted as representing the official policies, either expressed or implied, of the Army Research Laboratory or the U.S. Government. The U.S. Government is authorized to reproduce and distribute reprints for Government purposes notwithstanding any copyright notation herein. \n\nThe computations presented here were conducted on the Caltech High Performance Cluster, partially supported by a grant from the Gordon and Betty Moore Foundation.\n\nSubmitted - 2108.08481.pdf
", "abstract": "The classical development of neural networks has primarily focused on learning mappings between finite dimensional Euclidean spaces or finite sets. We propose a generalization of neural networks tailored to learn operators mapping between infinite dimensional function spaces. We formulate the approximation of operators by composition of a class of linear integral operators and nonlinear activation functions, so that the composed operator can approximate complex nonlinear operators. Furthermore, we introduce four classes of operator parameterizations: graph-based operators, low-rank operators, multipole graph-based operators, and Fourier operators and describe efficient algorithms for computing with each one. The proposed neural operators are resolution-invariant: they share the same network parameters between different discretizations of the underlying function spaces and can be used for zero-shot super-resolutions. Numerically, the proposed models show superior performance compared to existing machine learning based methodologies on Burgers' equation, Darcy flow, and the Navier-Stokes equation, while being several order of magnitude faster compared to conventional PDE solvers.", "date": "2021-08-19", "date_type": "published", "publisher": "arXiv", "id_number": "CaltechAUTHORS:20210831-204010794", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210831-204010794", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Kortschak Scholars Program" }, { "agency": "Bren Professor of Computing and Mathematical Sciences" }, { "agency": "Learning with Less Labels (LwLL)" }, { "agency": "Beyond Limits" }, { "agency": "Raytheon Company" }, { "agency": "Microsoft Faculty Fellowship" }, { "agency": "Google Faculty Research Award" }, { "agency": "Adobe" }, { "agency": "Caltech De Logi Fund" }, { "agency": "Army Research Laboratory", "grant_number": "W911NF-12-0022" }, { "agency": "NSF", "grant_number": "DMS-1818977" }, { "agency": "Gordon and Betty Moore Foundation" } ] }, "doi": "10.48550/arXiv.2108.08481", "primary_object": { "basename": "2108.08481.pdf", "url": "https://authors.library.caltech.edu/records/h5ry0-fsp13/files/2108.08481.pdf" }, "resource_type": "monograph", "pub_year": "2021", "author_list": "Kovachki, Nikola; Li, Zongyi; et el." }, { "id": "https://authors.library.caltech.edu/records/g2d05-3sh70", "eprint_id": 109919, "eprint_status": "archive", "datestamp": "2023-08-20 04:14:12", "lastmod": "2023-10-23 18:12:54", "type": "monograph", "metadata_visibility": "show", "creators": { "items": [ { "id": "Levine-Matthew-E", "name": { "family": "Levine", "given": "Matthew E." }, "orcid": "0000-0002-5627-3169" }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "Andrew M." } } ] }, "title": "A Framework for Machine Learning of Model Error in Dynamical Systems", "ispublished": "unpub", "full_text_status": "public", "keywords": "Dynamical Systems, Model Error, Statistical Learning, Random Features, Recurrent Neural Networks, Reservoir Computing", "note": "Attribution 4.0 International (CC BY 4.0) \n\nThe authors are grateful to David Albers, Oliver Dunbar, Ian Melbourne, and Yisong Yue for helpful discussions. The work of MEL and AMS was supported by NIH RO1 LM012734 \"Mechanistic Machine Learning\". MEL is also supported by the National Science Foundation Graduate Research Fellowship under Grant No. DGE-1745301. AMS is also supported by NSF (award AGS-1835860), NSF (award DMS-1818977), the Office of Naval Research (award N00014-17-1-2079), and the AFOSR under MURI award number FA9550-20-1-0358 (Machine Learning and Physics-Based Modeling and Simulation).\n\nSubmitted - 2107.06658.pdf
", "abstract": "The development of data-informed predictive models for dynamical systems is of widespread interest in many disciplines. We present a unifying framework for blending mechanistic and machine-learning approaches to identify dynamical systems from data. We compare pure data-driven learning with hybrid models which incorporate imperfect domain knowledge. We cast the problem in both continuous- and discrete-time, for problems in which the model error is memoryless and in which it has significant memory, and we compare data-driven and hybrid approaches experimentally. Our formulation is agnostic to the chosen machine learning model. \n\nUsing Lorenz '63 and Lorenz '96 Multiscale systems, we find that hybrid methods substantially outperform solely data-driven approaches in terms of data hunger, demands for model complexity, and overall predictive performance. We also find that, while a continuous-time framing allows for robustness to irregular sampling and desirable domain-interpretability, a discrete-time framing can provide similar or better predictive performance, especially when data are undersampled and the vector field cannot be resolved. \n\nWe study model error from the learning theory perspective, defining excess risk and generalization error; for a linear model of the error used to learn about ergodic dynamical systems, both errors are bounded by terms that diminish with the square-root of T. We also illustrate scenarios that benefit from modeling with memory, proving that continuous-time recurrent neural networks (RNNs) can, in principle, learn memory-dependent model error and reconstruct the original system arbitrarily well; numerical results depict challenges in representing memory by this approach. We also connect RNNs to reservoir computing and thereby relate the learning of memory-dependent error to recent work on supervised learning between Banach spaces using random features.", "date": "2021-07-19", "date_type": "published", "publisher": "arXiv", "id_number": "CaltechAUTHORS:20210719-210139286", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210719-210139286", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NIH", "grant_number": "RO1 LM012734" }, { "agency": "NSF Graduate Research Fellowship", "grant_number": "DGE-1745301" }, { "agency": "NSF", "grant_number": "AGS-1835860" }, { "agency": "NSF", "grant_number": "DMS-1818977" }, { "agency": "Office of Naval Research (ONR)", "grant_number": "N00014-17-1-2079" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-20-1-0358" } ] }, "doi": "10.48550/arXiv.2107.06658", "primary_object": { "basename": "2107.06658.pdf", "url": "https://authors.library.caltech.edu/records/g2d05-3sh70/files/2107.06658.pdf" }, "resource_type": "monograph", "pub_year": "2021", "author_list": "Levine, Matthew E. and Stuart, Andrew M." }, { "id": "https://authors.library.caltech.edu/records/wm6xz-zgz78", "eprint_id": 109918, "eprint_status": "archive", "datestamp": "2023-08-20 03:40:17", "lastmod": "2023-10-23 18:12:52", "type": "monograph", "metadata_visibility": "show", "creators": { "items": [ { "id": "Li-Zongyi", "name": { "family": "Li", "given": "Zongyi" }, "orcid": "0000-0003-2081-9665" }, { "id": "Kovachki-Nikola-B", "name": { "family": "Kovachki", "given": "Nikola" }, "orcid": "0000-0002-3650-2972" }, { "id": "Azizzadenesheli-Kamyar", "name": { "family": "Azizzadenesheli", "given": "Kamyar" }, "orcid": "0000-0001-8507-1868" }, { "id": "Liu-Burigede", "name": { "family": "Liu", "given": "Burigede" }, "orcid": "0000-0002-6518-3368" }, { "id": "Bhattacharya-K", "name": { "family": "Bhattacharya", "given": "Kaushik" }, "orcid": "0000-0003-2908-5469" }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "Andrew" }, "orcid": "0000-0001-9091-7266" }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" } ] }, "title": "Learning Dissipative Dynamics in Chaotic Systems", "ispublished": "unpub", "full_text_status": "public", "note": "Z. Li gratefully acknowledges the financial support from the Kortschak Scholars, PIMCO Fellows, and Amazon AI4Science Fellows programs. M. Liu-Schiaffini is supported by the Stephen Adelman Memorial Endowment. A. Anandkumar is supported in part by Bren endowed chair. K. Bhattacharya, N. B. Kovachki, B. Liu, A. M. Stuart gratefully acknowledge the financial support of the Army Research Laboratory through the Cooperative Agreement Number W911NF-12-0022. A. M. Stuart is also grateful to the US Department of Defense for support as a Vannevar Bush Faculty Fellow. Research was sponsored by the Army Research Laboratory and was accomplished under Cooperative Agreement Number W911NF-12-2-0022. A part of this work took place when K. Azizzadenesheli was at Purdue University. The views and conclusions contained in this document are those of the authors and should not be interpreted as representing the official policies, either expressed or implied, of the Army Research Laboratory or the U.S. Government. The U.S. Government is authorized to reproduce and distribute reprints for Government purposes notwithstanding any copyright notation herein.\n\nAccepted Version - 2106.06898.pdf
", "abstract": "Chaotic systems are notoriously challenging to predict because of their sensitivity to perturbations and errors due to time stepping. Despite this unpredictable behavior, for many dissipative systems the statistics of the long term trajectories are governed by an invariant measure supported on a set, known as the global attractor; for many problems this set is finite dimensional, even if the state space is infinite dimensional. For Markovian systems, the statistical properties of long-term trajectories are uniquely determined by the solution operator that maps the evolution of the system over arbitrary positive time increments. In this work, we propose a machine learning framework to learn the underlying solution operator for dissipative chaotic systems, showing that the resulting learned operator accurately captures short-time trajectories and long-time statistical behavior. Using this framework, we are able to predict various statistics of the invariant measure for the turbulent Kolmogorov Flow dynamics with Reynolds numbers up to 5000.", "date": "2021-06-13", "date_type": "published", "publisher": "arXiv", "id_number": "CaltechAUTHORS:20210719-210135878", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210719-210135878", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Kortschak Scholars Program" }, { "agency": "PIMCO" }, { "agency": "Amazon AI4Science Fellowship" }, { "agency": "Stephen Adelman Memorial Endowment" }, { "agency": "Bren Professor of Computing and Mathematical Sciences" }, { "agency": "Army Research Laboratory", "grant_number": "W911NF-12-0022" }, { "agency": "Vannever Bush Faculty Fellowship" }, { "agency": "Army Research Laboratory", "grant_number": "W911NF-12-2-0022" } ] }, "doi": "10.48550/arXiv.2106.06898", "primary_object": { "basename": "2106.06898.pdf", "url": "https://authors.library.caltech.edu/records/wm6xz-zgz78/files/2106.06898.pdf" }, "resource_type": "monograph", "pub_year": "2021", "author_list": "Li, Zongyi; Kovachki, Nikola; et el." }, { "id": "https://authors.library.caltech.edu/records/9q611-3gh94", "eprint_id": 106557, "eprint_status": "archive", "datestamp": "2023-08-19 18:24:01", "lastmod": "2023-10-20 23:35:06", "type": "monograph", "metadata_visibility": "show", "creators": { "items": [ { "id": "Albers-D-J", "name": { "family": "Albers", "given": "D. J." } }, { "id": "Levine-M-E", "name": { "family": "Levine", "given": "M. E." } }, { "id": "Sirlanci-Melike", "name": { "family": "Sirlanci", "given": "M." } }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "A. M." } } ] }, "title": "A Simple Modeling Framework For Prediction In The Human Glucose-Insulin System", "ispublished": "unpub", "full_text_status": "public", "note": "We acknowledge financial support from NIH RO1 LM012734 \"Mechanistic Machine Learning\". DA acknowledges helpful discussions with Bruce Gluckman, Rammah Abohtyra and Cecilia Diniz Behn. The authors have no conflicting or competing interests to declare.\n\nSubmitted - 1910.14193.pdf
", "abstract": "In this paper, we build a new, simple, and interpretable mathematical model to describe the human glucose-insulin system. Our ultimate goal is the robust control of the blood glucose (BG) level of individuals to a desired healthy range, by means of adjusting the amount of nutrition and/or external insulin appropriately. By constructing a simple yet flexible model class, with interpretable parameters, this general model can be specialized to work in different settings, such as type 2 diabetes mellitus (T2DM) and intensive care unit (ICU); different choices of appropriate model functions describing uptake of nutrition and removal of glucose differentiate between the models. In both cases, the available data is sparse and collected in clinical settings, major factors that have constrained our model choice to the simple form adopted.\nThe model has the form of a linear stochastic differential equation (SDE) to describe the evolution of the BG level. The model includes a term quantifying glucose removal from the bloodstream through the regulation system of the human body, and another two terms representing the effect of nutrition and externally delivered insulin. The parameters entering the equation must be learned in a patient-specific fashion, leading to personalized models. We present numerical results on patient-specific parameter estimation and future BG level forecasting in T2DM and ICU settings. The resulting model leads to the prediction of the BG level as an expected value accompanied by a band around this value which accounts for uncertainties in the prediction. Such predictions, then, have the potential for use as part of control systems which are robust to model imperfections and noisy data. Finally, a comparison of the predictive capability of the model with two different models specifically built for T2DM and ICU contexts is also performed.", "date": "2020-11-09", "date_type": "published", "publisher": "arXiv", "id_number": "CaltechAUTHORS:20201109-140952547", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20201109-140952547", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NIH", "grant_number": "RO1 LM012734" } ] }, "doi": "10.48550/arXiv.1910.14193", "primary_object": { "basename": "1910.14193.pdf", "url": "https://authors.library.caltech.edu/records/9q611-3gh94/files/1910.14193.pdf" }, "resource_type": "monograph", "pub_year": "2020", "author_list": "Albers, D. J.; Levine, M. E.; et el." }, { "id": "https://authors.library.caltech.edu/records/1h946-00r40", "eprint_id": 106562, "eprint_status": "archive", "datestamp": "2023-08-19 22:23:06", "lastmod": "2023-10-20 23:35:21", "type": "monograph", "metadata_visibility": "show", "creators": { "items": [ { "id": "Schneider-T", "name": { "family": "Schneider", "given": "Tapio" }, "orcid": "0000-0001-5687-2287" }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "Andrew M." } }, { "id": "Wu-Jin-Long", "name": { "family": "Wu", "given": "Jin-Long" } } ] }, "title": "Ensemble Kalman Inversion for Sparse Learning of Dynamical Systems from Time-Averaged Data", "ispublished": "unpub", "full_text_status": "public", "keywords": "Ensemble Kalman inversion, sparse learning, dynamical systems, time-averaged data", "note": "We thank Melanie Bieli, Tobias Bischoff and Anna Jaruga for sharing their formulation of the moment-based coalescence equation, and for discussions about it. All authors are supported by the generosity of Eric and Wendy Schmidt by recommendation of the Schmidt Futures program, by Earthrise Alliance, Mountain Philanthropies, the Paul G. Allen Family Foundation, and the National Science Foundation (NSF, award AGS1835860). A.M.S. is also supported by NSF (award DMS-1818977) and by the Office of Naval Research (award N00014-17-1-2079).\n\nSubmitted - 2007.06175.pdf
", "abstract": "Enforcing sparse structure within learning has led to significant advances in the field of data-driven discovery of dynamical systems. However, such methods require access not only to time-series of the state of the dynamical system, but also to the time derivative. In many applications, the data are available only in the form of time-averages such as moments and autocorrelation functions. We propose a sparse learning methodology to discover the vector fields defining a (possibly stochastic or partial) differential equation, using only time-averaged statistics. Such a formulation of sparse learning naturally leads to a nonlinear inverse problem to which we apply the methodology of ensemble Kalman inversion (EKI). EKI is chosen because it may be formulated in terms of the iterative solution of quadratic optimization problems; sparsity is then easily imposed. We then apply the EKI-based sparse learning methodology to various examples governed by stochastic differential equations (a noisy Lorenz 63 system), ordinary differential equations (Lorenz 96 system and coalescence equations), and a partial differential equation (the Kuramoto-Sivashinsky equation). The results demonstrate that time-averaged statistics can be used for data-driven discovery of differential equations using sparse EKI. The proposed sparse learning methodology extends the scope of data-driven discovery of differential equations to previously challenging applications and data-acquisition scenarios.", "date": "2020-11-09", "date_type": "published", "publisher": "arXiv", "id_number": "CaltechAUTHORS:20201109-141011032", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20201109-141011032", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Schmidt Futures Program" }, { "agency": "Earthrise Alliance" }, { "agency": "Mountain Philanthropies" }, { "agency": "Paul G. Allen Family Foundation" }, { "agency": "NSF", "grant_number": "AGS-1835860" }, { "agency": "NSF", "grant_number": "DMS-1818977" }, { "agency": "Office of Naval Research (ONR)", "grant_number": "N00014-17-1-2079" } ] }, "local_group": { "items": [ { "id": "Division-of-Geological-and-Planetary-Sciences" } ] }, "doi": "10.48550/arXiv.2007.06175", "primary_object": { "basename": "2007.06175.pdf", "url": "https://authors.library.caltech.edu/records/1h946-00r40/files/2007.06175.pdf" }, "resource_type": "monograph", "pub_year": "2020", "author_list": "Schneider, Tapio; Stuart, Andrew M.; et el." }, { "id": "https://authors.library.caltech.edu/records/jqa78-jcw61", "eprint_id": 106558, "eprint_status": "archive", "datestamp": "2023-08-22 04:41:43", "lastmod": "2023-10-20 23:35:09", "type": "monograph", "metadata_visibility": "show", "creators": { "items": [ { "id": "Schneider-T", "name": { "family": "Schneider", "given": "Tapio" }, "orcid": "0000-0001-5687-2287" }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "Andrew M." } }, { "id": "Wu-Jin-Long", "name": { "family": "Wu", "given": "Jin-Long" } } ] }, "title": "Learning Stochastic Closures Using Ensemble Kalman Inversion", "ispublished": "unpub", "full_text_status": "public", "keywords": "Stochastic differential equation; inverse problem; Ensemble Kalman inversion; Gaussian process regression; hierarchical parameterization", "note": "The authors thank Dr. Yvo Pokern at University College London for providing the butane dihedral angle data. All authors are supported by the generosity of Eric and Wendy Schmidt by recommendation of the Schmidt Futures program, by Earthrise Alliance, Mountain Philanthropies, the Paul G. Allen Family Foundation, and the National Science Foundation (NSF, award AGS1835860). A.M.S. is also supported by NSF (award DMS-1818977) and by the Office of Naval Research (award N00014-17-1-2079).\n\nSubmitted - 2004.08376.pdf
", "abstract": "Although the governing equations of many systems, when derived from first principles, may be viewed as known, it is often too expensive to numerically simulate all the interactions within the first principles description. Therefore researchers often seek simpler descriptions that describe complex phenomena without numerically resolving all the interacting components. Stochastic differential equations (SDEs) arise naturally as models in this context. The growth in data acquisition provides an opportunity for the systematic derivation of SDE models in many disciplines. However, inconsistencies between SDEs and real data at small time scales often cause problems, when standard statistical methodology is applied to parameter estimation. The incompatibility between SDEs and real data can be addressed by deriving sufficient statistics from the time-series data and learning parameters of SDEs based on these. Following this approach, we formulate the fitting of SDEs to sufficient statistics from real data as an inverse problem and demonstrate that this inverse problem can be solved by using ensemble Kalman inversion (EKI). Furthermore, we create a framework for non-parametric learning of drift and diffusion terms by introducing hierarchical, refineable parameterizations of unknown functions, using Gaussian process regression. We demonstrate the proposed methodology for the fitting of SDE models, first in a simulation study with a noisy Lorenz 63 model, and then in other applications, including dimension reduction starting from various deterministic chaotic systems arising in the atmospheric sciences, large-scale pattern modeling in climate dynamics, and simplified models for key observables arising in molecular dynamics. The results confirm that the proposed methodology provides a robust and systematic approach to fitting SDE models to real data.", "date": "2020-11-09", "date_type": "published", "publisher": "arXiv", "id_number": "CaltechAUTHORS:20201109-140955956", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20201109-140955956", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Schmidt Futures Program" }, { "agency": "Earthrise Alliance" }, { "agency": "Mountain Philanthropies" }, { "agency": "Paul G. Allen Family Foundation" }, { "agency": "NSF", "grant_number": "AGS-1835860" }, { "agency": "NSF", "grant_number": "DMS-1818977" }, { "agency": "Office of Naval Research (ONR)", "grant_number": "N00014-17-1-2079" } ] }, "local_group": { "items": [ { "id": "Division-of-Geological-and-Planetary-Sciences" } ] }, "doi": "10.48550/arXiv.2004.08376", "primary_object": { "basename": "2004.08376.pdf", "url": "https://authors.library.caltech.edu/records/jqa78-jcw61/files/2004.08376.pdf" }, "resource_type": "monograph", "pub_year": "2020", "author_list": "Schneider, Tapio; Stuart, Andrew M.; et el." }, { "id": "https://authors.library.caltech.edu/records/hpbg9-9ea84", "eprint_id": 106480, "eprint_status": "archive", "datestamp": "2023-08-19 23:55:01", "lastmod": "2023-10-20 23:31:55", "type": "monograph", "metadata_visibility": "show", "creators": { "items": [ { "id": "Li-Zongyi", "name": { "family": "Li", "given": "Zongyi" }, "orcid": "0000-0003-2081-9665" }, { "id": "Kovachki-N-B", "name": { "family": "Kovachki", "given": "Nikola" }, "orcid": "0000-0002-3650-2972" }, { "id": "Azizzadenesheli-K", "name": { "family": "Azizzadenesheli", "given": "Kamyar" }, "orcid": "0000-0001-8507-1868" }, { "id": "Liu-Burigede", "name": { "family": "Liu", "given": "Burigede" }, "orcid": "0000-0002-6518-3368" }, { "id": "Bhattacharya-K", "name": { "family": "Bhattacharya", "given": "Kaushik" }, "orcid": "0000-0003-2908-5469" }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "Andrew" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" } } ] }, "title": "Fourier Neural Operator for Parametric Partial Differential Equations", "ispublished": "unpub", "full_text_status": "public", "note": "Z. Li gratefully acknowledges the financial support from the Kortschak Scholars Program. A. Anandkumar is supported in part by Bren endowed chair, LwLL grants, Beyond Limits, Raytheon, Microsoft, Google, Adobe faculty fellowships, and DE Logi grant. K. Bhattacharya, N. B. Kovachki, B. Liu and A. M. Stuart gratefully acknowledge the financial support of the Army Research Laboratory through the Cooperative Agreement Number W911NF-12-0022. Research was sponsored by the Army Research Laboratory and was accomplished under Cooperative Agreement Number W911NF-12-2-0022. The views and conclusions contained in this document are those of the authors and should not be interpreted as representing the official policies, either expressed or implied, of the Army Research Laboratory or the U.S. Government. The U.S. Government is authorized to reproduce and distribute reprints for Government purposes notwithstanding any copyright notation herein.\n\nSubmitted - 2010.08895.pdf
", "abstract": "The classical development of neural networks has primarily focused on learning mappings between finite-dimensional Euclidean spaces. Recently, this has been generalized to neural operators that learn mappings between function spaces. For partial differential equations (PDEs), neural operators directly learn the mapping from any functional parametric dependence to the solution. Thus, they learn an entire family of PDEs, in contrast to classical methods which solve one instance of the equation. In this work, we formulate a new neural operator by parameterizing the integral kernel directly in Fourier space, allowing for an expressive and efficient architecture. We perform experiments on Burgers' equation, Darcy flow, and the Navier-Stokes equation (including the turbulent regime). Our Fourier neural operator shows state-of-the-art performance compared to existing neural network methodologies and it is up to three orders of magnitude faster compared to traditional PDE solvers.", "date": "2020-11-06", "date_type": "published", "publisher": "arXiv", "id_number": "CaltechAUTHORS:20201106-120140981", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20201106-120140981", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Kortschak Scholars Program" }, { "agency": "Bren Professor of Computing and Mathematical Sciences" }, { "agency": "Defense Advanced Research Projects Agency (DARPA)" }, { "agency": "Learning with Less Labels (LwLL)" }, { "agency": "Beyond Limits" }, { "agency": "Raytheon Company" }, { "agency": "Microsoft Faculty Fellowship" }, { "agency": "Google Faculty Research Award" }, { "agency": "Adobe" }, { "agency": "Caltech De Logi Fund" }, { "agency": "Army Research Laboratory", "grant_number": "W911NF-12-0022" } ] }, "doi": "10.48550/arXiv.2010.08895", "primary_object": { "basename": "2010.08895.pdf", "url": "https://authors.library.caltech.edu/records/hpbg9-9ea84/files/2010.08895.pdf" }, "resource_type": "monograph", "pub_year": "2020", "author_list": "Li, Zongyi; Kovachki, Nikola; et el." }, { "id": "https://authors.library.caltech.edu/records/0e45m-qwh51", "eprint_id": 103483, "eprint_status": "archive", "datestamp": "2023-08-19 21:23:38", "lastmod": "2023-10-20 16:24:56", "type": "monograph", "metadata_visibility": "show", "creators": { "items": [ { "id": "Bhattacharya-K", "name": { "family": "Bhattacharya", "given": "Kaushik" }, "orcid": "0000-0003-2908-5469" }, { "id": "Hosseini-Bamdad", "name": { "family": "Hosseini", "given": "Bamdad" } }, { "id": "Kovachki-N-B", "name": { "family": "Kovachki", "given": "Nikola B." }, "orcid": "0000-0002-3650-2972" }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "Andrew M." } } ] }, "title": "Model Reduction and Neural Networks for Parametric PDEs", "ispublished": "unpub", "full_text_status": "public", "keywords": "approximation theory, deep learning, model reduction, neural networks, partial differential equations", "note": "Submitted to the editors May 8, 2020. \n\nThe authors are grateful to Anima Anandkumar, Kamyar Azizzadenesheli, Zongyi Li and Nicholas H. Nelsen for helpful discussions in the general area of neural networks for PDE-defined maps between Hilbert spaces. The work is supported by MEDE-ARL funding (W911NF-12-0022). AMS is also partially supported by NSF (DMS 1818977) and AFOSR (FA9550-17-1-0185). BH is partially supported by a Von K\u00e1rm\u00e1n instructorship at the California Institute of Technology.\n\nSubmitted - 2005.03180.pdf
", "abstract": "We develop a general framework for data-driven approximation of input-output maps between infinite-dimensional spaces. The proposed approach is motivated by the recent successes of neural networks and deep learning, in combination with ideas from model reduction. This combination results in a neural network approximation which, in principle, is defined on infinite-dimensional spaces and, in practice, is robust to the dimension of finite-dimensional approximations of these spaces required for computation. For a class of input-output maps, and suitably chosen probability measures on the inputs, we prove convergence of the proposed approximation methodology. Numerically we demonstrate the effectiveness of the method on a class of parametric elliptic PDE problems, showing convergence and robustness of the approximation scheme with respect to the size of the discretization, and compare our method with existing algorithms from the literature.", "date": "2020-05-27", "date_type": "published", "publisher": "arXiv", "id_number": "CaltechAUTHORS:20200527-074228185", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20200527-074228185", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Army Research Laboratory", "grant_number": "W911NF-12-0022" }, { "agency": "NSF", "grant_number": "DMS-1818977" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-17-1-0185" }, { "agency": "Caltech" } ] }, "doi": "10.48550/arXiv.2005.03180", "primary_object": { "basename": "2005.03180.pdf", "url": "https://authors.library.caltech.edu/records/0e45m-qwh51/files/2005.03180.pdf" }, "resource_type": "monograph", "pub_year": "2020", "author_list": "Bhattacharya, Kaushik; Hosseini, Bamdad; et el." }, { "id": "https://authors.library.caltech.edu/records/k3t18-we744", "eprint_id": 102271, "eprint_status": "archive", "datestamp": "2023-08-19 20:30:35", "lastmod": "2023-10-20 00:01:46", "type": "monograph", "metadata_visibility": "show", "creators": { "items": [ { "id": "Li-Zongyi", "name": { "family": "Li", "given": "Zongyi" }, "orcid": "0000-0003-2081-9665" }, { "id": "Kovachki-Nikola-B", "name": { "family": "Kovachki", "given": "Nikola" }, "orcid": "0000-0002-3650-2972" }, { "id": "Azizzadenesheli-Kamyar", "name": { "family": "Azizzadenesheli", "given": "Kamyar" }, "orcid": "0000-0001-8507-1868" }, { "id": "Liu-Burigede", "name": { "family": "Liu", "given": "Burigede" }, "orcid": "0000-0002-6518-3368" }, { "id": "Bhattacharya-K", "name": { "family": "Bhattacharya", "given": "Kaushik" }, "orcid": "0000-0003-2908-5469" }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "Andrew" } }, { "id": "Anandkumar-A", "name": { "family": "Anandkumar", "given": "Anima" }, "orcid": "0000-0002-6974-6797" } ] }, "title": "Neural Operator: Graph Kernel Network for Partial Differential Equations", "ispublished": "unpub", "full_text_status": "public", "note": "Z. Li gratefully acknowledges the financial support from the Kortschak Scholars Program. K. Azizzadenesheli is supported in part by Raytheon and Amazon Web Service. A. Anandkumar is supported in part by Bren endowed chair, DARPA PAIHR00111890035, LwLL grants, Raytheon, Microsoft, Google, Adobe faculty fellowships, and DE Logi grant. K. Bhattacharya, N. B. Kovachki, B. Liu and A. M. Stuart gratefully acknowledge the financial support of the Amy research Laboratory through the Cooperative Agreement Number W911NF-12-0022. Research was sponsored by the Army Research Laboratory and was accomplished under Cooperative Agreement Number W911NF-12-2-0022. The views and conclusions contained in this document are those of the authors and should not be interpreted as representing the official policies, either expressed or implied, of the Army Research Laboratory or the U.S. Government. The U.S. Government is authorized to reproduce and distribute reprints for Government purposes notwithstanding any copyright notation herein.\n\nSubmitted - 2003.03485.pdf
", "abstract": "The classical development of neural networks has been primarily for mappings between a finite-dimensional Euclidean space and a set of classes, or between two finite-dimensional Euclidean spaces. The purpose of this work is to generalize neural networks so that they can learn mappings between infinite-dimensional spaces (operators). The key innovation in our work is that a single set of network parameters, within a carefully designed network architecture, may be used to describe mappings between infinite-dimensional spaces and between different finite-dimensional approximations of those spaces. We formulate approximation of the infinite-dimensional mapping by composing nonlinear activation functions and a class of integral operators. The kernel integration is computed by message passing on graph networks. This approach has substantial practical consequences which we will illustrate in the context of mappings between input data to partial differential equations (PDEs) and their solutions. In this context, such learned networks can generalize among different approximation methods for the PDE (such as finite difference or finite element methods) and among approximations corresponding to different underlying levels of resolution and discretization. Experiments confirm that the proposed graph kernel network does have the desired properties and show competitive performance compared to the state of the art solvers.", "date": "2020-04-02", "date_type": "published", "publisher": "arXiv", "id_number": "CaltechAUTHORS:20200402-133318521", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20200402-133318521", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Kortschak Scholars Program" }, { "agency": "Raytheon" }, { "agency": "Amazon Web Services" }, { "agency": "Bren Professor of Computing and Mathematical Sciences" }, { "agency": "Defense Advanced Research Projects Agency (DARPA)", "grant_number": "PAIHR00111890035" }, { "agency": "Learning with Less Labels (LwLL)" }, { "agency": "Microsoft Faculty Fellowship" }, { "agency": "Google Faculty Research Award" }, { "agency": "Adobe" }, { "agency": "Caltech De Logi Fund" }, { "agency": "Army Research Laboratory", "grant_number": "W911NF-12-0022" }, { "agency": "Army Research Laboratory", "grant_number": "W911NF-12-2-002" } ] }, "doi": "10.48550/arXiv.2003.03485", "primary_object": { "basename": "2003.03485.pdf", "url": "https://authors.library.caltech.edu/records/k3t18-we744/files/2003.03485.pdf" }, "resource_type": "monograph", "pub_year": "2020", "author_list": "Li, Zongyi; Kovachki, Nikola; et el." }, { "id": "https://authors.library.caltech.edu/records/7askx-9w059", "eprint_id": 97317, "eprint_status": "archive", "datestamp": "2023-08-19 16:16:28", "lastmod": "2023-10-20 22:11:32", "type": "monograph", "metadata_visibility": "show", "creators": { "items": [ { "id": "Kovachki-N-B", "name": { "family": "Kovachki", "given": "Nikola B." }, "orcid": "0000-0002-3650-2972" }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "Andrew M." } } ] }, "title": "Analysis Of Momentum Methods", "ispublished": "unpub", "full_text_status": "public", "keywords": "Optimization, Machine Learning, Deep Learning, Gradient Flows, Momentum Methods, Modified Equation, Invariant Manifold", "note": "Both authors are supported, in part, by the US National Science Foundation (NSF) grant DMS 1818977, the US Office of Naval Research (ONR) grant N00014-17-1-2079, and the US Army Research Office (ARO) grant W911NF-12-2-0022.\n\nSubmitted - 1906.04285.pdf
", "abstract": "Gradient decent-based optimization methods underpin the parameter training which results in the impressive results now found when testing neural networks. Introducing stochasticity is key to their success in practical problems, and there is some understanding of the role of stochastic gradient decent in this context. Momentum modifications of gradient decent such as Polyak's Heavy Ball method (HB) and Nesterov's method of accelerated gradients (NAG), are widely adopted. In this work, our focus is on understanding the role of momentum in the training of neural networks, concentrating on the common situation in which the momentum contribution is fixed at each step of the algorithm; to expose the ideas simply we work in the deterministic setting. We show that, contrary to popular belief, standard implementations of fixed momentum methods do no more than act to rescale the learning rate. We achieve this by showing that the momentum method converges to a gradient flow, with a momentum-dependent time-rescaling, using the method of modified equations from numerical analysis. Further we show that the momentum method admits an exponentially attractive invariant manifold on which the dynamic reduces to a gradient flow with respect to a modified loss function, equal to the original one plus a small perturbation.", "date": "2019-07-22", "date_type": "published", "publisher": "arXiv", "id_number": "CaltechAUTHORS:20190722-102107649", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190722-102107649", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NSF", "grant_number": "DMS-1818977" }, { "agency": "Office of Naval Research (ONR)", "grant_number": "N00014-17-1-2079" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-12-2-0022" } ] }, "doi": "10.48550/arXiv.1906.04285", "primary_object": { "basename": "1906.04285.pdf", "url": "https://authors.library.caltech.edu/records/7askx-9w059/files/1906.04285.pdf" }, "resource_type": "monograph", "pub_year": "2019", "author_list": "Kovachki, Nikola B. and Stuart, Andrew M." }, { "id": "https://authors.library.caltech.edu/records/nt212-sbf55", "eprint_id": 97329, "eprint_status": "archive", "datestamp": "2023-08-19 15:50:25", "lastmod": "2023-10-20 22:12:20", "type": "monograph", "metadata_visibility": "show", "creators": { "items": [ { "id": "Dunlop-M-M", "name": { "family": "Dunlop", "given": "Matthew M." }, "orcid": "0000-0001-7718-3755" }, { "id": "Helin-T", "name": { "family": "Helin", "given": "Tapio" } }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "Andrew M." } } ] }, "title": "Hyperparameter Estimation in Bayesian MAP Estimation: Parameterizations and Consistency", "ispublished": "unpub", "full_text_status": "public", "keywords": "Bayesian inverse problems, hierarchical Bayesian, MAP estimation, optimization, nonparametric inference, hyperparameter inference, consistency of estimators", "note": "The work of AMS and MMD is funded by US National Science\nFoundation (NSF) grant DMS 1818977 and AFOSR Grant FA9550-17-1-0185.\n\nSubmitted - 1905.04365.pdf
", "abstract": "The Bayesian formulation of inverse problems is attractive for three primary reasons: it provides a clear modelling framework; means for uncertainty quantification; and it allows for principled learning of hyperparameters. The posterior distribution may be explored by sampling methods, but for many problems it is computationally infeasible to do so. In this situation maximum a posteriori (MAP) estimators are often sought. Whilst these are relatively cheap to compute, and have an attractive variational formulation, a key drawback is their lack of invariance under change of parameterization. This is a particularly significant issue when hierarchical priors are employed to learn hyperparameters. In this paper we study the effect of the choice of parameterization on MAP estimators when a conditionally Gaussian hierarchical prior distribution is employed. Specifically we consider the centred parameterization, the natural parameterization in which the unknown state is solved for directly, and the noncentred parameterization, which works with a whitened Gaussian as the unknown state variable, and arises when considering dimension-robust MCMC algorithms; MAP estimation is well-defined in the nonparametric setting only for the noncentred parameterization. However, we show that MAP estimates based on the noncentred parameterization are not consistent as estimators of hyperparameters; conversely, we show that limits of finite-dimensional centred MAP estimators are consistent as the dimension tends to infinity. We also consider empirical Bayesian hyperparameter estimation, show consistency of these estimates, and demonstrate that they are more robust with respect to noise than centred MAP estimates. An underpinning concept throughout is that hyperparameters may only be recovered up to measure equivalence, a well-known phenomenon in the context of the Ornstein-Uhlenbeck process.", "date": "2019-07-22", "date_type": "published", "publisher": "arXiv", "id_number": "CaltechAUTHORS:20190722-134133717", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190722-134133717", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "NSF", "grant_number": "DMS-1818977" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-17-1-0185" } ] }, "doi": "10.48550/arXiv.1905.04365", "primary_object": { "basename": "1905.04365.pdf", "url": "https://authors.library.caltech.edu/records/nt212-sbf55/files/1905.04365.pdf" }, "resource_type": "monograph", "pub_year": "2019", "author_list": "Dunlop, Matthew M.; Helin, Tapio; et el." }, { "id": "https://authors.library.caltech.edu/records/7ezmd-00k93", "eprint_id": 94459, "eprint_status": "archive", "datestamp": "2023-08-19 08:17:44", "lastmod": "2023-10-20 17:58:46", "type": "monograph", "metadata_visibility": "show", "creators": { "items": [ { "id": "Chen-Victor", "name": { "family": "Chen", "given": "Victor" } }, { "id": "Dunlop-M-M", "name": { "family": "Dunlop", "given": "Matthew M." }, "orcid": "0000-0001-7718-3755" }, { "id": "Papaspiliopoulos-O", "name": { "family": "Papaspiliopoulos", "given": "Omiros" } }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "Andrew M." } } ] }, "title": "Dimension-Robust MCMC in Bayesian Inverse Problems", "ispublished": "unpub", "full_text_status": "public", "note": "MMD and AMS are supported by AFOSR Grant FA9550-17-1-0185 and ONR Grant N00014-17-1-2079.\n\nSubmitted - 1803.03344.pdf
", "abstract": "The methodology developed in this article is motivated by a wide range of prediction and uncertainty quantification problems that arise in Statistics, Machine Learning and Applied Mathematics, such as non-parametric regression, multi-class classification and inversion of partial differential equations. One popular formulation of such problems is as Bayesian inverse problems, where a prior distribution is used to regularize inference on a high-dimensional latent state, typically a function or a field. It is common that such priors are non-Gaussian, for example piecewise-constant or heavy-tailed, and/or hierarchical, in the sense of involving a further set of low-dimensional parameters, which, for example, control the scale or smoothness of the latent state. In this formulation prediction and uncertainty quantification relies on efficient exploration of the posterior distribution of latent states and parameters. This article introduces a framework for efficient MCMC sampling in Bayesian inverse problems that capitalizes upon two fundamental ideas in MCMC, non-centred parameterisations of hierarchical models and dimension-robust samplers for latent Gaussian processes. Using a range of diverse applications we showcase that the proposed framework is dimension-robust, that is, the efficiency of the MCMC sampling does not deteriorate as the dimension of the latent state gets higher. We showcase the full potential of the machinery we develop in the article in semi-supervised multi-class classification, where our sampling algorithm is used within an active learning framework to guide the selection of input data to manually label in order to achieve high predictive accuracy with a minimal number of labelled data.", "date": "2019-04-04", "date_type": "published", "publisher": "arXiv", "id_number": "CaltechAUTHORS:20190404-111029769", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190404-111029769", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-17-1-0185" }, { "agency": "Office of Naval Research (ONR)", "grant_number": "N00014-17-1-2079" } ] }, "doi": "10.48550/arXiv.1803.03344", "primary_object": { "basename": "1803.03344.pdf", "url": "https://authors.library.caltech.edu/records/7ezmd-00k93/files/1803.03344.pdf" }, "resource_type": "monograph", "pub_year": "2019", "author_list": "Chen, Victor; Dunlop, Matthew M.; et el." }, { "id": "https://authors.library.caltech.edu/records/0yvp3-rgm11", "eprint_id": 94458, "eprint_status": "archive", "datestamp": "2023-08-19 03:36:03", "lastmod": "2023-10-20 17:58:43", "type": "monograph", "metadata_visibility": "show", "creators": { "items": [ { "id": "Dunlop-M-M", "name": { "family": "Dunlop", "given": "Matthew M." }, "orcid": "0000-0001-7718-3755" }, { "id": "Elliott-C-M", "name": { "family": "Elliott", "given": "Charles M." } }, { "id": "Hoang-Viet-Ha", "name": { "family": "Hoang", "given": "Viet Ha" } }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "Andrew M." } } ] }, "title": "Reconciling Bayesian and Total Variation Methods for Binary Inversion", "ispublished": "unpub", "full_text_status": "public", "keywords": "Bayesian inversion, phase-field, level set method, perimeter regularization, Gamma convergence, uncertainty quantification", "note": "The research of CME was partially supported by the Royal Society via a Wolfson Research Merit Award; the work of AMS by DARPA contract contract W911NF-15-2-0121; the work of CME and AMS by the EPSRC programme grant EQUIP; the work of MMD and AMS by AFOSR Grant FA9550-17-1-0185 and ONR Grant N00014-17-1-2079; the work of MMD by the EPSRC MASDOC Graduate Training Program; VHH gratefully acknowledges the MOE AcRF Tier 1 grant RG30/16.\n\nSubmitted - 1706.01960.pdf
", "abstract": "A central theme in classical algorithms for the reconstruction of discontinuous functions from observational data is perimeter regularization. On the other hand, sparse or noisy data often demands a probabilistic approach to the reconstruction of images, to enable uncertainty quantification; the Bayesian approach to inversion is a natural framework in which to carry this out. The link between Bayesian inversion methods and perimeter regularization, however, is not fully understood. In this paper two links are studied: (i) the MAP objective function of a suitably chosen phase-field Bayesian approach is shown to be closely related to a least squares plus perimeter regularization objective; (ii) sample paths of a suitably chosen Bayesian level set formulation are shown to possess finite perimeter and to have the ability to learn about the true perimeter. Furthermore, the level set approach is shown to lead to faster algorithms for uncertainty quantification than the phase field approach.", "date": "2019-04-04", "date_type": "published", "publisher": "arXiv", "id_number": "CaltechAUTHORS:20190404-111026312", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190404-111026312", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Royal Society" }, { "agency": "Army Research Office (ARO)", "grant_number": "W911NF-15-2-0121" }, { "agency": "Defense Advanced Research Projects Agency (DARPA)" }, { "agency": "Engineering and Physical Sciences Research Council (EPSRC)", "grant_number": "EQUIP" }, { "agency": "Air Force Office of Scientific Research (AFOSR)", "grant_number": "FA9550-17-1-0185" }, { "agency": "Office of Naval Research (ONR)", "grant_number": "N00014-17-1-2079" }, { "agency": "Ministry of Education (Singapore)", "grant_number": "RG30/16" } ] }, "doi": "10.48550/arXiv.1706.01960", "primary_object": { "basename": "1706.01960.pdf", "url": "https://authors.library.caltech.edu/records/0yvp3-rgm11/files/1706.01960.pdf" }, "resource_type": "monograph", "pub_year": "2019", "author_list": "Dunlop, Matthew M.; Elliott, Charles M.; et el." }, { "id": "https://authors.library.caltech.edu/records/dk7kg-hhg14", "eprint_id": 78137, "eprint_status": "archive", "datestamp": "2023-08-19 08:21:18", "lastmod": "2023-10-25 23:47:52", "type": "monograph", "metadata_visibility": "show", "creators": { "items": [ { "id": "Brett-C-E-A", "name": { "family": "Brett", "given": "C. E. A." } }, { "id": "Lam-K-F", "name": { "family": "Lam", "given": "K. F." } }, { "id": "Law-K-J-H", "name": { "family": "Law", "given": "K. J. H." } }, { "id": "McCormick-D-S", "name": { "family": "McCormick", "given": "D. S." } }, { "id": "Scott-M-R", "name": { "family": "Scott", "given": "M. R." } }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "A. M." } } ] }, "title": "Stability of Filters for the Navier-Stokes Equation", "ispublished": "unpub", "full_text_status": "public", "note": "Submitted on 11 Oct 2011. \n\nAMS would like to thank the following institutions for financial support: EPSRC, ERC and ONR; KJHL was supported by EPSRC and ONR; and CEAB, KFL, DSM and MRS were supported EPSRC, through the MASDOC Graduate Training Centre at Warwick University. The authors also thank The Mathematics Institute and Centre for Scientific Computing. at Warwick University for supplying valuable computation time. Finally, the authors thank Masoumeh Dashti for valuable input.\n\nSubmitted - 1110.2527.pdf
", "abstract": "Data assimilation methodologies are designed to incorporate noisy observations of a physical system into an underlying model in order to infer the properties of the state of the system. Filters refer to a class of data assimilation algorithms designed to update the estimation of the state in a on-line fashion, as data is acquired sequentially. For linear problems subject to Gaussian noise filtering can be performed exactly using the Kalman filter. For nonlinear systems it can be approximated in a systematic way by particle filters. However in high dimensions these particle filtering methods can break down. Hence, for the large nonlinear systems arising in applications such as weather forecasting, various ad hoc filters are used, mostly based on making Gaussian approximations. The purpose of this work is to study the properties of these ad hoc filters, working in the context of the 2D incompressible Navier-Stokes equation. By working in this infinite dimensional setting we provide an analysis which is useful for understanding high dimensional filtering, and is robust to mesh-refinement. We describe theoretical results showing that, in the small observational noise limit, the filters can be tuned to accurately track the signal itself (filter stability), provided the system is observed in a sufficiently large low dimensional space; roughly speaking this space should be large enough to contain the unstable modes of the linearized dynamics. Numerical results are given which illustrate the theory. In a simplified scenario we also derive, and study numerically, a stochastic PDE which determines filter stability in the limit of frequent observations, subject to large observational noise. The positive results herein concerning filter stability complement recent numerical studies which demonstrate that the ad hoc filters perform poorly in reproducing statistical variation about the true signal.", "date": "2017-06-13", "date_type": "published", "id_number": "CaltechAUTHORS:20170613-070159561", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170613-070159561", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Engineering and Physical Sciences Research Council (EPSRC)" }, { "agency": "Engineering Research Center (ERC)" }, { "agency": "Office of Naval Research (ONR)" }, { "agency": "Warwick University" } ] }, "doi": "10.48550/arXiv.1110.2527", "primary_object": { "basename": "1110.2527.pdf", "url": "https://authors.library.caltech.edu/records/dk7kg-hhg14/files/1110.2527.pdf" }, "resource_type": "monograph", "pub_year": "2017", "author_list": "Brett, C. E. A.; Lam, K. F.; et el." }, { "id": "https://authors.library.caltech.edu/records/2t7pd-fnb55", "eprint_id": 78106, "eprint_status": "archive", "datestamp": "2023-08-19 01:40:36", "lastmod": "2024-03-05 18:25:36", "type": "monograph", "metadata_visibility": "show", "creators": { "items": [ { "id": "Schillings-C", "name": { "family": "Schillings", "given": "C." } }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "A. M." }, "orcid": "0000-0001-9091-7266" } ] }, "title": "Convergence Analysis of the Ensemble Kalman Filter for Inverse Problems: the Noisy Case", "ispublished": "unpub", "full_text_status": "public", "keywords": "Bayesian Inverse Problems, Ensemble Kalman Filter, Parameter Identi\ufffdcation", "note": "Both authors are grateful to the EPSRC Programme Grant EQUIP for funding of this research. AMS is also grateful to DARPA and to ONR for funding parts of this research.\n\nSubmitted - 1702.07894.pdf
", "abstract": "We present an analysis of the ensemble Kalman filter for inverse problems based on the continuous time limit of the algorithm. The analysis of the dynamical behaviour of the ensemble allows to establish well-posedness and convergence results for a fixed ensemble size. We will build on the results presented in [Schillings, Stuart 2017] and generalise them to the case of noisy observational data, in particular the influence of the noise on the convergence will be investigated, both theoretically and numerically.", "date": "2017-06-12", "date_type": "published", "publisher": "Caltech Library", "id_number": "CaltechAUTHORS:20170612-123329512", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170612-123329512", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Engineering and Physical Sciences Research Council (EPSRC)", "grant_number": "EQUIP" }, { "agency": "Defense Advanced Research Projects Agency (DARPA)" }, { "agency": "Office of Naval Research (ONR)" } ] }, "collection": "CaltechAUTHORS", "primary_object": { "basename": "1702.07894.pdf", "url": "https://authors.library.caltech.edu/records/2t7pd-fnb55/files/1702.07894.pdf" }, "resource_type": "monograph", "pub_year": "2017", "author_list": "Schillings, C. and Stuart, A. M." }, { "id": "https://authors.library.caltech.edu/records/53ykm-qm573", "eprint_id": 73040, "eprint_status": "archive", "datestamp": "2023-08-20 11:21:40", "lastmod": "2023-10-24 14:59:12", "type": "monograph", "metadata_visibility": "show", "creators": { "items": [ { "id": "Lu-Yulong", "name": { "family": "Lu", "given": "Yulong" } }, { "id": "Stuart-A-M", "name": { "family": "Stuart", "given": "Andrew M." } }, { "name": { "family": "Weber", "given": "Hendrik" } } ] }, "title": "Gaussian approximations for transition paths in molecular dynamics", "ispublished": "unpub", "full_text_status": "public", "keywords": "Transition path, Kullback-Leibler approximation, Onsager-Machlup functional, large deviations, Gamma-convergence", "note": "The authors are grateful Frank Pinski for helpful discussions and insights. YL is is supported by EPSRC as part of the MASDOC DTC at the University of Warwick with grant No. EP/HO23364/1. The work of AMS is supported by DARPA, EPSRC and ONR. The work of HW is supported by EPSRC and the Royal Society.\n\nSubmitted - 1604.06594v1.pdf
", "abstract": "This paper is concerned with transition paths within the framework of the overdamped Langevin dynamics model of chemical reactions. We aim to give an efficient description of typical transition paths in the small temperature regime. We adopt a variational point of view and seek the best Gaussian approximation, with respect to Kullback-Leibler divergence, of the non-Gaussian distribution of the diffusion process. We interpret the mean of this Gaussian approximation as the \"most likely path\" and the covariance operator as a means to capture the typical fluctuations around this most likely path. \nWe give an explicit expression for the Kullback-Leibler divergence in terms of the mean and the covariance operator for a natural class of Gaussian approximations and show the existence of minimisers for the variational problem. Then the low temperature limit is studied via \u0393-convergence of the associated variational problem. The limiting functional consists of two parts: The first part only depends on the mean and coincides with the \u0393-limit of the Freidlin-Wentzell rate functional. The second part depends on both, the mean and the covariance operator and is minimized if the dynamics are given by a time-inhomogenous Ornstein-Uhlenbeck process found by linearization of the Langevin dynamics around the Freidlin-Wentzell minimizer.", "date": "2016-12-21", "date_type": "published", "id_number": "CaltechAUTHORS:20161220-182307792", "official_url": "https://resolver.caltech.edu/CaltechAUTHORS:20161220-182307792", "rights": "No commercial reproduction, distribution, display or performance rights in this work are provided.", "funders": { "items": [ { "agency": "Engineering and Physical Sciences Research Council (EPSRC)", "grant_number": "EP/HO23364/1" }, { "agency": "Defense Advanced Research Projects Agency (DARPA)" }, { "agency": "Office of Naval Research (ONR)" }, { "agency": "Royal Society" } ] }, "doi": "10.48550/arXiv.1604.06594", "primary_object": { "basename": "1604.06594v1.pdf", "url": "https://authors.library.caltech.edu/records/53ykm-qm573/files/1604.06594v1.pdf" }, "resource_type": "monograph", "pub_year": "2016", "author_list": "Lu, Yulong; Stuart, Andrew M.; et el." } ]