@phdthesis{10.7907/1jz8-5t85, author = {Bernstein, Jeremy}, title = {Optimisation & Generalisation in Networks of Neurons}, school = {California Institute of Technology}, year = {2023}, doi = {10.7907/1jz8-5t85}, url = {https://resolver.caltech.edu/CaltechTHESIS:10132022-000100592}, abstract = {
The goal of this thesis is to develop the optimisation and generalisation theoretic foundations of learning in artificial neural networks. The thesis tackles two central questions. Given training data and a network architecture:
On optimisation, an essential feature of neural network training is that the network weights affect the loss function only indirectly through their appearance in the network architecture. This thesis proposes a three-step framework for deriving novel “architecture aware” optimisation algorithms. The first step—termed functional majorisation—is to majorise a series expansion of the loss function in terms of functional perturbations. The second step is to derive architectural perturbation bounds that relate the size of functional perturbations to the size of weight perturbations. The third step is to substitute these architectural perturbation bounds into the functional majorisation of the loss and to obtain an optimisation algorithm via minimisation. This constitutes an application of the majorise-minimise meta-algorithm to neural networks.
On generalisation, a promising recent line of work has applied PAC-Bayes theory to derive non-vacuous generalisation guarantees for neural networks. Since these guarantees control the average risk of ensembles of networks, they do not address which individual network should generalise best. To close this gap, the thesis rekindles an old idea from the kernels literature: the Bayes point machine. A Bayes point machine is a single classifier that approximates the aggregate prediction of an ensemble of classifiers. Since aggregation reduces the variance of ensemble predictions, Bayes point machines tend to generalise better than other ensemble members. The thesis shows that the space of neural networks consistent with a training set concentrates on a Bayes point machine if both the network width and normalised margin are sent to infinity. This motivates the practice of returning a wide network of large normalised margin.
Potential applications of these ideas include novel methods for uncertainty quantification, more efficient numerical representations for neural hardware, and optimisers that transfer hyperparameters across learning problems.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Yue, Yisong}, } @phdthesis{10.7907/8rz4-7b35, author = {Shi, Guanya}, title = {Reliable Learning and Control in Dynamic Environments: Towards Unified Theory and Learned Robotic Agility}, school = {California Institute of Technology}, year = {2023}, doi = {10.7907/8rz4-7b35}, url = {https://resolver.caltech.edu/CaltechTHESIS:08052022-231458463}, abstract = {Recent breathtaking advances in machine learning beckon to their applications in a wide range of real-world autonomous systems. However, for safety-critical settings such as agile robotic control in hazardous environments, we must confront several key challenges before widespread deployment. Most importantly, the learning system must interact with the rest of the autonomous system (e.g., highly nonlinear and non-stationary dynamics) in a way that safeguards against catastrophic failures with formal guarantees. In addition, from both computational and statistical standpoints, the learning system must incorporate prior knowledge for efficiency and generalizability.
This thesis presents progress towards establishing a unified framework that fundamentally connects learning and control. First, Part I motivates the benefit and necessity of such a unified framework by the Neural-Control Family, a family of nonlinear deep-learning-based control methods with not only stability and robustness guarantees but also new capabilities in agile robotic control. Then Part II discusses three unifying interfaces between learning and control: (1) online meta-adaptive control, (2) competitive online optimization and control, and (3) online learning perspectives on model predictive control. All interfaces yield settings that jointly admit both learning-theoretic and control-theoretic guarantees.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, } @phdthesis{10.7907/5n5q-x203, author = {Zhan, Eric}, title = {New Algorithms for Programmatic Deep Learning with Applications to Behavior Modeling}, school = {California Institute of Technology}, year = {2022}, doi = {10.7907/5n5q-x203}, url = {https://resolver.caltech.edu/CaltechTHESIS:11302021-224628633}, abstract = {Raw behavioral data is becoming increasingly more abundant and more easily obtainable in spatiotemporal domains such as sports, video games, navigation & driving, motion capture, and animal science. How can we best use this data to advance their respective domains forward? For instance, researchers for self-driving vehicles would like to identify the key features of the environment state that impact decision-making the most; game developers would like to populate their games with characters that have unique and diverse behaviors to create a more immersive gaming experience; and behavioral neuroscientists would like to uncover the underlying mechanisms that drive learning in animals. Machine learning, the science of developing models and algorithms to identify and leverage patterns in data, is well-equipped to aid in these endeavors. But how do we integrate machine learning with these spatiotemporal domains in a principled way? In this dissertation, we develop and introduce new algorithms in programmatic deep learning that tackle some of the new challenges encountered in behavior modeling.
Our work in programmatic deep learning comprises two main themes: in the first, we show how to use expert-written programs as sources of weak labels in domains where manually-annotated expert labels are scarce; in the second, we explore programs as a flexible function class with human-interpretable structure and show how to learn them via neurosymbolic program learning. Augmenting deep learning with programmatic structure allows domain experts to easily incorporate domain knowledge into machine learning models; we show that this results in significant improvements in many behavior modeling applications like imitation learning, controllable generation, counterfactual analysis, and unsupervised clustering.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Yue, Yisong}, } @phdthesis{10.7907/4mjd-ce53, author = {Marino, Joseph Louis}, title = {Learned Feedback & Feedforward Perception & Control}, school = {California Institute of Technology}, year = {2021}, doi = {10.7907/4mjd-ce53}, url = {https://resolver.caltech.edu/CaltechTHESIS:05272021-042158260}, abstract = {The notions of feedback and feedforward information processing gained prominence under cybernetics, an early movement at the dawn of computer science and theoretical neuroscience. Negative feedback processing corrects errors, whereas feedforward processing makes predictions, thereby preemptively reducing errors. A key insight of cybernetics was that such processes can be applied to both perception, or state estimation, and control, or action selection. The remnants of this insight are found in many modern areas, including predictive coding in neuroscience and deep latent variable models in machine learning. This thesis draws on feedback and feedforward ideas developed within predictive coding, adapting them to improve machine learning techniques for perception (Part II) and control (Part III). Upon establishing these conceptual connections, in Part IV, we traverse this bridge, from machine learning back to neuroscience, arriving at new perspectives on the correspondences between these fields.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, } @phdthesis{10.7907/7qaw-kd75, author = {Song, Jialin}, title = {Learning to Optimize: from Theory to Practice}, school = {California Institute of Technology}, year = {2021}, doi = {10.7907/7qaw-kd75}, url = {https://resolver.caltech.edu/CaltechTHESIS:06022021-223508132}, abstract = {Optimization is at the heart of everyday applications, from finding the fastest route for navigation to designing efficient drugs for diseases. The study of optimization algorithms has focused on developing general approaches that do not adapt to specific problem instances. While they enjoy wide applicability, they forgo the potentially useful information embedded in the structure of an instance. Furthermore, as new optimization problems appear, the algorithm development process relies heavily on domain expertise to identify special properties and design methods to exploit them. Such design philosophy is labor-intensive and difficult to deploy efficiently to a broad range of domain-specific optimization problems, which are becoming ubiquitous in the pursuit of ever more personalized applications.
In this dissertation, we consider different hybrid versions of classical optimization algorithms with data-driven techniques. We aim to equip classical algorithms with the ability to adapt their behaviors on the fly based on specific problem instances. A common theme in our approaches is to train the data-driven components on a pre-collected batch of representative problem instances to optimize some performance metrics, e.g., wall-clock time. Varying the integration details, we present several approaches to learning data-driven optimization modules for combinatorial optimization problems and study the corresponding fundamental research questions on policy learning. We provide multiple practical experimental results to showcase the practicality of our methods which lead to state-of-the-art performance on some classes of problems.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Yue, Yisong}, } @phdthesis{10.7907/gvtx-1586, author = {Novoseller, Ellen Rachel}, title = {Online Learning from Human Feedback with Applications to Exoskeleton Gait Optimization}, school = {California Institute of Technology}, year = {2021}, doi = {10.7907/gvtx-1586}, url = {https://resolver.caltech.edu/CaltechTHESIS:12092020-162149429}, abstract = {Systems that intelligently interact with humans could improve people’s lives in numerous ways and in numerous settings, such as households, hospitals, and workplaces. Yet, developing algorithms that reliably and efficiently personalize their interactions with people in real-world environments remains challenging. In particular, one major difficulty lies in adapting to human-in-the-loop feedback, in which an algorithm makes sequential decisions while receiving online feedback from humans; throughout this interaction, the algorithm seeks to optimize its decision-making quality, as measured by the utility of its performance to the human users. Such algorithms must balance between exploration and exploitation: on one hand, the algorithm must select uncertain strategies to fully explore the environment and the interacting human’s preferences, while on the other hand, it must exploit the empirically-best-performing strategies to maximize its cumulative performance.
Learning from human feedback can be difficult, as people are often unreliable in specifying numerical scores. In contrast, humans can often more accurately provide various types of qualitative feedback, for instance pairwise preferences. Yet, sample efficiency is a significant concern in human-in-the-loop settings, as qualitative feedback is less informative than absolute metrics, and algorithms can typically pose only limited queries to human users. Thus, there is a need to create theoretically-grounded online learning algorithms that efficiently, reliably, and robustly optimize their interactions with humans while learning from online qualitative feedback.
This dissertation makes several contributions to algorithm design for human-in-the-loop learning. Firstly, this work develops the Dueling Posterior Sampling (DPS) algorithmic framework, a model-based, Bayesian approach for online learning in the settings of preference-based reinforcement learning and generalized linear dueling bandits. DPS is developed together with a theoretical regret analysis framework, and yields competitive empirical performance in a range of simulations. Additionally, this thesis presents the CoSpar and LineCoSpar algorithms for sample-efficient, mixed-initiative learning from pairwise preferences and coactive feedback. CoSpar and LineCoSpar are both deployed in human subject experiments with a lower-body exoskeleton to identify optimal, user-preferred exoskeleton walking gaits. This work presents the first demonstration of preference-based learning for optimizing dynamic crutchless exoskeleton walking for user comfort, and makes progress toward customizing exoskeletons and other assistive devices for individual users.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Burdick, Joel W.}, } @phdthesis{10.7907/rz4w-k233, author = {Le, Hoang Minh}, title = {New Frameworks for Structured Policy Learning}, school = {California Institute of Technology}, year = {2020}, doi = {10.7907/rz4w-k233}, url = {https://resolver.caltech.edu/CaltechTHESIS:06092020-121556493}, abstract = {Sequential decision making applications are playing an increasingly important role in everyday life. Research interest in machine learning approaches to sequential decision making has surged thanks to recent empirical successes of reinforcement learning and imitation learning techniques, partly fueled by recent advances in deep learning-based function approximation. However in many real-world sequential decision making applications, relying purely on black box policy learning is often insufficient, due to practical requirements of data efficiency, interpretability, safety guarantees, etc. These challenges collectively make it difficult for many existing policy learning methods to find success in realistic applications.
In this dissertation, we present recent advances in structured policy learning, which are new machine learning frameworks that integrate policy learning with principled notions of domain knowledge, which spans value-based, policy-based, and model-based structures. Our framework takes flexible reduction-style approaches that can integrate structure with reinforcement learning, imitation learning and robust control techniques. In addition to methodological advances, we demonstrate several successful applications of the new policy learning frameworks.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Yue, Yisong}, } @phdthesis{10.7907/4S2Y-CY80, author = {Zheng, Stephan Tao}, title = {Exploiting Structure for Scalable and Robust Deep Learning}, school = {California Institute of Technology}, year = {2018}, doi = {10.7907/4S2Y-CY80}, url = {https://resolver.caltech.edu/CaltechThesis:05252018-092016207}, abstract = {Deep learning has seen great success training deep neural networks for complex prediction problems, such as large-scale image recognition, short-term time-series forecasting, and learning behavioral models for games with simple dynamics. However, neural networks have a number of weaknesses: 1) they are not sample-efficient and 2) they are often not robust against (adversarial) input perturbations. Hence, it is challenging to train neural networks for problems with exponential complexity, such as multi-agent games, complex long-term spatiotemporal dynamics, or noisy high-resolution image data.
This thesis contributes methods to improve the sample efficiency, expressive power, and robustness of neural networks, by exploiting various forms of low-dimensional structure, such as spatiotemporal hierarchy and multi-agent coordination. We show the effectiveness of this approach in multiple learning paradigms: in both the supervised learning (e.g., imitation learning) and reinforcement learning settings.
First, we introduce hierarchical neural networks that model both short-term actions and long-term goals from data, and can learn human-level behavioral models for spatiotemporal multi-agent games, such as basketball, using imitation learning.
Second, in reinforcement learning, we show that behavioral policies with a hierarchical latent structure can efficiently learn forms of multi-agent coordination, which enables a form of structured exploration for faster learning.
Third, we showcase tensor-train recurrent neural networks that can model high-order mutliplicative structure in dynamical systems (e.g., Lorenz dynamics). We show that this model class gives state-of-the-art long-term forecasting performance with very long time horizons for both simulation and real-world traffic and climate data.
Finally, we demonstrate two methods for neural network robustness: 1) stability training, a form of stochastic data augmentation to make neural networks more robust, and 2) neural fingerprinting, a method that detects adversarial examples by validating the network’s behavior in the neighborhood of any given input.
In sum, this thesis takes a step to enable machine learning for the next scale of problem complexity, such as rich spatiotemporal multi-agent games and large-scale robust predictions.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Yue, Yisong}, }