[
    {
        "id": "authors:6amte-r7198",
        "collection": "authors",
        "collection_id": "6amte-r7198",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-222215502",
        "type": "book_section",
        "title": "Adaptive Control for Linearizable Systems Using On-Policy Reinforcement Learning",
        "book_title": "2020 59th IEEE Conference on Decision and Control (CDC)",
        "author": [
            {
                "family_name": "Westenbroek",
                "given_name": "Tyler",
                "orcid": "0000-0003-1111-3118",
                "clpid": "Westenbroek-Tyler"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Fridovich-Keil",
                "given_name": "David",
                "orcid": "0000-0002-5866-6441",
                "clpid": "Fridovich-Keil-David"
            },
            {
                "family_name": "Prabhu",
                "given_name": "Valmik",
                "clpid": "Prabhu-Valmik"
            },
            {
                "family_name": "Tomlin",
                "given_name": "Claire J.",
                "orcid": "0000-0003-3192-3185",
                "clpid": "Tomlin-Claire-J"
            },
            {
                "family_name": "Sastry",
                "given_name": "S. Shankar",
                "clpid": "Sastry-S-Shankar"
            }
        ],
        "abstract": "The following topics are dealt with: control system synthesis; nonlinear control systems; linear systems; stability; optimisation; feedback; closed loop systems; Lyapunov methods; multi-agent systems; optimal control.",
        "doi": "10.1109/CDC42340.2020.9304242",
        "isbn": "978-1-7281-7447-1",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2020-12-14",
        "pages": "118-125"
    },
    {
        "id": "authors:8pqcg-xjy20",
        "collection": "authors",
        "collection_id": "8pqcg-xjy20",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-222215409",
        "type": "book_section",
        "title": "High Confidence Sets for Trajectories of Stochastic Time-Varying Nonlinear Systems",
        "book_title": "2020 59th IEEE Conference on Decision and Control (CDC)",
        "author": [
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Westenbroek",
                "given_name": "Tyler",
                "orcid": "0000-0003-1111-3118",
                "clpid": "Westenbroek-Tyler"
            },
            {
                "family_name": "Jordan",
                "given_name": "Michael I.",
                "orcid": "0000-0001-8935-817X",
                "clpid": "Jordan-Michael-I"
            },
            {
                "family_name": "Sastry",
                "given_name": "S. Shankar",
                "clpid": "Sastry-S-Shankar"
            }
        ],
        "abstract": "We analyze stochastic differential equations and their discretizations to derive novel high probability tracking bounds for exponentially stable time varying systems which are corrupted by process noise. The bounds have an explicit dependence on the rate of convergence for the unperturbed system and the dimension of the state space. The magnitude of the stochastic deviations have a simple intuitive form, and our perturbation bounds also allow us to derive tighter high probability bounds on the tracking of reference trajectories than the state of the art. The resulting bounds can be used in analyzing many tracking control schemes.",
        "doi": "10.1109/CDC42340.2020.9304491",
        "isbn": "978-1-7281-7447-1",
        "publisher": "IEEE",
        "publication_date": "2020-12-14",
        "pages": "4275-4280"
    },
    {
        "id": "authors:vddty-ay603",
        "collection": "authors",
        "collection_id": "vddty-ay603",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-222215578",
        "type": "book_section",
        "title": "Expert Selection in High-Dimensional Markov Decision Processes",
        "book_title": "2020 59th IEEE Conference on Decision and Control (CDC)",
        "author": [
            {
                "family_name": "Rubies-Royo",
                "given_name": "Vicen\u00e7",
                "clpid": "Rubies-Royo-Vicen\u00e7"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Dong",
                "given_name": "Roy",
                "orcid": "0000-0001-8034-4329",
                "clpid": "Dong-Roy"
            },
            {
                "family_name": "Tomlin",
                "given_name": "Claire",
                "orcid": "0000-0003-3192-3185",
                "clpid": "Tomlin-Claire-J"
            },
            {
                "family_name": "Sastry",
                "given_name": "S. Shankar",
                "clpid": "Sastry-S-Shankar"
            }
        ],
        "abstract": "In this work we present a multi-armed bandit framework for online expert selection in Markov decision processes and demonstrate its use in high-dimensional settings. Our method takes a set of candidate expert policies and switches between them to rapidly identify the best performing expert using a variant of the classical upper confidence bound algorithm, thus ensuring low regret in the overall performance of the system. This is useful in applications where several expert policies may be available, and one needs to be selected at run-time for the underlying environment.",
        "doi": "10.1109/CDC42340.2020.9303788",
        "isbn": "978-1-7281-7447-1",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2020-12",
        "pages": "3604-3610"
    },
    {
        "id": "authors:ev5c3-q2v72",
        "collection": "authors",
        "collection_id": "ev5c3-q2v72",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-222215650",
        "type": "book_section",
        "title": "Feedback Linearization for Uncertain Systems via Reinforcement Learning",
        "book_title": "2020 IEEE International Conference on Robotics and Automation (ICRA)",
        "author": [
            {
                "family_name": "Westenbroek",
                "given_name": "Tyler",
                "orcid": "0000-0003-1111-3118",
                "clpid": "Westenbroek-Tyler"
            },
            {
                "family_name": "Fridovich-Keil",
                "given_name": "David",
                "orcid": "0000-0002-5866-6441",
                "clpid": "Fridovich-Keil-David"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Arora",
                "given_name": "Shreyas",
                "clpid": "Arorsa-Shreyas"
            },
            {
                "family_name": "Prabhu",
                "given_name": "Valmik",
                "clpid": "Prabhu-Valmik"
            },
            {
                "family_name": "Sastry",
                "given_name": "S. Shankar",
                "clpid": "Sastry-S-Shankar"
            },
            {
                "family_name": "Tomlin",
                "given_name": "Claire J.",
                "orcid": "0000-0003-3192-3185",
                "clpid": "Tomlin-Claire-J"
            }
        ],
        "abstract": "We present a novel approach to control design for nonlinear systems which leverages model-free policy optimization techniques to learn a linearizing controller for a physical plant with unknown dynamics. Feedback linearization is a technique from nonlinear control which renders the input-output dynamics of a nonlinear plant linear under application of an appropriate feedback controller. Once a linearizing controller has been constructed, desired output trajectories for the nonlinear plant can be tracked using a variety of linear control techniques. However, the calculation of a linearizing controller requires a precise dynamics model for the system. As a result, model-based approaches for learning exact linearizing controllers generally require a simple, highly structured model of the system with easily identifiable parameters. In contrast, the model-free approach presented in this paper is able to approximate the linearizing controller for the plant using general function approximation architectures. Specifically, we formulate a continuous-time optimization problem over the parameters of a learned linearizing controller whose optima are the set of parameters which best linearize the plant. We derive conditions under which the learning problem is (strongly) convex and provide guarantees which ensure the true linearizing controller for the plant is recovered. We then discuss how model-free policy optimization algorithms can be used to solve a discrete-time approximation to the problem using data collected from the real-world plant. The utility of the framework is demonstrated in simulation and on a real-world robotic platform.",
        "doi": "10.1109/ICRA40945.2020.9197158",
        "isbn": "978-1-7281-7395-5",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2020-08",
        "pages": "1364-1371"
    },
    {
        "id": "authors:0c7my-cex38",
        "collection": "authors",
        "collection_id": "0c7my-cex38",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-222215800",
        "type": "book_section",
        "title": "Local Nash Equilibria are Isolated, Strict Local Nash Equilibria in 'Almost All' Zero-Sum Continuous Games",
        "book_title": "2019 IEEE 58th Conference on Decision and Control (CDC)",
        "author": [
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Ratliff",
                "given_name": "Lillian J.",
                "orcid": "0000-0001-8936-0229",
                "clpid": "Ratliff-Lillian-J"
            }
        ],
        "abstract": "We prove that differential Nash equilibria are generic amongst local Nash equilibria in continuous zero-sum games. That is, there exists an open-dense subset of zero-sum games for which local Nash equilibria are nondegenerate differential Nash equilibria. The result extends previous results to the zero-sum setting, where we obtain even stronger results; in particular, we show that local Nash equilibria are generically hyperbolic critical points. We further show that differential Nash equilibria of zero-sum games are structurally stable. The purpose for presenting these extensions is the recent renewed interest in zero-sum games within machine learning and optimization. Adversarial learning and generative adversarial network approaches are touted to be more robust than the alternative. Zero-sum games are at the heart of such approaches. Many works proceed under the assumption of hyperbolicity of critical points. Our results justify this assumption by showing `almost all' zero-sum games admit local Nash equilibria that are hyperbolic.",
        "doi": "10.1109/CDC40024.2019.9030203",
        "isbn": "978-1-7281-1398-2",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2019-12",
        "pages": "6899-6904"
    },
    {
        "id": "authors:2dbdw-9pw59",
        "collection": "authors",
        "collection_id": "2dbdw-9pw59",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-222215867",
        "type": "book_section",
        "title": "On the Analysis of Cyclic Drug Schedules for Cancer Treatment using Switched Dynamical Systems",
        "book_title": "2018 IEEE Conference on Decision and Control (CDC)",
        "author": [
            {
                "family_name": "Chapman",
                "given_name": "Margaret P.",
                "clpid": "Chapman-Margaret-P"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric V.",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Langer",
                "given_name": "Ellen",
                "orcid": "0000-0003-0352-1859",
                "clpid": "Langer-Ellen"
            },
            {
                "family_name": "Sears",
                "given_name": "Rosalie",
                "orcid": "0000-0003-1558-2413",
                "clpid": "Sears-Rosalie"
            },
            {
                "family_name": "Tomlin",
                "given_name": "Claire J.",
                "orcid": "0000-0003-3192-3185",
                "clpid": "Tomlin-Claire-J"
            }
        ],
        "abstract": "Motivated by our prior work on a Triple Negative breast cancer cell line, the focus of this paper is controller synthesis for cancer treatment, through the use of drug scheduling and a switched dynamical system model. Here we study a cyclic schedule of d drugs with maximal waiting times between drug inputs, where each drug is applied once per cycle in any order. We suppose that some of the d drugs are highly toxic to normal cells and that these drugs can shrink the live cancer cell population. The remaining drugs are less toxic to normal cells and can only reduce the growth rate of the live cancer cell population. Also, we assume that waiting time bounds related to toxicity, or to the onset of resistance, are available for each drug. A cancer cell population is said to be stable if the number of live cells tends to zero, as time becomes sufficiently large. In the absence of modeling error, we derive conditions for exponential stability. In the presence of modeling error, we prove exponential stability and derive a settling time, under certain mathematical conditions on the error. We conclude the paper with a numerical example that uses models which were identified on Triple Negative breast cancer cell line data.",
        "doi": "10.1109/CDC.2018.8619490",
        "isbn": "978-1-5386-1395-5",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2018-12",
        "pages": "3503-3509"
    },
    {
        "id": "authors:vcvee-qm253",
        "collection": "authors",
        "collection_id": "vcvee-qm253",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-222215940",
        "type": "book_section",
        "title": "Gradient-based inverse risk-sensitive reinforcement learning",
        "book_title": "2017 IEEE 56th Annual Conference on Decision and Control (CDC)",
        "author": [
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Ratliff",
                "given_name": "Lillian J.",
                "orcid": "0000-0001-8936-0229",
                "clpid": "Ratliff-Lillian-J"
            },
            {
                "family_name": "Fiez",
                "given_name": "Tanner",
                "clpid": "Fiez-Tanner"
            },
            {
                "family_name": "Sastry",
                "given_name": "S. Shankar",
                "clpid": "Sastry-S-Shankar"
            }
        ],
        "abstract": "We address the problem of inverse reinforcement learning in Markov decision processes where the agent is risksensitive. In particular, we model risk-sensitivity in a reinforcement learning framework by making use of models of human decision-making having their origins in behavioral psychology and economics. We propose a gradient-based inverse reinforcement learning algorithm that minimizes a loss function defined on the observed behavior. We demonstrate the performance of the proposed technique on two examples, the first of which is the canonical Grid World example and the second of which is an MDP modeling passengers' decisions regarding ride-sharing. In the latter, we use pricing and travel time data from a ride-sharing company to construct the transition probabilities and rewards of the MDP.",
        "doi": "10.1109/CDC.2017.8264535",
        "isbn": "978-1-5090-2873-3",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2017-12",
        "pages": "5796-5801"
    },
    {
        "id": "authors:tcken-s8w33",
        "collection": "authors",
        "collection_id": "tcken-s8w33",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-222216008",
        "type": "book_section",
        "title": "Understanding the impact of parking on urban mobility via routing games on queue-flow networks",
        "author": [
            {
                "family_name": "Calderone",
                "given_name": "Daniel",
                "clpid": "Calderone-Daniel"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Ratliff",
                "given_name": "Lillian J.",
                "orcid": "0000-0001-8936-0229",
                "clpid": "Ratliff-Lillian-J"
            },
            {
                "family_name": "Sastry",
                "given_name": "S. Shankar",
                "clpid": "Sastry-S-Shankar"
            }
        ],
        "abstract": "We derive a new routing game model for urban centers that takes into account parking-related traffic along with all other traffic. In particular, we combine a queuing game model for on-street parking with a classical routing game to create a queue-routing game where parking traffic selects a parking zone (block-face) in addition to their route through the network. We show that this game is a potential game. We construct practical examples using subsections of the Seattle downtown area to illustrate the usefulness of this modeling paradigm and to examine how parking-traffic can impact overall congestion and the route choices of other drivers. By varying the cost of parking in different parking zones, we demonstrate that parking-related traffic can be adjusted to satisfy a particular objective.",
        "doi": "10.1109/CDC.2016.7799444",
        "publisher": "IEEE",
        "publication_date": "2016-12"
    },
    {
        "id": "authors:5jg1y-yaw95",
        "collection": "authors",
        "collection_id": "5jg1y-yaw95",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-222215263",
        "type": "book_section",
        "title": "To observe or not to observe: Queuing game framework for urban parking",
        "book_title": "2016 IEEE 55th Conference on Decision and Control (CDC)",
        "author": [
            {
                "family_name": "Ratliff",
                "given_name": "Lillian J.",
                "orcid": "0000-0001-8936-0229",
                "clpid": "Ratliff-Lillian-J"
            },
            {
                "family_name": "Dowling",
                "given_name": "Chase",
                "clpid": "Dowling-Chase"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Zhang",
                "given_name": "Baosen",
                "clpid": "Zhang-Baosen"
            }
        ],
        "abstract": "We model parking in urban centers as a set of parallel queues and overlay a game theoretic structure. We model arriving drivers as utility maximizers and consider two games: one in which it is free to observe the queue length and one in which it is not. Not only do we compare the Nash induced welfare to the socially optimal welfare, confirming the usual result that Nash is worse for society, we also show that by other performance metrics more commonly used in transportation- such as occupancy and time spent circling-the Nash solution is suboptimal. We find that gains to welfare do not require everyone to observe. Through simulation, we explore a more complex scenario where drivers decide based the queueing game whether or not to enter a collection of queues over a network. Our simulated models use parameters informed by real-world data collected by the Seattle Department of Transportation.",
        "doi": "10.1109/CDC.2016.7799079",
        "isbn": "978-1-5090-1837-6",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2016-12",
        "pages": "5286-5291"
    }
]