[
    {
        "id": "authors:m8ygm-cy673",
        "collection": "authors",
        "collection_id": "m8ygm-cy673",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20230316-204015123",
        "type": "monograph",
        "title": "A Finite-Sample Analysis of Payoff-Based Independent Learning in Zero-Sum Stochastic Games",
        "author": [
            {
                "family_name": "Chen",
                "given_name": "Zaiwei",
                "orcid": "0000-0001-9915-5595",
                "clpid": "Chen-Zaiwei"
            },
            {
                "family_name": "Zhang",
                "given_name": "Kaiqing",
                "clpid": "Zhang-Kaiqing"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Ozdaglar",
                "given_name": "Asuman",
                "orcid": "0000-0002-1827-1285",
                "clpid": "Ozdaglar-Asuman-E"
            },
            {
                "family_name": "Wierman",
                "given_name": "Adam",
                "orcid": "0000-0002-5923-0199",
                "clpid": "Wierman-A"
            }
        ],
        "abstract": "We study two-player zero-sum stochastic games, and propose a form of independent learning dynamics called Doubly Smoothed Best-Response dynamics, which integrates a discrete and doubly smoothed variant of the best-response dynamics into temporal-difference (TD)-learning and minimax value iteration. The resulting dynamics are payoff-based, convergent, rational, and symmetric among players. Our main results provide finite-sample guarantees. In particular, we prove the first-known O\u0305(1/\u03f5\u00b2) sample complexity bound for payoff-based independent learning dynamics, up to a smoothing bias. In the special case where the stochastic game has only one state (i.e., matrix games), we provide a sharper O\u0305(1/\u03f5) sample complexity. Our analysis uses a novel coupled Lyapunov drift approach to capture the evolution of multiple sets of coupled and stochastic iterates, which might be of independent interest.",
        "publisher": "arXiv",
        "publication_date": "2023-03-03"
    },
    {
        "id": "authors:8fa1j-r0853",
        "collection": "authors",
        "collection_id": "8fa1j-r0853",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20230316-204028845",
        "type": "monograph",
        "title": "Algorithmic Collective Action in Machine Learning",
        "author": [
            {
                "family_name": "Hardt",
                "given_name": "Moritz",
                "clpid": "Hardt-Moritz"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Mendler-D\u00fcnner",
                "given_name": "Celestine",
                "orcid": "0000-0002-9880-7173",
                "clpid": "Mendler-D\u00fcnner-Celestine"
            },
            {
                "family_name": "Zrnic",
                "given_name": "Tijana",
                "clpid": "Zrnic-Tijana"
            }
        ],
        "abstract": "We initiate a principled study of algorithmic collective action on digital platforms that deploy machine learning algorithms. We propose a simple theoretical model of a collective interacting with a firm's learning algorithm. The collective pools the data of participating individuals and executes an algorithmic strategy by instructing participants how to modify their own data to achieve a collective goal. We investigate the consequences of this model in three fundamental learning-theoretic settings: the case of a nonparametric optimal learning algorithm, a parametric risk minimizer, and gradient-based optimization. In each setting, we come up with coordinated algorithmic strategies and characterize natural success criteria as a function of the collective's size. Complementing our theory, we conduct systematic experiments on a skill classification task involving tens of thousands of resumes from a gig platform for freelancers. Through more than two thousand model training runs of a BERT-like language model, we see a striking correspondence emerge between our empirical observations and the predictions made by our theory. Taken together, our theory and experiments broadly support the conclusion that algorithmic collectives of exceedingly small fractional size can exert significant control over a platform's learning algorithm.",
        "publisher": "arXiv",
        "publication_date": "2023-02-08"
    },
    {
        "id": "authors:445x7-d7434",
        "collection": "authors",
        "collection_id": "445x7-d7434",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20230316-204025426",
        "type": "monograph",
        "title": "Convergent First-Order Methods for Bi-level Optimization and Stackelberg Games",
        "author": [
            {
                "family_name": "Maheshwari",
                "given_name": "Chinmay",
                "orcid": "0000-0003-3596-2851",
                "clpid": "Maheshwari-Chinmay"
            },
            {
                "family_name": "Sasty",
                "given_name": "S. Shankar",
                "clpid": "Sasty-S-Shankar"
            },
            {
                "family_name": "Ratliff",
                "given_name": "Lillian",
                "orcid": "0000-0001-8936-0229",
                "clpid": "Ratliff-Lillian-J"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            }
        ],
        "abstract": "We propose an algorithm to solve a class of bi-level optimization problems using only first-order information. In particular, we focus on a class where the inner minimization has unique solutions. Unlike contemporary algorithms, our algorithm does not require the use of an oracle estimator for the gradient of the bi-level objective or an approximate solver for the inner problem. Instead, we alternate between descending on the inner problem using na\u00efve optimization methods and descending on the upper-level objective function using specially constructed gradient estimators. We provide non-asymptotic convergence rates to stationary points of the bi-level objective in the absence of convexity of the closed-loop function and further show asymptotic convergence to only local minima of the bi-level problem. The approach is inspired by ideas from the literature on two-timescale stochastic approximation algorithms.",
        "publisher": "arXiv",
        "publication_date": "2023-02-02"
    },
    {
        "id": "authors:pnp9k-akm20",
        "collection": "authors",
        "collection_id": "pnp9k-akm20",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20221219-234102223",
        "type": "monograph",
        "title": "Synthesizing Reactive Test Environments for Autonomous Systems: Testing Reach-Avoid Specifications with Multi-Commodity Flows",
        "author": [
            {
                "family_name": "Badithela",
                "given_name": "Apurva",
                "clpid": "Badithela-Apurva"
            },
            {
                "family_name": "Graebener",
                "given_name": "Josefine B.",
                "clpid": "Graebener-Josefine-B"
            },
            {
                "family_name": "Ubellacker",
                "given_name": "Wyatt",
                "orcid": "0000-0002-4732-6185",
                "clpid": "Ubellacker-Wyatt-L"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric V.",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Ames",
                "given_name": "Aaron D.",
                "orcid": "0000-0003-0848-3177",
                "clpid": "Ames-A-D"
            },
            {
                "family_name": "Murray",
                "given_name": "Richard M.",
                "orcid": "0000-0002-5785-7481",
                "clpid": "Murray-R-M"
            }
        ],
        "abstract": "We study automated test generation for verifying discrete decision-making modules in autonomous systems. We utilize linear temporal logic to encode the requirements on the system under test in the system specification and the behavior that we want to observe during the test is given as the test specification which is unknown to the system. First, we use the specifications and their corresponding non-deterministic B\u00fcchi automata to generate the specification product automaton. Second, a virtual product graph representing the high-level interaction between the system and the test environment is constructed modeling the product automaton encoding the system, the test environment, and specifications. The main result of this paper is an optimization problem, framed as a multi-commodity network flow problem, that solves for constraints on the virtual product graph which can then be projected to the test environment. Therefore, the result of the optimization problem is reactive test synthesis that ensures that the system meets the test specifications along with satisfying the system specifications. This framework is illustrated in simulation on grid world examples, and demonstrated on hardware with the Unitree A1 quadruped, wherein dynamic locomotion behaviors are verified in the context of reactive test environments.",
        "doi": "10.48550/arXiv.2210.10304",
        "publisher": "arXiv",
        "publication_date": "2022-10-19"
    },
    {
        "id": "authors:e6mv5-zqt31",
        "collection": "authors",
        "collection_id": "e6mv5-zqt31",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20221220-221907545",
        "type": "monograph",
        "title": "A Note on Zeroth-Order Optimization on the Simplex",
        "author": [
            {
                "family_name": "Zrnic",
                "given_name": "Tijana",
                "clpid": "Zrnic-Tijana"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            }
        ],
        "abstract": "We construct a zeroth-order gradient estimator for a smooth function defined on the probability simplex. The proposed estimator queries the simplex only. We prove that projected gradient descent and the exponential weights algorithm, when run with this estimator instead of exact gradients, converge at a O(T^(-1/4)}) rate.",
        "doi": "10.48550/arXiv.2208.01185",
        "publisher": "arXiv",
        "publication_date": "2022-08-02"
    },
    {
        "id": "authors:g79fh-yqz70",
        "collection": "authors",
        "collection_id": "g79fh-yqz70",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220714-212437915",
        "type": "article",
        "title": "Langevin Monte Carlo for Contextual Bandits",
        "author": [
            {
                "family_name": "Xu",
                "given_name": "Pan",
                "clpid": "Xu-Pan"
            },
            {
                "family_name": "Zheng",
                "given_name": "Hongkai",
                "clpid": "Zheng-Hongkai"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric V.",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Azizzadenesheli",
                "given_name": "Kamyar",
                "orcid": "0000-0001-8507-1868",
                "clpid": "Azizzadenesheli-Kamyar"
            },
            {
                "family_name": "Anandkumar",
                "given_name": "Anima",
                "orcid": "0000-0002-6974-6797",
                "clpid": "Anandkumar-A"
            }
        ],
        "abstract": "We study the efficiency of Thompson sampling for contextual bandits. Existing Thompson sampling-based algorithms need to construct a Laplace approximation (i.e., a Gaussian distribution) of the posterior distribution, which is inefficient to sample in high dimensional applications for general covariance matrices. Moreover, the Gaussian approximation may not be a good surrogate for the posterior distribution for general reward generating functions. We propose an efficient posterior sampling algorithm, viz., Langevin Monte Carlo Thompson Sampling (LMC-TS), that uses Markov Chain Monte Carlo (MCMC) methods to directly sample from the posterior distribution in contextual bandits. Our method is computationally efficient since it only needs to perform noisy gradient descent updates without constructing the Laplace approximation of the posterior distribution. We prove that the proposed algorithm achieves the same sublinear regret bound as the best Thompson sampling algorithms for a special case of contextual bandits, viz., linear contextual bandits. We conduct experiments on both synthetic data and real-world datasets on different contextual bandit models, which demonstrates that directly sampling from the posterior is both computationally efficient and competitive in performance.",
        "doi": "10.48550/arXiv.arXiv.2206.11254",
        "issn": "2640-3498",
        "publisher": "ML Research Press",
        "publication": "Proceedings of Machine Learning Research",
        "publication_date": "2022-06-22",
        "volume": "162",
        "pages": "24830-24850"
    },
    {
        "id": "authors:89c54-60249",
        "collection": "authors",
        "collection_id": "89c54-60249",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20220715-171641949",
        "type": "monograph",
        "title": "Decentralized, Communication- and Coordination-free Learning in Structured Matching Markets",
        "author": [
            {
                "family_name": "Maheshwari",
                "given_name": "Chinmay",
                "clpid": "Maheshwari-Chinmay"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Sastry",
                "given_name": "Shankar",
                "clpid": "Sastry-S-Shankar"
            }
        ],
        "abstract": "We study the problem of online learning in competitive settings in the context of two-sided matching markets. In particular, one side of the market, the agents, must learn about their preferences over the other side, the firms, through repeated interaction while competing with other agents for successful matches. We propose a class of decentralized, communication- and coordination-free algorithms that agents can use to reach to their stable match in structured matching markets. In contrast to prior works, the proposed algorithms make decisions based solely on an agent's own history of play and requires no foreknowledge of the firms' preferences. Our algorithms are constructed by splitting up the statistical problem of learning one's preferences, from noisy observations, from the problem of competing for firms. We show that under realistic structural assumptions on the underlying preferences of the agents and firms, the proposed algorithms incur a regret which grows at most logarithmically in the time horizon. Our results show that, in the case of matching markets, competition need not drastically affect the performance of decentralized, communication and coordination free online learning algorithms.",
        "doi": "10.48550/arXiv.arXiv.2206.02344",
        "publisher": "arXiv",
        "publication_date": "2022-06-06"
    },
    {
        "id": "authors:cstrb-vrf53",
        "collection": "authors",
        "collection_id": "cstrb-vrf53",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-213717702",
        "type": "monograph",
        "title": "Who Leads and Who Follows in Strategic Classification?",
        "author": [
            {
                "family_name": "Zrnic",
                "given_name": "Tijana",
                "clpid": "Zrnic-Tijana"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Sastry",
                "given_name": "S. Shankar",
                "clpid": "Sastry-S-Shankar"
            },
            {
                "family_name": "Jordan",
                "given_name": "Michael I.",
                "orcid": "0000-0001-8935-817X",
                "clpid": "Jordan-Michael-I"
            }
        ],
        "abstract": "As predictive models are deployed into the real world, they must increasingly contend with strategic behavior. A growing body of work on strategic classification treats this problem as a Stackelberg game: the decision-maker \"leads\" in the game by deploying a model, and the strategic agents \"follow\" by playing their best response to the deployed model. Importantly, in this framing, the burden of learning is placed solely on the decision-maker, while the agents' best responses are implicitly treated as instantaneous. In this work, we argue that the order of play in strategic classification is fundamentally determined by the relative frequencies at which the decision-maker and the agents adapt to each other's actions. In particular, by generalizing the standard model to allow both players to learn over time, we show that a decision-maker that makes updates faster than the agents can reverse the order of play, meaning that the agents lead and the decision-maker follows. We observe in standard learning settings that such a role reversal can be desirable for both the decision-maker and the strategic agents. Finally, we show that a decision-maker with the freedom to choose their update frequency can induce learning dynamics that converge to Stackelberg equilibria with either order of play.",
        "doi": "10.48550/arXiv.2106.12529",
        "publisher": "arXiv",
        "publication_date": "2021-06-23"
    },
    {
        "id": "authors:5jj0d-zdx15",
        "collection": "authors",
        "collection_id": "5jj0d-zdx15",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-213714292",
        "type": "monograph",
        "title": "Zeroth-Order Methods for Convex-Concave Minmax Problems: Applications to Decision-Dependent Risk Minimization",
        "author": [
            {
                "family_name": "Maheshwari",
                "given_name": "Chinmay",
                "clpid": "Maheshwari-Chinmay"
            },
            {
                "family_name": "Chiu",
                "given_name": "Chih-Yuan",
                "clpid": "Chiu-Chih-Yuan"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Sastry",
                "given_name": "S. Shankar",
                "clpid": "Sastry-S-Shankar"
            },
            {
                "family_name": "Ratliff",
                "given_name": "Lillian J.",
                "orcid": "0000-0001-8936-0229",
                "clpid": "Ratliff-Lillian-J"
            }
        ],
        "abstract": "Min-max optimization is emerging as a key framework for analyzing problems of robustness to strategically and adversarially generated data. We propose a random reshuffling-based gradient free Optimistic Gradient Descent-Ascent algorithm for solving convex-concave min-max problems with finite sum structure. We prove that the algorithm enjoys the same convergence rate as that of zeroth-order algorithms for convex minimization problems. We further specialize the algorithm to solve distributionally robust, decision-dependent learning problems, where gradient information is not readily available. Through illustrative simulations, we observe that our proposed approach learns models that are simultaneously robust against adversarial distribution shifts and strategic decisions from the data sources, and outperforms existing methods from the strategic classification literature.",
        "doi": "10.48550/arXiv.2106.09082",
        "publisher": "arXiv",
        "publication_date": "2021-06-16"
    },
    {
        "id": "authors:mjhq5-nd880",
        "collection": "authors",
        "collection_id": "mjhq5-nd880",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-213710817",
        "type": "monograph",
        "title": "Fast Distributionally Robust Learning with Variance Reduced Min-Max Optimization",
        "author": [
            {
                "family_name": "Yu",
                "given_name": "Yaodong",
                "orcid": "0000-0003-0540-8526",
                "clpid": "Yu-Yaodong"
            },
            {
                "family_name": "Lin",
                "given_name": "Tianyi",
                "orcid": "0000-0002-5323-1852",
                "clpid": "Lin-Tianyi-Darren"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Jordan",
                "given_name": "Michael I.",
                "orcid": "0000-0001-8935-817X",
                "clpid": "Jordan-Michael-I"
            }
        ],
        "abstract": "Distributionally robust supervised learning (DRSL) is emerging as a key paradigm for building reliable machine learning systems for real-world applications -- reflecting the need for classifiers and predictive models that are robust to the distribution shifts that arise from phenomena such as selection bias or nonstationarity. Existing algorithms for solving Wasserstein DRSL -- one of the most popular DRSL frameworks based around robustness to perturbations in the Wasserstein distance -- involve solving complex subproblems or fail to make use of stochastic gradients, limiting their use in large-scale machine learning problems. We revisit Wasserstein DRSL through the lens of min-max optimization and derive scalable and efficiently implementable stochastic extra-gradient algorithms which provably achieve faster convergence rates than existing approaches. We demonstrate their effectiveness on synthetic and real data when compared to existing DRSL approaches. Key to our results is the use of variance reduction and random reshuffling to accelerate stochastic min-max optimization, the analysis of which may be of independent interest.",
        "doi": "10.48550/arXiv.2104.13326",
        "publisher": "arXiv",
        "publication_date": "2021-04-27"
    },
    {
        "id": "authors:8pqcg-xjy20",
        "collection": "authors",
        "collection_id": "8pqcg-xjy20",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-222215409",
        "type": "book_section",
        "title": "High Confidence Sets for Trajectories of Stochastic Time-Varying Nonlinear Systems",
        "book_title": "2020 59th IEEE Conference on Decision and Control (CDC)",
        "author": [
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Westenbroek",
                "given_name": "Tyler",
                "orcid": "0000-0003-1111-3118",
                "clpid": "Westenbroek-Tyler"
            },
            {
                "family_name": "Jordan",
                "given_name": "Michael I.",
                "orcid": "0000-0001-8935-817X",
                "clpid": "Jordan-Michael-I"
            },
            {
                "family_name": "Sastry",
                "given_name": "S. Shankar",
                "clpid": "Sastry-S-Shankar"
            }
        ],
        "abstract": "We analyze stochastic differential equations and their discretizations to derive novel high probability tracking bounds for exponentially stable time varying systems which are corrupted by process noise. The bounds have an explicit dependence on the rate of convergence for the unperturbed system and the dimension of the state space. The magnitude of the stochastic deviations have a simple intuitive form, and our perturbation bounds also allow us to derive tighter high probability bounds on the tracking of reference trajectories than the state of the art. The resulting bounds can be used in analyzing many tracking control schemes.",
        "doi": "10.1109/CDC42340.2020.9304491",
        "isbn": "978-1-7281-7447-1",
        "publisher": "IEEE",
        "publication_date": "2020-12-14",
        "pages": "4275-4280"
    },
    {
        "id": "authors:6amte-r7198",
        "collection": "authors",
        "collection_id": "6amte-r7198",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-222215502",
        "type": "book_section",
        "title": "Adaptive Control for Linearizable Systems Using On-Policy Reinforcement Learning",
        "book_title": "2020 59th IEEE Conference on Decision and Control (CDC)",
        "author": [
            {
                "family_name": "Westenbroek",
                "given_name": "Tyler",
                "orcid": "0000-0003-1111-3118",
                "clpid": "Westenbroek-Tyler"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Fridovich-Keil",
                "given_name": "David",
                "orcid": "0000-0002-5866-6441",
                "clpid": "Fridovich-Keil-David"
            },
            {
                "family_name": "Prabhu",
                "given_name": "Valmik",
                "clpid": "Prabhu-Valmik"
            },
            {
                "family_name": "Tomlin",
                "given_name": "Claire J.",
                "orcid": "0000-0003-3192-3185",
                "clpid": "Tomlin-Claire-J"
            },
            {
                "family_name": "Sastry",
                "given_name": "S. Shankar",
                "clpid": "Sastry-S-Shankar"
            }
        ],
        "abstract": "The following topics are dealt with: control system synthesis; nonlinear control systems; linear systems; stability; optimisation; feedback; closed loop systems; Lyapunov methods; multi-agent systems; optimal control.",
        "doi": "10.1109/CDC42340.2020.9304242",
        "isbn": "978-1-7281-7447-1",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2020-12-14",
        "pages": "118-125"
    },
    {
        "id": "authors:vddty-ay603",
        "collection": "authors",
        "collection_id": "vddty-ay603",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-222215578",
        "type": "book_section",
        "title": "Expert Selection in High-Dimensional Markov Decision Processes",
        "book_title": "2020 59th IEEE Conference on Decision and Control (CDC)",
        "author": [
            {
                "family_name": "Rubies-Royo",
                "given_name": "Vicen\u00e7",
                "clpid": "Rubies-Royo-Vicen\u00e7"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Dong",
                "given_name": "Roy",
                "orcid": "0000-0001-8034-4329",
                "clpid": "Dong-Roy"
            },
            {
                "family_name": "Tomlin",
                "given_name": "Claire",
                "orcid": "0000-0003-3192-3185",
                "clpid": "Tomlin-Claire-J"
            },
            {
                "family_name": "Sastry",
                "given_name": "S. Shankar",
                "clpid": "Sastry-S-Shankar"
            }
        ],
        "abstract": "In this work we present a multi-armed bandit framework for online expert selection in Markov decision processes and demonstrate its use in high-dimensional settings. Our method takes a set of candidate expert policies and switches between them to rapidly identify the best performing expert using a variant of the classical upper confidence bound algorithm, thus ensuring low regret in the overall performance of the system. This is useful in applications where several expert policies may be available, and one needs to be selected at run-time for the underlying environment.",
        "doi": "10.1109/CDC42340.2020.9303788",
        "isbn": "978-1-7281-7447-1",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2020-12",
        "pages": "3604-3610"
    },
    {
        "id": "authors:n5mef-bvm32",
        "collection": "authors",
        "collection_id": "n5mef-bvm32",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210907-195235166",
        "type": "article",
        "title": "Convergence Analysis of Gradient-Based Learning in Continuous Games",
        "author": [
            {
                "family_name": "Chasnov",
                "given_name": "Benjamin",
                "orcid": "0000-0003-3484-2997",
                "clpid": "Chasnov-Benjamin"
            },
            {
                "family_name": "Ratliff",
                "given_name": "Lillian",
                "orcid": "0000-0001-8936-0229",
                "clpid": "Ratliff-Lillian-J"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Burden",
                "given_name": "Samuel",
                "clpid": "Burden-Sameul-A"
            }
        ],
        "abstract": "Considering a class of gradient-based multi-agent learning algorithms in non-cooperative settings, we provide convergence guarantees to a neighborhood of a stable Nash equilibrium. In particular, we consider continuous games where agents learn in 1) deterministic settings with oracle access to their gradient and 2) stochastic settings with an unbiased estimator of their gradient. We also study the effects of non-uniform learning rates, which causes a distortion of the vector field that can alter which equilibrium the agents converge to and the path they take. We support the analysis with numerical examples that provide insight into how one might synthesize games to achieve desired equilibria.",
        "issn": "2640-3498",
        "publisher": "ML Research Press",
        "publication": "Proceedings of Machine Learning Research",
        "publication_date": "2020-08-06",
        "volume": "115",
        "pages": "935-944"
    },
    {
        "id": "authors:ev5c3-q2v72",
        "collection": "authors",
        "collection_id": "ev5c3-q2v72",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-222215650",
        "type": "book_section",
        "title": "Feedback Linearization for Uncertain Systems via Reinforcement Learning",
        "book_title": "2020 IEEE International Conference on Robotics and Automation (ICRA)",
        "author": [
            {
                "family_name": "Westenbroek",
                "given_name": "Tyler",
                "orcid": "0000-0003-1111-3118",
                "clpid": "Westenbroek-Tyler"
            },
            {
                "family_name": "Fridovich-Keil",
                "given_name": "David",
                "orcid": "0000-0002-5866-6441",
                "clpid": "Fridovich-Keil-David"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Arora",
                "given_name": "Shreyas",
                "clpid": "Arorsa-Shreyas"
            },
            {
                "family_name": "Prabhu",
                "given_name": "Valmik",
                "clpid": "Prabhu-Valmik"
            },
            {
                "family_name": "Sastry",
                "given_name": "S. Shankar",
                "clpid": "Sastry-S-Shankar"
            },
            {
                "family_name": "Tomlin",
                "given_name": "Claire J.",
                "orcid": "0000-0003-3192-3185",
                "clpid": "Tomlin-Claire-J"
            }
        ],
        "abstract": "We present a novel approach to control design for nonlinear systems which leverages model-free policy optimization techniques to learn a linearizing controller for a physical plant with unknown dynamics. Feedback linearization is a technique from nonlinear control which renders the input-output dynamics of a nonlinear plant linear under application of an appropriate feedback controller. Once a linearizing controller has been constructed, desired output trajectories for the nonlinear plant can be tracked using a variety of linear control techniques. However, the calculation of a linearizing controller requires a precise dynamics model for the system. As a result, model-based approaches for learning exact linearizing controllers generally require a simple, highly structured model of the system with easily identifiable parameters. In contrast, the model-free approach presented in this paper is able to approximate the linearizing controller for the plant using general function approximation architectures. Specifically, we formulate a continuous-time optimization problem over the parameters of a learned linearizing controller whose optima are the set of parameters which best linearize the plant. We derive conditions under which the learning problem is (strongly) convex and provide guarantees which ensure the true linearizing controller for the plant is recovered. We then discuss how model-free policy optimization algorithms can be used to solve a discrete-time approximation to the problem using data collected from the real-world plant. The utility of the framework is demonstrated in simulation and on a real-world robotic platform.",
        "doi": "10.1109/ICRA40945.2020.9197158",
        "isbn": "978-1-7281-7395-5",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2020-08",
        "pages": "1364-1371"
    },
    {
        "id": "authors:41arc-vy622",
        "collection": "authors",
        "collection_id": "41arc-vy622",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-222215724",
        "type": "article",
        "title": "Inverse Risk-Sensitive Reinforcement Learning",
        "author": [
            {
                "family_name": "Ratliff",
                "given_name": "Lillian J.",
                "orcid": "0000-0001-8936-0229",
                "clpid": "Ratliff-Lillian-J"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            }
        ],
        "abstract": "This work addresses the problem of inverse reinforcement learning in Markov decision processes where the decision-making agent is risk-sensitive. In particular, a risk-sensitive reinforcement learning algorithm with convergence guarantees that makes use of coherent risk metrics and models of human decision-making which have their origins in behavioral psychology and economics is presented. The risk-sensitive reinforcement learning algorithm provides the theoretical underpinning for a gradient-based inverse reinforcement learning algorithm that seeks to minimize a loss function defined on the observed behavior. It is shown that the gradient of the loss function with respect to the model parameters is well defined and computable via a contraction map argument. Evaluation of the proposed technique is performed on a Grid World example, a canonical benchmark problem.",
        "doi": "10.1109/TAC.2019.2926674",
        "issn": "0018-9286",
        "publisher": "IEEE",
        "publication": "IEEE Transactions on Automatic Control",
        "publication_date": "2020-03",
        "series_number": "3",
        "volume": "65",
        "issue": "3",
        "pages": "1256-1263"
    },
    {
        "id": "authors:mr6zn-1js36",
        "collection": "authors",
        "collection_id": "mr6zn-1js36",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-220351518",
        "type": "monograph",
        "title": "On Thompson Sampling with Langevin Algorithms",
        "author": [
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Pacchiano",
                "given_name": "Aldo",
                "clpid": "Pacchiano-Aldo"
            },
            {
                "family_name": "Ma",
                "given_name": "Yi-an",
                "orcid": "0000-0001-6074-6638",
                "clpid": "Ma-Yi-an"
            },
            {
                "family_name": "Bartlett",
                "given_name": "Peter L.",
                "orcid": "0000-0002-8760-3140",
                "clpid": "Bartlett-Peter-L"
            },
            {
                "family_name": "Jordan",
                "given_name": "Michael I.",
                "orcid": "0000-0001-8935-817X",
                "clpid": "Jordan-Michael-I"
            }
        ],
        "abstract": "Thompson sampling for multi-armed bandit problems is known to enjoy favorable performance in both theory and practice. However, it suffers from a significant limitation computationally, arising from the need for samples from posterior distributions at every iteration. We propose two Markov Chain Monte Carlo (MCMC) methods tailored to Thompson sampling to address this issue. We construct quickly converging Langevin algorithms to generate approximate samples that have accuracy guarantees, and we leverage novel posterior concentration rates to analyze the regret of the resulting approximate Thompson sampling algorithm. Further, we specify the necessary hyperparameters for the MCMC procedure to guarantee optimal instance-dependent frequentist regret while having low computational complexity. In particular, our algorithms take advantage of both posterior concentration and a sample reuse mechanism to ensure that only a constant number of iterations and a constant amount of data is needed in each round. The resulting approximate Thompson sampling algorithm has logarithmic regret and its computational complexity does not scale with the time horizon of the algorithm.",
        "doi": "10.48550/arXiv.2002.10002",
        "publisher": "arXiv",
        "publication_date": "2020-02-23"
    },
    {
        "id": "authors:8a04w-etx35",
        "collection": "authors",
        "collection_id": "8a04w-etx35",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210907-200115513",
        "type": "article",
        "title": "On Gradient-Based Learning in Continuous Games",
        "author": [
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Ratliff",
                "given_name": "Lillian J.",
                "orcid": "0000-0001-8936-0229",
                "clpid": "Ratliff-Lillian-J"
            },
            {
                "family_name": "Sastry",
                "given_name": "S. Shankar",
                "clpid": "Sastry-S-Shankar"
            }
        ],
        "abstract": "We introduce a general framework for competitive gradient-based learning that encompasses a wide breadth of multiagent learning algorithms, and analyze the limiting behavior of competitive gradient-based learning algorithms using dynamical systems theory. For both general-sum and potential games, we characterize a nonnegligible subset of the local Nash equilibria that will be avoided if each agent employs a gradient-based learning algorithm. We also shed light on the issue of convergence to non-Nash strategies in general- and zero-sum games, which may have no relevance to the underlying game, and arise solely due to the choice of algorithm. The existence and frequency of such strategies may explain some of the difficulties encountered when using gradient descent in zero-sum games as, e.g., in the training of generative adversarial networks. To reinforce the theoretical contributions, we provide empirical results that highlight the frequency of linear quadratic dynamic games (a benchmark for multiagent reinforcement learning) that admit global Nash equilibria that are almost surely avoided by policy gradient.",
        "doi": "10.1137/18m1231298",
        "issn": "2577-0187",
        "publisher": "Society for Industrial & Applied Mathematics",
        "publication": "SIAM Journal on Mathematics of Data Science",
        "publication_date": "2020-02-18",
        "series_number": "1",
        "volume": "2",
        "issue": "1",
        "pages": "103-131"
    },
    {
        "id": "authors:0c7my-cex38",
        "collection": "authors",
        "collection_id": "0c7my-cex38",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-222215800",
        "type": "book_section",
        "title": "Local Nash Equilibria are Isolated, Strict Local Nash Equilibria in 'Almost All' Zero-Sum Continuous Games",
        "book_title": "2019 IEEE 58th Conference on Decision and Control (CDC)",
        "author": [
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Ratliff",
                "given_name": "Lillian J.",
                "orcid": "0000-0001-8936-0229",
                "clpid": "Ratliff-Lillian-J"
            }
        ],
        "abstract": "We prove that differential Nash equilibria are generic amongst local Nash equilibria in continuous zero-sum games. That is, there exists an open-dense subset of zero-sum games for which local Nash equilibria are nondegenerate differential Nash equilibria. The result extends previous results to the zero-sum setting, where we obtain even stronger results; in particular, we show that local Nash equilibria are generically hyperbolic critical points. We further show that differential Nash equilibria of zero-sum games are structurally stable. The purpose for presenting these extensions is the recent renewed interest in zero-sum games within machine learning and optimization. Adversarial learning and generative adversarial network approaches are touted to be more robust than the alternative. Zero-sum games are at the heart of such approaches. Many works proceed under the assumption of hyperbolicity of critical points. Our results justify this assumption by showing `almost all' zero-sum games admit local Nash equilibria that are hyperbolic.",
        "doi": "10.1109/CDC40024.2019.9030203",
        "isbn": "978-1-7281-1398-2",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2019-12",
        "pages": "6899-6904"
    },
    {
        "id": "authors:8wfkd-9vp35",
        "collection": "authors",
        "collection_id": "8wfkd-9vp35",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-213700306",
        "type": "monograph",
        "title": "Policy-Gradient Algorithms Have No Guarantees of Convergence in Linear Quadratic Games",
        "author": [
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Ratliff",
                "given_name": "Lillian J.",
                "orcid": "0000-0001-8936-0229",
                "clpid": "Ratliff-Lillian-J"
            },
            {
                "family_name": "Jordan",
                "given_name": "Michael I.",
                "orcid": "0000-0001-8935-817X",
                "clpid": "Jordan-Michael-I"
            },
            {
                "family_name": "Sastry",
                "given_name": "S. Shankar",
                "clpid": "Sastry-S-Shankar"
            }
        ],
        "abstract": "We show by counterexample that policy-gradient algorithms have no guarantees of even local convergence to Nash equilibria in continuous action and state space multi-agent settings. To do so, we analyze gradient-play in N-player general-sum linear quadratic games, a classic game setting which is recently emerging as a benchmark in the field of multi-agent learning. In such games the state and action spaces are continuous and global Nash equilibria can be found be solving coupled Ricatti equations. Further, gradient-play in LQ games is equivalent to multi agent policy-gradient. We first show that these games are surprisingly not convex games. Despite this, we are still able to show that the only critical points of the gradient dynamics are global Nash equilibria. We then give sufficient conditions under which policy-gradient will avoid the Nash equilibria, and generate a large number of general-sum linear quadratic games that satisfy these conditions. In such games we empirically observe the players converging to limit cycles for which the time average does not coincide with a Nash equilibrium. The existence of such games indicates that one of the most popular approaches to solving reinforcement learning problems in the classic reinforcement learning setting has no local guarantee of convergence in multi-agent settings. Further, the ease with which we can generate these counterexamples suggests that such situations are not mere edge cases and are in fact quite common.",
        "doi": "10.48550/arXiv.1907.03712",
        "publisher": "arXiv",
        "publication_date": "2019-07-08"
    },
    {
        "id": "authors:snptr-yqq39",
        "collection": "authors",
        "collection_id": "snptr-yqq39",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-213656891",
        "type": "monograph",
        "title": "Convergence Analysis of Gradient-Based Learning with Non-Uniform Learning Rates in Non-Cooperative Multi-Agent Settings",
        "author": [
            {
                "family_name": "Chasnov",
                "given_name": "Benjamin",
                "orcid": "0000-0003-3484-2997",
                "clpid": "Chasnov-Benjamin"
            },
            {
                "family_name": "Ratliff",
                "given_name": "Lillian J.",
                "orcid": "0000-0001-8936-0229",
                "clpid": "Ratliff-Lillian-J"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Burden",
                "given_name": "Samuel A.",
                "clpid": "Burden-Sameul-A"
            }
        ],
        "abstract": "Considering a class of gradient-based multi-agent learning algorithms in non-cooperative settings, we provide local convergence guarantees to a neighborhood of a stable local Nash equilibrium. In particular, we consider continuous games where agents learn in (i) deterministic settings with oracle access to their gradient and (ii) stochastic settings with an unbiased estimator of their gradient. Utilizing the minimum and maximum singular values of the game Jacobian, we provide finite-time convergence guarantees in the deterministic case. On the other hand, in the stochastic case, we provide concentration bounds guaranteeing that with high probability agents will converge to a neighborhood of a stable local Nash equilibrium in finite time. Different than other works in this vein, we also study the effects of non-uniform learning rates on the learning dynamics and convergence rates. We find that much like preconditioning in optimization, non-uniform learning rates cause a distortion in the vector field which can, in turn, change the rate of convergence and the shape of the region of attraction. The analysis is supported by numerical examples that illustrate different aspects of the theory. We conclude with discussion of the results and open questions.",
        "doi": "10.48550/arXiv.1906.00731",
        "publisher": "arXiv",
        "publication_date": "2019-05-30"
    },
    {
        "id": "authors:a8c6w-z2554",
        "collection": "authors",
        "collection_id": "a8c6w-z2554",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-213653378",
        "type": "monograph",
        "title": "On Finding Local Nash Equilibria (and Only Local Nash Equilibria) in Zero-Sum Games",
        "author": [
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Jordan",
                "given_name": "Michael I.",
                "clpid": "Jordan-M-I"
            },
            {
                "family_name": "Sastry",
                "given_name": "S. Shankar",
                "clpid": "Sastry-S-Shankar"
            }
        ],
        "abstract": "We propose local symplectic surgery, a two-timescale procedure for finding local Nash equilibria in two-player zero-sum games. We first show that previous gradient-based algorithms cannot guarantee convergence to local Nash equilibria due to the existence of non-Nash stationary points. By taking advantage of the differential structure of the game, we construct an algorithm for which the local Nash equilibria are the only attracting fixed points. We also show that the algorithm exhibits no oscillatory behaviors in neighborhoods of equilibria and show that it has the same per-iteration complexity as other recently proposed algorithms. We conclude by validating the algorithm on two numerical examples: a toy example with multiple Nash equilibria and a non-Nash equilibrium, and the training of a small generative adversarial network (GAN).",
        "doi": "10.48550/arXiv.1901.00838",
        "publisher": "arXiv",
        "publication_date": "2019-01-03"
    },
    {
        "id": "authors:2dbdw-9pw59",
        "collection": "authors",
        "collection_id": "2dbdw-9pw59",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-222215867",
        "type": "book_section",
        "title": "On the Analysis of Cyclic Drug Schedules for Cancer Treatment using Switched Dynamical Systems",
        "book_title": "2018 IEEE Conference on Decision and Control (CDC)",
        "author": [
            {
                "family_name": "Chapman",
                "given_name": "Margaret P.",
                "clpid": "Chapman-Margaret-P"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric V.",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Langer",
                "given_name": "Ellen",
                "orcid": "0000-0003-0352-1859",
                "clpid": "Langer-Ellen"
            },
            {
                "family_name": "Sears",
                "given_name": "Rosalie",
                "orcid": "0000-0003-1558-2413",
                "clpid": "Sears-Rosalie"
            },
            {
                "family_name": "Tomlin",
                "given_name": "Claire J.",
                "orcid": "0000-0003-3192-3185",
                "clpid": "Tomlin-Claire-J"
            }
        ],
        "abstract": "Motivated by our prior work on a Triple Negative breast cancer cell line, the focus of this paper is controller synthesis for cancer treatment, through the use of drug scheduling and a switched dynamical system model. Here we study a cyclic schedule of d drugs with maximal waiting times between drug inputs, where each drug is applied once per cycle in any order. We suppose that some of the d drugs are highly toxic to normal cells and that these drugs can shrink the live cancer cell population. The remaining drugs are less toxic to normal cells and can only reduce the growth rate of the live cancer cell population. Also, we assume that waiting time bounds related to toxicity, or to the onset of resistance, are available for each drug. A cancer cell population is said to be stable if the number of live cells tends to zero, as time becomes sufficiently large. In the absence of modeling error, we derive conditions for exponential stability. In the presence of modeling error, we prove exponential stability and derive a settling time, under certain mathematical conditions on the error. We conclude the paper with a numerical example that uses models which were identified on Triple Negative breast cancer cell line data.",
        "doi": "10.1109/CDC.2018.8619490",
        "isbn": "978-1-5386-1395-5",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2018-12",
        "pages": "3503-3509"
    },
    {
        "id": "authors:vcvee-qm253",
        "collection": "authors",
        "collection_id": "vcvee-qm253",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-222215940",
        "type": "book_section",
        "title": "Gradient-based inverse risk-sensitive reinforcement learning",
        "book_title": "2017 IEEE 56th Annual Conference on Decision and Control (CDC)",
        "author": [
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Ratliff",
                "given_name": "Lillian J.",
                "orcid": "0000-0001-8936-0229",
                "clpid": "Ratliff-Lillian-J"
            },
            {
                "family_name": "Fiez",
                "given_name": "Tanner",
                "clpid": "Fiez-Tanner"
            },
            {
                "family_name": "Sastry",
                "given_name": "S. Shankar",
                "clpid": "Sastry-S-Shankar"
            }
        ],
        "abstract": "We address the problem of inverse reinforcement learning in Markov decision processes where the agent is risksensitive. In particular, we model risk-sensitivity in a reinforcement learning framework by making use of models of human decision-making having their origins in behavioral psychology and economics. We propose a gradient-based inverse reinforcement learning algorithm that minimizes a loss function defined on the observed behavior. We demonstrate the performance of the proposed technique on two examples, the first of which is the canonical Grid World example and the second of which is an MDP modeling passengers' decisions regarding ride-sharing. In the latter, we use pricing and travel time data from a ride-sharing company to construct the transition probabilities and rewards of the MDP.",
        "doi": "10.1109/CDC.2017.8264535",
        "isbn": "978-1-5090-2873-3",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2017-12",
        "pages": "5796-5801"
    },
    {
        "id": "authors:nx17y-01p34",
        "collection": "authors",
        "collection_id": "nx17y-01p34",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-213649911",
        "type": "monograph",
        "title": "A Multi-Armed Bandit Approach for Online Expert Selection in Markov Decision Processes",
        "author": [
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Dong",
                "given_name": "Roy",
                "orcid": "0000-0001-8034-4329",
                "clpid": "Dong-Roy"
            },
            {
                "family_name": "R\u00fabies Royo",
                "given_name": "Vicen\u00e7",
                "clpid": "R\u00fabies-Royo-Vicen\u00e7"
            },
            {
                "family_name": "Tomlin",
                "given_name": "Claire",
                "orcid": "0000-0003-3192-3185",
                "clpid": "Tomlin-Claire-J"
            },
            {
                "family_name": "Sastry",
                "given_name": "S. Shankar",
                "clpid": "Sastry-S-Shankar"
            }
        ],
        "abstract": "We formulate a multi-armed bandit (MAB) approach to choosing expert policies online in Markov decision processes (MDPs). Given a set of expert policies trained on a state and action space, the goal is to maximize the cumulative reward of our agent. The hope is to quickly find the best expert in our set. The MAB formulation allows us to quantify the performance of an algorithm in terms of the regret incurred from not choosing the best expert from the beginning. We first develop the theoretical framework for MABs in MDPs, and then present a basic regret decomposition identity. We then adapt the classical Upper Confidence Bounds algorithm to the problem of choosing experts in MDPs and prove that the expected regret grows at worst at a logarithmic rate. Lastly, we validate the theory on a small MDP.",
        "doi": "10.48550/arXiv.1707.05714",
        "publisher": "arXiv",
        "publication_date": "2017-07-18"
    },
    {
        "id": "authors:vedyk-y5k08",
        "collection": "authors",
        "collection_id": "vedyk-y5k08",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-213646411",
        "type": "monograph",
        "title": "Optimal Causal Imputation for Control",
        "author": [
            {
                "family_name": "Dong",
                "given_name": "Roy",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Dong-Roy"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Sastry",
                "given_name": "S. Shankar",
                "clpid": "Sastry-S-Shankar"
            }
        ],
        "abstract": "The widespread applicability of analytics in cyber-physical systems has motivated research into causal inference methods. Predictive estimators are not sufficient when analytics are used for decision making; rather, the flow of causal effects must be determined. Generally speaking, these methods focus on estimation of a causal structure from experimental data. In this paper, we consider the dual problem: we fix the causal structure and optimize over causal imputations to achieve desirable system behaviors for a minimal imputation cost. First, we present the optimal causal imputation problem, and then we analyze the problem in two special cases: 1) when the causal imputations can only impute to a fixed value, 2) when the causal structure has linear dynamics with additive Gaussian noise. This optimal causal imputation framework serves to bridge the gap between causal structures and control.",
        "doi": "10.48550/arXiv.1703.07049",
        "publisher": "arXiv",
        "publication_date": "2017-03-21"
    },
    {
        "id": "authors:5jg1y-yaw95",
        "collection": "authors",
        "collection_id": "5jg1y-yaw95",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-222215263",
        "type": "book_section",
        "title": "To observe or not to observe: Queuing game framework for urban parking",
        "book_title": "2016 IEEE 55th Conference on Decision and Control (CDC)",
        "author": [
            {
                "family_name": "Ratliff",
                "given_name": "Lillian J.",
                "orcid": "0000-0001-8936-0229",
                "clpid": "Ratliff-Lillian-J"
            },
            {
                "family_name": "Dowling",
                "given_name": "Chase",
                "clpid": "Dowling-Chase"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Zhang",
                "given_name": "Baosen",
                "clpid": "Zhang-Baosen"
            }
        ],
        "abstract": "We model parking in urban centers as a set of parallel queues and overlay a game theoretic structure. We model arriving drivers as utility maximizers and consider two games: one in which it is free to observe the queue length and one in which it is not. Not only do we compare the Nash induced welfare to the socially optimal welfare, confirming the usual result that Nash is worse for society, we also show that by other performance metrics more commonly used in transportation- such as occupancy and time spent circling-the Nash solution is suboptimal. We find that gains to welfare do not require everyone to observe. Through simulation, we explore a more complex scenario where drivers decide based the queueing game whether or not to enter a collection of queues over a network. Our simulated models use parameters informed by real-world data collected by the Seattle Department of Transportation.",
        "doi": "10.1109/CDC.2016.7799079",
        "isbn": "978-1-5090-1837-6",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2016-12",
        "pages": "5286-5291"
    },
    {
        "id": "authors:tcken-s8w33",
        "collection": "authors",
        "collection_id": "tcken-s8w33",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20210903-222216008",
        "type": "book_section",
        "title": "Understanding the impact of parking on urban mobility via routing games on queue-flow networks",
        "author": [
            {
                "family_name": "Calderone",
                "given_name": "Daniel",
                "clpid": "Calderone-Daniel"
            },
            {
                "family_name": "Mazumdar",
                "given_name": "Eric",
                "orcid": "0000-0002-1815-269X",
                "clpid": "Mazumdar-Eric"
            },
            {
                "family_name": "Ratliff",
                "given_name": "Lillian J.",
                "orcid": "0000-0001-8936-0229",
                "clpid": "Ratliff-Lillian-J"
            },
            {
                "family_name": "Sastry",
                "given_name": "S. Shankar",
                "clpid": "Sastry-S-Shankar"
            }
        ],
        "abstract": "We derive a new routing game model for urban centers that takes into account parking-related traffic along with all other traffic. In particular, we combine a queuing game model for on-street parking with a classical routing game to create a queue-routing game where parking traffic selects a parking zone (block-face) in addition to their route through the network. We show that this game is a potential game. We construct practical examples using subsections of the Seattle downtown area to illustrate the usefulness of this modeling paradigm and to examine how parking-traffic can impact overall congestion and the route choices of other drivers. By varying the cost of parking in different parking zones, we demonstrate that parking-related traffic can be adjusted to satisfy a particular objective.",
        "doi": "10.1109/CDC.2016.7799444",
        "publisher": "IEEE",
        "publication_date": "2016-12"
    }
]