@phdthesis{10.7907/Z9BG2KZG, author = {Cai, Wuhan Desmond}, title = {Electricity Markets for the Smart Grid: Networks, Timescales, and Integration with Control}, school = {California Institute of Technology}, year = {2016}, doi = {10.7907/Z9BG2KZG}, url = {https://resolver.caltech.edu/CaltechTHESIS:05262016-112813537}, abstract = {
We are at the dawn of a significant transformation in the electric industry. Renewable generation and customer participation in grid operations and markets have been growing at tremendous rates in recent years and these trends are expected to continue. These trends are likely to be accompanied by both engineering and market integration challenges. Therefore, to incorporate these resources efficiently into the grid, it is important to deal with the inefficiencies in existing markets. The goal of this thesis is to contribute new insights towards improving the design of electricity markets.
This thesis makes three main contributions. First, we provide insights into how the economic dispatch mechanism could be designed to account for price-anticipating participants. We study this problem in the context of a networked Cournot competition with a market maker and we give an algorithm to find improved market clearing designs. Our findings illustrate the potential inefficiencies in existing markets and provides a framework for improving the design of the markets. Second, we provide insights into the strategic interactions between generation flexibility and forward markets. Our key insight is an observation that spot market capacity constraints can significantly impact the efficiency and existence of equilibrium in forward markets, as they give producers incentives to strategically withhold offers from the markets. Third, we provide insights into how optimization decomposition theory can guide optimal design of the architecture of power systems control. In particular, we illustrate a context where decomposition theory enables us to jointly design market and control mechanisms to allocate resources efficiently across both the economic dispatch and frequency regulation timescales.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Low, Steven H.}, } @mastersthesis{10.7907/Z9RB72J4, author = {Agarwal, Anish}, title = {A Model For Residential Adoption of Photovoltaic Systems}, school = {California Institute of Technology}, year = {2015}, doi = {10.7907/Z9RB72J4}, url = {https://resolver.caltech.edu/CaltechTHESIS:03202015-082016718}, abstract = {The rapid rise in the residential photo voltaic (PV) adoptions in the past half decade has created a need in the electricity industry for a widely-accessible model that estimates PV adoption based on a combination of different business and policy decisions. This work analyzes historical adoption patterns and finds fiscal savings to be the single most important factor in PV adoption, with significantly greater predictive power compared to all other socioeconomic factors including income and education. We can create an application available on Google App Engine (GAE) based on our findings that allows all stakeholders including policymakers, power system researchers and regulators to study the complex and coupled relationship between PV adoption, utility economics and grid sustainability. The application allows users to experiment with different customer demographics, tier structures and subsidies, hence allowing them to tailor the application to the geographic region they are studying. This study then demonstrates the different type of analyses possible with the application by studying the relative impact of different policies regarding tier structures, fixed charges and PV prices on PV adoption.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @phdthesis{10.7907/FRGW-AF26, author = {Bose, Subhonmesh}, title = {An Integrated Design Approach to Power Systems: From Power Flows to Electricity Markets}, school = {California Institute of Technology}, year = {2014}, doi = {10.7907/FRGW-AF26}, url = {https://resolver.caltech.edu/CaltechTHESIS:06012014-040224456}, abstract = {Power system is at the brink of change. Engineering needs, economic forces and environmental factors are the main drivers of this change. The vision is to build a smart electrical grid and a smarter market mechanism around it to fulfill mandates on clean energy. Looking at engineering and economic issues in isolation is no longer an option today; it needs an integrated design approach. In this thesis, I shall revisit some of the classical questions on the engineering operation of power systems that deals with the nonconvexity of power flow equations. Then I shall explore some issues of the interaction of these power flow equations on the electricity markets to address the fundamental issue of market power in a deregulated market environment. Finally, motivated by the emergence of new storage technologies, I present an interesting result on the investment decision problem of placing storage over a power network. The goal of this study is to demonstrate that modern optimization and game theory can provide unique insights into this complex system. Some of the ideas carry over to applications beyond power systems.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Low, Steven H.}, } @phdthesis{10.7907/5D60-FG88, author = {Olson, Michael James}, title = {Cloud Computing Services for Seismic Networks}, school = {California Institute of Technology}, year = {2014}, doi = {10.7907/5D60-FG88}, url = {https://resolver.caltech.edu/CaltechTHESIS:08242013-182604077}, abstract = {This thesis describes a compositional framework for developing situation awareness applications: applications that provide ongoing information about a user’s changing environment. The thesis describes how the framework is used to develop a situation awareness application for earthquakes. The applications are implemented as Cloud computing services connected to sensors and actuators. The architecture and design of the Cloud services are described and measurements of performance metrics are provided. The thesis includes results of experiments on earthquake monitoring conducted over a year. The applications developed by the framework are (1) the CSN — the Community Seismic Network — which uses relatively low-cost sensors deployed by members of the community, and (2) SAF — the Situation Awareness Framework — which integrates data from multiple sources, including the CSN, CISN — the California Integrated Seismic Network, a network consisting of high-quality seismometers deployed carefully by professionals in the CISN organization and spread across Southern California — and prototypes of multi-sensor platforms that include carbon monoxide, methane, dust and radiation sensors.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @phdthesis{10.7907/QFM5-FH06, author = {Faulkner, Matthew Nicholas}, title = {Community Sense and Response Systems}, school = {California Institute of Technology}, year = {2014}, doi = {10.7907/QFM5-FH06}, url = {https://resolver.caltech.edu/CaltechTHESIS:04152014-111007328}, abstract = {The proliferation of smartphones and other internet-enabled, sensor-equipped consumer devices enables us to sense and act upon the physical environment in unprecedented ways. This thesis considers Community Sense-and-Response (CSR) systems, a new class of web application for acting on sensory data gathered from participants’ personal smart devices. The thesis describes how rare events can be reliably detected using a decentralized anomaly detection architecture that performs client-side anomaly detection and server-side event detection. After analyzing this decentralized anomaly detection approach, the thesis describes how weak but spatially structured events can be detected, despite significant noise, when the events have a sparse representation in an alternative basis. Finally, the thesis describes how the statistical models needed for client-side anomaly detection may be learned efficiently, using limited space, via coresets.
The Caltech Community Seismic Network (CSN) is a prototypical example of a CSR system that harnesses accelerometers in volunteers’ smartphones and consumer electronics. Using CSN, this thesis presents the systems and algorithmic techniques to design, build and evaluate a scalable network for real-time awareness of spatial phenomena such as dangerous earthquakes.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Krause, R. Andreas and Chandy, K. Mani and Heaton, Thomas H.}, } @mastersthesis{10.7907/NBQ4-6Q72, author = {Faulkner, Matthew Nicholas}, title = {Selective Data Gathering in Community Sensor Networks}, school = {California Institute of Technology}, year = {2014}, doi = {10.7907/NBQ4-6Q72}, url = {https://resolver.caltech.edu/CaltechTHESIS:04102014-131741107}, abstract = {Smartphones and other powerful sensor-equipped consumer devices make it possible to sense the physical world at an unprecedented scale. Nearly 2 million Android and iOS devices are activated every day, each carrying numerous sensors and a high-speed internet connection. Whereas traditional sensor networks have typically deployed a fixed number of devices to sense a particular phenomena, community networks can grow as additional participants choose to install apps and join the network. In principle, this allows networks of thousands or millions of sensors to be created quickly and at low cost. However, making reliable inferences about the world using so many community sensors involves several challenges, including scalability, data quality, mobility, and user privacy.
This thesis focuses on how learning at both the sensor- and network-level can provide scalable techniques for data collection and event detection. First, this thesis considers the abstract problem of distributed algorithms for data collection, and proposes a distributed, online approach to selecting which set of sensors should be queried. In addition to providing theoretical guarantees for submodular objective functions, the approach is also compatible with local rules or heuristics for detecting and transmitting potentially valuable observations. Next, the thesis presents a decentralized algorithm for spatial event detection, and describes its use detecting strong earthquakes within the Caltech Community Seismic Network. Despite the fact that strong earthquakes are rare and complex events, and that community sensors can be very noisy, our decentralized anomaly detection approach obtains theoretical guarantees for event detection performance while simultaneously limiting the rate of false alarms.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Heaton, Thomas H.}, } @phdthesis{10.7907/MZWJ-T222, author = {Liu, Annie Hsin-Wen}, title = {Sensor Networks for Geospatial Event Detection - Theory and Applications}, school = {California Institute of Technology}, year = {2013}, doi = {10.7907/MZWJ-T222}, url = {https://resolver.caltech.edu/CaltechTHESIS:06062013-224746692}, abstract = {This thesis presents theories, analyses, and algorithms for detecting and estimating parameters of geospatial events with today’s large, noisy sensor networks. A geospatial event is initiated by a significant change in the state of points in a region in a 3-D space over an interval of time. After the event is initiated it may change the state of points over larger regions and longer periods of time.
Networked sensing is a typical approach for geospatial event detection. In contrast to traditional sensor networks comprised of a small number of high quality (and expensive) sensors, trends in personal computing devices and consumer electronics have made it possible to build large, dense networks at a low cost. The changes in sensor capability, network composition, and system constraints call for new models and algorithms suited to the opportunities and challenges of the new generation of sensor networks.
This thesis offers a single unifying model and a Bayesian framework for analyzing different types of geospatial events in such noisy sensor networks. It presents algorithms and theories for estimating the speed and accuracy of detecting geospatial events as a function of parameters from both the underlying geospatial system and the sensor network. Furthermore, the thesis addresses network scalability issues by presenting rigorous scalable algorithms for data aggregation for detection. These studies provide insights to the design of networked sensing systems for detecting geospatial events.
In addition to providing an overarching framework, this thesis presents theories and experimental results for two very different geospatial problems: detecting earthquakes and hazardous radiation. The general framework is applied to these specific problems, and predictions based on the theories are validated against measurements of systems in the laboratory and in the field.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @mastersthesis{10.7907/CH76-YW89, author = {Mou, Judy}, title = {Situation Awareness Application}, school = {California Institute of Technology}, year = {2013}, doi = {10.7907/CH76-YW89}, url = {https://resolver.caltech.edu/CaltechTHESIS:06272013-211013400}, abstract = {This thesis describes the design and implementation of a situation awareness application. The application gathers data from sensors including accelerometers for monitoring earthquakes, carbon monoxide sensors for monitoring fires, radiation detectors, and dust sensors. The application also gathers Internet data sources including data about traffic congestion on daily commute routes, information about hazards, news relevant to the user of the application, and weather. The application sends the data to a Cloud computing service which aggregates data streams from multiple sites and detects anomalies. Information from the Cloud service is then displayed by the application on a tablet, computer monitor, or television screen. The situation awareness application enables almost all members of a community to remain aware of critical changes in their environments.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani and Bunn, Julian J.}, } @mastersthesis{10.7907/8S5K-FX17, author = {Olson, Michael James}, title = {Cloud Computing for Citizen Science}, school = {California Institute of Technology}, year = {2012}, doi = {10.7907/8S5K-FX17}, url = {https://resolver.caltech.edu/CaltechTHESIS:08232011-122341638}, abstract = {My thesis describes the design and implementation of systems that empower individuals to help their communities respond to critical situations and to participate in research that helps them understand and improve their environments. People want to help their communities respond to threats such as earthquakes, wildfires, mudslides and hurricanes, and they want to participate in research that helps them understand and improve their environment. “Citizen Science” projects that facilitate this interaction include projects that monitor climate change, water quality and animal habitats. My thesis explores the design and analysis of community-based sense and response systems that enable individuals to participate in critical community activities and scientific research that monitors their environments.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @phdthesis{10.7907/8FRW-ZF17, author = {White, Jerome S.}, title = {Applying Formal Methods to Distributed Algorithms Using Local-Global Relations}, school = {California Institute of Technology}, year = {2011}, doi = {10.7907/8FRW-ZF17}, url = {https://resolver.caltech.edu/CaltechTHESIS:05312011-123940546}, abstract = {This thesis deals with the design and analysis of distributed systems in which homogeneous, autonomous agents collaborate to achieve a common goal. The class of problems studied includes consensus algorithms in which all agents eventually come to an agreement about a specific action. The thesis proposes a framework, called local-global, for analyzing these systems. A local interaction is an interaction among subsets of agents, while a global interaction is one among all agents in the system. Global interactions, in practice, are rare, yet they are the basis by which correctness of a system is measured. For example, if the problem is to compute the average of a measurement made separately by each agent, and all the agents in the system could exchange values in a single action, then the solution is straightforward: each agent gets the values of all others and computes the average independently. However, if the system consists of a large number of agents with unreliable communication, this scenario is highly unlikely. Thus, the design challenge is to ensure that sequences of local interactions lead, or converge, to the same state as a global interaction.
The local-global framework addresses this challenge by describing each local interaction as if were a global one, encompassing all agents within the system. This thesis outlines the concept in detail, using it to design algorithms, prove their correctness, and ultimately develop executable implementations that are reliable. To this end, the tools of formal methods are employed: algorithms are modeled, and mechanically checked, within the PVS theorem prover; programs are also verified using the Spin model checker; and interface specification languages are used to ensure local-global properties are still maintained within Java and C# implementations. The thesis presents example applications of the framework and discusses a class of problems to which the framework can be applied.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @phdthesis{10.7907/SCQF-VP66, author = {Pilotto, Concetta}, title = {Systematic Design and Formal Verification of Multi-Agent Systems}, school = {California Institute of Technology}, year = {2011}, doi = {10.7907/SCQF-VP66}, url = {https://resolver.caltech.edu/CaltechTHESIS:05232011-013046516}, abstract = {This thesis presents methodologies for verifying the correctness of multi-agent systems operating in hostile environments. Verification of these systems is challenging because of their inherent concurrency and unreliable communication medium. The problem is exacerbated if the model representing the multi-agent system includes infinite or uncountable data types.
We first consider message-passing multi-agent systems operating over an unreliable communication medium. We assume that messages in transit may be lost, delayed or received out-of-order. We present conditions on the system that reduce the design and verification of a message-passing system to the design and verification of the corresponding shared-state system operating in a friendly environment. Our conditions can be applied both to discrete and continuous agent trajectories.
We apply our results to verify a general class of multi-agent system whose goal is solving a system of linear equations. We discuss this class in detail and show that mobile robot linear pattern-formation schemes are instances of this class. In these protocols, the goal of the team of robots is to reach a given pattern formation.
We present a framework that allows verification of message-passing systems operating over an unreliable communication medium. This framework is implemented as a library of PVS theorem prover meta-theories and is built on top of the timed automata framework. We discuss the applicability of this tool. As an example, we automatically check correctness of the mobile robot linear pattern formation protocols.
We conclude with an analysis of the verification of multi-agent systems operating in hostile environments. Under these more general assumptions, we derive conditions on the agents’ protocols and properties of the environment that ensure bounded steady-state system error. We apply these results to message-passing multi-agent systems that allow for lost, delayed, received out-of-order or forged messages, and to multi-agent systems whose goal is tracking time-varying quantities. We show that pattern formation schemes are robust to leaders dynamics, i.e., in these schemes, followers eventually form the pattern defined by the new positions of the leaders.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @mastersthesis{10.7907/KXWA-7Y81, author = {Florian, Mihai}, title = {SCALE: Source Code Analyzer for Locating Errors}, school = {California Institute of Technology}, year = {2010}, doi = {10.7907/KXWA-7Y81}, url = {https://resolver.caltech.edu/CaltechTHESIS:04142010-122136677}, abstract = {This thesis presents the design and implementation of SCALE, a tool for systematic software testing multi-threaded C applications that use the pthread library. SCALE exhaustively explores the non determinism introduced by thread schedulings and tries to find violations of safety properties. We have designed SCALE to be flexible so that it is easy to add and combine different exploration and state space reduction algorithms. In this thesis we describe the currently implemented reduction algorithms, of which the most important ones are local execution cycle detection and super step partial order reduction. To exemplify how SCALE can be used, we have applied it to a few multi-threaded applications, measured its performance and compared the results to those obtained by other tools. While checking the implementation of a non-blocking queuing algorithm, we were able to find a previously unknown bug that appears only in some unexpected thread inter-leavings.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Holzmann, Gerard J.}, } @mastersthesis{10.7907/XF1E-XW61, author = {Liu, Annie Hsin-Wen}, title = {Simulation and Implementation of Distributed Sensor Network for Radiation Detection}, school = {California Institute of Technology}, year = {2010}, doi = {10.7907/XF1E-XW61}, url = {https://resolver.caltech.edu/CaltechTHESIS:07072010-160100413}, abstract = {The problem of monitoring and searching for threats that involve radiological weapons is extremely challenging because of he high variance in background radiation, the presence of benign sources and possible shielding on harmful sources. We present in this thesis a collection of algorithms and analysis that center around the problem of radiation detection with a distributed sensor network. We studied the basic characteristics of a radiation sensor network and focused on the tradeoffs between false positive rate, true positive rate, and time to detect one or more radiation sources in a large area. Three major results came out from this thesis work. First of all, we developed a simulation platform modified from multiplayer game engine that is capable of simulating realistic data in highly dynamic environments. Secondly, we provided mathematical and simulation analyses regarding critical system parameters such the number of sensors, sensor placement, and sensor placement. We also introduced a robust data fusion and parameter estimation method based on Bayesian framework. Lastly, we described an initial work to construct a ground mobile sensor for indoor search and surveillance purposes.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @mastersthesis{10.7907/JY2K-6194, author = {Pilotto, Concetta}, title = {Local-to-Global in Multi-Agent Systems}, school = {California Institute of Technology}, year = {2007}, doi = {10.7907/JY2K-6194}, url = {https://resolver.caltech.edu/CaltechETD:etd-05232007-084106}, abstract = {The thesis presents performance analysis and simulation results for algorithms that compute global functions out of local interactions on multi-agent systems. We focus on optimization problems; this is because many problems can be formulated in terms of designing algorithms which optimize some global function subject to local constraints.
We model the environment as an adversary of the system. The environment is able to attack the system, modifying the system in arbitrary ways: some agents and/or communication links can be disabled.
Computations proceed by opportunistically employing the resources available at each point, progressing rapidly when more resources are available and slowing down when resources become unavailable.
We investigate and compare two techniques. In the first one, each sub-system (which we call group) behaves like a centralized system, i.e., it solves its specific optimization sub-problem by applying a central algorithm. We investigate this technique, called self-similarity, showing examples where it works and where it fails, and carry out performance analysis on some problems. Then, we introduce a second technique which is completely decentralized but synchronous: agents simultaneously makes a local update to their current estimates of some true parameter using the estimates of their adjacents. We prove the correctness of this technique on some specific problems by applying tools from distributed systems (variant functions) and control theory (equilibrium points of a dynamical system).
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @mastersthesis{10.7907/YZF9-ZN10, author = {Capponi, Agostino}, title = {Estimation Problems in Sense and Respond Systems}, school = {California Institute of Technology}, year = {2006}, doi = {10.7907/YZF9-ZN10}, url = {https://resolver.caltech.edu/CaltechETD:etd-05222006-142127}, abstract = {In this thesis we study problems arising in the design of sense and respond systems and present analytical solutions to them as well as results from experiments dealing with real systems. Sense and respond systems employ sensors and other sources of data to sense what is happening in their environments, process the obtained information, and respond appropriately. A goal of the processing stage is to reconstruct the best possible estimate of the state of the environment using messages received from sensors. Due to the large number of messages that need to be processed, it is desirable to have algorithms that can incrementally process the received measurements and recover the state. The state estimation process becomes more problematic if measurements obtained from the sensors are noisy or they are sent at unpredictable times. First, we study models of state estimation and present algorithms that can incrementally compute accurate linear state estimates of the surrounding environment. Second, we define a framework called predicate signaling that allows us to make tradeoffs between message generation rates and the quality of the state estimate through specification of suitable predicates. We show how predicate signaling generalizes commonly used signaling schemes and present a detailed analysis based on stochastic processes to evaluate schemes based on predicate signaling.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @mastersthesis{10.7907/35Y5-H853, author = {Tian, Lu}, title = {Resource Allocation in Streaming Environments}, school = {California Institute of Technology}, year = {2006}, doi = {10.7907/35Y5-H853}, url = {https://resolver.caltech.edu/CaltechETD:etd-05262006-165801}, abstract = {The proliferation of the Internet and sensor networks has fueled the development of applications that process, analyze, and react to continuous data streams in a near-real-time manner. Examples of such stream applications include network traffic monitoring, intrusion detection, financial services, large-scale reconnaissance, and surveillance.
Unlike tasks in traditional scheduling problems, these stream processing applications are interacting repeating tasks, where iterations of computation are triggered by the arrival of new inputs. Furthermore, these repeated tasks are elastic in the quality of service, and the economic value of a computation depends on the time taken to execute it; for example, an arbitrage opportunity can disappear in seconds. Given limited resources, it is not possible to process all streams without delay. The more resource available to a computation, the less time it takes to process the input, and thus the more value it generates. Therefore, efficiently utilizing a network of limited distributed resources to optimize the net economic value of computations forms a new paradigm in the well-studied field of resource allocation.
We propose using a new performance model and resource reservation system as the solution space, and present two scheduling/resource allocation heuristics for processing streams in a distributed heterogenous computing environment to optimize economic value. Both heuristics are based on market mechanisms; one uses a centralized market and the other decentralized markets. We prove bounds on performance and present measurements to show that the performances of these two heuristics are near-optimal and significantly better than straightforward load-balancing heuristics.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @mastersthesis{10.7907/4MH9-9104, author = {Khorlin, Andrey}, title = {Scheduling in Distributed Stream Processing Systems}, school = {California Institute of Technology}, year = {2006}, doi = {10.7907/4MH9-9104}, url = {https://resolver.caltech.edu/CaltechETD:etd-05242006-175006}, abstract = {Stream processing systems receive continuous streams of messages with relatively raw information and produce streams of messages with processed information. The utility of a stream-processing system depends, in part, on the accuracy and timeliness of the output. Streams in complex event processing systems are processed on distributed systems; several steps are taken on different processors to process each incoming message, and messages may be enqueued between steps. This work explores the problem of distributed dynamic control of streams to optimize the total utility provided by the system. A system can be controlled using central control or distributed control. In the former case a single central controller maintains the state of the entire system and controls the operation of all processors. In distributed control systems, each processor controls itself based on its state and information from other processors. A challenge of distributed control is that timeliness of output depends only on the total end-to-end time and is otherwise independent of the delays at each separate processor whereas the controller for each processor takes action to control only the steps on that processor and cannot directly control the entire network. In this work, we discuss a framework for design and analysis of the control-based scheduling algorithms for a distributed stream processing system and illustrate our framework with two concrete scheduling algorithms.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @phdthesis{10.7907/9GXT-BD03, author = {Ginis, Roman}, title = {Automating Resource Management for Distributed Business Processes}, school = {California Institute of Technology}, year = {2002}, doi = {10.7907/9GXT-BD03}, url = {https://resolver.caltech.edu/CaltechETD:etd-11012005-093745}, abstract = {A distributed business process is a set of related activities performed by independent resources offering services for lease. For instance, constructing an office building involves hundreds of activities such as excavating, plumbing and carpentry performed by machines and subcontractors, whose activities are related in time, space, cost and other dimensions. In the last decade Internet-based middleware has linked consumers with resources and services enabling the consumers to more efficiently locate, select and reserve the resources for use in business processes. This recent capability creates an opportunity for a new automation of resource management that can assign the optimal resources to the activities of a business process to maximize its utility to the consumer and yield substantial gains in operational efficiency. This thesis explores two basic problems towards automating the management of distributed business processes: 1. How to choose the best resources for the activities of a process (the Activity Resource Assignment - ARA - optimization problem); and 2. How to reserve the resources chosen for a process as an atomic operation when time has value, i.e., commit all resources or no resources (the Distributed Service Commit problem - DSC). I believe these will become the typical optimization and agreement problems between consumers and producers in a networked service economy. I propose a solution to the ARA optimization problem by modeling it as a special type of Integer Programming and I give a method for solving it efficiently for a large class of practical cases. Given a problem instance the method extracts the structure of the problem and using a new concept of variable independence recursively simplifies it while retaining at least one optimal solution. The reduction operation is guided by a novel procedure that makes use of the recent advances in tree-decomposition of graphs from the graph complexity theory. The solution to the DSC problem is an algorithm based on financial instruments and the two-phase commit protocol adapted for services. The method achieves an economically sensible atomic reservation agreement between multiple distributed resources and consumers in a free market environment. I expect the automation of resource management addressed in my thesis and elsewhere will pave the way for more efficient business operations in the networked economy.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @phdthesis{10.7907/AC6E-WE21, author = {Zimmerman, Daniel Marc}, title = {Dynamic UNITY}, school = {California Institute of Technology}, year = {2002}, doi = {10.7907/AC6E-WE21}, url = {https://resolver.caltech.edu/CaltechETD:etd-12072001-160019}, abstract = {Dynamic distributed systems, where a changing set of communicating processes must interoperate to accomplish particular computational tasks, are becoming extremely important. Designing and implementing these systems, and verifying the correctness of the designs and implementations, are difficult tasks. The goal of this thesis is to make these tasks easier. This thesis presents a specification language for dynamic distributed systems, based on Chandy and Misra’s UNITY language. It extends the UNITY language to enable process creation, process deletion, and dynamic communication patterns. The thesis defines an execution model for systems specified in this language, which leads to a proof logic similar to that of UNITY. While extending UNITY logic to correctly handle systems with dynamic behavior, this logic retains the familiar UNITY operators and most of the proof rules associated with them. The thesis presents specifications for three example dynamic distributed systems to demonstrate the use of the specification language, and full correctness proofs for two of these systems and a partial correctness proof for the third to demonstrate the use of the proof logic. The thesis details a method for determining whether a system in the specification language can be transformed into an implementation in a standard programming language, as well as a method for performing this transformation on those specifications that can. This guarantees a correct implementation for any specification that can be so transformed.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @phdthesis{10.7907/TVTD-E826, author = {Kiniry, Joseph Roland}, title = {Kind Theory}, school = {California Institute of Technology}, year = {2002}, doi = {10.7907/TVTD-E826}, url = {https://resolver.caltech.edu/CaltechETD:etd-06062002-164914}, abstract = {My contribution, described in this thesis, is a theory that is meant to assist in the construction of complex software systems. I propose a notion of structure that is independent of language, formalism, or problem domain. I call this new abstraction a kind, and its related formal system, kind theory. I define a type system that models the structural aspects of kind theory. I also define an algebra that models this type system and provides a logic in which one can specify and execute computations.
A reflective definition of kind theory is reviewed. This reflective specification depends upon a basic ontology for mathematics. By specifying the theory in itself, I provide an example of how one can use kind theory to reason about reuse in general formal systems.
I provide examples of the use of kind theory in reasoning about software constructs in several domains of software engineering. I also discuss a set of software tools that I have constructed that realize or use kind theory.
A logical framework is used to specify a type theoretic and algebraic model for the theory. Using this basic theorem prover one can reason about software systems using kind theory. Also, I have constructed a reuse repository that supports online collaboration, houses software assets, helps search for components that match specifications, and more. This repository is designed to use kind theory (via the logical framework) for the representation of, and reasoning about, software assets.
Finally, I propose a set of language-independent specification constructs called semantic properties which have a semantics specified in kind theory. I show several uses of these constructs, all of which center on reasoning about reusable component-based software, by giving examples of how these constructs are applied to programming and specification languages. I discuss how the availability of these constructs and the associated theory impact the software development process.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Hickey, Jason J. and Chandy, K. Mani}, } @phdthesis{10.7907/44QZ-R465, author = {Schooler, Eve Meryl}, title = {Why multicast protocols (don’t) scale: an analysis of multipoint algorithms for scalable group communication}, school = {California Institute of Technology}, year = {2001}, doi = {10.7907/44QZ-R465}, url = {https://resolver.caltech.edu/CaltechETD:etd-08272001-155016}, abstract = {With the exponential growth of the Internet, there is a critical need to design efficient, scalable and robust protocols to support the network infrastructure. A new class of protocols has emerged to address these challenges, and these protocols rely on a few key techniques, or micro-algorithms, to achieve scalability. By scalability, we mean the ability of groups of communicating processes to grow very large in size. We study the behavior of several of these fundamental techniques that appear in many deployed and emerging Internet standards: Suppression, Announce-Listen, and Leader Election. These algorithms are based on the principle of efficient multipoint communication, often in combination with periodic messaging. We assume a loosely-coupled communication model, where acknowledged messaging among groups of processes is not required. Thus, processes infer information from the periodic receipt or loss of messages from other processes. We present an analysis, validated by simulation, of the performance tradeoffs of each of these techniques. Toward this end, we derive a series of performance metrics that help us to evaluate these algorithms under lossy conditions: expected response time, network usage, memory overhead, consistency attainable, and convergence time. In addition, we study the impact of both correlated and uncorrelated loss on groups of communicating processes. As a result, this thesis provides insights into the scalability of multicast protocols that rely upon these techniques. We provide a systematic framework for calibrating as well as predicting protocol behavior over a range of operating conditions. In the process, we establish a general methodology for the analysis of these and other scalability techniques. Finally, we explore a theory of composition; if we understand the behavior of these micro-algorithms, then we can bound analytically the performance of the more complex algorithms that rely upon them.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @phdthesis{10.7907/z89g-gm27, author = {Sivilotti, Paolo A. G.}, title = {A method for the specification, composition, and testing of distributed object systems}, school = {California Institute of Technology}, year = {1998}, doi = {10.7907/z89g-gm27}, url = {https://resolver.caltech.edu/CaltechETD:etd-01252008-095244}, abstract = {The formation of a distributed system from a collection of individual components requires the ability for components to exchange syntactically well-formed messages. Several technologies exist that provide this fundamental functionality, as well as the ability to locate components dynamically based on syntactic requirements. The formation of a correct distributed system requires, in addition, that these interactions between components be semantically well-formed. The method presented in this thesis is intended to assist in the development of correct distributed systems.
We present a specification methodology based on three fundamental operators from temporal logic: initially, next, and transient. From these operators we derive a collection of higher-level operators that are used for component specification. The novel aspect of our specification methodology is that we require that these operators be used in the following restricted manner:
•A specification statement can refer only to properties that are local to a single component. •A single component must be able to guarantee unilaterally the validity of the specification statement for any distributed system of which it is a part. Specification statements that conform to these two restrictions we call certificates.
The first restriction is motivated by our desire for these component specifications to be testable in a relatively efficient manner. In fact, we describe a set of simplified certificates that can be translated into a testing harness by a simple parser with very little programmer intervention. The second restriction is motivated by our desire for a simple theory of composition: If a certificate is a property of a component, that certificate is also a property of any system containing that component.
Another novel aspect of our methodology is the introduction of a new temporal operator that combines both safety and progress properties. The concept underlying this operator has been used implicitly before; but by extracting this concept into a first-class operator, we are able to prove several new theorems about such properties. We demonstrate the utility of this operator and of our theorems by using them to simplify several proofs.
The restrictions imposed on certificates are severe. Although they have pleasing consequences as described above, they can also lead to lengthy proofs of system properties that are not simple conjunctions. To compensate for this difficulty, we introduce collections of certificates that we call services. Services facilitate proof reuse by encapsulating common component interactions used to establish various system properties.
We experiment with our methodology by applying it to several extended examples. These experiments illustrate the utility of our approach and convince us of the practicality of component-based distributed system development. This thesis addresses three parts of the development cycle for distributed object systems: (i) the specification of systems and components, (ii) the compositional reasoning used to verify that a collection of components satisfy a system specification, and (iii) the validation of component implementations.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @phdthesis{10.7907/5ma9-h225, author = {Massingill, Berna Linda}, title = {A structured approach to parallel programming}, school = {California Institute of Technology}, year = {1998}, doi = {10.7907/5ma9-h225}, url = {https://resolver.caltech.edu/CaltechETD:etd-01242008-074143}, abstract = {Parallel programs are more difficult to develop and reason about than sequential programs. There are two broad classes of parallel programs: (1) programs whose specifications describe ongoing behavior and interaction with an environment, and (2) programs whose specifications describe the relation between initial and final states. This thesis presents a simple, structured approach to developing parallel programs of the latter class that allows much of the work of development and reasoning to be done using the same techniques and tools used for sequential programs. In this approach, programs are initially developed in a primary programming model that combines the standard sequential model with a restricted form of parallel composition that is semantically equivalent to sequential composition. Such programs can be reasoned about using sequential techniques and executed sequentially for testing. They are then transformed for execution on typical parallel architectures via a sequence of semantics-preserving transformations, making use of two secondary programming models, both based on parallel composition with barrier synchronization and one incorporating data partitioning. The transformation process for a particular program is typically guided and assisted by a parallel programming archetype, an abstraction that captures the commonality of a class of programs with similar computational features and provides a class-specific strategy for producing efficient parallel programs. Transformations may be applied manually or via a parallelizing compiler. Correctness of transformations within the primary programming model is proved using standard sequential techniques. Correctness of transformations between the programming models and between the models and practical programming languages is proved using a state-transition-based operational model.
This thesis presents: (1) the primary and secondary programming models, (2) an operational model that provides a common framework for reasoning about programs in all three models, (3) a collection of example program transformations with arguments for their correctness, and (4) two groups of experiments in which our overall approach was used to develop example applications. The specific contribution of this work is to present a unified theory/practice framework for this approach to parallel program development, tying together the underlying theory, the program transformations, and the program-development methodology.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @phdthesis{10.7907/mytw-er77, author = {Thornley, John William}, title = {A parallel programming model with sequential semantics}, school = {California Institute of Technology}, year = {1996}, doi = {10.7907/mytw-er77}, url = {https://resolver.caltech.edu/CaltechETD:etd-01042008-085720}, abstract = {Parallel programming is more difficult than sequential programming in part because of the complexity of reasoning, testing, and debugging in the context of concurrency. In this thesis, we present and investigate a parallel programming model that provides direct control of parallelism in a notation with sequential semantics. Our model consists of a standard sequential imperative programming notation extended with the following three pragmas:
The parallelizable sequence of statements pragma indicates that a sequence of statements can be executed as parallel threads.
The parallelizable for-loop statement pragma indicates that the iterations of a for-loop statement can be executed as parallel threads.
The single-assignment type pragma indicates that variables of a given type are assigned at most once and that ordinary assignment and evaluation operations can be used as implicit communication and synchronization operations between parallel threads.
In our model, a parallel program is simply an equivalent sequential program with added pragmas. The placement of the pragmas is subject to a small set of restrictions that ensure the equivalence of the parallel and sequential semantics. We prove that if standard sequential execution of a program (by ignoring the pragmas) satisfies a given specification and the pragmas are used correctly, parallel execution of the program (as directed by the pragmas) is guaranteed to satisfy the same specification.
Our model allows parallel programs to be developed using sequential reasoning, testing, and debugging techniques, prior to parallel execution for performance. Since parallelism is specified directly, sophisticated analysis and compilation techniques are not required to extract parallelism from programs. However, it is important that parallel performance issues such as granularity, load balancing, and locality be considered throughout algorithm and program development.
We describe a series of programming experiments performed on up to 32 processors of a shared-memory multiprocessor system. These experiments indicate that for a wide range of problems:
Our model can express sophisticated parallel algorithms with significantly less complication than traditional explicit parallel programming models.
Parallel programs in our model execute as efficiently as sequential programs on one processor and deliver good speedups on multiple processors.
Program development with our model is less difficult than with traditional explicit parallel programming models because reasoning, testing, and debugging are performed using sequential methods.
We believe that our model provides the basis of the method of choice for a large number of moderate-scale, medium-grained parallel programming applications.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @mastersthesis{10.7907/g8mm-x344, author = {Kryukova, Svetlana A.}, title = {Parallel Programming Archetypes in Combinatorics and Optimization}, school = {California Institute of Technology}, year = {1995}, doi = {10.7907/g8mm-x344}, url = {https://resolver.caltech.edu/CaltechTHESIS:04112012-083734292}, abstract = {A Parallel Programming Archetype is a language-independent program design strategy. We describe two archetypes in combinatorics and optimization, their components, implementations, and example applications developed using an archetype.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @phdthesis{10.7907/G620-GG65, author = {Hofstee, H. Peter}, title = {Synchronizing processes}, school = {California Institute of Technology}, year = {1995}, doi = {10.7907/G620-GG65}, url = {https://resolver.caltech.edu/CaltechETD:etd-10112007-083903}, abstract = {In this monograph we develop a mathematical theory for a concurrent language based on angelic and demonic nondeterminism. An underlying model is defined with sets of sets of sequences of synchronization actions. A refinement relation is defined for the model, and equivalence classes under this relation are identified with processes. Processes, together with the refinement relation, form a complete distributive lattice. We define a language with parallel composition, sequential composition, angelic and demonic nondeterminism, and an operator that connects pairs of synchronization actions into synchronization statements and hides these actions from observation. Also, angelic and demonic iteration are defined. All operators are monotonic with respect to the refinement ordering. Many algebraic properties are proven from these definitions. We study duals of processes and prove that they can be related to the most demonic environment in which a process will not deadlock. We give a simple example to illustrate the use of duals. We study classes of programs for which angelic choice can be implemented by probing the environment for its next action. To this end specifications of processes are extended with simple conditions on the environment. We give a more elaborate example to illustrate the use of these conditions and the compositionality of the method. Finally we briefly introduce an operational model that describes implementable processes only. This model mentions probes explicitly. Such a model may form a basis for a language that is less restrictive than ours, but that will also have less attractive algebraic properties.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Van de Snepscheut, Jan L. A. and Chandy, K. Mani}, } @phdthesis{10.7907/ynt2-nn65, author = {Leino, K. Rustan M.}, title = {Toward reliable modular programs}, school = {California Institute of Technology}, year = {1995}, doi = {10.7907/ynt2-nn65}, url = {https://resolver.caltech.edu/CaltechETD:etd-10162007-111256}, abstract = {Software is being applied in an ever-increasing number of areas. Computer programs and systems are becoming more complex and consisting of more delicately interconnected components. Errors surfacing in programs are still a conspicuous and costly problem. It’s about time we employ some techniques that guide us toward higher reliability of practical programs. The goal of this thesis is just that.
This thesis presents a theory for verifying programs based on Dijkstra’s weakest-precondition calculus. A variety of program paradigms used in practice, such as exceptions, procedures, object orientation, and modularity, are dealt with.
The thesis sheds new light on the theory behind programs with exceptions. It develops an elegant algebra, and shows it to be the foundation on which the semantics of exceptions rests. It develops a trace semantics for programs with exceptions, from which the weakest-precondition semantics is derived. It also proves a theorem on programming methodology relating to exceptions, and applies this theorem in the novel derivation of a simple program.
The thesis presents a simple model for object-oriented data types, in which concerns have been separated, resulting in the simplicity of the model.
To deal with large programs, this thesis takes a practical look at modularity and abstraction. It reveals a problem that arises in writing specifications for modular programs where previous techniques fail. The thesis introduces a new specification construct that solves that problem, and gives a formal proof of soundness for modular verification using that construct. The model is a generalization of Hoare’s classical data refinement. However, there are more problems to be solved. The thesis reports on some of these problems and suggests some future directions toward more reliable modular programs.
}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Van de Snepscheut, Jan L. A. and Chandy, K. Mani and Nelson, Greg}, } @mastersthesis{10.7907/ync0-mn20, author = {Carlin, Peter}, title = {Distributed linear algebra on networks of workstations}, school = {California Institute of Technology}, year = {1994}, doi = {10.7907/ync0-mn20}, url = {https://resolver.caltech.edu/CaltechTHESIS:03272012-094222255}, abstract = {This thesis describes the development of a portion of a distributed linear algebra library for use on networks of workstations. The library was designed with special consideration towards three characteristics of networks of workstations: small numbers of processes, availability of multithreading, and high communication latency. Two aspects of the library are highlighted. First, modifications to message passing primitives to permit their use in a multithreaded environment. Second, modifications to basic linear algebra algorithms to improve their performance on networks of workstations. A model of distributed linear algebra on networks of workstations is developed, and used to predict the performance of the modified algorithms. These predictions are compared to experimental results on several networks of workstations.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @mastersthesis{10.7907/q2v9-sq59, author = {Manohar, Rajit}, title = {Mach-Based Channel Library}, school = {California Institute of Technology}, year = {1994}, doi = {10.7907/q2v9-sq59}, url = {https://resolver.caltech.edu/CaltechTHESIS:04122012-094328813}, abstract = {No abstract.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, } @mastersthesis{10.7907/ngrt-q826, author = {Sivilotti, Paul}, title = {A Verified Integration of Imperative Parallel Programming Paradigms in an Object-Oriented Language}, school = {California Institute of Technology}, year = {1993}, doi = {10.7907/ngrt-q826}, url = {https://resolver.caltech.edu/CaltechTHESIS:04122012-114004175}, abstract = {CC++ is a parallel object-oriented programming language that uses parallel composition, atomic functions, and single-assignment variables to express concurrency. We show that this programming paradigm is equivalent to several traditional imperative communication and synchronization models, namely: semaphores, monitors, and asynchronous channels. A collection of libraries which integrates these traditional models with CC++ is specified, implemented, and formally verified.}, address = {1200 East California Boulevard, Pasadena, California 91125}, } @mastersthesis{10.7907/a7ga-s950, author = {Massingill, Berna Linda}, title = {Integrating Task and Data Parallelism}, school = {California Institute of Technology}, year = {1993}, doi = {10.7907/a7ga-s950}, url = {https://resolver.caltech.edu/CaltechTHESIS:04122012-130550435}, abstract = {Many models of concurrency and concurrent programming have been proposed; most can be categorized as either task-parallel (based on functional decomposition) or data-parallel (based on data decomposition). Task-parallel models are most effective for expressing irregular computations; data-parallel models are most effective for expressing regular computations. Some computations, however, exhibit both regular and irregular aspects. For such computations, a better programming model is one that integrates task and data parallelism. This report describes one model of integrating task and data parallelism, some problem classes for which it is effective, and a prototype implementation.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Chandy, K. Mani}, }