@phdthesis{10.7907/fpmm-a552, author = {Hirokawa, Soichi}, title = {Dynamics of Protein-Mediated Polymer Coupling and their Implications in Antibody Production and Emergent Patterning}, school = {California Institute of Technology}, year = {2023}, doi = {10.7907/fpmm-a552}, url = {https://resolver.caltech.edu/CaltechTHESIS:08252022-153300158}, abstract = {

Proteins serve a wide range of functions in and out of the cell, from signaling and gene regulation to transport and structural reinforcement. These functions are usually carried out from interactions with other molecules in the surrounding medium such as other proteins, small molecules, or DNA. One such class of proteins are what I will call polymer-coupling proteins: these proteins intentionally link identical polymers or two regions of the same polymer together so that their coupled interactions critically affect the state of the biological system. A vast array of such proteins exist in nature with roles such as the looping of DNA to physically inhibit the expression of a gene or the formation of the cytoskeleton which provides a cell with its shape. In this thesis, I use in vitro experimental methods to explore two cases of coupling proteins and understand their roles not only in reorganizing their complementary polymers but influencing the final state of their respective systems.

In Chapter 2, I examine the starting process for the assembly of an antibody-encoding gene in developing immune cells. Motivated by data suggesting that some antibodies are less likely to be made than others, I explore how the early steps of constructing an antibody-encoding gene affect this uneven frequency of assembly. To initiate recombination, the recombination-activating gene (RAG) protein complex simultaneously binds and cuts two well-recognized sequences neighboring two antibody-encoding gene segments in order to allow other proteins to combine these exposed segments together. The sequences to which the RAG protein performs its binding and cutting functions have certain identifiable sequence patterns but can still vary. Through a single-molecule experimental method known as tethered particle motion (TPM) I show how changes to the binding site sequence can enhance or diminish the propensity of the RAG protein to bind and cut the DNA and thus explore the consequences of these altered interactions in the unequal selection for certain antibody gene segments over others.

In Chapter 3, I turn to questions of the emergence of order from self-organization in biological systems. From the molecular to the population scale, biology constantly demonstrates that with an injection of energy, systems can be driven out of equilibrium and allow for the organization of its constituents. A case of such organization in cells is the coupling of microtubules by motor proteins to create and maintain the mitotic spindle, a critical biological architecture for ensuring that each cell obtains a copy of the genome during division. In vitro experiments that exploit similar motor-microtubule interactions have become a convenient way to identify the effects of perturbing a key player such as motor properties or boundary conditions of the system on the spatiotemporal extent of organization. However, in many instances, the dynamics under which such cytoskeletal systems reduce their entropy over the course of creating order have not been carefully examined in experimental systems. Here, I use engineered light-dimerizable motors that can give rise to the formation of a highly connected network that compacts to form a dense, organized structure, and through the use of a noninvasive imaging technique observe how the polymers that make up the network continually reorganize in the bulk during a global contraction of the network.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/5ee6-j454, author = {Banks, Rachel A.}, title = {Experimental and Theoretical Studies of Non-Equilibrium Systems: Motor-Microtubule Assemblies and the Human-Earth System}, school = {California Institute of Technology}, year = {2022}, doi = {10.7907/5ee6-j454}, url = {https://resolver.caltech.edu/CaltechTHESIS:11282021-042001335}, abstract = {

Systems out of equilibrium are pervasive around us. In fact, being out of equilibrium is a key property of life, as described by Erwin Schrodinger in his series of essays “What is life?”. Through the consumption of energy, i.e. food, living organisms achieve ordered states that would be very unlikely to occur at equilibrium, such as the mitotic spindle during cell division, swarms of bacteria, or flocks of starlings. The Earth system is another example of a non-equilibrium system. The state of the Earth has been evolving for billions of years, often under the influence of life. Today, humanity is a dominant influence forcing the Earth system to new states. Understanding these non-equilibrium systems has posed many challenges; in this thesis, we work towards quantitatively dissecting and gaining an intuition for the functioning of both a molecular scale and planetary scale non-equilibrium system.

Underlying many cellular functions such as cell division and transportation of organelles is the cytoskeleton composed of motor proteins and their constituent filaments. One of the key components are kinesin motors, which consume chemical energy to walk along and reorganize microtubules. Collections of these motors and microtubules are able to form organized structures. Understanding how these structures are formed has remained an open question. In Chapter 2, we develop a system of kinesin motors and microtubules wherein motor activity is controlled by light, thereby gaining spatiotemporal control over the formation of motor-microtubule assemblies. We demonstrate the creation of a variety of structures of different sizes and geometry, and measure how length and time scales of these assemblies depend on the activated region.

A remaining question was how the microscopic details of the interaction between motors and microtubule affect the dynamics and steady-state structure formed. With our scheme for light-control in hand, we extended the system to a variety of motor proteins that have different speeds, processivities (how many steps they take before unbinding from the microtubule), directionalities (which end of the microtubule they walk towards), and forces they are able to exert in Chapter 3. We found that the size of steady-state structures, distribution of motors within assemblies, and rate of contraction of networks depend on motor properties. Further, we demonstrate that various structures can be formed by combining different motors. This work begins to build a connection between the detailed microscopic interactions of cytoskeletal components to the larger scale structures they form.

Chapter 4 begins our work on understanding the state of the human-Earth system. A major hurdle to quantitatively understanding this system is the difficulty of finding and parsing the relevant data, which is often within long, complicated reports. In order to facilitate access to this data, we created the Human Impacts Database, which houses a collection of > 300 carefully curated values related to human impacts on the Earth, introduced in Chapter 4. In this chapter, we describe the format of the database as well as demonstrate how it can be harnessed to gain a more holistic perspective on humanity’s influence on the Earth.

Having this data is only a starting point towards deciphering the ways that humans are altering the state of the Earth, though. In Chapter 5, we combine these quantitative measurements with simple order-of-magnitude estimates to gain an intuition for the magnitude of several of the values. In this way, we show that many of the ways humanity is affecting the Earth can be tied back to how much land, water, and power we use. We further contextualize the magnitude of human influence by comparing human activities to natural analogs, finding that humans currently rival natural processes in influencing the state of the Earth system.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, month = {July}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/p3rg-m937, author = {Beeler, Suzannah Michelle}, title = {Deciphering Regulation in Escherichia coli: From Genes to Genomes}, school = {California Institute of Technology}, year = {2022}, doi = {10.7907/p3rg-m937}, url = {https://resolver.caltech.edu/CaltechTHESIS:08182021-053622635}, abstract = {

Advances in DNA sequencing have revolutionized our ability to read genomes. However, even in the most well-studied of organisms, the bacterium Escherichia coli, for ≈ 65% of promoters we remain ignorant of their regulation. Until we crack this regulatory Rosetta Stone, efforts to read and write genomes will remain haphazard. We introduce a new method, Reg-Seq, that links massively-parallel reporter assays with mass spectrometry to produce a base pair resolution dissection of more than 100 E. coli promoters in 12 growth conditions. We demonstrate that the method recapitulates known regulatory information. Then, we examine regulatory architectures for more than 80 promoters which previously had no known regulatory information. In many cases, we also identify which transcription factors mediate their regulation. This method clears a path for highly multiplexed investigations of the regulatory genome of model organisms, with the potential of moving to an array of microbes of ecological and medical relevance.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/kpc2-b345, author = {Razo-Mejia, Manuel}, title = {Physical Biology of Cellular Information Processing}, school = {California Institute of Technology}, year = {2022}, doi = {10.7907/kpc2-b345}, url = {https://resolver.caltech.edu/CaltechTHESIS:08242021-212609828}, abstract = {

The state of matter that we define as life is different from anything else we have encountered so far in the universe. Living systems not only perpetuate their existence out of equilibrium against the will of the second law of thermodynamics, but they do so while keeping up with an ever-changing environment. A key part of this capacity to adapt to environmental changes is the ability of organisms to gather information from their surroundings to put together an adequate response to the challenges presented to them. This thesis presents an effort to understand, from first principles, this fundamental feature of information gathering that all life on earth shares. We dig into the physics behind one of the most pervasive mechanisms through which living systems sense and respond to the environment–the ability to turn on and off genes. In doing so, we hope to uncover general principles of how organisms deal with the problem of collecting information about the world that surrounds them.

In Chapter 1, we develop the theoretical and conceptual tools to navigate the rest of the thesis. I introduce the idea of gene regulation, as well as different theoretical models of this pervasive biological phenomenon. We also delve into the realm of information theory and learn how the plastic concept of information can be mathematically defined and quantified.

The second stop in our exploration (Chapter 2) asks the following question: can we understand, from first principles, how it is that proteins allow cells to regulate their genes on-demand upon sensing environmental cues? For this, we explore the physics behind transcriptional control due to allosteric transcription factors. Using simple quasi-equilibrium models of the two processes involved in this type of regulation—the regulation of the gene by the binding and unbinding of the transcription factor, and the regulation of the activity of the transcription factor itself by the binding and unbinding of an effector molecule—we are able to predict the input-output function of a simple genetic circuit, and compare such predictions with experimental determinations of the mean response of a population of bacterial cells.

We then expand on these insights to ask questions about the inescapable cell-to-cell variability that isogenic cells encounter. For this, we have to leave behind the pure thermodynamic framework and work in the language of chemical kinetics. This allows us to make predictions beyond the mean input-output gene expression response of cells by reconstructing full gene expression distributions. With these probabilistic input-output functions, in Chapter 3 we formalize the question of the amount of information that cells can gather from the environment. For this, we turn to information-theoretic concepts of maximal mutual information (otherwise known as channel capacity) between the state of the environment and the gene expression response from bacterial cells. Finally, we compare our predictions of the maximum amount of information—measured in bits—that cells can gather with single-cell inferences of this quantity.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/1fzr-1240, author = {Galstyan, Vahe}, title = {Studies in Physical Biology: Exploring Allosteric Regulation, Enzymatic Error Correction, and Cytoskeletal Self-Organization Using Theory and Modeling}, school = {California Institute of Technology}, year = {2022}, doi = {10.7907/1fzr-1240}, url = {https://resolver.caltech.edu/CaltechTHESIS:08242021-212959886}, abstract = {

Physical biology offers powerful tools for quantitatively dissecting the various aspects of cellular life that one cannot attribute to inanimate matter. Signature examples of living matter include adaptation, self-organization, and division. In this thesis, we explore different interconnected facets of these processes using statistical mechanics, nonequilibrium thermodynamics, and biophysical modeling.

One of the key mechanisms underlying physiological and evolutionary adaptation is allosteric regulation. It allows cells to dynamically respond to changes in the state of the environment often expressed through altered levels of different environmental cues. The first thread of our work is dedicated to exploring the combinatorial diversity of responses available to allosteric proteins that are subject to multi-ligand regulation. We demonstrate that proteins characterized through the Monod-Wyman-Changeux model of allostery and operating at thermodynamic equilibrium are capable of eliciting a wide range of response behaviors which include the kinds known from the field of digital circuits (e.g., NAND logic response), as well as more sophisticated computations such as ratiometric sensing.

Despite the fact that biomolecules at thermodynamic equilibrium are able to orchestrate a variety of fascinating behaviors, the cell is ultimately ‘alive’ because it constantly metabolizes nutrients and generates energy to drive functions that cannot be sustained in the absence of energy consumption. One prominent example of such a function is nonequilibrium error correction present in high-fidelity processes such as protein synthesis, DNA replication, or pathogen recognition. We begin the second thread of our work by providing a conceptual understanding of the prevailing mechanism used in explaining this high-fidelity behavior, namely that of kinetic proofreading. Specifically, we develop an allostery-based mechanochemical model of a kinetic proofreader where chemical driving is replaced with a mechanical engine with tunable knobs which allow modulating the amount of dissipation in a transparent way. We demonstrate how varying levels of error correction can be attained at different regimes of dissipation and offer intuitive interpretations for the conditions required for efficient biological proofreading.

We then extend the notion of error correction to equilibrium enzymes not endowed with structural features typically required for proofreading. We show that, under physiological conditions, purely diffusing enzymes can take advantage of the existing nonequilibrium organization of their substrates in space and enhance the fidelity of catalysis. Our proposed mechanism called spatial proofreading offers a novel perspective on spatial structures and compartmentalization in cells as a route to specificity.

In the last thread of the thesis, we make a transition from molecular-scale studies to the mesoscopic scale, and explore the principles of self-organization in nonequilibrium structures formed in reconstituted microtubule-motor mixtures. In particular, we develop a theoretical framework that predicts the spatial distribution of kinesin motors in radially symmetric microtubule asters formed under various conditions using optogenetic control. The model manages to accurately recapitulate the experimentally measured motor profiles through effective parameters that are specific for each kind of kinesin motor used. Our theoretical work of rigorously assessing the motor distribution therefore offers an avenue for understanding the link between the microscopic motor properties (e.g., processivity or binding affinity) and the large-scale structures they create.

In all, the thesis encompasses a series of case studies with shared themes of allostery and nonequilibrium, highlighting the capacity of living matter to perform remarkable tasks inaccessible to nonliving materials.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/d042-rp26, author = {Morrison, Muir}, title = {Statistical Mechanics of Problems in Transcription Regulation}, school = {California Institute of Technology}, year = {2021}, doi = {10.7907/d042-rp26}, url = {https://resolver.caltech.edu/CaltechTHESIS:06082021-005042886}, abstract = {

As the quantity of sequenced genome data continues to multiply, our understanding of the transcriptional regulation of genomes has lagged behind. This deficit impinges on research throughout biology, from fundamental questions of how evolution proceeds to eminently practical questions such as how antibiotic resistance arises.

In this thesis we present three threads that address the question of transcriptional regulation from distinct perspectives. The first thread focuses on the simplest nontrivial regulation motif common in bacteria. We analyze in turn a sampling of the myriad mathematical models previously proposed in the literature for this system. We attempt to shine light on the similarities and differences of the models’ predictions, clarify their microscopic interpretations, and offer guidance as to situations when one model or another should be preferred or even distinguishable.

The second thread considers a substantially more complicated genetic circuit, for which we build a minimal phenomenological model that retains intuitive microscopic meaning for all its parameters. The model neatly explains recent experimental observations of bistability in the circuit, and suggests natural generalizations to other metabolically important gene circuits with qualitatively similar architectures.

Motivation for the third thread comes from even more complicated transcriptional regulation problems with a multitude of regulatory proteins and binding sites, where even enumerating all possible DNA-protein complexes manually is a formidable challenge. Here we propose a method to tackle this complexity that uses ideas from quantum field theory to encode assembly rules for macromolecular complexes. By specifying a small set of rules, we avoid manual enumeration of the much larger set of complexes, allowing the formalism to automatically generate this set for us.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/0sk3-hd69, author = {Ireland, William Thornton}, title = {A Quantitative and High-Throughput Approach to Gene Regulation in Escherichia coli}, school = {California Institute of Technology}, year = {2020}, doi = {10.7907/0sk3-hd69}, url = {https://resolver.caltech.edu/CaltechTHESIS:03262020-092455420}, abstract = {

Measurements in biology have reached a level of precision that demands quantitative modeling. This is particularly true in the field of gene regulation, where concepts from physics such as thermodynamics have allowed for accurate models to be made.

Many issues remain. DNA sequencing is routine enough to sequence new genomes in days and cheap enough to use deep sequencing to perform precision measurements, but our ability to interpret the wealth of genomic data is lagging behind, especially in the realm of gene regulation. The primary reason is that we lack any information what so ever as to the basic regulatory details of approximately 65 percent of operons even in E. coli, the best understood organism in biology. As a result we cannot use our hard won modeling efforts to understand any of these operons.

This work takes steps to address these issues. First we use 30 LacI mutants as a test case to prove that we can make quantitatively accurate models of gene expression and sequence-dependent binding energies of transcription factors and RNA polymerase.

Next we note that much of the quantitative insight available on transcriptional regulation relies on work on only a few model regulatory systems such as LacI as was considered above. We develop an approach, through a combination of massively parallel reporter assays, mass spectrometry, and information-theoretic modeling that can be used to dissect bacterial promoters in a systematic and scalable way. We demonstrate that we can uncover a qualitative list of transcription factor binding sites as well as their associated quantitative details from both well-studied and previously uncharacterized promoters in E. coli.

Finally we extend the above method to over 100 E. coli promoters using over 12 growth conditions. We show the method recapitulates known regulatory information. Then, we examine regulatory architectures for more than 80 promoters which previously had no known regulation. In many cases, we identify which transcription factors mediate their regulation. The method introduced clears a path for fully characterizing the regulatory genome of E. coli and advances towards the goal of using this method on a wide variety of other organisms including other prokaryotes and eukaryotes such as Drosophila melanogaster.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/q8h6-xr92, author = {Chure, Griffin Daniel}, title = {The Molecular Biophysics of Evolutionary and Physiological Adaptation}, school = {California Institute of Technology}, year = {2020}, doi = {10.7907/q8h6-xr92}, url = {https://resolver.caltech.edu/CaltechTHESIS:06022020-102020436}, abstract = {

Central to any definition of Life is the ability to sense changes in one’s environment and respond in kind. Adaptive phenomena can be found across the biological scales ranging from the nanosecond-scale conformational changes of proteins, to temporary rewiring of metabolic networks, to the 3.5 billion years of evolution that produced the enormous biodiversity we see today. This thesis presents a body of work which attempts to examine the overlap between these three scales of adaptation through the quantitative lens of statistical physics. Namely, we examine how molecular, physiological, and evolutionary adaptation governs a feature common to all life – the regulation of gene expression.

We begin by examining the phenomenon of molecular adaptation in the context of allostery, specifically in the context of allosteric transcriptional repressors. Using simple tools of quasi-equilibrium thermodynamics, we derive and experimentally dissect a quantitative model of how such a repressor adapts to different concentrations of an extracellular inducer molecule, modulating the repressors activity and thereby gene expression. While the model is relatively simple, it is remarkably powerful in its ability to draw concrete, quantitative predictions about not only the level of gene expression at a given concentration of inducer, but details of how the repressor responds to changes in the inducer concentration. With a few lines of simple mathematics, we are able to use this model to derive a state variable of the simple repression motif which we term the free energy of the regulatory architecture. This permits us to collapse nearly 500 distinct measurements of the level of gene expression onto a master curve defined by this free energy.

We leverage this feature of the model and use data collapse as a method to identify the effects of mutation, a strong evolutionary force responsible for much of the genetic diversity in bacteria. In Chapter 3, we examine how mutations within the allosteric repressor itself can be mapped to changes in the free energy. The precise value of these free energy shifts and their dependence on the inducer concentration reveal different classes of mutations with one class affecting only the DNA-repressor interaction and another class governing the allosteric nature of the repressor. We test these pen-and-paper predictions experimentally and illustrate that given sufficient knowledge of how single mutants behave, the complete phenotypic response of pairwise double mutants can be predicted with quantitative accuracy.

With this framework in hand, we turn to exploring how changes in the physiological state of the cell influence the molecular biophysics of the regulatory architecture. We hypothesize that changes in the source of carbon in the growth medium or changes in the growth temperature can be accounted for by the existing model without any additional parameters. We experimentally show that the parameter values determined in one physiological state are inherited when the available carbon source is verified, but changes in the growth temperature require some additional considerations. Chapter 4 as a whole reveals that, while there remains work to be done both theoretically and experimentally when it comes to temperature variation, thermodynamic models can remain powerful tools to draw predictions of gene expression in different physiological contexts.

Finally, in Chapter 5, we explore physiological adaptation and cellular decision making where it counts – in the survival of cells to environmental insults. We turn our focus beyond transcriptional regulation and consider the relationship between osmotic shocks, the abundance of mechanosensitive channels, and cellular survival with single cell resolution. Using a combination of quantitative microscopy and tricks of statistical inference, we infer how the probability of a cell surviving an osmotic shock scales as a function of the cell’s number of mechanosensitive channels.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/S4CV-T162, author = {Einav, Tal}, title = {Taming the Molecular Dance: Harnessing Statistical Mechanics to Quantitatively Characterize Allosteric Systems}, school = {California Institute of Technology}, year = {2019}, doi = {10.7907/S4CV-T162}, url = {https://resolver.caltech.edu/CaltechTHESIS:06082019-034706928}, abstract = {

The pace of biological research continues to grow at a staggering pace as high-throughput experimental techniques rapidly increase our ability to sequence DNA, quantify cell behavior, and image molecules of all types within the cellular milieu. Given this surge in experimental prowess, the time is ripe to examine how well our conceptual cartoons of biological phenomena can not only recapitulate the data but also successfully predict the outcomes of future experiments.

One of the fundamental challenges in biology is that the space of possible molecules is overwhelmingly large. The number of variants of a moderately-sized protein (20^300) is larger than the number of atoms in the universe, as is the space of possible bacterial genomes, protein interaction networks, and effector functions; progress in any of these fronts requires a theory-experiment dialogue that can extrapolate our small drop of data to explain large swaths of parameter space.

My thesis strives towards this goal by analyzing a number of central molecular players in biology including enzymes (biological catalysts that accelerate chemical reactions), transcription factors (proteins that bind to DNA and regulate its expression), and ion channels (signaling proteins that regulate ion transport). I develop a quantitative description in each context by harnessing the statistical mechanical Monod-Wyman-Changeux model of allostery which coarse-grains the behavior of a multi-state system into two effective states, demonstrating that these seemingly diverse molecules are all governed by the same fundamental equation.

Writ large, there are two overarching goals encompassed by these projects. The first is to translate our biological knowledge into concrete physical models, enabling us to quantitatively describe how the key molecular components in each system interact to carry out their function. The second goal is to analyze how mutations can be mapped into the fundamental biophysical parameters governing each system. In my opinion, predicting the effects of mutations remains one of the great unsolved problems in biology, and it has been incredibly exciting to make progress on this front.

Looking back at my amazing graduate school experience, one of the most surprising aspects of my PhD was how closely each of my projects revolved around experiments. I entered graduate school as a theoretical physicist expecting to work on esoteric mathematical models, yet the direct connection with data provided a window into the exhilarating world of biology. While I have never physically manipulated these biological systems in the lab, my models allow me to push and prod and examine their behavior from the most mundane to the utterly extreme limits. Through modeling, I test our assumptions of how these systems work and tease out insights into their underlying biophysical mechanism. Most importantly, these models enable me to harness the incredible wealth of hard-won data to weave a few more threads of understanding into our tapestry of how these incredible living systems operate.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/D13T-7868, author = {Barnes, Stephanie Loos}, title = {Decoding the Regulatory Genome: Quantitative Analysis of Transcriptional Regulation in Escherichia coli}, school = {California Institute of Technology}, year = {2018}, doi = {10.7907/D13T-7868}, url = {https://resolver.caltech.edu/CaltechTHESIS:05292018-133205686}, abstract = {

Over the past decades DNA sequencing has become significantly cheaper and faster, which has enabled the accumulation of a huge amount of genomic data. However, much of this genomic data is illegible to us. For noncoding regions of the genome in particular, it is difficult to determine what role is played by specific DNA sequences. Here we focus on regions of DNA that play a role in transcriptional regulation. We develop models and techniques that allow us to discover new regulatory sequences and better understand how DNA sequence determines regulatory output.

We start by considering how quantitative models serve as a powerful tool for testing our understanding of biological systems. We apply a statistical mechanical framework that incorporates the Monod-Wyman-Changeux model to analyze the effects of allostery in simple repression, using the lac operon as a test case. By fitting our model to experimental data, we are able to determine the values of the unknown parameter values in our model. We then show that we can use the model to accurately predict the induction responses of an array of simple repression constructs with a variety of repressor copy numbers and repressor binding energies.

Next, we consider how the DNA sequence of a promoter region can provide details about how the promoter is regulated. We begin by describing an approach for discovering regulatory architectures for promoters whose regulation has not previously been studied. We focus on six promoters from E. coli including three well-studied promoters (rel, mar, and lac) to serve as test cases. We use the massively parallel reporter assay Sort-Seq to identify transcription factor binding sites with base-pair resolution, determine the regulatory role of each binding site, and infer energy matrices for each binding site. Then, we use DNA affinity chromatography and mass spectrometry to identify each transcription factor.

We conclude with an in vivo approach for analyzing the sequence-dependence of transcription factor binding energies. Again using Sort-Seq, we show that we can represent transcription factor binding sites using energy matrices in absolute energy units. We then show that these energy matrices can be used to accurately predict the binding energies of mutated binding sites. We provide several examples of how understanding the relationship between DNA sequence and transcription factor binding provides us with a foundation for addressing additional scientific topics, such as the co-evolution of transcription factors and their binding sites.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/Z9DN438T, author = {Belliveau, Nathan Maurice}, title = {Quantitative Dissection of the Allosteric and Sequence-Dependent Regulatory Genome in E. coli}, school = {California Institute of Technology}, year = {2018}, doi = {10.7907/Z9DN438T}, url = {https://resolver.caltech.edu/CaltechTHESIS:01052018-221609680}, abstract = {

Transcriptional regulation of gene expression is one of the most ubiquitous processes in biology. But while the catalog of bacterial genomes continues to expand rapidly, we remain ignorant about how almost all of the genes in these genomes are regulated. One of the ways genes are regulated is through external signals. To that end, we begin by presenting a general theory of allosteric transcriptional regulation using a statistical formulation of the Monod-Wyman-Changeux model, which we rigorously test using the ubiquitous simple repression motif in Escherichia coli. We then move to consider the consequence of the regulatory sequences themselves on gene expression. Here we apply a massively parallel reporter assay, Sort-Seq, to build models that describe the sequence-dependent binding energies of transcription factors and RNA polymerase to DNA. By coupling such models to our thermodynamic models of regulation, we construct a genotype to phenotype mapping that predicts gene expression as a function of regulatory sequence. We first demonstrate this approach in the context of the allosteric simple repression motif, and then show how it can be applied broadly across a bacterial genome, in conjunction with mass spectrometry, to uncover how genes are regulated.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/Z9Q81B91, author = {Mahmoudabadi, Gita}, title = {Virology By The Numbers: A Quantitative Exploration of Viral Energetics, Genomics, and Ecology}, school = {California Institute of Technology}, year = {2018}, doi = {10.7907/Z9Q81B91}, url = {https://resolver.caltech.edu/CaltechTHESIS:04172018-174233725}, abstract = {

Over the past couple of decades, technological advancements in sequencing and imaging have unequivocally proven that the world of viruses is far bigger and more consequential than previously imagined. There are 1031 viruses estimated to inhabit our planet, outnumbering even bacteria. Despite their astronomical numbers and staggering sequence diversity, environmental viruses are poorly characterized. In this thesis we will demonstrate our three-pronged exploration of viruses through the lenses of energetics (Chapters 2 and 3), genomics (Chapter 4) and ecology (Chapter 5). We will first focus on one of the defining features of viruses, namely their reliance on their host for energy, and demonstrate the energetic cost of building a virus and mounting an infection. In our second study, we present one of the largest surveys of complete viral genomes, providing a comprehensive and quantitative snapshot of viral genomic trends for thousands of viruses. In our third study, we shift our focus towards ecological questions surrounding the large number of commensal phages inhabiting the human body. We discovered that phage community composition could serve as a fingerprint, or a “phageprint” – highly personal and stable over time. To our knowledge, this study is one of the largest studies of human phages and the first to demonstrate the feasibility of human identification based on phage sequences.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/Z9PC308X, author = {Chen, Yi-Ju}, title = {The Mechanical Genome in Regulation and Infection}, school = {California Institute of Technology}, year = {2015}, doi = {10.7907/Z9PC308X}, url = {https://resolver.caltech.edu/CaltechTHESIS:05072015-162723691}, abstract = {Biological information storage and retrieval is a dynamic process that requires the genome to undergo dramatic structural rearrangements. Recent advances in single-molecule techniques have allowed precise quantification of the nano-mechanical properties of DNA [1, 2], and direct in vivo observation of molecules in action [3]. In this work, we will examine elasticity in protein-mediated DNA looping, whose structural rearrangement is essential for transcriptional regulation in both prokaryotes and eukaryotes. We will look at hydrodynamics in the process of viral DNA ejection, which mediates information transfer and exchange and has prominent implications in evolution. As in the case of Kepler’s laws of planetary motion leading to Newton’s gravitational theory, and the allometric scaling laws in biology revealing the organizing principles of complex networks [4], experimental data collapse in these biological phenomena has guided much of our studies and urged us to find the underlying physical principles.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/Z9W9573H, author = {Lovely, Geoffrey A.}, title = {Biophysics of V(D)J Recombination and Genome Packaging: In Singulo Studies on RAG, HMGB1, and TFAM}, school = {California Institute of Technology}, year = {2014}, doi = {10.7907/Z9W9573H}, url = {https://resolver.caltech.edu/CaltechTHESIS:06072014-140700155}, abstract = {

The recombination-activating gene products, RAG1 and RAG2, initiate V(D)J recombination during lymphocyte development by cleaving DNA adjacent to conserved recombination signal sequences (RSSs). The reaction involves DNA binding, synapsis, and cleavage at two RSSs located on the same DNA molecule and results in the assembly of antigen receptor genes. Since their discovery full-length, RAG1 and RAG2 have been difficult to purify, and core derivatives are shown to be most active when purified from adherent 293-T cells. However, the protein yield from adherent 293-T cells is limited. Here we develop a human suspension cell purification and change the expression vector to boost RAG production 6-fold. We use these purified RAG proteins to investigate V(D)J recombination on a mechanistic single molecule level. As a result, we are able to measure the binding statistics (dwell times and binding energies) of the initial RAG binding events with or without its co-factor high mobility group box protein 1 (HMGB1), and to characterize synapse formation at the single-molecule level yielding insights into the distribution of dwell times in the paired complex and the propensity for cleavage upon forming the synapse. We then go on to investigate HMGB1 further by measuring it compact single DNA molecules. We observed concentration dependent DNA compaction, differential DNA compaction depending on the divalent cation type, and found that at a particular HMGB1 concentration the percentage of DNA compacted is conserved across DNA lengths. Lastly, we investigate another HMGB protein called TFAM, which is essential for packaging the mitochondrial genome. We present crystal structures of TFAM bound to the heavy strand promoter 1 (HSP1) and to nonspecific DNA. We show TFAM dimerization is dispensable for DNA bending and transcriptional activation, but is required for mtDNA compaction. We propose that TFAM dimerization enhances mtDNA compaction by promoting looping of mtDNA.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, } @phdthesis{10.7907/4J7V-WD59, author = {Jones, Daniel Lawson III}, title = {Sequence-Function Relationships in E. coli Transcriptional Regulation}, school = {California Institute of Technology}, year = {2014}, doi = {10.7907/4J7V-WD59}, url = {https://resolver.caltech.edu/CaltechTHESIS:06022014-165211576}, abstract = {

Understanding how transcriptional regulatory sequence maps to regulatory function remains a difficult problem in regulatory biology. Given a particular DNA sequence for a bacterial promoter region, we would like to be able to say which transcription factors bind there, how strongly they bind, and whether they interact with each other and/or RNA polymerase, with the ultimate objective of integrating knowledge of these parameters into a prediction of gene expression levels. The theoretical framework of statistical thermodynamics provides a useful framework for doing so, enabling us to predict how gene expression levels depend on transcription factor binding energies and concentrations. We used thermodynamic models, coupled with models of the sequence-dependent binding energies of transcription factors and RNAP, to construct a genotype to phenotype map for the level of repression exhibited by the lac promoter, and tested it experimentally using a set of promoter variants from E. coli strains isolated from different natural environments. For this work, we sought to reverse engineer'' naturally occurring promoter sequences to understand how variations in promoter sequence affects gene expression. The natural inverse of this approach is toforward engineer’’ promoter sequences to obtain targeted levels of gene expression. We used a high precision model of RNAP-DNA sequence dependent binding energy, coupled with a thermodynamic model relating binding energy to gene expression, to predictively design and verify a suite of synthetic E. coli promoters whose expression varied over nearly three orders of magnitude.

However, although thermodynamic models enable predictions of mean levels of gene expression, it has become evident that cell-to-cell variability or ``noise’’ in gene expression can also play a biologically important role. In order to address this aspect of gene regulation, we developed models based on the chemical master equation framework and used them to explore the noise properties of a number of common E. coli regulatory motifs; these properties included the dependence of the noise on parameters such as transcription factor binding strength and copy number. We then performed experiments in which these parameters were systematically varied and measured the level of variability using mRNA FISH. The results showed a clear dependence of the noise on these parameters, in accord with model predictions.

Finally, one shortcoming of the preceding modeling frameworks is that their applicability is largely limited to systems that are already well-characterized, such as the lac promoter. Motivated by this fact, we used a high throughput promoter mutagenesis assay called Sort-Seq to explore the completely uncharacterized transcriptional regulatory DNA of the E. coli mechanosensitive channel of large conductance (MscL). We identified several candidate transcription factor binding sites, and work is continuing to identify the associated proteins.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/PNX5-Y638, author = {Rydenfelt, Mattias Sven Olof}, title = {The Combinatorics of Transcriptional Regulation}, school = {California Institute of Technology}, year = {2014}, doi = {10.7907/PNX5-Y638}, url = {https://resolver.caltech.edu/CaltechTHESIS:05162014-140751409}, abstract = {The ability to regulate gene expression is of central importance for the adaptability of living organisms to changes in their internal and external environment. At the transcriptional level, binding of transcription factors (TFs) in the vicinity of promoters can modulate the rate at which transcripts are produced, and as such play an important role in gene regulation. TFs with regulatory action at multiple promoters is the rule rather than the exception, with examples ranging from TFs like the cAMP receptor protein (CRP) in E. coli that regulates hundreds of different genes, to situations involving multiple copies of the same gene, such as on plasmids, or viral DNA. When the number of TFs heavily exceeds the number of binding sites, TF binding to each promoter can be regarded as independent. However, when the number of TF molecules is comparable to the number of binding sites, TF titration will result in coupling (“entanglement”) between transcription of different genes. The last few decades have seen rapid advances in our ability to quantitatively measure such effects, which calls for biophysical models to explain these data. Here we develop a statistical mechanical model which takes the TF titration effect into account and use it to predict both the level of gene expression and the resulting correlation in transcription rates for a general set of promoters. To test these predictions experimentally, we create genetic constructs with known TF copy number, binding site affinities, and gene copy number; hence avoiding the need to use free fit parameters. Our results clearly prove the TF titration effect and that the statistical mechanical model can accurately predict the fold change in gene expression for the studied cases. We also generalize these experimental efforts to cover systems with multiple different genes, using the method of mRNA fluorescence in situ hybridization (FISH). Interestingly, we can use the TF titration affect as a tool to measure the plasmid copy number at different points in the cell cycle, as well as the plasmid copy number variance. Finally, we investigate the strategies of transcriptional regulation used in a real organism by analyzing the thousands of known regulatory interactions in E. coli. We introduce a “random promoter architecture model” to identify overrepresented regulatory strategies, such as TF pairs which coregulate the same genes more frequently than would be expected by chance, indicating a related biological function. Furthermore, we investigate whether promoter architecture has a systematic effect on gene expression by linking the regulatory data of E. coli to genome-wide expression censuses.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/7SRD-WS94, author = {Bialecka-Fornal, Maja I.}, title = {Single-Cell Analysis of the Physiology of Mechanosensation in Bacteria}, school = {California Institute of Technology}, year = {2013}, doi = {10.7907/7SRD-WS94}, url = {https://resolver.caltech.edu/CaltechTHESIS:05142013-213725125}, abstract = {Escherichia coli is one of the best studied living organisms and a model system for many biophysical investigations. Despite countless discoveries of the details of its physiology, we still lack a holistic understanding of how these bacteria react to changes in their environment. One of the most important examples is their response to osmotic shock. One of the mechanistic elements protecting cell integrity upon exposure to sudden changes of osmolarity is the presence of mechanosensitive channels in the cell membrane. These channels are believed to act as tension release valves protecting the inner membrane from rupturing. This thesis presents an experimental study of various aspects of mechanosensation in bacteria. We examine cell survival after osmotic shock and how the number of MscL (Mechanosensitive channel of Large conductance) channels expressed in a cell influences its physiology. We developed an assay that allows real-time monitoring of the rate of the osmotic challenge and direct observation of cell morphology during and after the exposure to osmolarity change. The work described in this thesis introduces tools that can be used to quantitatively determine at the single-cell level the number of expressed proteins (in this case MscL channels) as a function of, e.g., growth conditions. The improvement in our quantitative description of mechanosensation in bacteria allows us to address many, so far unsolved, problems, like the minimal number of channels needed for survival, and can begin to paint a clearer picture of why there are so many distinct types of mechanosensitive channels.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/W40T-PD39, author = {Johnson, Stephanie Lynn}, title = {DNA Mechanics and Transcriptional Regulation in the E. coli lac Operon}, school = {California Institute of Technology}, year = {2012}, doi = {10.7907/W40T-PD39}, url = {https://resolver.caltech.edu/CaltechTHESIS:05112012-140027276}, abstract = {Many gene regulatory motifs in both prokaryotes and eukaryotes involve physical manipulations of the genetic material, often on length scales short enough that the mechanical properties of the DNA significantly impact gene expression. One class of such manipulations, called “action at a distance”, includes transcription factor-mediated DNA looping, in which a binding site some distance away on the DNA is brought into close proximity with the transcription machinery at the promoter. DNA looping is a key component of several important regulatory systems in bacteria, and is crucial to the combinatorial control that is common at eukaryotic promoters regulated by more transcription factors than can physically bind adjacent to the promoter. Here we use a prototypical DNA looping protein, the Lac repressor from E. coli, to explore questions regarding the role of DNA mechanics in DNA looping and combinatorial control, particularly concerning the role of sequence flexibility in short-length-scale looping. We combine a statistical mechanical model of looping by the Lac repressor with a single-molecule technique called tethered particle motion that allows us to quantify this looping, and the systematic tuning of four biologically relevant and experimentally tractable parameters: loop length, loop sequence, repressor-DNA affinity, and repressor concentration. We show that this combination is a powerful approach to measuring repressor-DNA binding affinities and sequence-dependent DNA flexibilities in a way that is orthogonal, and therefore complementary, to conventional ensemble assays. Our results show that the sequence dependence to looping is more complicated than has been observed in other contexts, suggesting that “sequence flexibility” as a general term is misleading, and, we argue, that the measurement of sequence flexibilities depend more strongly than previously appreciated on the shape of the deformation used to make the measurement. Finally, we present preliminary results with a more complicated system that is a case study for broader issues in combinatorial control, and a new hidden Markov model approach, based on variational Bayesian inference, to analyze these more complicated systems, which we hope will allow more precise dissections of, and more robust extraction of kinetic parameters from, tethered particle motion assays.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/FJW3-G615, author = {Van Valen, David Ashley}, title = {Case Studies in Quantitative Biology: Biochemistry on a Leash and a Single-Molecule Hershey-Chase Experiment}, school = {California Institute of Technology}, year = {2011}, doi = {10.7907/FJW3-G615}, url = {https://resolver.caltech.edu/CaltechTHESIS:05112011-144451576}, abstract = {

The last 50 years of biological research has seen a marked increase in the amount of quantitative data that describes living systems. This wealth of data provides a unique opportunity to recast the pictorial level descriptions of biological processes in the language of mathematics, with the hope that such an undertaking will lead to deeper insights into the behavior of living systems. To achieve this end, we have undertaken three case studies in physical biology. In the first case study, we used statistical mechanics and polymer physics to construct a simple model that describes how flexible chains of amino acids, referred to as tethers, influence the information processing properties of signaling proteins. In the second case study, we studied the DNA ejection process of phage lambda in vitro. In particular, we used bulk and single-molecule methods to study the control parameters that govern the force and kinematics of the ejection process in vitro. In the last case study, we studied the DNA ejection process of phage lambda in vivo. We developed an assay that allows real-time monitoring of DNA ejection in vivo at the single-molecule level. We also developed a parallel system that allows the simultaneous visualization of both phage capsids and phage DNA at the single-cell level, constituting a true single-molecule Hershey-Chase experiment. The work described in this thesis outlines new tools, both in theory and experiment, that can be used to study biological systems as well as a paradigm that can be employed to mathematicize the cartoons of biology.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/68Q5-D532, author = {Tadmor, Arbel David}, title = {Phage-Host Interaction in Nature}, school = {California Institute of Technology}, year = {2011}, doi = {10.7907/68Q5-D532}, url = {https://resolver.caltech.edu/CaltechTHESIS:05252011-233927917}, abstract = {Though viruses may be the most abundant biological entities on the planet, very little is known about phage-host interaction in the wild due to the absence of proper experimental tools. In the present work we report of a method to pair environmental phages with their bacterial hosts at the single-cell level without having to culture either host or virus. The method utilizes microfluidic digital PCR in conjunction with a metagenome data mining tool that was developed to find a viral marker gene in an unknown environment. We implemented this technique on the microbial community residing in the hindgut of termites. Consequently, we discovered genus-wide infection patterns displaying remarkable intra-genus selectivity, with viral alleles displaying limited lateral gene transfer and/or host switching despite host proximity. To try and explain phage-host interactions from a theoretical perspective, we formulated a simple biophysical model describing the interaction of bacteria and viruses in aqueous environments. We predict that the radius r of a bacterium is the most critical parameter determining its fixed point concentration, which scales as r-4. Given the hypothesis that there is no selection pressure on bacterial radii, our model predicts that the size spectrum of marine bacteria follows a power law with slope -1, close to the observed average spectrum. Moreover, given the total concentration of bacteria in the ocean, our model enables us to estimate the total number of bacterial “species” per volume of water providing a lower and upper bound on the total number of species in the oceans. To elucidate the concept of a “species”, we consider a bacterial-viral co-speciation model, which is consistent with the observed narrow host range of phages. Our model hints that the bacterial-viral “arms race” may be a critical component in the process of co-speciation. We suggest further experiments to test both models. Finally, we consider a recent high resolution measurement of the force as a function of time generated by stress fibers within a single fibroblast cell and suggest a stochastic model that is capable of accounting for the observed data.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/63RG-8P84, author = {Garcia, Hernan G.}, title = {Transcriptional Regulation by the Numbers}, school = {California Institute of Technology}, year = {2011}, doi = {10.7907/63RG-8P84}, url = {https://resolver.caltech.edu/CaltechTHESIS:12162010-123122193}, abstract = {

Recent decades have seen dramatic advances in our ability to make quantitative measurements of the level of gene expression in organisms of all types. The data resulting from these experiments has raised the need for quantitative models that go beyond the verbal and cartoon-level descriptions that have been so useful in developing a qualitative picture of the nature of gene expression. The improvement in our quantitative description of regulatory networks and our corresponding ability to rewire these networks at will has led many to argue for an analogy between biological regulatory networks and their electronic counterparts. In the electronic setting, we can predict the output current given knowledge of the input voltage and the parameters characterizing the circuit. However, this has so far been nothing more than a hopeful analogy since the input-output functions of most quantitative models of transcriptional regulation are based on phenomenological fits with little-to-no connection to the microscopic parameters of the system. This thesis sharpens this analogy by presenting an integrated approach to understanding transcriptional regulation in bacteria in terms of the microscopic parameters involved in the decision-making processes. This is achieved by a three-pronged approach consisting of theoretical models, in vivo measurements and single-molecule experiments in vitro.

The theoretical analysis is based upon two different families of models aimed at describing the output of several regulatory architectures as a function of their input parameters. Thermodynamic models of transcriptional regulation are used to predict the mean level of gene expression of several bacterial promoter architectures as a function of the concentration of the intervening regulatory proteins and their binding energies to DNA and to the associated transcriptional machinery. In recent years, however, an increasing body of work has been performed where levels of gene expression are quantified in single cells and sometimes even at the single molecule level. These measurements have revealed that “noise” in gene expression can play a significant role in decision-making processes in systems ranging from bacteria to mammalian cells. Stochastic models of transcriptional regulation predict this variability in gene expression as a function of the microscopic parameter of the system. Unlike thermodynamic models, however, the predictions from stochastic models are dependent on the rate constants describing the regulatory circuit of interest. A complete set of models that predict input-output functions of regulatory systems in bacteria as a function of not only equilibrium parameters, but also probabilities of transition between different regulatory states is presented.

The second half of the thesis complements the theoretical analyses by presenting several experiments aimed at testing the various predictions generated by these models. One of the experiments is carried out in vivo and aims to test the theoretical predictions for the input-output function of simple repression in terms of its microscopic parameters such as the concentration of repressor inside the cell and its binding energy to DNA. By quantifying the output level of gene expression as a function of the intracellular absolute concentration of repressor it is shown that our models can account for the level of gene expression as a function of the input parameters over several orders of magnitude. The simple repression motif is also explored experimentally using a second method based upon evaluating fluctuations in the partitioning of regulatory proteins during the cell division process. A third set of experiments performed at the single-molecule level in vitro show how a particular repressor protein binds to DNA at two different sites and loops the intervening DNA.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/X454-MV52, author = {Wu, David Dah-wei}, title = {Biomechanical Information Transfer: Maximum Caliber, λ Genome Ejection Dynamics, and the Formation of Otoliths in Zebrafish}, school = {California Institute of Technology}, year = {2010}, doi = {10.7907/X454-MV52}, url = {https://resolver.caltech.edu/CaltechTHESIS:05182010-213110299}, abstract = {

Biology is inherently a non-equilibrium process - constantly battling or co-opting the entropic enemy in order to achieve its sine qua non: growth, development, and reproduction. Involved in these processes is the formation of temporal and spatial order from disorder, and the transfer of information in genomic content but also in the form of physical principles.

First, I will describe a different way to analyze dynamics called Maximum Caliber, a model-free application of the Principle of Maximum Entropy to trajectories or processes, based purely on information theorya. We apply Maximum Caliber to analyze particles diffusing on a dual-minima energy landscape and attempt to describe and predict the probability distribution of trajectories. Next, I will discuss our efforts in analyzing the literal transfer of information from one organism to the next: specifically, how phage lambda’s DNA gets inside Escherichia coli. The use of counterions as a parameter to tune the force of in vitro ejections is clarified before performing suggestive in vivo injection experiments. Last, the process of building and shaping the otolith of the zebrafish (the accelerometer and hearing organ) is analyzed, with the conclusion that it is possible to generate said organ through purely physical mechanisms, suggesting that biological information transfer is not exclusively genetic.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, } @phdthesis{10.7907/Q0R5-K353, author = {Ursell, Tristan Scott}, title = {Stretching the Definition of a Lipid Bilayer: Elasticity’s Role in Protein and Lipid Organization}, school = {California Institute of Technology}, year = {2009}, doi = {10.7907/Q0R5-K353}, url = {https://resolver.caltech.edu/CaltechETD:etd-06062009-131454}, abstract = {

The Central Dogma forms the foundation of molecular biology couched in polymer language; all the key players are there — DNA, RNA, protein — or so it would seem. Yet one class of biologically synthesized molecules, crucial for life, is often over looked: lipids. These amphiphilic molecules exhibit a number of strange properties, integral to the cells ability to separate self from non-self in a chemically diverse environment. Lipids self-assemble into two-dimensional bi-layered fluids with aspect ratios of a thousand to one or more, capable of self-healing and bending into extraordinarily complex shapes. Within the cell, membranes allow for numerous chemically-distinct compartments, essential for metabolism, protein assembly, genome management, and cell division. With literally hundreds of different kinds of lipids and proteins interacting on a given membrane, we have much to learn about how membranes regulate the flow of materials into and out of cells. Clearly, molecular level detail is integral to our understanding of these systems, however, on the mesoscopic level membranes exhibit certain mechanical effects that serve to organize lipids and proteins, the study of which forms the bulk of this dissertation. We start by building an elastic model of bilayers, where embedded proteins deform the surrounding membrane and incur a free energy cost. This allows the mechanical attributes of the bilayer to influence the conformation of embedded proteins. We explore this connection in the context of mechanosensation in bacteria, as well as developing methods that allow bilayer mechanics to comment on the structure of classically voltage-gated ion channels. In addition to affecting conformational preferences, these same deformations have a finite length-scale that results in interactions between embedded proteins. Depending on the protein shape, these interactions can be attractive or repulsive, may exert torques on proteins, provide for a mechanism of shape-specific oligomerization, and importantly allow proteins to utilize the bilayer as a generic communicator of conformational information. The effects of these elastic interactions are discussed in the context of mean protein spacing, dimerization, conformational cooperativity, and likely pathways to multi-mer protein assembly, with the bacterial mechanosensitive channel MscL as a structural example. In subsequent chapters, bilayer elasticity is used to shed light on the large-scale organization of lipids themselves. Biological membranes likely have multiple fluid, lipid phases, where sequestration of saturated lipids and cholesterol form lipid domains. We found that formation of domains above a certain critical size induces morphological transitions to a ‘dimpled’ phase which turns on repulsive, elastic interactions that serve to spatially organize domains as well as severely inhibit domain coalescence. This provides a mechanism for the maintenance of lipid lateral heterogeneity on relatively short length-scales and long time scales. We further observed discrete transitions to a ‘budded’ domain morphology and developed a set of interpretive energetic transition rules between flat, dimpled and budded domains. We demonstrate that these morphologies and their attendant transitions lead to a unique form of domain-size-dependent transport in membranes. Further, we employ the mechanics of vesicles to model osmoregulation via channel proteins, and in the setting of conserved surface area and volume to develop a theoretical and experimental framework to study membrane adhesion in the context of the homophilic protein binding.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/X00X-VC27, author = {Peterson, Eric Lee}, title = {A Random Walk in Physical Biology}, school = {California Institute of Technology}, year = {2008}, doi = {10.7907/X00X-VC27}, url = {https://resolver.caltech.edu/CaltechETD:etd-05282008-152952}, abstract = {

Biology as a scientific discipline is becoming evermore quantitative as tools become available to probe living systems on every scale from the macro to the micro and now even to the nanoscale. In quantitative biology the challenge is to understand the living world in an in vivo context, where it is often difficult for simple theoretical models to connect with the full richness and complexity of the observed data. Computational models and simulations offer a way to bridge the gap between simple theoretical models and real biological systems; towards that aspiration are presented in this thesis three case studies in applying computational models that may give insight into native biological structures.

The first is concerned with soluble proteins; proteins, like DNA, are linear polymers written in a twenty-letter “language” of amino acids. Despite the astronomical number of possible proteins sequences, a great amount of similarity is observed among the folded structures of globular proteins. One useful way of discovering similar sequences is to align their sequences, as done e.g. by the popular BLAST program. By clustering together amino acids and reducing the alphabet that proteins are written in to fewer than twenty letters, we find that pairwise sequence alignments are actually more sensitive to proteins with similar structures.

The second case study is concerned with the measurement of forces applied to a membrane. We demonstrate a general method for extracting the forces applied to a fluid lipid bilayer of arbitrary shape and show that the subpiconewton forces applied by optical tweezers to vesicles can be accurately measured in this way.

In the third and final case study we examine the forces between proteins in a lipid bilayer membrane. Due to the bending of the membrane surrounding them, such proteins feel mutually attractive forces which can help them to self-organize and act in concert. These finding are relevant at the areal densities estimated for membrane proteins such as the MscL mechanosensitive channel. The findings of the analytical studies were confirmed by a Monte Carlo Markov Chain simulation using the fully two-dimensional potentials between two model proteins in a membrane.

Living systems present us with beautiful and intricate structures, from the helices and sheets of a folded protein to the dynamic morphology of cellular organelles and the self-organization of proteins in a biomembrane and a synergy of theoretical and it in silico approaches should enable us to build and refine models of in vivo biological data.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/7573-A922, author = {Han, Lin}, title = {In vitro DNA Mechanics in Gene Regulation: One Molecule at a Time}, school = {California Institute of Technology}, year = {2008}, doi = {10.7907/7573-A922}, url = {https://resolver.caltech.edu/CaltechETD:etd-01282008-150852}, abstract = {The biological significance of DNA is primarily attributed to its sequence information. On the other hand, the mechanical properties of DNA can play a critical role in a wide variety of biological processes. One prime example is DNA looping in the context of transcriptional regulation. The emergence of single molecule tracking techniques in the last two decades presents an unprecedented opportunity for studying looping kinetics. One such powerful technique, tethered particle motion (TPM), harnesses the Brownian motion of a microsphere as a means of reporting on the excursion of its tethered molecule, such as DNA. The present work focuses on a looping system found in Escherichia coli, which is mediated by the Lac repressor (LacI) protein. TPM is used to measure individual, real-time looping/unlooping events in DNA of various length and sequence characteristics. By monitoring the magnitude, frequency, and time interval of these features while tuning different parameters, such as LacI concentration, DNA length and DNA sequence, one can survey a host of important information about looping kinetics. A measurement of the LacI concentration dependence of looping probability was found to be in quantitative agreement with a simple thermodynamic model, which also led to the measurement of free energy of LacI-mediated looping, the first such measurement in a single molecule, in vitro setting. A quantitative characterization of free energy was obtained under conditions of different inter-operator spacing, systematically varied from 300 to 310 base pairs in one-base-pair increments. An important conclusion from this study is that free energy is modulated by DNA’s helical structure, yet the energy difference between the aligned and unaligned operator configurations is small compared to expectation from simple polymer physics. TPM measurements also revealed an additional looped state, lending support to the hypothesis that two distinct conformations of LacI, the closed and open forms, can coexist. This study also confirmed that the presence of certain DNA sequences, particularly TA pairs in the minor groove of the nucleosomal positioning sequence, makes DNA substantially softer than a corresponding random sequence. This provides direct support for the notion of sequence-dependent DNA elasticity. Finally, a surprising result is that loops as short as 100 base pairs-only two-thirds the persistence length of DNA-can form by LacI-DNA binding. Classical elasticity theory almost forbids this, suggesting that LacI itself plays a more direct role in the bending process, or classical understanding of DNA elasticity breaks down at length scales comparable to its persistence length.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/X12R-DA46, author = {Seitaridou, Effrosyni}, title = {Non-Equilibrium Dynamics: Diffusion in Small Numbers and Ribosomal Self-Assembly}, school = {California Institute of Technology}, year = {2008}, doi = {10.7907/X12R-DA46}, url = {https://resolver.caltech.edu/CaltechETD:etd-05142008-113003}, abstract = {

Biological systems are encountered in states that are far from equilibrium. A change in the cell’s condition triggers the flow of energy and matter that causes the cell’s transition from that non-equilibrium state to a different state. Our interest is on non-equilibrium systems and the way these relate to the cell’s “small numbers” limit as well as to the mechanisms of self-assembly.

Cells contain proteins and nucleotides in numbers smaller than Avogadro’s. In addition, advances in single-molecule experiments, which are, by definition, a case of the “small numbers” problem, have emphasized the importance of fluctuations. Does the result we get from a single-molecule measurement agree with what we would get from a bulk measurement? Is it a fluctuation from the mean? It is, thus, of biological interest to see the behavior of non-equilibrium systems at the “small numbers” limit where fluctuations become important. Using microfluidics, we concentrate on the diffusion of a small number of submicron particles in a system that is away from equilibrium. Therefore, we study the “small numbers” limit of Fick’s Law, with special reference to the fluctuations that attend diffusive dynamics in order to experimentally test the theoretical predictions obtained via the use of E. T. Jaynes’ “principle of maximum caliber.”

The process of macromolecular self-assembly is also highly dynamical. The system’s components come together, defeating in this way entropic effects, to form the system. In the case of the ribosome, whose importance lies in its ability to synthesize proteins, understanding the mechanism of the highly dimensional process of self-assembly becomes relevant when designing, for example, new antibiotics. The second part of this thesis concentrates on the RNA-protein interactions which, in the case of the ribosome, determine the mechanism of self-assembly. With the use of microfluidic technology and a fluorescence assay we determine the thermodynamics and kinetics of RNA folding and RNA-protein binding for a fragment of the bacterial 30S ribosomal subunit, paving the way for the study of the complete assembly of the 30S subunit.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/A7ED-4044, author = {Grayson, Paul Daniel}, title = {The DNA Ejection Process in Bacteriophage λ}, school = {California Institute of Technology}, year = {2007}, doi = {10.7907/A7ED-4044}, url = {https://resolver.caltech.edu/CaltechETD:etd-05252007-103551}, abstract = {Bacteriophages have long served as model systems through which the nature of life may be explored. From a physical or mechanical point of view, phages are excellent examples of natural nanotechnology: they are nanometer-scale systems which depend critically on forces, pressures, velocities, and other fundamentally physical quantities for their biological functions. The study of the physical properties of phages has therefore provided an arena for application of physics to biology. In particular, recent studies of the motor responsible for packaging a phage gnome into a capsid showed a buildup of pressure within the capsid of tens of atmospheres. This thesis reports a combined theoretical and experimental study on various aspects of the genome ejection process, so that a comparison may be drawn with the packaging experiments. In particular, we examine various theoretical models of the forces within a phage capsid, deriving formulas both for the force driving genome ejection and for the velocity at which the genome is translocated into a host cell. We describe an experiment in which the force was measured as a function of the amount of genome within the phage capsid, and another where the genome ejection velocity was measured for single phages under the microscope. We make direct quantitative comparisons between the theory and experiments, stringently testing the extent to which we are able to model the genome ejection process.}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/P5K9-BD68, author = {Inamdar, Mandar Mukund}, title = {Dissipative Nanomechanics}, school = {California Institute of Technology}, year = {2006}, doi = {10.7907/P5K9-BD68}, url = {https://resolver.caltech.edu/CaltechETD:etd-03272006-142956}, abstract = {

Due to thermal fluctuations, systems at small length scales are remarkably different than their large length scale counterparts. For example, bacterial viruses (phages) have thousands of nanometers of DNA packed inside a hollow capsid of tens of nanometers. This tight compaction leads to large forces on the phage DNA (tens of piconewtons). These forces can be subsequently utilized to instigate the DNA ejection during the infection phase. Developments in optics, biochemistry, microfluidics, etc., have enabled the experimental quantification of these forces, and the rate of DNA packing and ejection. Similarly, eukaryotic genome is compacted into nanometer size structures called nucleosomes. The conformational changes in the nucleosome due to the thermal fluctuations of the DNA are instrumental in making the DNA accessible for key genomic processes. Developments in FRET, gel electrophoresis, spectroscopy etc. have made it possible to quantify the equilibrium constant and the rates of these fluctuations. The first part of the thesis involves formulation of simple models for the phage and nucleosome to respond to the existing experimental data and predict results to stimulate further experimentation.

One of the next frontiers in biology is to understand the “small numbers” problem: how does a biological cell function given that most of its proteins and nucleotide polymers are present in numbers much smaller than Avogadro’s number? For example, one of the most important molecules, a cell’s DNA, occurs in only a single copy. Also, it is the flow of matter and energy through cells that makes it possible for organisms to maintain a relatively stable form. Hence, cells must be in this stable state far from equilibrium to function. Many problems of current interest thus involve small systems that are out of equilibrium. Unfortunately, there is no general theoretical frame-work to model these dissipative systems. E. T. Jaynes suggested the use of dynamical microtrajectories to write down the trajectory entropy, or caliber, for such systems. Maximization of this trajectory entropy, subject to the external constraints, provides one with the probabilities of the underlying microtrajectories. Jaynes calls this the “principle of maximum caliber.” Advances in optics, video-microscopy, etc. have made it possible to experimentally measure these microtrajectories for various systems. In the second part of the thesis we develop simple microtrajectory models for small systems like molecular motors, ion-channels, etc., and apply the maximum caliber principle to obtain the probabilities of the underlying microtrajectories. Our goal is to respond to these experiments and make new predictions.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, } @phdthesis{10.7907/015B-JA11, author = {Wiggins, Paul Alexander}, title = {Biology Beyond Biochemistry:The Mechanics of Life}, school = {California Institute of Technology}, year = {2005}, doi = {10.7907/015B-JA11}, url = {https://resolver.caltech.edu/CaltechETD:etd-05262005-134552}, abstract = {

In the last half century, biologists have made great strides towards understanding the intricate structure of the cell and the relation between this structure and cellular function. Single-molecule techniques and advances in microscopy have also significantly changed the way in which biologists ask and answer questions. As biological measurements and techniques have become increasingly quantitative, they have allowed biologists to ask ever more quantitative questions: How do the molecular machines, which comprise the cell function microscopically? Can we understand the design principles that govern the structure and function of biological systems on a microscopic scale? One outcome of this new generation of quantitative biological questions is the need to greet quantitative experiments with models at a higher level of abstraction than the traditional cartoons of molecular biology. In this thesis, I present two such quantitative models.

In the first half of this thesis, I present a physical model for mechanotransduction. Mechanosensitive channels are the central agents employed by cells to transduce mechanical stimuli. Our senses of hearing and touch are both examples of this functional motif. The Mechanosensitive Channel of Large conductance (MscL) is arguably the simplest and best studied mechanosensitive channel. I present analytic estimates for the forces and free energy generated by bilayer deformation which reveal a compelling and intuitive model for the function of the MscL channel, analogous to the nucleation of a second phase. The competition between hydrophobic mismatch of the protein with the surrounding membrane and tension results in a surprisingly rich story which can provide both a quantitative comparison to measurements of the opening tension for MscL when reconstituted in bilayers of different thickness and qualitative insights into the function of the MscL channel and other transmembrane proteins.

In the second half of this thesis, I examine models for the mechanics of DNA. DNA bending, on length scales shorter than a persistence length, plays a central role in the translation of genetic information from DNA to cellular function. Quantitative experimental studies of these biological systems have led to a renewed interest in the short-contour-length polymer statistics relevant for describing the conformational free energy of DNA bending induced by protein-DNA complexes. The recent DNA cyclization studies of Cloutier and Widom have questioned the applicability of the canonical semiflexible polymer theory, the wormlike chain model, to DNA bending on biological length scales. We describe a new class of polymer models that can resolve the proposed discrepancy between short and long-contour-length bending. These models explain the spectacular success of the wormlike chain model in describing many traditional DNA mechanics experiments, as well as its failure to describe the short-contour-length mechanics of DNA. In particular, I present two toy models for DNA bending which capture the short-contour-length behavior observed by Cloutier and Widom. These toy models make quantitative predictions for chain statistics of DNA, observable in DNA mechanics experiments and of central importance to the qualitative description of cellular function, from chromosomal DNA packaging to transcription and gene regulation to viral packaging.

}, address = {1200 East California Boulevard, Pasadena, California 91125}, advisor = {Phillips, Robert B.}, }