[
    {
        "id": "authors:rasf5-gzm48",
        "collection": "authors",
        "collection_id": "rasf5-gzm48",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20200302-105109375",
        "type": "book_section",
        "title": "Multi-View, Generative, Transfer Learning for Distributed Time Series Classification",
        "book_title": "2019 IEEE International Conference on Big Data (Big Data)",
        "author": [
            {
                "family_name": "Das Bhattacharjee",
                "given_name": "Sreyasee",
                "clpid": "Das-Bhattacharjee-S"
            },
            {
                "family_name": "Tolone",
                "given_name": "William J.",
                "clpid": "Tolone-W-J"
            },
            {
                "family_name": "Mahabal",
                "given_name": "Ashish",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Elshambakey",
                "given_name": "Mohammed",
                "clpid": "Elshambakey-M"
            },
            {
                "family_name": "Cho",
                "given_name": "Isaac",
                "clpid": "Cho-Isaac"
            },
            {
                "family_name": "Nayeem",
                "given_name": "Abdullah al-Raihan",
                "clpid": "Nayeem-A-A-R"
            },
            {
                "family_name": "Yuan",
                "given_name": "Junsong",
                "clpid": "Yuan-Junsong"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "George",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            }
        ],
        "abstract": "In this paper, we propose an effective, multi-view,\ngenerative, transfer learning framework for multivariate timeseries\ndata. While generative models are demonstrated effective\nfor several machine learning tasks, their application to time-series\nclassification problems is underexplored. The need for additional\nexploration is motivated when data are large, annotations are\nunbalanced or scarce, or data are distributed and fragmented.\nRecent advances in computer vision attempt to use synthesized\nsamples with system generated annotations to overcome the lack\nor imbalance of annotated data. However, in multi-view problem\nsettings, view mismatches between the synthetic data and real\ndata pose additional challenges against harnessing new annotated\ndata collections. The proposed method offers important\ncontributions to facilitate knowledge sharing, while simultaneously\nensuring an effective solution for domain-specific, finelevel\ncategorizations. We propose a principled way to perform\nview adaptation in a cross-view learning environment, wherein\npairwise view similarity is identified by a smaller subset of source\nsamples that closely resemble the target data patterns. This\napproach integrates generative models within a deep classification\nframework to minimize the gap between source and target data.\nMore precisely, we design category specific conditional, generative\nmodels to update the source generator in order for transforming\nsource features so that they appear as target features and\nsimultaneously tune the associated discriminative model to distinguish\nthese features. During each learning iteration, the source\ngenerator is conditioned by a source training set represented as\nsome target-like features. This transformation in appearance was\nperformed via a target generator specifically learned for targetspecific\ncustomization per category. Afterward, a smaller source\ntraining set, indicating close target pattern resemblance in terms\nof the corresponding generative and discriminative loss, is used to\nfine-tune the source classification model parameters. Experiments\nshow that compared to existing approaches, our proposed multiview,\ngenerative, transfer learning framework improves timeseries\nclassification performance by around 4% in the UCI multiview\nactivity recognition dataset, while also showing a robust,\ngeneralized representation capacity in classifying several largescale\nmulti-view light curve collections.",
        "doi": "10.1109/bigdata47090.2019.9005452",
        "isbn": "9781728108582",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2019-12",
        "pages": "5585-5594"
    },
    {
        "id": "authors:7an0w-s2324",
        "collection": "authors",
        "collection_id": "7an0w-s2324",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20191004-134753975",
        "type": "book_section",
        "title": "OVS+Tumor: a tool for enhanced lung tumor annotation in VR for machine learning training and analysis",
        "book_title": "ACM SIGGRAPH 2019 Virtual, Augmented, and Mixed Reality",
        "author": [
            {
                "family_name": "Lombeyda",
                "given_name": "Santiago",
                "clpid": "Lombeyda-S-V"
            },
            {
                "family_name": "Mahabal",
                "given_name": "Ashish",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Crichton",
                "given_name": "Daniel",
                "clpid": "Crichton-D-J"
            },
            {
                "family_name": "Kincaid",
                "given_name": "Heather",
                "clpid": "Kincaid-H"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Patriotis",
                "given_name": "Christos",
                "clpid": "Patriotis-C"
            },
            {
                "family_name": "Srivastava",
                "given_name": "Sudhir",
                "clpid": "Srivastava-S"
            }
        ],
        "abstract": "OVS+Tumor creates a seamless VR environment designed for intuitive interaction aiding in the complex task of parsing through 3D CT-scans and annotating candidate tumors. Through interactive subsetting and on-the-fly iso-cloud generation, a wider range of users beyond just domain experts (radiologists/surgeons) can generate a viable machine-learning training dataset.",
        "doi": "10.1145/3306449.3328825",
        "isbn": "9781450363204",
        "publisher": "ACM Press",
        "place_of_publication": "New York, NY",
        "publication_date": "2019-08",
        "pages": "Art. No. 26"
    },
    {
        "id": "authors:8wd6a-2rv47",
        "collection": "authors",
        "collection_id": "8wd6a-2rv47",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190711-134821741",
        "type": "book_section",
        "title": "View-Adaptive Weighted Deep Transfer Learning for Distributed Time-Series Classification",
        "book_title": "2019 IEEE 43rd Annual Computer Software and Applications Conference (COMPSAC)",
        "author": [
            {
                "family_name": "Das Bhattacharjee",
                "given_name": "Sreyasee",
                "clpid": "Das-Bhattacharjee-S"
            },
            {
                "family_name": "Tolone",
                "given_name": "William J.",
                "clpid": "Tolone-W-J"
            },
            {
                "family_name": "Mahabal",
                "given_name": "Ashish",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Elshambakey",
                "given_name": "Mohammed",
                "clpid": "Elshambakey-M"
            },
            {
                "family_name": "Cho",
                "given_name": "Isaac",
                "clpid": "Cho-Isaac"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "George",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            }
        ],
        "abstract": "In this paper, we propose an effective, multi-view, deep, transfer learning framework for multivariate time-series data. Though widely used for tasks such as computer vision, the application of transfer learning to time-series classification problems (e.g., classification of light curves) is underexplored. The proposed framework makes several important contributions to facilitate knowledge sharing, while simultaneously ensuring an effective solution for domain specific fine-level categorizations. First, in contrast to the traditional approaches, the proposed framework describes pairwise view similarity by identifying a smaller subset of source-view samples that closely resemble the target data patterns. Second, by means of two-phase learning, a generic baseline model is learned on a larger source data collection and later fine-tuned on a smaller target data collection, precisely approximating the target data patterns. Third, an effective view-adaptive timestamp weighting scheme evaluates the relative importance of each timestamp in a more data-driven manner, which enables a more flexible yet discriminative feature representation scheme in the presence of evolving data characteristics. As shown by experiments, compared to the existing approaches, our proposed deep transfer learning framework improves classification performance by around 2-3% in the UCI multi-view activity recognition dataset, while also showing a robust, generalized representation capacity in classifying several large-scale multi-view light curve collections.",
        "doi": "10.1109/compsac.2019.00061",
        "isbn": "9781728126074",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2019-07",
        "pages": "373-381"
    },
    {
        "id": "authors:jad8k-c1950",
        "collection": "authors",
        "collection_id": "jad8k-c1950",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190325-133557161",
        "type": "book_section",
        "title": "Bigger Buffer k-d Trees on Multi-Many-Core Systems",
        "author": [
            {
                "family_name": "Gieseke",
                "given_name": "Fabian",
                "clpid": "Gieseke-F"
            },
            {
                "family_name": "Oancea",
                "given_name": "Cosmin Eugen",
                "clpid": "Oancea-C-E"
            },
            {
                "family_name": "Mahabal",
                "given_name": "Ashish",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Igel",
                "given_name": "Christian",
                "clpid": "Igel-C"
            },
            {
                "family_name": "Heskes",
                "given_name": "Tom",
                "clpid": "Heskes-T"
            }
        ],
        "contributor": [
            {
                "family_name": "Senger",
                "given_name": "Hermes",
                "clpid": "Senger-H"
            },
            {
                "family_name": "Marques",
                "given_name": "Osni",
                "clpid": "Marques-O"
            },
            {
                "family_name": "Garcia",
                "given_name": "Rogerio",
                "clpid": "Garcia-R"
            },
            {
                "family_name": "Pinheiro de Brito",
                "given_name": "Tatiana",
                "clpid": "Pinheiro-de-Brito-T"
            },
            {
                "family_name": "Iope",
                "given_name": "Rog\u00e9rio",
                "clpid": "Iope-R"
            },
            {
                "family_name": "Stanzani",
                "given_name": "Silvio",
                "clpid": "Stanzani-S"
            },
            {
                "family_name": "Gil-Costa",
                "given_name": "Veronica",
                "clpid": "Gil-Costa-V"
            }
        ],
        "abstract": "A buffer k-d tree is a k-d tree variant for massively-parallel nearest neighbor search. While providing valuable speed-ups on modern many-core devices in case both a large number of reference and query points are given, buffer k-d trees are limited by the amount of points that can fit on a single device. In this work, we show how to modify the original data structure and the associated workflow to make the overall approach capable of dealing with massive data sets. We further provide a simple yet efficient way of using multiple devices given in a single workstation. The applicability of the modified framework is demonstrated in the context of astronomy, a field that is faced with huge amounts of data.",
        "doi": "10.1007/978-3-030-15996-2_15",
        "publisher": "Springer",
        "publication_date": "2019-03-26"
    },
    {
        "id": "authors:00yf9-21365",
        "collection": "authors",
        "collection_id": "00yf9-21365",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190131-131445274",
        "type": "book_section",
        "title": "Context-Aware Deep Sequence Learning with Multi-View Factor Pooling for Time Series Classification",
        "book_title": "2018 IEEE International Conference on Big Data (Big Data)",
        "author": [
            {
                "family_name": "Das Bhattacharjee",
                "given_name": "Sreyasee",
                "clpid": "Das-Bhattacharjee-S"
            },
            {
                "family_name": "Tolone",
                "given_name": "William J.",
                "clpid": "Tolone-W-J"
            },
            {
                "family_name": "Elshambakey",
                "given_name": "Mohammed",
                "clpid": "Elshambakey-M"
            },
            {
                "family_name": "Cho",
                "given_name": "Isaac",
                "clpid": "Cho-Isaac"
            },
            {
                "family_name": "Mahabal",
                "given_name": "Ashish",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "George",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            }
        ],
        "abstract": "In this paper, we propose an effective, multi-view, multivariate deep classification model for time-series data. Multi-view methods show promise in their ability to learn correlation and exclusivity properties across different independent information resources. However, most current multi-view integration schemes employ only a linear model and, therefore, do not extensively utilize the relationships observed across different view-specific representations. Moreover, the majority of these methods rely exclusively on sophisticated, handcrafted features to capture local data patterns and, thus, depend heavily on large collections of labeled data. The multi-view, multivariate deep classification model for time-series data proposed in this paper makes important contributions to address these limitations. The proposed model derives a LSTM-based, deep feature descriptor to model both the view-specific data characteristics and cross-view interaction in an integrated deep architecture while driving the learning phase in a data-driven manner. The proposed model employs a compact context descriptor to exploit view-specific affinity information to design a more insightful context representation. Finally, the model uses a multi-view factor-pooling scheme for a context-driven attention learning strategy to weigh the most relevant feature dimensions while eliminating noise from the resulting fused descriptor. As shown by experiments, compared to the existing multi-view methods, the proposed multi-view deep sequential learning approach improves classification performance by roughly 4% in the UCI multi-view activity recognition dataset, while also showing significantly robust generalized representation capacity against its single-view counterparts, in classifying several large-scale multi-view light curve collections.",
        "doi": "10.1109/bigdata.2018.8622253",
        "isbn": "9781538650356",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2018-12",
        "pages": "959-966"
    },
    {
        "id": "authors:64c8s-edy94",
        "collection": "authors",
        "collection_id": "64c8s-edy94",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20180208-145104828",
        "type": "book_section",
        "title": "Deep-learnt classification of light curves",
        "book_title": "2017 IEEE Symposium Series on Computational Intelligence (SSCI)",
        "author": [
            {
                "family_name": "Mahabal",
                "given_name": "A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Sheth",
                "given_name": "K.",
                "orcid": "0000-0002-5496-4118",
                "clpid": "Sheth-K"
            },
            {
                "family_name": "Gieseke",
                "given_name": "F.",
                "clpid": "Gieseke-F"
            },
            {
                "family_name": "Pai",
                "given_name": "A.",
                "clpid": "Pai-A"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Drake",
                "given_name": "A. J.",
                "clpid": "Drake-A-J"
            },
            {
                "family_name": "Graham",
                "given_name": "M. J.",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            }
        ],
        "abstract": "Astronomy light curves are sparse, gappy, and heteroscedastic. As a result standard time series methods regularly used for financial and similar datasets are of little help and astronomers are usually left to their own instruments and techniques to classify light curves. A common approach is to derive statistical features from the time series and to use machine learning methods, generally supervised, to separate objects into a few of the standard classes. In this work, we transform the time series to two-dimensional light curve representations in order to classify them using modern deep learning techniques. In particular, we show that convolutional neural networks based classifiers work well for broad characterization and classification. We use labeled datasets of periodic variables from CRTS survey and show how this opens doors for a quick classification of diverse classes with several possible exciting extensions.",
        "doi": "10.1109/SSCI.2017.8280984",
        "isbn": "978-1-5386-2727-3",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2017-12",
        "pages": "1-8"
    },
    {
        "id": "authors:hghsm-3st44",
        "collection": "authors",
        "collection_id": "hghsm-3st44",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20180216-161024980",
        "type": "book_section",
        "title": "Massively-parallel best subset selection for ordinary least-squares regression",
        "book_title": "2017 IEEE Symposium Series on Computational Intelligence (SSCI)",
        "author": [
            {
                "family_name": "Gieseke",
                "given_name": "Fabian",
                "clpid": "Gieseke-F"
            },
            {
                "family_name": "Polsterer",
                "given_name": "Kai Lars",
                "clpid": "Polsterer-K-L"
            },
            {
                "family_name": "Mahabal",
                "given_name": "Ashish",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Igel",
                "given_name": "Christian",
                "clpid": "Igel-C"
            },
            {
                "family_name": "Heskes",
                "given_name": "Tom",
                "clpid": "Heskes-T"
            }
        ],
        "abstract": "Selecting an optimal subset of k out of d features for linear regression models given n training instances is often considered intractable for feature spaces with hundreds or thousands of dimensions. We propose an efficient massively-parallel implementation for selecting such optimal feature subsets in a brute-force fashion for small k. By exploiting the enormous compute power provided by modern parallel devices such as graphics processing units, it can deal with thousands of input dimensions even using standard commodity hardware only. We evaluate the practical runtime using artificial datasets and sketch the applicability of our framework in the context of astronomy.",
        "doi": "10.1109/SSCI.2017.8285225",
        "isbn": "978-1-5386-2726-6",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2017-12",
        "pages": "1-8"
    },
    {
        "id": "authors:qhb32-ttw68",
        "collection": "authors",
        "collection_id": "qhb32-ttw68",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20180220-071833166",
        "type": "book_section",
        "title": "Photometric redshift estimation: An active learning approach",
        "book_title": "2017 IEEE Symposium Series on Computational Intelligence (SSCI)",
        "author": [
            {
                "family_name": "Vilalta",
                "given_name": "R.",
                "orcid": "0000-0001-8165-8805",
                "clpid": "Vilalta-R"
            },
            {
                "family_name": "Ishida",
                "given_name": "E. E. O.",
                "orcid": "0000-0002-0406-076X",
                "clpid": "Ishida-E-E-O"
            },
            {
                "family_name": "Beck",
                "given_name": "R.",
                "clpid": "Beck-R"
            },
            {
                "family_name": "Sutrisno",
                "given_name": "R.",
                "clpid": "Sutrisno-R"
            },
            {
                "family_name": "de Souza",
                "given_name": "R. S.",
                "clpid": "de-Souza-R-S"
            },
            {
                "family_name": "Mahabal",
                "given_name": "A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            }
        ],
        "abstract": "A long-lasting problem in astronomy is the accurate estimation of galaxy distances based solely on the information contained in photometric filters. Due to observational selection effects, the spectroscopic (source) sample lacks coverage throughout the feature space (e.g. colors and magnitudes) compared to the photometric (target) sample; this results in a clear mismatch in terms of photometric measurement distributions. We propose a solution to this problem based on active learning, a machine learning technique where a sampling strategy enables us to select the most informative instances to build a predictive model; specifically, we use active learning following a Query by Committee approach. We show that by making wisely selected queries in the target domain, we are able to increase our predictive performance significantly. We also show how a relatively small number of queries (spectroscopic follow-up measurements) suffices to improve the performance of photometric redshift estimators significantly.",
        "doi": "10.1109/SSCI.2017.8285192",
        "isbn": "978-1-5386-2727-3",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2017-11",
        "pages": "1-8"
    },
    {
        "id": "authors:kdr17-qph67",
        "collection": "authors",
        "collection_id": "kdr17-qph67",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190828-141822439",
        "type": "book_section",
        "title": "The EDRN knowledge environment: an open source, scalable informatics platform for biological sciences research",
        "book_title": "Micro- and Nanotechnology Sensors, Systems, and Applications IX",
        "author": [
            {
                "family_name": "Crichton",
                "given_name": "Daniel",
                "clpid": "Crichton-D-J"
            },
            {
                "family_name": "Mahabal",
                "given_name": "Ashish",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Anton",
                "given_name": "Kristen",
                "clpid": "Anton-K"
            },
            {
                "family_name": "Cinquini",
                "given_name": "Luca",
                "clpid": "Cinquini-L"
            },
            {
                "family_name": "Colbert",
                "given_name": "Maureen",
                "clpid": "Colbert-M"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. George",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Kincaid",
                "given_name": "Heather",
                "clpid": "Kincaid-H"
            },
            {
                "family_name": "Kelly",
                "given_name": "Sean",
                "clpid": "Kelly-S-C"
            },
            {
                "family_name": "Liu",
                "given_name": "David",
                "clpid": "Liu-David"
            }
        ],
        "contributor": [
            {
                "family_name": "George",
                "given_name": "Thomas",
                "clpid": "George-T"
            },
            {
                "family_name": "Dutta",
                "given_name": "Achyut K.",
                "clpid": "Dutta-A-K"
            },
            {
                "family_name": "Islam",
                "given_name": "M. Saif",
                "clpid": "Islam-M-S"
            }
        ],
        "abstract": "We describe here the Early Detection Research Network (EDRN) for Cancer's knowledge environment. It is an open source platform built by NASA's Jet Propulsion Laboratory with contributions from the California Institute of Technology, and Giesel School of Medicine at Dartmouth. It uses tools like Apache OODT, Plone, and Solr, and borrows heavily from JPL's Planetary Data System's ontological infrastructure. It has accumulated data on hundreds of thousands of biospecemens and serves over 1300 registered users across the National Cancer Institute (NCI). The scalable computing infrastructure is built such that we are being able to reach out to other agencies, provide homogeneous access, and provide seamless analytics support and bioinformatics tools through community engagement.",
        "doi": "10.1117/12.2263842",
        "isbn": "9781510608894",
        "publisher": "Society of Photo-Optical Instrumentation Engineers (SPIE)",
        "place_of_publication": "Bellingham, WA",
        "publication_date": "2017-05-18",
        "pages": "Art. No. 101942A"
    },
    {
        "id": "authors:jegwy-f7v51",
        "collection": "authors",
        "collection_id": "jegwy-f7v51",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20160105-071616841",
        "type": "book_section",
        "title": "From stars to patients: Lessons from space science and astrophysics for health care informatics",
        "book_title": "2015 IEEE International Conference on Big Data (Big Data)",
        "author": [
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Mahabal",
                "given_name": "A. A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Crichton",
                "given_name": "D. J.",
                "clpid": "Crichton-D-J"
            },
            {
                "family_name": "Chaudhry",
                "given_name": "B.",
                "clpid": "Chaudhry-B"
            }
        ],
        "abstract": "Big Data are revolutionizing nearly every aspect of the modern society. One area where this can have a profound positive societal impact is the field of Health Care Informatics (HCI), which faces many challenges. The key idea behind this study is: can we use some of the experience and technical and methodological solutions from the fields that have successfully adapted to the Big Data era, namely astronomy and space science, to help accelerate the progress of HCI? We illustrate this with examples from the Virtual Observatory framework, and the NCI EDRN project. An effective sharing and reuse of tools, methods, and experiences from different fields can save a lot of effort, time, and expense. HCI can thus benefit from the proven solutions to big data challenges from other domains.",
        "doi": "10.1109/BigData.2015.7364135",
        "isbn": "978-1-4799-9925-5",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2015-10",
        "pages": "2957-2959"
    },
    {
        "id": "authors:nqxhj-ze554",
        "collection": "authors",
        "collection_id": "nqxhj-ze554",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20151113-151109081",
        "type": "book_section",
        "title": "Star Classification Under Data Variability: An Emerging Challenge in Astroinformatics",
        "book_title": "Machine Learning and Knowledge Discovery in Databases",
        "author": [
            {
                "family_name": "Vilalta",
                "given_name": "Ricardo",
                "orcid": "0000-0001-8165-8805",
                "clpid": "Vilalta-R"
            },
            {
                "family_name": "Gupta",
                "given_name": "Kinjal Dhar",
                "clpid": "Gupta-K-D"
            },
            {
                "family_name": "Mahabal",
                "given_name": "Ashish",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            }
        ],
        "contributor": [
            {
                "family_name": "Bifet",
                "given_name": "Albert",
                "clpid": "Bifet-A"
            },
            {
                "family_name": "May",
                "given_name": "Michael",
                "clpid": "May-Michael"
            },
            {
                "family_name": "Zadrozny",
                "given_name": "Bianca",
                "clpid": "Zadrozny-B"
            },
            {
                "family_name": "Gavalda",
                "given_name": "Ricard",
                "clpid": "Gavalda-R"
            },
            {
                "family_name": "Pedreschi",
                "given_name": "Dino",
                "clpid": "Pedreschi-D"
            },
            {
                "family_name": "Bonchi",
                "given_name": "Francesco",
                "clpid": "Bonchi-F"
            },
            {
                "family_name": "Cardoso",
                "given_name": "Jaime",
                "clpid": "Cardoso-J"
            },
            {
                "family_name": "Spiliopoulou",
                "given_name": "Myra",
                "clpid": "Spiliopoulou-M"
            }
        ],
        "abstract": "Astroinformatics is an interdisciplinary field of science that applies modern computational tools to the solution of astronomical problems. One relevant subarea is the use of machine learning for analysis of large astronomical repositories and surveys. In this paper we describe a case study based on the classification of variable Cepheid stars using domain adaptation techniques; our study highlights some of the emerging challenges posed by astroinformatics.",
        "doi": "10.1007/978-3-319-23461-8_22",
        "isbn": "978-3-319-23460-1",
        "publisher": "Springer",
        "place_of_publication": "Cham",
        "publication_date": "2015-09",
        "pages": "241-244"
    },
    {
        "id": "authors:x9k8w-4vt74",
        "collection": "authors",
        "collection_id": "x9k8w-4vt74",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20141217-103710003",
        "type": "book_section",
        "title": "Automated Real-Time Classification and Decision Making in Massive Data Streams from Synoptic Sky Surveys",
        "book_title": "2014 IEEE 10th International Conference on e-Science",
        "author": [
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Mahabal",
                "given_name": "A. A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Donalek",
                "given_name": "C.",
                "clpid": "Donalek-C"
            },
            {
                "family_name": "Graham",
                "given_name": "M. J.",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Drake",
                "given_name": "A. J.",
                "clpid": "Drake-A-J"
            },
            {
                "family_name": "Turmon",
                "given_name": "M.",
                "orcid": "0000-0002-6463-063X",
                "clpid": "Turmon-M"
            },
            {
                "family_name": "Fuchs",
                "given_name": "T.",
                "clpid": "Fuchs-T-J"
            }
        ],
        "abstract": "The nature of scientific and technological data collection is evolving rapidly: data volumes and rates grow exponentially, with increasing complexity and information content, and there has been a transition from static data sets to data streams that must be analyzed in real time. Interesting or anomalous phenomena must be quickly characterized and followed up with additional measurements via optimal deployment of limited assets. Modern astronomy presents a variety of such phenomena in the form of transient events in digital synoptic sky surveys, including cosmic explosions (supernovae, gamma ray bursts), relativistic phenomena (black hole formation, jets), potentially hazardous asteroids, etc. We have been developing a set of machine learning tools to detect, classify and plan a response to transient events for astronomy applications, using the Catalina Real-time Transient Survey (CRTS) as a scientific and methodological testbed. The ability to respond rapidly to the potentially most interesting events is a key bottleneck that limits the scientific returns from the current and anticipated synoptic sky surveys. Similar challenge arise in other contexts, from environmental monitoring using sensor networks to autonomous spacecraft systems. Given the exponential growth of data rates, and the time-critical response, we need a fully automated and robust approach. We describe the results obtained to date, and the possible future developments.",
        "doi": "10.1109/eScience.2014.7",
        "isbn": "978-1-4799-4288-6",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2014-10",
        "pages": "204-211"
    },
    {
        "id": "authors:vcn8b-z5k56",
        "collection": "authors",
        "collection_id": "vcn8b-z5k56",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20150113-112433536",
        "type": "book_section",
        "title": "Immersive and Collaborative Data Visualization Using Virtual Reality Platforms",
        "author": [
            {
                "family_name": "Donalek",
                "given_name": "Ciro",
                "clpid": "Donalek-C"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Cioc",
                "given_name": "Alex",
                "clpid": "Cioc-A"
            },
            {
                "family_name": "Wang",
                "given_name": "Anwell",
                "clpid": "Wang-Anwell"
            },
            {
                "family_name": "Zhang",
                "given_name": "Jerry",
                "clpid": "Zhang-Jerry"
            },
            {
                "family_name": "Lawler",
                "given_name": "Elizabeth",
                "clpid": "Lawler-E"
            },
            {
                "family_name": "Yeh",
                "given_name": "Stacy",
                "clpid": "Yeh-Stacy"
            },
            {
                "family_name": "Mahabal",
                "given_name": "Ashish",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Graham",
                "given_name": "Matthew",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Drake",
                "given_name": "Andrew",
                "clpid": "Drake-A"
            },
            {
                "family_name": "Davidoff",
                "given_name": "Scott",
                "clpid": "Davidoff-S"
            },
            {
                "family_name": "Norris",
                "given_name": "Jeffrey S.",
                "clpid": "Norris-J-S"
            },
            {
                "family_name": "Longo",
                "given_name": "Giuseppe",
                "orcid": "0000-0002-9182-8414",
                "clpid": "Longo-G"
            }
        ],
        "abstract": "Effective data visualization is a key part of the discovery process in the era of \"big data\". It is the bridge between the quantitative content of the data and human intuition, and thus an essential component of the scientific path from data into knowledge and understanding. Visualization is also essential in the data mining process, directing the choice of the applicable algorithms, and in helping to identify and remove bad data from the analysis. However, a high complexity or a high dimensionality of modern data sets represents a critical obstacle. How do we visualize interesting structures and patterns that may exist in hyper-dimensional data spaces? A better understanding of how we can perceive and interact with multidimensional information poses some deep questions in the field of cognition technology and human-computer interaction. To this effect, we are exploring the use of immersive virtual reality platforms for scientific data visualization, both as software and inexpensive commodity hardware. These potentially powerful and innovative tools for multi-dimensional data visualization can also provide an easy and natural path to a collaborative data visualization and exploration, where scientists can interact with their data and their colleagues in the same visual space. Immersion provides benefits beyond the traditional \"desktop\" visualization tools: it leads to a demonstrably better perception of a datascape geometry, more intuitive data understanding, and a better retention of the perceived relationships in the data.",
        "doi": "10.1109/BigData.2014.7004282",
        "publisher": "IEEE",
        "publication_date": "2014-10"
    },
    {
        "id": "authors:gc0wx-1h258",
        "collection": "authors",
        "collection_id": "gc0wx-1h258",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190502-161809010",
        "type": "book_section",
        "title": "Novel Measures for Rare Transients",
        "author": [
            {
                "family_name": "Mahabal",
                "given_name": "Ashish",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            }
        ],
        "contributor": [
            {
                "family_name": "Wo\u017aniak",
                "given_name": "P. R.",
                "clpid": "Wo\u017aniak-P-R"
            },
            {
                "family_name": "Graham",
                "given_name": "M. J.",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Mahabal",
                "given_name": "A. A.",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Seaman",
                "given_name": "R.",
                "clpid": "Seaman-R"
            }
        ],
        "abstract": "Data volumes in astronomy have been growing rapidly. Various projects and methodologies\nare starting to deal with this. As we cross-match and correlate datasets, the\nnumber of parameters per object\u2014in other words dimensions we need to deal with\u2014\nis also growing. This leads to more interesting issues as many values are missing,\nand many parameters are non-homogeneously redundant. One needs to tease apart\nclusters in this space which represent different physical properties, and hence phenomena.\nWe describe measures that help to do that for transients from the Catalina\nRealtime Transient Survey, and project it to near future surveys. The measures are\nbased partly on domain knowledge and are incorporated into statistical and machine\nlearning techniques. We also describe the discriminating role of appropriate followup\nobservations in near-real-time classification of transients. In particular such novel\nmeasures will help us find relatively rare transients.",
        "publisher": "Los Alamos National Laboratory",
        "publication_date": "2013-11"
    },
    {
        "id": "authors:fzg75-k1604",
        "collection": "authors",
        "collection_id": "fzg75-k1604",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20140324-134154762",
        "type": "book_section",
        "title": "Feature Selection Strategies for Classifying High Dimensional Astronomical Data Sets",
        "book_title": "Big Data, 2013 IEEE International Conference",
        "author": [
            {
                "family_name": "Donalek",
                "given_name": "Ciro",
                "clpid": "Donalek-C"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Mahabal",
                "given_name": "Ashish A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Graham",
                "given_name": "Matthew J.",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Drake",
                "given_name": "Andrew J.",
                "clpid": "Drake-A-J"
            },
            {
                "family_name": "Fuchs",
                "given_name": "Thomas J.",
                "clpid": "Fuchs-T-J"
            },
            {
                "family_name": "Turmon",
                "given_name": "Michael J.",
                "clpid": "Turmon-M-J"
            },
            {
                "family_name": "Kumar",
                "given_name": "A. Arun",
                "clpid": "Kumar-A-A"
            },
            {
                "family_name": "Philip",
                "given_name": "N. Sajeeth",
                "clpid": "Philip-N-S"
            },
            {
                "family_name": "Yang",
                "given_name": "Michael Ting-Chang",
                "clpid": "Yang-Michael-Ting-Chang"
            },
            {
                "family_name": "Longo",
                "given_name": "Giuseppe",
                "orcid": "0000-0002-9182-8414",
                "clpid": "Longo-G"
            }
        ],
        "contributor": [
            {
                "family_name": "Hu",
                "given_name": "X.",
                "clpid": "Hu-X"
            },
            {
                "family_name": "Lin",
                "given_name": "T. Y.",
                "clpid": "Lin-T-Y"
            },
            {
                "family_name": "Raghavan",
                "given_name": "V.",
                "clpid": "Raghavan-V"
            },
            {
                "family_name": "Wah",
                "given_name": "B.",
                "clpid": "Wah-B"
            },
            {
                "family_name": "Baeza-Yates",
                "given_name": "R.",
                "clpid": "Baeza-Yates-R"
            },
            {
                "family_name": "Fox",
                "given_name": "G.",
                "clpid": "Fox-G"
            },
            {
                "family_name": "Shahabi",
                "given_name": "C.",
                "clpid": "Shahabi-C"
            },
            {
                "family_name": "Smith",
                "given_name": "M.",
                "clpid": "Smith-M"
            },
            {
                "family_name": "Yang",
                "given_name": "Q.",
                "clpid": "Yang-Q"
            },
            {
                "family_name": "Ghani",
                "given_name": "R.",
                "clpid": "Ghani-R"
            },
            {
                "family_name": "Fan",
                "given_name": "W.",
                "clpid": "Fan-W"
            },
            {
                "family_name": "Lempel",
                "given_name": "R.",
                "clpid": "Lempel-R"
            },
            {
                "family_name": "Nambiar",
                "given_name": "R.",
                "clpid": "Nambiar-R"
            }
        ],
        "abstract": "The amount of collected data in many scientific fields is increasing, all of them requiring a common task: extract knowledge from massive, multi parametric data sets, as rapidly and efficiently possible. This is especially true in astronomy where synoptic sky surveys are enabling new research frontiers in the time domain astronomy and posing several new object classification challenges in multi dimensional spaces; given the high number of parameters available for each object, feature selection is quickly becoming a crucial task in analyzing astronomical data sets. Using data sets extracted from the ongoing Catalina Real-Time Transient Surveys (CRTS) and the Kepler Mission we illustrate a variety of feature selection strategies used to identify the subsets that give the most information and the results achieved applying these techniques to three major astronomical problems.",
        "doi": "10.1109/BigData.2013.6691731",
        "isbn": "978-1-4799-1292-6",
        "publisher": "IEEE",
        "place_of_publication": "New York, NY",
        "publication_date": "2013-10",
        "pages": "35-45"
    },
    {
        "id": "authors:qncny-bbw80",
        "collection": "authors",
        "collection_id": "qncny-bbw80",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20150713-125431089",
        "type": "book_section",
        "title": "Sky Surveys",
        "book_title": "Planets, Stars and Stellar Systems",
        "author": [
            {
                "family_name": "Djorgovski",
                "given_name": "S. George",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Mahabal",
                "given_name": "Ashish",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Drake",
                "given_name": "Andrew",
                "clpid": "Drake-A-J"
            },
            {
                "family_name": "Graham",
                "given_name": "Matthew",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Donalek",
                "given_name": "Ciro",
                "clpid": "Donalek-C"
            }
        ],
        "contributor": [
            {
                "family_name": "Oswalt",
                "given_name": "Terry D.",
                "clpid": "Oswalt-T-D"
            },
            {
                "family_name": "Bond",
                "given_name": "Howard E.",
                "clpid": "Bond-H-E"
            }
        ],
        "abstract": "Sky surveys represent a fundamental data basis for astronomy. We use them to map in a systematic way the universe and its constituents and to discover new types of objects or phenomena. We review the subject, with an emphasis on the wide-field, imaging surveys, placing them in a broader scientific and historical context. Surveys are now the largest data generators in astronomy, propelled by the advances in information and computation technology, and have transformed the ways in which astronomy is done. This trend is bound to continue, especially with the new generation of synoptic sky surveys that cover wide areas of the sky repeatedly and open a new time domain of discovery. We describe the variety and the general properties of surveys, illustrated by a number of examples, the ways in which they may be quantified and compared, and offer some figures of merit that can be used to compare their scientific discovery potential. Surveys enable a very wide range of science, and that is perhaps their key unifying characteristic. As new domains of the observable parameter space open up thanks to the advances in technology, surveys are often the initial step in their exploration. Some science can be done with the survey data alone (or a combination of data from different surveys), and some require a targeted follow-up of potentially interesting sources selected from surveys. Surveys can be used to generate large, statistical samples of objects that can be studied as populations or as tracers of larger structures to which they belong. They can be also used to discover or generate samples of rare or unusual objects and may lead to discoveries of some previously unknown types. We discuss a general framework of parameter spaces that can be used for an assessment and comparison of different surveys and the strategies for their scientific exploration. As we are moving into the Petascale regime and beyond, an effective processing and scientific exploitation of such large data sets and data streams pose many challenges, some of which are specific to any given survey and some of which may be addressed in the framework of Virtual Observatory and Astroinformatics. The exponential growth of data volumes and complexity makes a broader application of data mining and knowledge discovery technologies critical in order to take a full advantage of this wealth of information. Finally, we discuss some outstanding challenges and prospects for the future.",
        "doi": "10.1007/978-94-007-5618-2_5",
        "isbn": "978-94-007-5617-5",
        "publisher": "Springer",
        "place_of_publication": "Dordrecht",
        "publication_date": "2013",
        "pages": "223-281"
    },
    {
        "id": "authors:2ezgc-htq19",
        "collection": "authors",
        "collection_id": "2ezgc-htq19",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20130401-144342419",
        "type": "book_section",
        "title": "Flashes in a star stream: Automated classification of astronomical transient events",
        "book_title": "2012 IEEE 8th International Conference on E-Science",
        "author": [
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Mahabal",
                "given_name": "A. A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Donalek",
                "given_name": "C.",
                "clpid": "Donalek-C"
            },
            {
                "family_name": "Graham",
                "given_name": "M. J.",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Drake",
                "given_name": "A. J.",
                "clpid": "Drake-A-J"
            },
            {
                "family_name": "Moghaddam",
                "given_name": "B.",
                "clpid": "Moghaddam-B"
            },
            {
                "family_name": "Turmon",
                "given_name": "M.",
                "orcid": "0000-0002-6463-063X",
                "clpid": "Turmon-M"
            }
        ],
        "abstract": "An automated, rapid classification of transient events detected in the modern synoptic sky surveys is essential for their scientific utility and effective follow-up using scarce resources. This presents some unusual challenges: the data are sparse, heterogeneous and incomplete; evolving in time; and most of the relevant information comes not from the data stream itself, but from a variety of archival data and contextual information (spatial, temporal, and multi-wavelength). We are exploring a variety of novel techniques, mostly Bayesian, to respond to these challenges, using the ongoing CRTS sky survey as a testbed. The current surveys are already overwhelming our ability to effectively follow all of the potentially interesting events, and these challenges will grow by orders of magnitude over the next decade as the more ambitious sky surveys get under way. While we focus on an application in a specific domain (astrophysics), these challenges are more broadly relevant for event or anomaly detection and knowledge discovery in massive data streams.",
        "doi": "10.1109/eScience.2012.6404437",
        "isbn": "978-1-4673-4467-8",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2012-10",
        "pages": "1-8"
    },
    {
        "id": "authors:8qrp8-63z16",
        "collection": "authors",
        "collection_id": "8qrp8-63z16",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20170207-173845918",
        "type": "book_section",
        "title": "Data Understanding using Semi-Supervised Clustering",
        "book_title": "2012 Conference on Intelligent Data Understanding",
        "author": [
            {
                "family_name": "Bhatnagar",
                "given_name": "Vasudha",
                "clpid": "Bhatnagar-V"
            },
            {
                "family_name": "Dobariyal",
                "given_name": "Rashmi",
                "clpid": "Dobariyal-R"
            },
            {
                "family_name": "Jain",
                "given_name": "Priya",
                "clpid": "Jain-P"
            },
            {
                "family_name": "Mahabal",
                "given_name": "Ashish",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            }
        ],
        "abstract": "In the era of E-science, most scientific endeavors depend on intense data analysis to understand the underlying physical phenomenon. Predictive modeling is one of the popular machine learning tasks undertaken in such endeavors. Labeled data used for training the predictive model reflects understanding of the domain. In this paper we introduce data understanding as a computational problem and propose a solution for enhancing domain understanding based on semisupervised clustering.\n\nThe proposed DU-SSC (Data Understanding using SemiSupervised Clustering) algorithm is incremental, parameterless and performs single scan of data. Given labeled (training) data is discretized at user specified resolution and finer (micro) data distributions are identified within classes, along with outliers. The discovery process is based on grouping similar instances in data space, while taking into account the degree of influence each attribute exercises on the class label. Maximal Information Coefficient measure is used during similarity computations for this purpose. \n\nThe study is supported by experiments and a detailed account of understanding gained is presented for two selected UCI data sets. General observations on nine other UCI datasets are presented, along with experiments that demonstrate use of discovered knowledge for improved classification.",
        "doi": "10.1109/CIDU.2012.6382192",
        "isbn": "978-1-4673-4627-6",
        "publisher": "IEEE",
        "place_of_publication": "Piscataway, NJ",
        "publication_date": "2012-10",
        "pages": "118-123"
    },
    {
        "id": "authors:ht0ff-31q11",
        "collection": "authors",
        "collection_id": "ht0ff-31q11",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20161019-145928167",
        "type": "book_section",
        "title": "Connecting the time domain community with the Virtual Astronomical Observatory",
        "book_title": "Observatory Operations: Strategies, Processes, and Systems IV",
        "author": [
            {
                "family_name": "Graham",
                "given_name": "Matthew J.",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Donalek",
                "given_name": "Ciro",
                "clpid": "Donalek-C"
            },
            {
                "family_name": "Drake",
                "given_name": "Andrew J.",
                "clpid": "Drake-A-J"
            },
            {
                "family_name": "Mahabal",
                "given_name": "Ashish A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Plante",
                "given_name": "Raymond L.",
                "clpid": "Plante-R-L"
            },
            {
                "family_name": "Kantor",
                "given_name": "Jeffrey",
                "clpid": "Kantor-J"
            },
            {
                "family_name": "Good",
                "given_name": "John C.",
                "clpid": "Good-J-C"
            }
        ],
        "contributor": [
            {
                "family_name": "Peck",
                "given_name": "Alison B.",
                "clpid": "Peck-A-B"
            },
            {
                "family_name": "Seaman",
                "given_name": "Robert L.",
                "clpid": "Seaman-R-L"
            },
            {
                "family_name": "Comeron",
                "given_name": "Fernando",
                "clpid": "Comeron-F"
            }
        ],
        "abstract": "The time domain has been identified as one of the most important areas of astronomical research for the next decade. The Virtual Observatory is in the vanguard with dedicated tools and services that enable and facilitate the discovery, dissemination and analysis of time domain data. These range in scope from rapid notifications of time-critical astronomical transients to annotating long-term variables with the latest modelling results. In this paper, we will review the prior art in these areas and focus on the capabilities that the VAO is bringing to bear in support of time domain science. In particular, we will focus on the issues involved with the heterogeneous collections of (ancilllary) data associated with astronomical transients, and the time series characterization and classification tools required by the next generation of sky surveys, such as LSST and SKA.",
        "doi": "10.1117/12.926577",
        "isbn": "9780819491497",
        "publisher": "Society of Photo-Optical Instrumentation Engineers (SPIE)",
        "place_of_publication": "Bellingham, WA",
        "publication_date": "2012-09-13",
        "pages": "Art. No. 84480P"
    },
    {
        "id": "authors:gg9aa-y5t52",
        "collection": "authors",
        "collection_id": "gg9aa-y5t52",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20121105-131003860",
        "type": "book_section",
        "title": "The Catalina Real-time Transient Survey",
        "book_title": "New Horizons in Time-Domain Astronomy",
        "author": [
            {
                "family_name": "Drake",
                "given_name": "A. J.",
                "clpid": "Drake-A-J"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Mahabal",
                "given_name": "A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Prieto",
                "given_name": "J. L.",
                "orcid": "0000-0003-0943-0026",
                "clpid": "Prieto-J-L"
            },
            {
                "family_name": "Beshore",
                "given_name": "E.",
                "clpid": "Beshore-E"
            },
            {
                "family_name": "Graham",
                "given_name": "M. J.",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Catalan",
                "given_name": "M.",
                "clpid": "Catalan-M"
            },
            {
                "family_name": "Larson",
                "given_name": "S.",
                "orcid": "0000-0002-0057-8211",
                "clpid": "Larson-S"
            },
            {
                "family_name": "Christensen",
                "given_name": "E.",
                "clpid": "Christensen-E"
            },
            {
                "family_name": "Donalek",
                "given_name": "C.",
                "clpid": "Donalek-C"
            },
            {
                "family_name": "Williams",
                "given_name": "R.",
                "clpid": "Williams-R"
            }
        ],
        "contributor": [
            {
                "family_name": "Griffin",
                "given_name": "R. E. M.",
                "clpid": "Griffin-R-E-M"
            },
            {
                "family_name": "Hanisch",
                "given_name": "R. J.",
                "clpid": "Hanisch-R-J"
            },
            {
                "family_name": "Seaman",
                "given_name": "Robert L.",
                "clpid": "Seaman-R-L"
            }
        ],
        "abstract": "The Catalina Real-time Transient Survey (CRTS) currently covers 33,000 deg^2 of the sky in search of transient astrophysical events, with time base-lines ranging from 10 minutes to ~7 years. Data provided by the Catalina Sky Survey provide an unequalled base-line against which &gt; 4,000 unique optical transient events have been discovered and openly published in real-time. Here we highlight some of the discoveries of CRTS.",
        "doi": "10.1017/S1743921312000889",
        "isbn": "978-1-107-01985-0",
        "publisher": "Cambridge University Press",
        "place_of_publication": "New York",
        "publication_date": "2012",
        "pages": "306-308"
    },
    {
        "id": "authors:1n7q2-2d193",
        "collection": "authors",
        "collection_id": "1n7q2-2d193",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20121105-130132610",
        "type": "book_section",
        "title": "Exploring the Time Domain with Synoptic Sky Surveys",
        "book_title": "New Horizons in Time-Domain Astronomy",
        "author": [
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Mahabal",
                "given_name": "A. A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Drake",
                "given_name": "A. J.",
                "clpid": "Drake-A-J"
            },
            {
                "family_name": "Graham",
                "given_name": "M. J.",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Donalek",
                "given_name": "C.",
                "clpid": "Donalek-C"
            },
            {
                "family_name": "Williams",
                "given_name": "R.",
                "clpid": "Williams-R"
            }
        ],
        "contributor": [
            {
                "family_name": "Griffin",
                "given_name": "R. E. M.",
                "clpid": "Griffin-R-E-M"
            },
            {
                "family_name": "Hanisch",
                "given_name": "R. J.",
                "clpid": "Hanisch-R-J"
            },
            {
                "family_name": "Seaman",
                "given_name": "Robert L.",
                "clpid": "Seaman-R-L"
            }
        ],
        "abstract": "Synoptic sky surveys are becoming the largest data generators in astronomy, and they are opening a new research frontier that touches practically every field of astronomy. Opening the time domain to a systematic exploration will strengthen our understanding of a number of interesting known phenomena, and may lead to the discoveries of as yet unknown ones. We describe some lessons learned over the past decade, and offer some ideas that may guide strategic considerations in the planning and execution of future synoptic sky surveys.",
        "doi": "10.1017/S1743921312000488",
        "isbn": "978-1-107-01985-0",
        "publisher": "Cambridge University Press",
        "place_of_publication": "New York",
        "publication_date": "2012",
        "pages": "141-146"
    },
    {
        "id": "authors:63ckw-b4a15",
        "collection": "authors",
        "collection_id": "63ckw-b4a15",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20121105-132022053",
        "type": "book_section",
        "title": "The VAO Transient Facility",
        "book_title": "New Horizons in Time-Domain Astronomy",
        "author": [
            {
                "family_name": "Graham",
                "given_name": "Matthew J.",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Drake",
                "given_name": "Andrew",
                "clpid": "Drake-A"
            },
            {
                "family_name": "Mahabal",
                "given_name": "Ashish",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Williams",
                "given_name": "Roy",
                "clpid": "Williams-R"
            },
            {
                "family_name": "Seaman",
                "given_name": "Rob",
                "clpid": "Seaman-R-L"
            }
        ],
        "contributor": [
            {
                "family_name": "Griffin",
                "given_name": "R. E. M.",
                "clpid": "Griffin-R-E-M"
            },
            {
                "family_name": "Hanisch",
                "given_name": "R. J.",
                "clpid": "Hanisch-R-J"
            },
            {
                "family_name": "Seaman",
                "given_name": "Robert L.",
                "clpid": "Seaman-R-L"
            }
        ],
        "abstract": "The time-domain community wants robust and reliable tools to enable the production of, and subscription to, community-endorsed event notification packets (VOEvent). The Virtual Astronomical Observatory (VAO) Transient Facility (VTF) is being designed to be the premier brokering service for the community, both collecting and disseminating observations about time-critical astronomical transients but also supporting annotations and the application of intelligent machine-learning to those observations. Two types of activity associated with the facility can therefore be distinguished: core infrastructure, and user services. We review the prior art in both areas, and describe the planned capabilities of the VTF. In particular, we focus on scalability and quality-of-service issues required by the next generation of sky surveys such as LSST and SKA.",
        "doi": "10.1017/S1743921312000920",
        "isbn": "978-1-107-01985-0",
        "publisher": "Cambridge University Press",
        "place_of_publication": "New York",
        "publication_date": "2012",
        "pages": "318-320"
    },
    {
        "id": "authors:3h6ez-31845",
        "collection": "authors",
        "collection_id": "3h6ez-31845",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20121105-133238798",
        "type": "book_section",
        "title": "Real-Time Classification of Transient Events in Synoptic Sky Surveys",
        "book_title": "New Horizons in Time-Domain Astronomy",
        "author": [
            {
                "family_name": "Mahabal",
                "given_name": "Ashish A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Donalek",
                "given_name": "C.",
                "clpid": "Donalek-C"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Drake",
                "given_name": "A. J.",
                "clpid": "Drake-A-J"
            },
            {
                "family_name": "Graham",
                "given_name": "M. J.",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Williams",
                "given_name": "R.",
                "clpid": "Williams-R"
            },
            {
                "family_name": "Chen",
                "given_name": "Y.",
                "orcid": "0000-0002-9730-9463",
                "clpid": "Chen-Yanbei"
            },
            {
                "family_name": "Moghaddam",
                "given_name": "B.",
                "clpid": "Moghaddam-B"
            },
            {
                "family_name": "Turmon",
                "given_name": "M.",
                "orcid": "0000-0002-6463-063X",
                "clpid": "Turmon-M"
            }
        ],
        "contributor": [
            {
                "family_name": "Griffin",
                "given_name": "R. E. M.",
                "clpid": "Griffin-R-E-M"
            },
            {
                "family_name": "Hanisch",
                "given_name": "R. J.",
                "clpid": "Hanisch-R-J"
            },
            {
                "family_name": "Seaman",
                "given_name": "Robert L.",
                "clpid": "Seaman-R-L"
            }
        ],
        "abstract": "An automated rapid classification of the transient events detected in modern synoptic sky surveys is essential for their scientific utility and effective follow-up when resources are scarce. This problem will grow by orders of magnitude with the next generation of surveys. We are exploring a variety of novel automated classification techniques, mostly Bayesian, to respond to those challenges, using the ongoing CRTS sky survey as a testbed. We describe briefly some of the methods used.",
        "doi": "10.1017/S1743921312001056",
        "isbn": "978-1-107-01985-0",
        "publisher": "Cambridge University Press",
        "place_of_publication": "New York",
        "publication_date": "2011-09",
        "pages": "355-357"
    },
    {
        "id": "authors:jwynb-sc777",
        "collection": "authors",
        "collection_id": "jwynb-sc777",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20110510-135410406",
        "type": "book_section",
        "title": "Classification of Optical Transients: Experiences from PQ and CRTS Surveys",
        "book_title": "GAIA: At the Frontiers of Astrometry",
        "author": [
            {
                "family_name": "Mahabal",
                "given_name": "A. A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Donalek",
                "given_name": "C.",
                "clpid": "Donalek-C"
            },
            {
                "family_name": "Drake",
                "given_name": "A. J.",
                "clpid": "Drake-A-J"
            },
            {
                "family_name": "Graham",
                "given_name": "M. J.",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Williams",
                "given_name": "R. D.",
                "orcid": "0000-0002-9145-8580",
                "clpid": "Williams-R-D"
            },
            {
                "family_name": "Moghaddam",
                "given_name": "B.",
                "clpid": "Moghaddam-B"
            },
            {
                "family_name": "Turmon",
                "given_name": "M.",
                "orcid": "0000-0002-6463-063X",
                "clpid": "Turmon-M"
            }
        ],
        "abstract": "Synoptic sky surveys are opening up exciting opportunities in time domain astronomy. Gaia will make a great contribution to this field. A crucial factor for good scientific returns is real-time classification of transients, in order to optimize their follow-up. We have been developing infrastructure towards this end starting from the completed Palomar-Quest (PQ) survey, and the ongoing Catalina Real-Time Transient Survey (CRTS). CRTS has been consistently producing transients for almost three years now. We describe here the efforts related to transient classification and event dissemination. Many of the technologies and methodologies we are developing may benefit Gaia.",
        "doi": "10.1051/eas/1045030",
        "isbn": "978-2-7598-0608-9",
        "publisher": "EDP Sciences",
        "place_of_publication": "Les Ulis, France",
        "publication_date": "2011",
        "pages": "173-178"
    },
    {
        "id": "authors:1vxsc-8j432",
        "collection": "authors",
        "collection_id": "1vxsc-8j432",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20110520-140854016",
        "type": "book_section",
        "title": "US VAO Facility for Rapid Transients",
        "book_title": "Astronomical data analysis software and systems XIX",
        "author": [
            {
                "family_name": "Williams",
                "given_name": "R. D.",
                "orcid": "0000-0002-9145-8580",
                "clpid": "Williams-R-D"
            },
            {
                "family_name": "Donalek",
                "given_name": "C.",
                "clpid": "Donalek-C"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Drake",
                "given_name": "A. J.",
                "clpid": "Drake-A-J"
            },
            {
                "family_name": "Graham",
                "given_name": "M. J.",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Mahabal",
                "given_name": "A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Seaman",
                "given_name": "R.",
                "clpid": "Seaman-R"
            }
        ],
        "contributor": [
            {
                "family_name": "Mizumoto",
                "given_name": "Yoshihiko",
                "clpid": "Mizumoto-Yoshihiko"
            },
            {
                "family_name": "Morita",
                "given_name": "Koh-Ichiro",
                "clpid": "Morita-Koh-Ichiro"
            },
            {
                "family_name": "Ohishi",
                "given_name": "Masatoshi",
                "clpid": "Ohishi-Masatoshi"
            }
        ],
        "abstract": "The US VAO Facility for Rapid Transients (if funded) will collect and disseminate observations about time-critical astronomical transients, and add annotations and intelligent machine-learning to those observations. The information can be \"pushed\" to subscribers, who may be either humans or machines that control telescopes. Subscribers can prepare precise \"triggers\" to decide which events should reach them and their machines, that may be based on the generic event, or on the specific vocabulary of parameters that define a particular type of observation. The system will not be centralized, but rather a set of interoperating nodes with caching. The twin thrusts are automation of process, and discrimination of interesting events.",
        "isbn": "978-1-58381-748-3",
        "publisher": "Astronomical Society of the Pacific",
        "place_of_publication": "San Francisco, CA",
        "publication_date": "2010",
        "pages": "123-126"
    },
    {
        "id": "authors:vpn6f-v9g76",
        "collection": "authors",
        "collection_id": "vpn6f-v9g76",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20110520-131607293",
        "type": "book_section",
        "title": "Mixing Bayesian Techniques for Effective Real-time Classification of Astronomical Transients",
        "book_title": "Astronomical data analysis software and systems XIX",
        "author": [
            {
                "family_name": "Mahabal",
                "given_name": "Ashish",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Donalek",
                "given_name": "Ciro",
                "clpid": "Donalek-C"
            },
            {
                "family_name": "Drake",
                "given_name": "Andrew",
                "clpid": "Drake-A-J"
            },
            {
                "family_name": "Graham",
                "given_name": "Matthew",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Williams",
                "given_name": "Roy",
                "clpid": "Williams-R"
            },
            {
                "family_name": "Moghaddam",
                "given_name": "Baback",
                "clpid": "Moghaddam-B"
            },
            {
                "family_name": "Turmon",
                "given_name": "Michael",
                "clpid": "Turmon-M-J"
            }
        ],
        "contributor": [
            {
                "family_name": "Mizumoto",
                "given_name": "Yoshihiko",
                "clpid": "Mizumoto-Y"
            },
            {
                "family_name": "Morita",
                "given_name": "Koh-Ichiro",
                "clpid": "Morita-K-I"
            },
            {
                "family_name": "Ohishi",
                "given_name": "Masatoshi",
                "clpid": "Ohishi-M"
            }
        ],
        "abstract": "With the recent advent of time domain astronomy through various surveys several approaches at classification of transient s are being tried. Choosing relatively interesting and rarer transients for follow-up is important since following all transients being detected per night is not possible given the limited resources available. In addition, the classification needs to be carried out using minimal number of observations available in order to catch some of the more interesting objects. We present details on two such classification methods: (1) using Bayesian networks with colors and contextual information, and (2) using Gaussian Process Regression and light-curves. Both can be carried out in real-time and from a very small number of epochs. In order to improve classification i.e. narrow down number of competing classes, it is important to combine as many different classifiers as possible. We mention how this can be accomplished using a higher order fusion network.",
        "isbn": "978-1-58381-748-3",
        "publisher": "Astronomical Society of the Pacific",
        "place_of_publication": "San Francisco, CA",
        "publication_date": "2010",
        "pages": "115-118"
    },
    {
        "id": "authors:3hq96-yrx02",
        "collection": "authors",
        "collection_id": "3hq96-yrx02",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20100913-153257900",
        "type": "book_section",
        "title": "Skyalert: Real-time Astronomy for You and Your Robots",
        "book_title": "Astronomical Data Analysis Software and Systems XVIII",
        "author": [
            {
                "family_name": "Williams",
                "given_name": "R. D.",
                "orcid": "0000-0002-9145-8580",
                "clpid": "Williams-R-D"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Drake",
                "given_name": "A. J.",
                "clpid": "Drake-A-J"
            },
            {
                "family_name": "Graham",
                "given_name": "M. J.",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Mahabal",
                "given_name": "A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            }
        ],
        "contributor": [
            {
                "family_name": "Bohlender",
                "given_name": "David A.",
                "clpid": "Bohlender-D-A"
            },
            {
                "family_name": "Durand",
                "given_name": "Daniel",
                "clpid": "Durand-D"
            },
            {
                "family_name": "Dowler",
                "given_name": "Patrick",
                "clpid": "Dowler-P"
            }
        ],
        "abstract": "Skyalert.org is a web application to collect and disseminate observations about time-critical astronomical transients, and to add annotations and intelligent machine-learning to those observations. The information is \"pushed\" to subscribers, who may be either humans (email, text message etc.) or they may be machines that control telescopes. Subscribers can prepare precise \"trigger rules\" to decide which events should reach them and their robots, rules that may be based on sky position, or on the specific vocabulary of parameters that define a particular type of observation. Our twin thrusts are automation of process, and discrimination of interesting events.",
        "isbn": "978-1-58381-702-5",
        "publisher": "Astronomical Society of the Pacific",
        "place_of_publication": "San Francisco, CA",
        "publication_date": "2009",
        "pages": "115-119"
    },
    {
        "id": "authors:zkjrj-14991",
        "collection": "authors",
        "collection_id": "zkjrj-14991",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:MAHaipcp08",
        "type": "book_section",
        "title": "Towards real-time classification of astronomical transients",
        "book_title": "International Conference on Classification and Discovery in Large Astronomical Surveys, Ringberg Castle, Germany, 14\u201317 October 2008",
        "author": [
            {
                "family_name": "Mahabal",
                "given_name": "A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Williams",
                "given_name": "R.",
                "orcid": "0000-0002-9145-8580",
                "clpid": "Williams-R-D"
            },
            {
                "family_name": "Drake",
                "given_name": "A.",
                "clpid": "Drake-A-J"
            },
            {
                "family_name": "Donalek",
                "given_name": "C.",
                "clpid": "Donalek-C"
            },
            {
                "family_name": "Graham",
                "given_name": "M.",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Moghaddam",
                "given_name": "B.",
                "clpid": "Moghaddam-B"
            },
            {
                "family_name": "Turmon",
                "given_name": "M.",
                "orcid": "0000-0002-6463-063X",
                "clpid": "Turmon-M"
            },
            {
                "family_name": "Jewell",
                "given_name": "J.",
                "clpid": "Jewell-Jeffrey-B"
            },
            {
                "family_name": "Khosla",
                "given_name": "A.",
                "clpid": "Khosla-A"
            },
            {
                "family_name": "Hensleya",
                "given_name": "B.",
                "clpid": "Hensleya-B"
            }
        ],
        "contributor": [
            {
                "family_name": "Bailer-Jones",
                "given_name": "Coryn A. L.",
                "clpid": "Bailer-Jones-C-A-L"
            }
        ],
        "abstract": "Exploration of time domain is now a vibrant area of research in astronomy, driven by the advent of digital synoptic sky surveys. While panoramic surveys can detect variable or transient events, typically some follow-up observations are needed; for short-lived phenomena, a rapid response is essential. Ability to automatically classify and prioritize transient events for follow-up studies becomes critical as the data rates increase. We have been developing such methods using the data streams from the Palomar-Quest survey, the Catalina Sky Survey and others, using the VOEventNet framework. The goal is to automatically classify transient events, using the new measurements, combined with archival data (previous and multi-wavelength measurements), and contextual information (e.g., Galactic or ecliptic latitude, presence of a possible host galaxy nearby, etc.); and to iterate them dynamically as the follow-up data come in (e.g., light curves or colors). We have been investigating Bayesian methodologies for classification, as well as discriminated follow-up to optimize the use of available resources, including Naive Bayesian approach, and the non-parametric Gaussian process regression. We will also be deploying variants of the traditional machine learning techniques such as Neural Nets and Support Vector Machines on datasets of reliably classified transients as they build up.",
        "doi": "10.1063/1.3059064",
        "isbn": "9780735406131",
        "publisher": "American Institute of Physics",
        "place_of_publication": "Melville, NY",
        "publication_date": "2008-12-05",
        "pages": "287-293"
    },
    {
        "id": "authors:9m4q7-e2t36",
        "collection": "authors",
        "collection_id": "9m4q7-e2t36",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:DONaipcp08",
        "type": "book_section",
        "title": "New approaches to object classification in synoptic sky surveys",
        "book_title": "Classification and discovery in large astronomical surveys : proceedings of the International Conference \"Classification and Discovery in Large Astronomical Surveys\", Ringberg Castle, Germany, 14\u201317 October 2008",
        "author": [
            {
                "family_name": "Donalek",
                "given_name": "C.",
                "clpid": "Donalek-C"
            },
            {
                "family_name": "Mahabal",
                "given_name": "A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Marney",
                "given_name": "S.",
                "clpid": "Marney-S"
            },
            {
                "family_name": "Drake",
                "given_name": "A.",
                "clpid": "Drake-A"
            },
            {
                "family_name": "Glikman",
                "given_name": "E.",
                "orcid": "0000-0003-0489-3750",
                "clpid": "Glikman-E"
            },
            {
                "family_name": "Graham",
                "given_name": "M. J.",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Williams",
                "given_name": "R.",
                "clpid": "Williams-R"
            }
        ],
        "abstract": "Digital synoptic sky surveys pose several new object classification challenges. In surveys where real-time detection and classification of transient events is a science driver, there is a need for an effective elimination of instrument-related artifacts which can masquerade as transient sources in the detection pipeline, e.g., unremoved large cosmic rays, saturation trails, reflections, crosstalk artifacts, etc. We have implemented such an Artifact Filter, using a supervised neural network, \nfor the real-time processing pipeline in the Palomar-Quest (PQ) survey. After the training phase, for each object it takes as input a set of measured morphological parameters and returns the probability of it being a real object. Despite the relatively low number of training cases for many kinds of artifacts, the overall artifact classification rate is around 90%, with no genuine transients misclassified during our real-time scans. Another question is how to assign an optimal star-galaxy \nclassification in a multi-pass survey, where seeing and other conditions change between different epochs, potentially producing inconsistent classifications for the same object. We have implemented a star/galaxy multipass classifier that makes use of external and a priori knowledge to find the optimal classification from the individually derived ones. Both these techniques can be applied to other, similar surveys and data sets.",
        "doi": "10.1063/1.3059057",
        "isbn": "9780735406131",
        "publisher": "American Institute of Physics",
        "place_of_publication": "Melville, NY",
        "publication_date": "2008-12-05",
        "pages": "252-256"
    },
    {
        "id": "authors:4e9kp-14y78",
        "collection": "authors",
        "collection_id": "4e9kp-14y78",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20110210-090345699",
        "type": "book_section",
        "title": "Some Pattern Recognition Challenges in Data-Intensive Astronomy",
        "book_title": "The18th International Conference on Pattern Recognition",
        "author": [
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Donalek",
                "given_name": "C.",
                "clpid": "Donalek-C"
            },
            {
                "family_name": "Mahabal",
                "given_name": "A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Williams",
                "given_name": "R.",
                "clpid": "Williams-R"
            },
            {
                "family_name": "Drake",
                "given_name": "A. J.",
                "clpid": "Drake-A-J"
            },
            {
                "family_name": "Graham",
                "given_name": "M. J.",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Glikman",
                "given_name": "E.",
                "orcid": "0000-0003-0489-3750",
                "clpid": "Glikman-E"
            }
        ],
        "abstract": "We review some of the recent developments and\nchallenges posed by the data analysis in modern\ndigital sky surveys, which are representative of the\ninformation-rich astronomy in the context of Virtual\nObservatory. Illustrative examples include the\nproblems of an automated star-galaxy classification\nin complex and heterogeneous panoramic imaging\ndata sets, and an automated, iterative, dynamical\nclassification of transient events detected in synoptic\nsky surveys. These problems offer good opportunities\nfor productive collaborations between astronomers\nand applied computer scientists and statisticians, and\nare representative of the kind of challenges now\npresent in all data-intensive fields. We discuss briefly\nsome emergent types of scalable scientific data\nanalysis systems with a broad applicability.",
        "doi": "10.1109/ICPR.2006.1064",
        "isbn": "0-7695-2521-0",
        "publisher": "IEEE",
        "place_of_publication": "Los Alamitos, CA",
        "publication_date": "2006-09-18",
        "pages": "856-863"
    },
    {
        "id": "authors:eqggf-nrn19",
        "collection": "authors",
        "collection_id": "eqggf-nrn19",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20110811-102455986",
        "type": "book_section",
        "title": "Time Domain Explorations with Digital Sky Surveys",
        "book_title": "Astronomical Data Analysis Software and Systems XIV",
        "author": [
            {
                "family_name": "Mahabal",
                "given_name": "Ashish A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Graham",
                "given_name": "M. J.",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Kollipara",
                "given_name": "Priya",
                "clpid": "Kollipara-P"
            },
            {
                "family_name": "Granett",
                "given_name": "Benjamin",
                "clpid": "Granett-B"
            },
            {
                "family_name": "Krause",
                "given_name": "Elisabeth",
                "clpid": "Krause-E"
            },
            {
                "family_name": "Williams",
                "given_name": "Roy D.",
                "orcid": "0000-0002-9145-8580",
                "clpid": "Williams-R-D"
            },
            {
                "family_name": "Bogosavljevic",
                "given_name": "M.",
                "clpid": "Bogosavljevic-M"
            },
            {
                "family_name": "Baltay",
                "given_name": "C.",
                "clpid": "Baltay-C"
            },
            {
                "family_name": "Rabinowitz",
                "given_name": "D.",
                "clpid": "Rabinowitz-D"
            },
            {
                "family_name": "Bauer",
                "given_name": "A.",
                "clpid": "Bauer-A"
            },
            {
                "family_name": "Andrews",
                "given_name": "P.",
                "clpid": "Andrews-P"
            },
            {
                "family_name": "Ellman",
                "given_name": "N.",
                "clpid": "Ellman-N"
            },
            {
                "family_name": "Duffau",
                "given_name": "S.",
                "clpid": "Duffau-S"
            },
            {
                "family_name": "Jerke",
                "given_name": "J.",
                "clpid": "Jerke-J"
            },
            {
                "family_name": "Rengstorf",
                "given_name": "A.",
                "clpid": "Rengstorf-A"
            },
            {
                "family_name": "Brunner",
                "given_name": "R. J.",
                "clpid": "Brunner-R-J"
            },
            {
                "family_name": "Musser",
                "given_name": "J.",
                "clpid": "Musser-J"
            },
            {
                "family_name": "Mufson",
                "given_name": "S.",
                "clpid": "Mufson-S"
            },
            {
                "family_name": "Gebhard",
                "given_name": "M.",
                "clpid": "Gebhard-M"
            }
        ],
        "contributor": [
            {
                "family_name": "Shopbell",
                "given_name": "Patrick L.",
                "clpid": "Shopbell-P-L"
            },
            {
                "family_name": "Britton",
                "given_name": "Matthew C.",
                "clpid": "Britton-M-C"
            },
            {
                "family_name": "Ebert",
                "given_name": "Rick",
                "clpid": "Ebert-R-J"
            }
        ],
        "abstract": "One of the new frontiers of astronomical research is the exploration of time variability on the sky at different wavelengths and flux levels. We have carried out a pilot project using DPOSS data to study strong variables and transients, and are now extending it to the new Palomar-QUEST synoptic sky survey. We report on our early findings and outline the methodology to be implemented in preparation for a real-time transient detection pipeline. In addition to large numbers of known types of highly variable sources (e.g., SNe, CVs, OVV QSOs, etc.), we expect to find numerous transients whose nature may be established by a rapid follow-up. Whereas we will make all detected variables publicly available through the web, we anticipate that email alerts would be issued in the real time for a subset of events deemed to be the most interesting. This real-time process entails many challenges, in an effort to maintain a high completeness while keeping the contamination low. We will utilize distributed Grid services developed by the GRIST project, and implement a variety of advanced statistical and machine learning techniques.",
        "isbn": "1-58381-215-6",
        "publisher": "Astronomical Society of the Pacific",
        "place_of_publication": "San Francisco, CA",
        "publication_date": "2005",
        "pages": "604-608"
    },
    {
        "id": "authors:ewy8z-hjt79",
        "collection": "authors",
        "collection_id": "ewy8z-hjt79",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20110810-140105650",
        "type": "book_section",
        "title": "VOStat: A Distributed Statistical Toolkit for the Virtual Observatory",
        "book_title": "Astronomical Data Analysis Software and Systems XIV",
        "author": [
            {
                "family_name": "Graham",
                "given_name": "Matthew J.",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Mahabal",
                "given_name": "A. A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Williams",
                "given_name": "Roy D.",
                "orcid": "0000-0002-9145-8580",
                "clpid": "Williams-R-D"
            },
            {
                "family_name": "Babu",
                "given_name": "G. Jogesh",
                "clpid": "Babu-G-J"
            },
            {
                "family_name": "Feigelson",
                "given_name": "Eric D.",
                "orcid": "0000-0002-5077-6734",
                "clpid": "Feigelson-E-D"
            },
            {
                "family_name": "Vanden Berk",
                "given_name": "Daniel E.",
                "clpid": "Vanden-Berk-D-E"
            },
            {
                "family_name": "Nichol",
                "given_name": "Robert",
                "clpid": "Nichol-R-C"
            },
            {
                "family_name": "Wasserman",
                "given_name": "Larry",
                "clpid": "Wasserman-L"
            }
        ],
        "contributor": [
            {
                "family_name": "Shopbell",
                "given_name": "Patrick L.",
                "clpid": "Shopbell-P-L"
            },
            {
                "family_name": "Britton",
                "given_name": "Matthew C.",
                "clpid": "Britton-M-C"
            },
            {
                "family_name": "Ebert",
                "given_name": "Rick",
                "clpid": "Ebert-R"
            }
        ],
        "abstract": "The nature of astronomical data is changing: data volumes are following Moore's law with a doubling every 18 months and data sets consisting of a billion data vectors in a 100-dimensional parameter space are becoming commonplace. Sophisticated statistical techniques are crucial to fully and efficiently exploit these and maximize the scientific return. A long-standing limitation, however, on the range and capability of such analyses has been the paucity of non-proprietary software.\n\nVOStat is the result of a cross-disciplinary collaboration between astronomers and statisticians to meet these challenges; it is a prototype knowledge-based statistical toolkit implemented within the VO paradigm for the entire astronomical community. VOStat consists of an easily extensible distributed web-based framework transparently accessed via a single science endpoint.\n\nAn exploratory science application is presented to demonstrate some of the functionality currently offered by VOStat.",
        "isbn": "1-58381-215-6",
        "publisher": "Astronomical Society of the Pacific",
        "place_of_publication": "San Francisco, CA",
        "publication_date": "2005",
        "pages": "394-398"
    },
    {
        "id": "authors:19j6v-4my77",
        "collection": "authors",
        "collection_id": "19j6v-4my77",
        "cite_using_url": "https://resolver.caltech.edu/CaltechCACR:2005.118",
        "type": "book_section",
        "title": "Grist: Grid-based Data Mining for Astronomy",
        "author": [
            {
                "family_name": "Jacob",
                "given_name": "Joseph C.",
                "clpid": "Jacob-J-C"
            },
            {
                "family_name": "Katz",
                "given_name": "Daniel S.",
                "clpid": "Katz-D-S"
            },
            {
                "family_name": "Miller",
                "given_name": "Craig D.",
                "clpid": "Miller-C-D"
            },
            {
                "family_name": "Walia",
                "given_name": "Harshpreet",
                "clpid": "Walia-H"
            },
            {
                "family_name": "Williams",
                "given_name": "Roy",
                "clpid": "Williams-R"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. George",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Graham",
                "given_name": "Matthew",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Mahabal",
                "given_name": "Ashish",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Babu",
                "given_name": "Jogesh",
                "clpid": "Babu-J"
            },
            {
                "family_name": "Vanden Berk",
                "given_name": "Daniel E.",
                "clpid": "Vanden-Berk-D-E"
            },
            {
                "family_name": "Nichol",
                "given_name": "Robert",
                "clpid": "Nichol-R"
            }
        ],
        "contributor": [
            {
                "family_name": "Shopbell",
                "given_name": "P. L.",
                "clpid": "Shopbell-P-L"
            },
            {
                "family_name": "Britton",
                "given_name": "M. C.",
                "clpid": "Britton-M-C"
            },
            {
                "family_name": "Ebert",
                "given_name": "R.",
                "clpid": "Ebert-R-J"
            }
        ],
        "abstract": "The Grist project is developing a grid-technology based system as a research environment for astronomy with massive and complex datasets. This knowledge extraction system will consist of a library of distributed grid services controlled by a work ow system, compliant with standards emerging from the grid computing, web services, and virtual observatory communities. This new technology is being used to find high redshift quasars, study peculiar variable objects, search for transients in real time, and fit SDSS QSO spectra to measure black hole masses. Grist services are also a component of the \"hyperatlas\" project to serve high-resolution multi-wavelength imagery over the Internet. In support of these science and outreach objectives, the Grist framework will provide the enabling fabric to tie together distributed grid services in the areas of data access, federation, mining, subsetting, source extraction, image mosaicking, statistics, and visualization.",
        "publisher": "Astronomical Society of the Pacific",
        "publication_date": "2005"
    },
    {
        "id": "authors:tpzkk-gyg45",
        "collection": "authors",
        "collection_id": "tpzkk-gyg45",
        "cite_using_url": "https://resolver.caltech.edu/CaltechCACR:2005.117",
        "type": "book_section",
        "title": "Time Domain Explorations With Digital Sky Surveys",
        "author": [
            {
                "family_name": "Mahabal",
                "given_name": "Ashish A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Graham",
                "given_name": "M. J.",
                "orcid": "0000-0002-3168-0139",
                "clpid": "Graham-M-J"
            },
            {
                "family_name": "Kollipara",
                "given_name": "Priya",
                "clpid": "Kollipara-P"
            },
            {
                "family_name": "Granett",
                "given_name": "Benjamin",
                "clpid": "Granett-B"
            },
            {
                "family_name": "Krause",
                "given_name": "Elisabeth",
                "clpid": "Krause-E"
            },
            {
                "family_name": "Williams",
                "given_name": "Roy",
                "clpid": "Williams-R"
            },
            {
                "family_name": "Bogosavljevic",
                "given_name": "M.",
                "clpid": "Bogosavljevic-M"
            },
            {
                "family_name": "Baltay",
                "given_name": "C.",
                "clpid": "Baltay-C"
            },
            {
                "family_name": "Rabinowitz",
                "given_name": "D.",
                "clpid": "Rabinowitz-D"
            },
            {
                "family_name": "Bauer",
                "given_name": "A.",
                "clpid": "Bauer-A"
            },
            {
                "family_name": "Andrews",
                "given_name": "P.",
                "clpid": "Andrews-P"
            },
            {
                "family_name": "Ellman",
                "given_name": "N.",
                "clpid": "Ellman-N"
            },
            {
                "family_name": "Duffau",
                "given_name": "S.",
                "clpid": "Duffau-S"
            },
            {
                "family_name": "Jerke",
                "given_name": "J.",
                "clpid": "Jerke-J"
            },
            {
                "family_name": "Rengstorf",
                "given_name": "A.",
                "clpid": "Rengstorf-A"
            },
            {
                "family_name": "Brunner",
                "given_name": "R.",
                "clpid": "Brunner-R"
            },
            {
                "family_name": "Musser",
                "given_name": "J.",
                "clpid": "Musser-J"
            },
            {
                "family_name": "Mufson",
                "given_name": "S.",
                "clpid": "Mufson-S"
            },
            {
                "family_name": "Gebhard",
                "given_name": "M.",
                "clpid": "Gebhard-M"
            }
        ],
        "contributor": [
            {
                "family_name": "Shopbell",
                "given_name": "P. L."
            },
            {
                "family_name": "Britton",
                "given_name": "M. C."
            },
            {
                "family_name": "Ebert",
                "given_name": "R."
            }
        ],
        "abstract": "One of the new frontiers of astronomical research is the exploration of time variability on the sky at different wavelengths and flux levels. We have carried out a pilot project using DPOSS data to study strong variables and transients, and are now extending it to the new Palomar-QUEST synoptic sky survey. We report on our early findings and outline the methodology to be implemented in preparation for a real-time transient detection pipeline. In addition to large numbers of known types of highly variable sources (e.g., SNe, CVs, OVV QSOs, etc.), we expect to find numerous transients whose nature may be established by a rapid follow-up. Whereas we will make all detected variables publicly available through the web, we anticipate that email alerts would be issued in the real time for a subset of events deemed to be the most interesting. This real-time process entails many challenges, in an effort to maintain a high completeness while keeping the contamination low. We will utilize distributed Grid services developed by the GRIST project, and implement a variety of advanced statistical and machine learning techniques.",
        "publisher": "Astronomical Society of the Pacific",
        "publication_date": "2005"
    },
    {
        "id": "authors:qahq6-mdw60",
        "collection": "authors",
        "collection_id": "qahq6-mdw60",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20180111-100855939",
        "type": "book_section",
        "title": "The cosmic gamma-ray bursts and their host galaxies in a cosmological context",
        "book_title": "Discoveries and Research Prospects from 6- to 10-Meter-Class Telescopes II",
        "author": [
            {
                "family_name": "Djorgovski",
                "given_name": "Stan G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Kulkarni",
                "given_name": "Shrinivas R.",
                "orcid": "0000-0001-5390-8563",
                "clpid": "Kulkarni-S-R"
            },
            {
                "family_name": "Frail",
                "given_name": "Dale A.",
                "clpid": "Frail-D-A"
            },
            {
                "family_name": "Harrison",
                "given_name": "Fiona A.",
                "orcid": "0000-0002-4226-8959",
                "clpid": "Harrison-F-A"
            },
            {
                "family_name": "Bloom",
                "given_name": "Joshua S.",
                "orcid": "0000-0002-7777-216X",
                "clpid": "Bloom-J-S"
            },
            {
                "family_name": "Berger",
                "given_name": "Edo",
                "orcid": "0000-0002-9392-9681",
                "clpid": "Berger-Edo"
            },
            {
                "family_name": "Price",
                "given_name": "P. A.",
                "clpid": "Price-P-A"
            },
            {
                "family_name": "Fox",
                "given_name": "D.",
                "orcid": "0000-0002-3714-672X",
                "clpid": "Fox-D-B"
            },
            {
                "family_name": "Soderberg",
                "given_name": "A. M.",
                "clpid": "Soderberg-A-M"
            },
            {
                "family_name": "Galama",
                "given_name": "Titus J.",
                "clpid": "Galama-T-J"
            },
            {
                "family_name": "Reichart",
                "given_name": "D. E.",
                "orcid": "0000-0002-5060-3673",
                "clpid": "Reichart-D-E"
            },
            {
                "family_name": "Sari",
                "given_name": "Re'em",
                "orcid": "0000-0002-1084-3656",
                "clpid": "Sari-R"
            },
            {
                "family_name": "Yost",
                "given_name": "S.",
                "clpid": "Yost-S-A"
            },
            {
                "family_name": "Mahabal",
                "given_name": "Ashish",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Castro",
                "given_name": "S. M.",
                "clpid": "Castro-S-M"
            },
            {
                "family_name": "Goodrich",
                "given_name": "Robert",
                "clpid": "Goodrich-R-W"
            },
            {
                "family_name": "Chaffee",
                "given_name": "F.",
                "clpid": "Chaffee-F"
            }
        ],
        "contributor": [
            {
                "family_name": "Guhathakurta",
                "given_name": "Puragra",
                "clpid": "Guhathakurta-P"
            }
        ],
        "abstract": "Studies of the cosmic gamma-ray bursts (GRBs) and their host galaxies are now starting to provide interesting or even unique new insights in observational cosmology. Observed GRB host galaxies have a median magnitude R~25 mag, and show a range of luminosities, morphologies, and star formation rates, with a median redshift z~1. They represent a new way of identifying a population of star-forming galaxies at cosmological redshifts, which is mostly independent of the traditional selection methods. They seem to be broadly similar to the normal field galaxy populations at comparable redshifts and magnitudes, and indicate at most a mild luminosity evolution over the redshift range they probe. Studies of GRB optical afterglows seen in absorption provide a powerful new probe of the ISM in dense, central regions of their host galaxies, which is complementary to the traditional studies using QSO absorption line systems. Some GRB hosts are heavily obscured, and provide a new way to select a population of cosmological sub-mm sources. A census of detected optical tranistents may provide an important new way to constrain the total obscured fraction of star formation over the history of the universe. Finally, detection of GRB afterglows at high redshifts (z&gt;6) may provide a unique way to probe the primordial star formation, massive IMF, early IGM, and chemical enrichment at the end of the cosmic reionization era.",
        "doi": "10.1117/12.457700",
        "isbn": "0-8194-4613-0",
        "publisher": "Society of Photo-Optical Instrumentation Engineers (SPIE)",
        "place_of_publication": "Bellingham, WA",
        "publication_date": "2003-02-13",
        "pages": "238-247"
    },
    {
        "id": "authors:rj4nc-94470",
        "collection": "authors",
        "collection_id": "rj4nc-94470",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190723-144623682",
        "type": "book_section",
        "title": "Challenges for Cluster Analysis in a Virtual Observatory",
        "book_title": "Statistical Challenges in Astronomy",
        "author": [
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Brunner",
                "given_name": "R.",
                "clpid": "Brunner-R-J"
            },
            {
                "family_name": "Mahabal",
                "given_name": "A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Williams",
                "given_name": "R.",
                "clpid": "Williams-R"
            },
            {
                "family_name": "Granat",
                "given_name": "R.",
                "clpid": "Granat-R"
            },
            {
                "family_name": "Stolorz",
                "given_name": "P.",
                "clpid": "Stolorz-P"
            }
        ],
        "contributor": [
            {
                "family_name": "Feigelson",
                "given_name": "Eric D.",
                "clpid": "Feigelson-E-D"
            },
            {
                "family_name": "Babu",
                "given_name": "G. Jogesh",
                "clpid": "Babu-G-J"
            }
        ],
        "abstract": "There has been an unprecedented and continuing growth in the volume, quality, and complexity of astronomical data sets over the past few years, mainly through large digital sky surveys. Virtual Observatory (VO) concept represents a scientific and technological framework needed to cope with this data flood. We review some of the applied statistics and computing challenges posed by the analysis of large and complex data sets expected in the VO-based research. The challenges are driven both by the size and the complexity of the data sets (billions of data vectors in parameter spaces of tens or hundreds of dimensions), by the heterogeneity of the data and measurement errors, the selection effects and censored data, and by the intrinsic clustering properties (functional form, topology) of the data distribution in the parameter space of observed attributes. Examples of scientific questions one may wish to address include: objective determination of the numbers of object classes present in the data, and the membership probabilities for each source; searches for unusual, rare, or even new types of objects and phenomena; discovery of physically interesting multivariate correlations which may be present in some of the clusters; etc.",
        "doi": "10.1007/0-387-21529-8_9",
        "isbn": "978-0-387-95546-9",
        "publisher": "Springer",
        "place_of_publication": "New York, NY",
        "publication_date": "2003",
        "pages": "127-141"
    },
    {
        "id": "authors:fxv00-gr748",
        "collection": "authors",
        "collection_id": "fxv00-gr748",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20180711-100710320",
        "type": "book_section",
        "title": "Topic maps for custom viewing of data",
        "book_title": "Virtual Observatories",
        "author": [
            {
                "family_name": "Mahabal",
                "given_name": "Ashish",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Williams",
                "given_name": "Roy",
                "clpid": "Williams-R-E"
            },
            {
                "family_name": "Brunner",
                "given_name": "Robert",
                "clpid": "Brunner-R-J"
            }
        ],
        "contributor": [
            {
                "family_name": "Szalay",
                "given_name": "Alexander S.",
                "clpid": "Szalay-A-S"
            }
        ],
        "abstract": "A Topic Map is a structured network of hyperlinks that points into an information pool. Topic Maps have an existence independent of the information pool and hence different Topic Maps can form different layers above the same information pool and provide us with different views of it. We explore the use of Topic Maps with the Unified Column Descriptor (UCD) scheme developed in the frame of the ESO-CDS data mining project. UCD, with its multi-tier hierarchical structure, categorizes parameters reported in tables and catalogs. By using Topic Maps we show how columns from different catalogs with similar but not identical descriptions could be combined. A direct application for the Virtual Observatory community is that of merging catalogs in order to generate customized views of data.",
        "doi": "10.1117/12.461516",
        "isbn": "0819446254",
        "publisher": "Society of Photo-optical Instrumentation Engineers (SPIE)",
        "place_of_publication": "Bellingham, WA",
        "publication_date": "2002-12-16",
        "pages": "65-76"
    },
    {
        "id": "authors:dgpaj-jqb45",
        "collection": "authors",
        "collection_id": "dgpaj-jqb45",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20180711-093541133",
        "type": "book_section",
        "title": "yourSky: rapid desktop access to custom astronomical image mosaics",
        "book_title": "Virtual Observatories",
        "author": [
            {
                "family_name": "Jacob",
                "given_name": "Joseph C.",
                "clpid": "Jacob-J-C"
            },
            {
                "family_name": "Brunner",
                "given_name": "Robert",
                "clpid": "Brunner-R-J"
            },
            {
                "family_name": "Curkendall",
                "given_name": "David W.",
                "clpid": "Curkendall-D-W"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. George",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Good",
                "given_name": "John C.",
                "clpid": "Good-J-C"
            },
            {
                "family_name": "Husman",
                "given_name": "Laura",
                "clpid": "Husman-L"
            },
            {
                "family_name": "Kremenek",
                "given_name": "George",
                "clpid": "Kremenek-G"
            },
            {
                "family_name": "Mahabal",
                "given_name": "Ashish",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            }
        ],
        "contributor": [
            {
                "family_name": "Szalay",
                "given_name": "Alexander S.",
                "clpid": "Szalay-A-S"
            }
        ],
        "abstract": "The yourSky custom astronomical image mosaicking software has a Web portal architecture that allows access via ordinary desktop computers with low bandwidth network connections to high performance and highly customizable mosaicking software deployed in a high performance computing and communications environment. The emphasis is on custom access to image mosaics constructed from terabytes of raw image data stored in remote archives. In this context, custom access refers to new technology that enables on the fly mosaicking to meet user-specified criteria for region of the sky to be mosaicked, datasets to be used, resolution, coordinate system, projection, data type and image format. The yourSky server is a fully automated end-to-end system that handles all aspects of the mosaic construction. This includes management of mosaic requests, determining which input images are required to fulfill each request, management of a data cache for both input image plates and output mosaics, retrieval of input image plates from massive remote archives, image mosaic construction on a multiprocessor system, and making the result accessible to the user on the desktop. The URL for yourSky is http://yourSky.jpl.nasa.gov.",
        "doi": "10.1117/12.461514",
        "isbn": "0819446254",
        "publisher": "Society of Photo-optical Instrumentation Engineers (SPIE)",
        "place_of_publication": "Bellingham, WA",
        "publication_date": "2002-12-16",
        "pages": "53-64"
    },
    {
        "id": "authors:7j1wj-hap91",
        "collection": "authors",
        "collection_id": "7j1wj-hap91",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20180711-110835813",
        "type": "book_section",
        "title": "Topic maps as a virtual observatory tool",
        "book_title": "Astronomical Data Analysis",
        "author": [
            {
                "family_name": "Mahabal",
                "given_name": "Ashish",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Brunner",
                "given_name": "Robert",
                "clpid": "Brunner-R-J"
            },
            {
                "family_name": "Williams",
                "given_name": "Roy",
                "clpid": "Williams-R-E"
            }
        ],
        "contributor": [
            {
                "family_name": "Starck",
                "given_name": "Jean-Luc",
                "clpid": "Starck-J-L"
            },
            {
                "family_name": "Murtagh",
                "given_name": "Fionn D.",
                "clpid": "Murtagh-F-D"
            }
        ],
        "abstract": "One major component of the VO will be catalogs measuring gigabytes and terrabytes if not more. Some mechanism like XML will be used for structuring the information. However, such mechanisms are not good for information retrieval on their own. For retrieval we use queries. Topic Maps that have started becoming popular recently are excellent for segregating information that results from a query. A Topic Map is a structured network of hyperlinks above an information pool. Different Topic Maps can form different layers above the same information pool and provide us with different views of it. This facilitates in being able to ask exact questions, aiding us in looking for gold needles in the proverbial haystack. Here we will discuss the specifics of what Topic Maps are and how they can be implemented within the VO framework.",
        "doi": "10.1117/12.447170",
        "isbn": "9780819441911",
        "publisher": "Society of Photo-optical Instrumentation Engineers (SPIE)",
        "place_of_publication": "Bellingham, WA",
        "publication_date": "2001-11-01",
        "pages": "161-172"
    },
    {
        "id": "authors:gp1vj-z7521",
        "collection": "authors",
        "collection_id": "gp1vj-z7521",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20180711-092405965",
        "type": "book_section",
        "title": "Exploration of parameter spaces in a virtual observatory",
        "book_title": "Astronomical Data Analysis",
        "author": [
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Mahabal",
                "given_name": "A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Brunner",
                "given_name": "R.",
                "clpid": "Brunner-R-J"
            },
            {
                "family_name": "Williams",
                "given_name": "R.",
                "clpid": "Williams-R-E"
            },
            {
                "family_name": "Granat",
                "given_name": "R.",
                "clpid": "Granat-R"
            },
            {
                "family_name": "Curkendall",
                "given_name": "D.",
                "clpid": "Curkendall-D-W"
            },
            {
                "family_name": "Jacob",
                "given_name": "J.",
                "clpid": "Jacob-J-C"
            },
            {
                "family_name": "Stolorz",
                "given_name": "P.",
                "clpid": "Stolorz-P"
            }
        ],
        "contributor": [
            {
                "family_name": "Starck",
                "given_name": "J.-L.",
                "clpid": "Starck-J-L"
            },
            {
                "family_name": "Murtagh",
                "given_name": "Fionn",
                "clpid": "Murtagh-F"
            }
        ],
        "abstract": "Like every other field of intellectual endeavor, astronomy is being revolutionized by the advances in information technology. There is an ongoing exponential growth in the volume, quality, and complexity of astronomical data sets, mainly through large digital sky surveys and archives. The Virtual Observatory (VO) concept represents a scientific and technological framework needed to cope with this data flood. Systematic exploration of the observable parameter spaces, covered by large digital sky surveys spanning a range of wavelengths, will be one of the primary modes of research with a VO. This is where the truly new discoveries will be made, and new insights be gained about the already known astronomical objects and phenomena. We review some of the methodological challenges posed by the analysis of large and complex data sets expected in the VO-based research. The challenges are driven both by the size and the complexity of the data sets (billions of data vectors in parameter spaces of tens or hundreds of dimensions), by the heterogeneity of the data and measurement errors, including differences in basic survey parameters for the federated data sets (e.g., in the positional accuracy and resolution, wavelength coverage, time baseline, etc), various selection effects, as well as the intrinsic clustering properties (functional form, topology) of the data distributions in the parameter spaces of observed attributes. Answering these challenges will require substantial collaborative efforts and partnerships between astronomers, computer scientists, and statisticians.",
        "doi": "10.1117/12.447189",
        "isbn": "9780819441911",
        "publisher": "Society of Photo-optical Instrumentation Engineers (SPIE)",
        "place_of_publication": "Bellingham, WA",
        "publication_date": "2001-11-01",
        "pages": "43-52"
    },
    {
        "id": "authors:0cbxq-s8r44",
        "collection": "authors",
        "collection_id": "0cbxq-s8r44",
        "cite_using_url": "https://resolver.caltech.edu/CaltechAUTHORS:20190723-160758075",
        "type": "book_section",
        "title": "Exploration of Large Digital Sky Surveys",
        "book_title": "Mining the Sky",
        "author": [
            {
                "family_name": "Djorgovski",
                "given_name": "S. G.",
                "orcid": "0000-0002-0603-3087",
                "clpid": "Djorgovski-S-G"
            },
            {
                "family_name": "Brunner",
                "given_name": "R. J.",
                "clpid": "Brunner-R-J"
            },
            {
                "family_name": "Mahabal",
                "given_name": "A. A.",
                "orcid": "0000-0003-2242-0244",
                "clpid": "Mahabal-A-A"
            },
            {
                "family_name": "Odewahn",
                "given_name": "S. C.",
                "clpid": "Odewahn-S-C"
            },
            {
                "family_name": "de Carvalho",
                "given_name": "R. R.",
                "orcid": "0000-0002-1283-3363",
                "clpid": "de-Carvalho-R-R"
            },
            {
                "family_name": "Gal",
                "given_name": "R. R.",
                "clpid": "Gal-R-R"
            },
            {
                "family_name": "Stolorz",
                "given_name": "P.",
                "clpid": "Stolorz-P"
            },
            {
                "family_name": "Granat",
                "given_name": "R.",
                "clpid": "Granat-R"
            },
            {
                "family_name": "Curkendall",
                "given_name": "D.",
                "clpid": "Curkendall-D-W"
            },
            {
                "family_name": "Jacob",
                "given_name": "J.",
                "clpid": "Jacob-J"
            },
            {
                "family_name": "Castro",
                "given_name": "S.",
                "clpid": "Castro-S"
            }
        ],
        "contributor": [
            {
                "family_name": "Banday",
                "given_name": "Anthony J.",
                "clpid": "Banday-A-J"
            },
            {
                "family_name": "Zaroubi",
                "given_name": "Saleem",
                "clpid": "Zaroubi-S"
            },
            {
                "family_name": "Bartelmann",
                "given_name": "Matthias",
                "clpid": "Bartelmann-M"
            }
        ],
        "abstract": "We review some of the scientific opportunities and technical challenges posed by the exploration of the large digital sky surveys, in the context of a Virtual Observatory (VO). The VO paradigm will profoundly change the way observational astronomy is done. Clustering analysis techniques can be used to discover samples of rare, unusual, or even previously unknown types of astronomical objects and phenomena. Exploration of the previously poorly probed portions of the observable parameter space are especially promising. We illustrate some of the possible types of studies with examples drawn from DPOSS; much more complex and interesting applications are forthcoming. Development of the new tools needed for an efficient exploration of these vast data sets requires a synergy between astronomy and information sciences, with great potential returns for both fields.",
        "doi": "10.1007/10849171_37",
        "isbn": "978-3-540-42468-0",
        "publisher": "Springer",
        "place_of_publication": "Berlin",
        "publication_date": "2001",
        "pages": "305-322"
    }
]