@article{majumder_terando_hiers_collazo_reich_2025, title={A spatiotemporal optimization engine for prescribed burning in the Southeast US}, volume={85}, ISSN={["1878-0512"]}, url={https://doi.org/10.1016/j.ecoinf.2024.102956}, DOI={10.1016/j.ecoinf.2024.102956}, journal={ECOLOGICAL INFORMATICS}, author={Majumder, Reetam and Terando, Adam J. and Hiers, J. Kevin and Collazo, Jaime A. and Reich, Brian J.}, year={2025}, month={Mar} } @article{dadashova_smith_haider_reich_2025, title={Bayesian inference informed by parameter subset selection for a minimal PBPK brain model}, volume={383}, ISSN={["1471-2962"]}, DOI={10.1098/rsta.2024.0219}, abstractNote={Physiologically based pharmacokinetic (PBPK) models use a mechanistic approach to delineate the processes of the absorption, distribution, metabolism and excretion of biological substances in various species. These models generally comprise coupled systems of ordinary differential equations involving multiple states and a moderate to a large number of parameters. Such models contain compartments corresponding to various organs or tissues in the body. Before employing the models for treatment, the quantification of uncertainties for the parameters, based on a priori information or data for a specific response, is necessary. This requires the determination of identifiable parameters, which are uniquely determined by data, and uncertainty analysis based on frequentist or Bayesian inference. We introduce a strategy to integrate parameter subset selection, based on identifiability analysis, with Bayesian inference. This approach further refines the subset of identifiable parameters, quantifies parameter and response uncertainties, enhances model prediction and reduces computational cost. This article is part of the theme issue ‘Uncertainty quantification for healthcare and biological systems (Part 1)’.}, number={2292}, journal={PHILOSOPHICAL TRANSACTIONS OF THE ROYAL SOCIETY A-MATHEMATICAL PHYSICAL AND ENGINEERING SCIENCES}, author={Dadashova, Kamala and Smith, Ralph C. and Haider, Mansoor A. and Reich, Brian J.}, year={2025}, month={Mar} } @article{nag_sun_reich_2025, title={Bivariate DeepKriging for Large-Scale Spatial Interpolation of Wind Fields}, volume={3}, ISSN={["1537-2723"]}, DOI={10.1080/00401706.2025.2453197}, abstractNote={High spatial resolution wind data play a crucial role in various fields such as climate, oceanography, and meteorology. However, spatial interpolation or downscaling of bivariate wind fields, characterized by velocity in two dimensions, poses a challenge due to their non-Gaussian nature, high spatial variability, and heterogeneity. While cokriging is commonly employed in spatial statistics for predicting bivariate spatial fields, it is suboptimal for non-Gaussian processes and computationally prohibitive for large datasets. In this paper, we introduce bivariate DeepKriging, a novel method utilizing a spatially dependent deep neural network (DNN) with an embedding layer constructed by spatial radial basis functions for predicting bivariate spatial data. Additionally, we devise a distribution-free uncertainty quantification technique based on bootstrap and ensemble DNN. We establish the theoretical basis for bivariate DeepKriging by linking it with the Linear Model of Coregionalization (LMC). Our proposed approach surpasses traditional cokriging predictors, including those utilizing commonly used covariance functions like the linear model of co-regionalization and parsimonious bivariate Matérn covariance. We demonstrate the computational efficiency and scalability of the proposed DNN model, achieving computation speeds approximately 20 times faster than conventional techniques. Furthermore, we apply the bivariate DeepKriging method to wind data across the Middle East region at 506,771 locations, showcasing superior prediction performance over cokriging predictors while significantly reducing computation time.}, journal={TECHNOMETRICS}, author={Nag, Pratik and Sun, Ying and Reich, Brian J.}, year={2025}, month={Mar} } @article{hector_reich_eloyan_2025, title={Distributed model building and recursive integration for big spatial data modeling}, volume={81}, ISSN={["1541-0420"]}, DOI={10.1093/biomtc/ujae159}, abstractNote={ABSTRACT Motivated by the need for computationally tractable spatial methods in neuroimaging studies, we develop a distributed and integrated framework for estimation and inference of Gaussian process model parameters with ultra-high-dimensional likelihoods. We propose a shift in viewpoint from whole to local data perspectives that is rooted in distributed model building and integrated estimation and inference. The framework’s backbone is a computationally and statistically efficient integration procedure that simultaneously incorporates dependence within and between spatial resolutions in a recursively partitioned spatial domain. Statistical and computational properties of our distributed approach are investigated theoretically and in simulations. The proposed approach is used to extract new insights into autism spectrum disorder from the autism brain imaging data exchange.}, number={1}, journal={BIOMETRICS}, author={Hector, Emily C. and Reich, Brian J. and Eloyan, Ani}, year={2025}, month={Jan} } @article{trostle_guinness_reich_2024, title={A Gaussian-process approximation to a spatial SIR process using moment closures and emulators}, volume={80}, ISSN={["1541-0420"]}, DOI={10.1093/biomtc/ujae068}, abstractNote={ABSTRACT The dynamics that govern disease spread are hard to model because infections are functions of both the underlying pathogen as well as human or animal behavior. This challenge is increased when modeling how diseases spread between different spatial locations. Many proposed spatial epidemiological models require trade-offs to fit, either by abstracting away theoretical spread dynamics, fitting a deterministic model, or by requiring large computational resources for many simulations. We propose an approach that approximates the complex spatial spread dynamics with a Gaussian process. We first propose a flexible spatial extension to the well-known SIR stochastic process, and then we derive a moment-closure approximation to this stochastic process. This moment-closure approximation yields ordinary differential equations for the evolution of the means and covariances of the susceptibles and infectious through time. Because these ODEs are a bottleneck to fitting our model by MCMC, we approximate them using a low-rank emulator. This approximation serves as the basis for our hierarchical model for noisy, underreported counts of new infections by spatial location and time. We demonstrate using our model to conduct inference on simulated infections from the underlying, true spatial SIR jump process. We then apply our method to model counts of new Zika infections in Brazil from late 2015 through early 2016.}, number={3}, journal={BIOMETRICS}, author={Trostle, Parker and Guinness, Joseph and Reich, Brian J.}, year={2024}, month={Jul} } @article{gill_lester_free_pfaff_iversen_reich_yang_ahmadia_brown_darling_et al._2024, title={A diverse portfolio of marine protected areas can better advance global conservation and equity}, volume={121}, ISSN={["1091-6490"]}, DOI={10.1073/pnas.2313205121}, abstractNote={Marine protected areas (MPAs) are widely used for ocean conservation, yet the relative impacts of various types of MPAs are poorly understood. We estimated impacts on fish biomass from no-take and multiple-use (fished) MPAs, employing a rigorous matched counterfactual design with a global dataset of >14,000 surveys in and around 216 MPAs. Both no-take and multiple-use MPAs generated positive conservation outcomes relative to no protection (58.2% and 12.6% fish biomass increases, respectively), with smaller estimated differences between the two MPA types when controlling for additional confounding factors (8.3% increase). Relative performance depended on context and management: no-take MPAs performed better in areas of high human pressure but similar to multiple-use in remote locations. Multiple-use MPA performance was low in high-pressure areas but improved significantly with better management, producing similar outcomes to no-take MPAs when adequately staffed and appropriate use regulations were applied. For priority conservation areas where no-take restrictions are not possible or ethical, our findings show that a portfolio of well-designed and well-managed multiple-use MPAs represents a viable and potentially equitable pathway to advance local and global conservation.}, number={10}, journal={PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES OF THE UNITED STATES OF AMERICA}, author={Gill, David A. and Lester, Sarah E. and Free, Christopher M. and Pfaff, Alexander and Iversen, Edwin and Reich, Brian J. and Yang, Shu and Ahmadia, Gabby and Brown, Dominic A. Andradi- and Darling, Emily S. and et al.}, year={2024}, month={Mar} } @article{zulian_pacifici_bacheler_buckel_patterson iii_reich_shertzer_hostetter_2024, title={Applying mark-resight, count, and telemetry data to estimate effective sampling area and fish density with stationary underwater cameras}, volume={12}, ISSN={["1205-7533"]}, url={https://doi.org/10.1139/cjfas-2023-0373}, DOI={10.1139/cjfas-2023-0373}, abstractNote={Accurate estimates of abundance and density for geographically open populations must account for the effective sampling area (ESA) of sampling gear. We describe a Marked N-Mixture model to estimate ESA and density (number of individuals/unit area) from repeated counts of unmarked and marked individuals, integrating mark-resight, camera counts, and telemetry data of red snapper (Lutjanus campechanus) at a 1.6 km2 reef off North Carolina, USA. Cameras recorded observations of unmarked and marked individuals, whereas telemetry data indicated the number of tagged fish present on the reef. We estimated density (95 individuals/km2, 95%CI.:58–149), ESA (which was lower when current direction was towards the camera), detection probability (0.06, 95%CI.: 0.03–0.09), and covariate relationships. Simulation studies under different scenarios of data quality and space use identified positive bias in density estimates from N-mixture models due to fish movement. In contrast, the Marked N-Mixture model returned unbiased estimates of density, ESA, and detection parameters, and appears to be a more robust method for modeling density given the data available for this analysis. This approach can be applied to other populations where count and telemetry data overlap in space and time.}, journal={CANADIAN JOURNAL OF FISHERIES AND AQUATIC SCIENCES}, author={Zulian, Viviane and Pacifici, Krishna and Bacheler, Nathan M. and Buckel, Jeffrey A. and Patterson Iii, William F. and Reich, Brian J. and Shertzer, Kyle W. and Hostetter, Nathan J.}, year={2024}, month={Dec} } @article{chen_li_reich_sun_2024, title={DEEPKRIGING: SPATIALLY DEPENDENT DEEP NEURAL NETWORKS FOR SPATIAL PREDICTION}, volume={34}, ISSN={["1996-8507"]}, DOI={10.5705/ss.202021.0277}, abstractNote={In spatial statistics, a common objective is to predict values of a spatial process at unobserved locations by exploiting spatial dependence.Kriging provides the best linear unbiased predictor using covariance functions and is often associated with Gaussian processes.However, when considering non-linear prediction for non-Gaussian and categorical data, the Kriging prediction is no longer optimal, and the associated variance is often overly optimistic.Although deep neural networks (DNNs) are widely used for general classification and prediction, they have not been studied thoroughly for data with spatial dependence.In this work, we propose a novel DNN structure for spatial prediction, where the spatial dependence is captured by adding an embedding layer of spatial coordinates with basis functions.We show in theory and simulation studies that the proposed DeepKriging method has a direct link to Kriging in the Gaussian case, and it has multiple advantages over Kriging for non-Gaussian and non-stationary data, i.e., it provides non-linear predictions and thus has smaller approximation Statistica Sinica: Newly accepted Paper (accepted author-version subject to English editing) errors, it does not require operations on covariance matrices and thus is scalable for large datasets, and with sufficiently many hidden neurons, it provides the optimal prediction in terms of model capacity.We further explore the possibility of quantifying prediction uncertainties based on density prediction without assuming any data distribution.Finally, we apply the method to predicting PM2.5 concentrations across the continental United States.}, number={1}, journal={STATISTICA SINICA}, author={Chen, Wanfang and Li, Yuxiao and Reich, Brian J. and Sun, Ying}, year={2024}, month={Jan}, pages={291–311} } @article{wu_gao_yang_reich_rappold_2024, title={Estimating spatially varying health effects of wildland fire smoke using mobile health data}, volume={7}, ISSN={["1467-9876"]}, url={https://doi.org/10.1093/jrsssc/qlae034}, DOI={10.1093/jrsssc/qlae034}, abstractNote={Abstract Wildland fire smoke exposures are an increasing threat to public health, highlighting the need for studying the effects of protective behaviours on reducing health outcomes. Emerging smartphone applications provide unprecedented opportunities to deliver health risk communication messages to a large number of individuals in real-time and subsequently study the effectiveness, but also pose methodological challenges. Smoke Sense, a citizen science project, provides an interactive smartphone app platform for participants to engage with information about air quality, and ways to record their own health symptoms and actions taken to reduce smoke exposure. We propose a doubly robust estimator of the structural nested mean model that accounts for spatially and time-varying effects via a local estimating equation approach with geographical kernel weighting. Moreover, our analytical framework also handles informative missingness by inverse probability weighting of estimating functions. We evaluate the method using extensive simulation studies and apply it to Smoke Sense data to increase the knowledge base about the relationship between health preventive measures and health-related outcomes. Our results show that the protective behaviours’ effects vary over space and time and find that protective behaviours have more significant effects on reducing health symptoms in the Southwest than the Northwest region of the U.S.}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES C-APPLIED STATISTICS}, author={Wu, Lili and Gao, Chenyin and Yang, Shu and Reich, Brian J. and Rappold, Ana G.}, year={2024}, month={Jul} } @article{majumder_reich_shaby_2024, title={MODELING EXTREMAL STREAMFLOW USING DEEP LEARNING APPROXIMATIONS AND A FLEXIBLE SPATIAL PROCESS}, volume={18}, ISSN={["1941-7330"]}, DOI={10.1214/23-AOAS1847}, abstractNote={Quantifying changes in the probability and magnitude of extreme flooding events is key to mitigating their impacts. While hydrodynamic data are inherently spatially dependent, traditional spatial models, such as Gaussian processes, are poorly suited for modeling extreme events. Spatial extreme value models with more realistic tail dependence characteristics are under active development. They are theoretically justified but give intractable likelihoods, making computation challenging for small datasets and prohibitive for continental-scale studies. We propose a process mixture model (PMM) which specifies spatial dependence in extreme values as a convex combination of a Gaussian process and a max-stable process, yielding desirable tail dependence properties but intractable likelihoods. To address this, we employ a unique computational strategy where a feed-forward neural network is embedded in a density regression model to approximate the conditional distribution at one spatial location, given a set of neighbors. We then use this univariate density function to approximate the joint likelihood for all locations by way of a Vecchia approximation. The PMM is used to analyze changes in annual maximum streamflow within the U.S. over the last 50 years and is able to detect areas which show increases in extreme streamflow over time.}, number={2}, journal={ANNALS OF APPLIED STATISTICS}, author={Majumder, Reetam and Reich, Brian J. and Shaby, Benjamin A.}, year={2024}, month={Jun}, pages={1519–1542} } @article{kotlarz_mccord_wiecha_weed_cuffney_enders_strynar_knappe_reich_hoppin_2024, title={Measurement of Hydro-EVE and 6:2 FTS in Blood from Wilmington, North Carolina, Residents, 2017-2018}, volume={132}, ISSN={["1552-9924"]}, url={https://doi.org/10.1289/EHP14503}, DOI={10.1289/EHP14503}, number={2}, journal={ENVIRONMENTAL HEALTH PERSPECTIVES}, author={Kotlarz, Nadine and McCord, James and Wiecha, Nate and Weed, Rebecca A. and Cuffney, Michael and Enders, Jeffrey R. and Strynar, Mark and Knappe, Detlef R. U. and Reich, Brian J. and Hoppin, Jane A.}, year={2024}, month={Feb} } @article{simafranca_willoughby_o'neil_farr_reich_giertych_johnson_pascolini-campbell_2024, title={Modeling wildland fire burn severity in California using a spatial Super Learner approach}, ISSN={["1573-3009"]}, DOI={10.1007/s10651-024-00601-1}, journal={ENVIRONMENTAL AND ECOLOGICAL STATISTICS}, author={Simafranca, Nicholas and Willoughby, Bryant and O'Neil, Erin and Farr, Sophie and Reich, Brian J. and Giertych, Naomi and Johnson, Margaret C. and Pascolini-Campbell, Madeleine A.}, year={2024}, month={Mar} } @article{kotlarz_mccord_wiecha_weed_cuffney_enders_strynar_knappe_reich_hoppin_2024, title={Reanalysis of PFO5DoA Levels in Blood from Wilmington, North Carolina, Residents, 2017-2018}, volume={132}, ISSN={["1552-9924"]}, url={https://doi.org/10.1289/EHP13339}, DOI={10.1289/EHP13339}, abstractNote={,}, number={2}, journal={ENVIRONMENTAL HEALTH PERSPECTIVES}, author={Kotlarz, Nadine and McCord, James and Wiecha, Nate and Weed, Rebecca A. and Cuffney, Michael and Enders, Jeffrey R. and Strynar, Mark and Knappe, Detlef R. U. and Reich, Brian J. and Hoppin, Jane A.}, year={2024}, month={Feb} } @article{euan_sun_reich_2024, title={Regime-based precipitation modeling: A spatio-temporal approach}, volume={60}, ISSN={["2211-6753"]}, DOI={10.1016/j.spasta.2024.100818}, abstractNote={In this paper, we propose a new regime-based model to describe spatio-temporal dynamics of precipitation data. Precipitation is one of the most essential factors for multiple human-related activities such as agriculture production. Therefore, a detailed and accurate understanding of the rain for a given region is needed. Motivated by the different formations of precipitation systems (convective, frontal, and orographic), we proposed a hierarchical regime-based spatio-temporal model for precipitation data. We use information about the values of neighbouring sites to identify such regimes, allowing spatial and temporal dependence to be different among regimes. Using the Bayesian approach with R INLA, we fit our model to the Guanajuato state (Mexico) precipitation data case study to understand the spatial and temporal dependencies of precipitation in this region. Our findings show the regime-based model's versatility and compare it with the truncated Gaussian model.}, journal={SPATIAL STATISTICS}, author={Euan, Carolina and Sun, Ying and Reich, Brian J.}, year={2024}, month={Apr} } @article{yanchenko_bondell_reich_2024, title={The R2D2 Prior for Generalized Linear Mixed Models}, volume={5}, ISSN={["1537-2731"]}, DOI={10.1080/00031305.2024.2352010}, abstractNote={In Bayesian analysis, the selection of a prior distribution is typically done by considering each parameter in the model. While this can be convenient, in many scenarios it may be desirable to place a prior on a summary measure of the model instead. In this work, we propose a prior on the model fit, as measured by a Bayesian coefficient of determination (R2), which then induces a prior on the individual parameters. We achieve this by placing a beta prior on R2 and then deriving the induced prior on the global variance parameter for generalized linear mixed models. We derive closed-form expressions in many scenarios and present several approximation strategies when an analytic form is not possible and/or to allow for easier computation. In these situations, we suggest approximating the prior by using a generalized beta prime distribution and provide a simple default prior construction scheme. This approach is quite flexible and can be easily implemented in standard Bayesian software. Lastly, we demonstrate the performance of the method on simulated and real-world data, where the method particularly shines in high-dimensional settings, as well as modeling random effects.}, journal={AMERICAN STATISTICIAN}, author={Yanchenko, Eric and Bondell, Howard D. and Reich, Brian J.}, year={2024}, month={May} } @article{yang_ruiz-suarez_reich_guan_rappold_2023, title={A Data-Fusion Approach to Assessing the Contribution of Wildland Fire Smoke to Fine Particulate Matter in California}, volume={15}, ISSN={["2072-4292"]}, url={https://www.mdpi.com/2072-4292/15/17/4246}, DOI={10.3390/rs15174246}, abstractNote={The escalating frequency and severity of global wildfires necessitate an in-depth understanding and monitoring of wildfire smoke impacts, specifically its contribution to fine particulate matter (PM2.5). We propose a data-fusion method to study wildfire contribution to PM2.5 using satellite-derived smoke plume indicators and PM2.5 monitoring data. Our study incorporates two types of monitoring data, the high-quality but sparse Air Quality System (AQS) stations and the abundant but less accurate PurpleAir (PA) sensors that are gaining popularity among citizen scientists. We propose a multi-resolution spatiotemporal model specified in the spectral domain to calibrate the PA sensors against accurate AQS measurements, and leverage the two networks to estimate wildfire contribution to PM2.5 in California in 2020 and 2021. A Bayesian approach is taken to incorporate all uncertainties and our prior intuition that the dependence between networks, as well as the accuracy of PA network, vary by frequency. We find that 1% to 3% increase in PM2.5 concentration due to wildfire smoke, and that leveraging PA sensors improves accuracy.}, number={17}, journal={REMOTE SENSING}, author={Yang, Hongjian and Ruiz-Suarez, Sofia and Reich, Brian J. and Guan, Yawen and Rappold, Ana G.}, year={2023}, month={Sep} } @article{long_reich_staicu_meitzen_2023, title={A Nonparametric Test of Group Distributional Differences for Hierarchically Clustered Functional Data}, volume={79}, ISSN={0006-341X 1541-0420}, url={http://dx.doi.org/10.1111/biom.13846}, DOI={10.1111/biom.13846}, abstractNote={Abstract Biological sex and gender are critical variables in biomedical research, but are complicated by the presence of sex-specific natural hormone cycles, such as the estrous cycle in female rodents, typically divided into phases. A common feature of these cycles are fluctuating hormone levels that induce sex differences in many behaviors controlled by the electrophysiology of neurons, such as neuronal membrane potential in response to electrical stimulus, typically summarized using a priori defined metrics. In this paper, we propose a method to test for differences in the electrophysiological properties across estrous cycle phase without first defining a metric of interest. We do this by modeling membrane potential data in the frequency domain as realizations of a bivariate process, also depending on the electrical stimulus, by adopting existing methods for longitudinal functional data. We are then able to extract the main features of the bivariate signals through a set of basis function coefficients. We use these coefficients for testing, adapting methods for multivariate data to account for an induced hierarchical structure that is a product of the experimental design. We illustrate the performance of the proposed approach in simulations and then apply the method to experimental data.}, number={4}, journal={Biometrics}, publisher={Oxford University Press (OUP)}, author={Long, Alexander S. and Reich, Brian J. and Staicu, Ana-Maria and Meitzen, John}, year={2023}, month={Feb}, pages={3778–3791} } @article{abba_williams_reich_2023, title={A PENALIZED COMPLEXITY PRIOR FOR DEEP BAYESIAN TRANSFER LEARNING WITH APPLICATION TO MATERIALS INFORMATICS}, volume={17}, ISSN={["1941-7330"]}, DOI={10.1214/23-AOAS1759}, abstractNote={A key task in the emerging field of materials informatics is to use machine learning to predict a material’s properties and functions. A fast and accurate predictive model allows researchers to more efficiently identify or construct a material with desirable properties. As in many fields, deep learning is one of the state-of-the art approaches, but fully training a deep learning model is not always feasible in materials informatics due to limitations on data availability, computational resources, and time. Accordingly, there is a critical need in the application of deep learning to materials informatics problems to develop efficient transfer learning algorithms. The Bayesian framework is natural for transfer learning because the model trained from the source data can be encoded in the prior distribution for the target task of interest. However, the Bayesian perspective on transfer learning is relatively unaccounted for in the literature and is complicated for deep learning because the parameter space is large and the interpretations of individual parameters are unclear. Therefore, rather than subjective prior distributions for individual parameters, we propose a new Bayesian transfer learning approach based on the penalized complexity prior on the Kullback–Leibler divergence between the predictive models of the source and target tasks. We show via simulations that the proposed method outperforms other transfer learning methods across a variety of settings. The proposed method is applied to predict the properties of a molecular crystal, based on its structural properties, and we show improved precision for estimating the band gap of a material compared to state-of-the-art methods currently used in materials science.}, number={4}, journal={ANNALS OF APPLIED STATISTICS}, author={Abba, Mohamed A. and Williams, Jonathan P. and Reich, Brian J.}, year={2023}, month={Dec}, pages={3241–3256} } @article{werthmann_joode_cuffney_reich_soto-martinez_corrales-vargas_palomo-cordero_penaloza-castanedac_hoppin_2023, title={A cross-sectional analysis of medical conditions and environmental factors associated with fractional exhaled nitric oxide (FeNO) in women and children from the ISA birth cohort, Costa Rica}, volume={233}, ISSN={["1096-0953"]}, url={https://doi.org/10.1016/j.envres.2023.116449}, DOI={10.1016/j.envres.2023.116449}, abstractNote={Fractional exhaled nitric oxide (FeNO) is a marker of airway inflammation. Elevated FeNO has been associated with environmental exposures, however, studies from tropical countries are limited. Using data from the Infants' Environmental Health Study (ISA) birth cohort, we evaluated medical conditions and environmental exposures' association with elevated FeNO.}, journal={ENVIRONMENTAL RESEARCH}, author={Werthmann, Derek and Joode, Berna van Wendel and Cuffney, Michael T. and Reich, Brian J. and Soto-Martinez, Manuel E. and Corrales-Vargas, Andrea and Palomo-Cordero, Luis and Penaloza-Castanedac, Jorge and Hoppin, Jane A.}, year={2023}, month={Sep} } @article{majumder_reich_2023, title={A deep learning synthetic likelihood approximation of a non-stationary spatial model for extreme streamflow forecasting}, volume={55}, ISSN={["2211-6753"]}, DOI={10.1016/j.spasta.2023.100755}, abstractNote={Extreme streamflow is a key indicator of flood risk, and quantifying the changes in its distribution under non-stationary climate conditions is key to mitigating the impact of flooding events. We propose a non-stationary process mixture model (NPMM) for annual streamflow maxima over the central US (CUS) which uses downscaled climate model precipitation projections to forecast extremal streamflow. Spatial dependence for the model is specified as a convex combination of transformed Gaussian and max-stable processes, indexed by a weight parameter which identifies the asymptotic regime of the process. The weight parameter is modeled as a function of the annual precipitation for each of the two hydrologic regions within the CUS, introducing spatio-temporal non-stationarity within the model. The NPMM is flexible with desirable tail dependence properties, but yields an intractable likelihood. To address this, we embed a neural network within a density regression model which is used to learn a synthetic likelihood function using simulations from the NPMM with different parameter settings. Our model is fitted using observational data for 1972--2021, and inference carried out in a Bayesian framework. The two regions within the CUS are estimated to be in different asymptotic regimes based on the posterior distribution of the weight parameter. Annual streamflow maxima estimates based on global climate models for two representative climate pathway scenarios suggest an overall increase in the frequency and magnitude of extreme streamflow for 2006-2035 compared to the historical period of 1972-2005.}, journal={SPATIAL STATISTICS}, author={Majumder, Reetam and Reich, Brian J.}, year={2023}, month={Jun} } @article{awasthi_archfield_reich_sankarasubramanian_2023, title={Beyond Simple Trend Tests: Detecting Significant Changes in Design-Flood Quantiles}, volume={50}, ISSN={["1944-8007"]}, url={https://doi.org/10.1029/2023GL103438}, DOI={10.1029/2023GL103438}, abstractNote={Abstract Changes in annual maximum flood (AMF), which are usually detected using simple trend tests (e.g., Mann‐Kendall test (MKT)), are expected to change design‐flood estimates. We propose an alternate framework to detect significant changes in design‐flood between two periods and evaluate it for synthetically generated AMF from the Log‐Pearson Type‐3 (LP3) distribution due to changes in moments associated with flood distribution. Synthetic experiments show MKT does not consider changes in all three moments of the LP3 distribution and incorrectly detects changes in design‐flood. We applied the framework on 31 river basins spread across the United States. Statistically significant changes in design‐flood quantiles were observed even without a significant trend in AMF and basins with statistically significant trend did not necessarily exhibit statistically significant changes in design‐flood. We recommend application of the framework for evaluating changes in design‐flood estimates considering changes in all the moments as opposed to simple trend tests.}, number={13}, journal={GEOPHYSICAL RESEARCH LETTERS}, author={Awasthi, C. and Archfield, S. A. and Reich, B. J. and Sankarasubramanian, A.}, year={2023}, month={Jul} } @article{burgener_hyland_reich_scotese_2023, title={Cretaceous climates: Mapping paleo-Koppen climatic zones using a Bayesian statistical analysis of lithologic, paleontologic, and geochemical proxies}, volume={613}, ISSN={["1872-616X"]}, url={http://dx.doi.org/10.1016/j.palaeo.2022.111373}, DOI={10.1016/j.palaeo.2022.111373}, abstractNote={The Cretaceous Period (145 to 66 Ma) was a prolonged warmhouse to hothouse period characterized by high atmospheric CO2 conditions, elevated surface temperatures, and an enhanced global hydrologic cycle. It provides a case study for understanding how a hothouse climate system operates, and is an analog for future anthropogenic climate change scenarios. This study presents new quantitative temperature and precipitation proxy datasets for nine key Cretaceous time slices (Berriasian/Valanginian, Hauterivian/Barremian, Aptian, Albian, Cenomanian, Turonian, Coniacian/Santonian, Campanian, Maastrichtian), and a new geostatistical analysis technique that utilizes Markov Chain Monte Carlo algorithm and Bayesian hierarchical models to generate high resolution, quantitative global paleoclimate reconstructions from these proxy datasets, with associated uncertainties. Using these paleoclimate reconstructions, paleo-Köppen (-Geiger) climate zone maps are produced that provide new insights into the changing spatial and temporal climate patterns during the Cretaceous. These new paleoclimate reconstructions and paleo-Köppen climate maps provide new insight into the timing of the initiation of the Early Cretaceous equatorial humid belt over Gondwana and reveal temporal shifts in the width of the subtropical arid belts from the Early to mid- to Late Cretaceous. A comparison of these proxy-based reconstructions and model simulations of Cretaceous climate reveal continued proxy/model differences. In addition, the methodology developed for this study can be applied to other time periods, providing a framework for better understanding ancient climate, environments, and ecosystems.}, journal={PALAEOGEOGRAPHY PALAEOCLIMATOLOGY PALAEOECOLOGY}, author={Burgener, Landon and Hyland, Ethan and Reich, Brian J. and Scotese, Christopher}, year={2023}, month={Mar} } @article{hector_reich_2023, title={Distributed Inference for Spatial Extremes Modeling in High Dimensions}, volume={4}, ISSN={["1537-274X"]}, url={https://doi.org/10.1080/01621459.2023.2186886}, DOI={10.1080/01621459.2023.2186886}, abstractNote={Extreme environmental events frequently exhibit spatial and temporal dependence. These data are often modeled using max stable processes (MSPs) that are computationally prohibitive to fit for as few as a dozen observations. Supposed computationally-efficient approaches like the composite likelihood remain computationally burdensome with a few hundred observations. In this paper, we propose a spatial partitioning approach based on local modeling of subsets of the spatial domain that delivers computationally and statistically efficient inference. Marginal and dependence parameters of the MSP are estimated locally on subsets of observations using censored pairwise composite likelihood, and combined using a modified generalized method of moments procedure. The proposed distributed approach is extended to estimate inverted MSP models, and to estimate spatially varying coefficient models to deliver computationally efficient modeling of spatial variation in marginal parameters. We demonstrate consistency and asymptotic normality of estimators, and show empirically that our approach leads to statistically efficient estimation of model parameters. We illustrate the flexibility and practicability of our approach through simulations and the analysis of streamflow data from the U.S. Geological Survey.}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, publisher={Taylor & Francis}, author={Hector, Emily C. and Reich, Brian J.}, year={2023}, month={Apr} } @article{sahoo_guinness_reich_2023, title={Estimating atmospheric motion winds from satellite image data using space-time drift models}, volume={7}, ISSN={["1099-095X"]}, DOI={10.1002/env.2818}, abstractNote={Abstract Geostationary weather satellites collect high‐resolution data comprising a series of images. The Derived Motion Winds (DMW) Algorithm is commonly used to process these data and estimate atmospheric winds by tracking features in the images. However, the wind estimates from the DMW Algorithm are often missing and do not come with uncertainty measures. Also, the DMW Algorithm estimates can only be half‐integers, since the algorithm requires the original and shifted data to be at the same locations, in order to calculate the displacement vector between them. This motivates us to statistically model wind motions as a spatial process drifting in time. Using a covariance function that depends on spatial and temporal lags and a drift parameter to capture the wind speed and wind direction, we estimate the parameters by local maximum likelihood. Our method allows us to compute standard errors of the local estimates, enabling spatial smoothing of the estimates using a Gaussian kernel weighted by the inverses of the estimated variances. We conduct extensive simulation studies to determine the situations where our method performs well. The proposed method is applied to the GOES‐15 brightness temperature data over Colorado and reduces prediction error of brightness temperature compared to the DMW Algorithm.}, journal={ENVIRONMETRICS}, author={Sahoo, Indranil and Guinness, Joseph and Reich, Brian J. J.}, year={2023}, month={Jul} } @article{mohottige_davenport_bhavsar_schappe_lyn_maxson_johnson_planey_mcelroy_wang_et al._2023, title={Residential Structural Racism and Prevalence of Chronic Health Conditions}, volume={6}, ISSN={["2574-3805"]}, DOI={10.1001/jamanetworkopen.2023.48914}, abstractNote={Studies elucidating determinants of residential neighborhood-level health inequities are needed.}, number={12}, journal={JAMA NETWORK OPEN}, author={Mohottige, Dinushika and Davenport, Clemontina A. and Bhavsar, Nrupen and Schappe, Tyler and Lyn, Michelle J. and Maxson, Pamela and Johnson, Fred and Planey, Arrianna M. and Mcelroy, Lisa M. and Wang, Virginia and et al.}, year={2023}, month={Dec} } @article{yanchenko_bondell_reich_2023, title={Spatial regression modeling via the R2D2 framework}, volume={10}, ISSN={["1099-095X"]}, DOI={10.1002/env.2829}, abstractNote={Abstract Spatially dependent data arises in many applications, and Gaussian processes are a popular modeling choice for these scenarios. While Bayesian analyses of these problems have proven to be successful, selecting prior distributions for these complex models remains a difficult task. In this work, we propose a principled approach for setting prior distributions on model variance components by placing a prior distribution on a measure of model fit. In particular, we derive the distribution of the prior coefficient of determination. Placing a beta prior distribution on this measure induces a generalized beta prime prior distribution on the global variance of the linear predictor in the model. This method can also be thought of as shrinking the fit towards the intercept‐only (null) model. We derive an efficient Gibbs sampler for the majority of the parameters and use Metropolis–Hasting updates for the others. Finally, the method is applied to a marine protection area dataset. We estimate the effect of marine policies on biodiversity and conclude that no‐take restrictions lead to a slight increase in biodiversity and that the majority of the variance in the linear predictor comes from the spatial effect.}, journal={ENVIRONMETRICS}, author={Yanchenko, Eric and Bondell, Howard D. and Reich, Brian J.}, year={2023}, month={Oct} } @article{nag_sun_reich_2023, title={Spatio-temporal DeepKriging for interpolation and probabilistic forecasting}, volume={57}, ISSN={["2211-6753"]}, DOI={10.1016/j.spasta.2023.100773}, abstractNote={Gaussian processes (GP) and Kriging are widely used in traditional spatio-temporal modelling and prediction. These techniques typically presuppose that the data are observed from a stationary GP with a parametric covariance structure. However, processes in real-world applications often exhibit non-Gaussianity and nonstationarity. Moreover, likelihood-based inference for GPs is computationally expensive and thus prohibitive for large datasets. In this paper, we propose a deep neural network (DNN) based two-stage model for spatio-temporal interpolation and forecasting. Interpolation is performed in the first step, which utilizes a dependent DNN with the embedding layer constructed with spatio-temporal basis functions. For the second stage, we use Long-Short Term Memory (LSTM) and convolutional LSTM to forecast future observations at a given location. We adopt the quantile-based loss function in the DNN to provide probabilistic forecasting. Compared to Kriging, the proposed method does not require specifying covariance functions or making stationarity assumptions and is computationally efficient. Therefore, it is suitable for large-scale prediction of complex spatio-temporal processes. We apply our method to monthly PM2.5 data at more than 200,000 space–time locations from January 1999 to December 2022 for fast imputation of missing values and forecasts with uncertainties.}, journal={SPATIAL STATISTICS}, author={Nag, Pratik and Sun, Ying and Reich, Brian J.}, year={2023}, month={Oct} } @article{larsen_yang_reich_rappold_2022, title={A SPATIAL CAUSAL ANALYSIS OF WILDLAND FIRE-CONTRIBUTED PM2.5 USING NUMERICAL MODEL OUTPUT}, volume={16}, ISSN={["1941-7330"]}, DOI={10.1214/22-AOAS1610}, abstractNote={Wildland fire smoke contains hazardous levels of fine particulate matter (PM}, number={4}, journal={ANNALS OF APPLIED STATISTICS}, author={Larsen, Alexandra and Yang, Shu and Reich, Brian J. and Rappold, Ana G.}, year={2022}, month={Dec}, pages={2714–2731} } @article{trostle_corzo_reich_machado_2022, title={A discrete-time survival model for porcine epidemic diarrhoea virus}, volume={10}, ISSN={["1865-1682"]}, url={https://doi.org/10.1111/tbed.14739}, DOI={10.1111/tbed.14739}, abstractNote={Since the arrival of porcine epidemic diarrhea virus (PEDV) in the United States in 2013, elimination and control programmes have had partial success. The dynamics of its spread are hard to quantify, though previous work has shown that local transmission and the transfer of pigs within production systems are most associated with the spread of PEDV. Our work relies on the history of PEDV infections in a region of the southeastern United States. This infection data is complemented by farm-level features and extensive industry data on the movement of both pigs and vehicles. We implement a discrete-time survival model and evaluate different approaches to modelling the local-transmission and network effects. We find strong evidence in that the local-transmission and pig-movement effects are associated with the spread of PEDV, even while controlling for seasonality, farm-level features and the possible spread of disease by vehicles. Our fully Bayesian model permits full uncertainty quantification of these effects. Our farm-level out-of-sample predictions have a receiver-operating characteristic area under the curve (AUC) of 0.779 and a precision-recall AUC of 0.097. The quantification of these effects in a comprehensive model allows stakeholders to make more informed decisions about disease prevention efforts.}, journal={TRANSBOUNDARY AND EMERGING DISEASES}, author={Trostle, Parker and Corzo, Cesar A. and Reich, Brian J. and Machado, Gustavo}, year={2022}, month={Oct} } @article{zhang_naughton_bondell_reich_2022, title={Bayesian Regression Using a Prior on the Model Fit: The R2-D2 Shrinkage Prior}, volume={117}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2020.1825449}, abstractNote={Prior distributions for high-dimensional linear regression require specifying a joint distribution for the unobserved regression coefficients, which is inherently difficult. We instead propose a new class of shrinkage priors for linear regression via specifying a prior first on the model fit, in particular, the coefficient of determination, and then distributing through to the coefficients in a novel way. The proposed method compares favorably to previous approaches in terms of both concentration around the origin and tail behavior, which leads to improved performance both in posterior contraction and in empirical performance. The limiting behavior of the proposed prior is 1/x , both around the origin and in the tails. This behavior is optimal in the sense that it simultaneously lies on the boundary of being an improper prior both in the tails and around the origin. None of the existing shrinkage priors obtain this behavior in both regions simultaneously. We also demonstrate that our proposed prior leads to the same near-minimax posterior contraction rate as the spike-and-slab prior. Supplementary materials for this article are available online.}, number={538}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Zhang, Yan Dora and Naughton, Brian P. and Bondell, Howard D. and Reich, Brian J.}, year={2022}, month={Apr}, pages={862–874} } @article{miller_reich_2022, title={Bayesian spatial modeling using random Fourier frequencies}, volume={48}, ISSN={["2211-6753"]}, DOI={10.1016/j.spasta.2022.100598}, abstractNote={Spectral methods are important for both theory and computation in spatial data analysis. When data lie on a grid, spectral approaches can take advantage of the discrete Fourier transform for fast computation. If data are not on a grid, then low-rank processes with Fourier basis functions may be sufficient approximations. However, deciding which basis functions to use is difficult and can depend on unknown parameters. Here, we introduce Bayesian Random Fourier Frequencies (BRFF), a fully Bayesian extension of the random Fourier features approach. BRFF treats the spectral frequencies as random parameters, which unlike fixed frequency approximations allows the frequencies to be data-adaptive and averages over uncertainty in frequency selection. We apply this method to non-gridded continuous, binary, and count data. We compare BRFF using simulated and observed data to another popular low-rank method, the predictive processes (PP) model. BRFF is faster than PP, and outperforms or matches the predictive performance of the PP model in settings with high numbers of observations.}, journal={SPATIAL STATISTICS}, author={Miller, Matthew J. J. and Reich, Brian J. J.}, year={2022}, month={Apr} } @article{reich_yang_guan_2022, title={Discussion on "Spatial plus : A novel approach to spatial confounding" by Dupont, Wood, and Augustin}, volume={3}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13651}, abstractNote={Web Appendices (code) referenced in Section 2 are available with this paper at the Biometrics website on Wiley Online Library. Please note: The publisher is not responsible for the content or functionality of any supporting information supplied by the authors. Any queries (other than missing content) should be directed to the corresponding author for the article.}, journal={BIOMETRICS}, author={Reich, Brian J. and Yang, Shu and Guan, Yawen}, year={2022}, month={Mar} } @article{giffin_gong_majumder_rappold_reich_yang_2022, title={Estimating intervention effects on infectious disease control: The effect of community mobility reduction on Coronavirus spread}, volume={52}, ISSN={["2211-6753"]}, DOI={10.1016/j.spasta.2022.100711}, abstractNote={Understanding the effects of interventions, such as restrictions on community and large group gatherings, is critical to controlling the spread of COVID-19. Susceptible-Infectious-Recovered (SIR) models are traditionally used to forecast the infection rates but do not provide insights into the causal effects of interventions. We propose a spatiotemporal model that estimates the causal effect of changes in community mobility (intervention) on infection rates. Using an approximation to the SIR model and incorporating spatiotemporal dependence, the proposed model estimates a direct and indirect (spillover) effect of intervention. Under an interference and treatment ignorability assumption, this model is able to estimate causal intervention effects, and additionally allows for spatial interference between locations. Reductions in community mobility were measured by cell phone movement data. The results suggest that the reductions in mobility decrease Coronavirus cases 4 to 7 weeks after the intervention.}, journal={SPATIAL STATISTICS}, author={Giffin, Andrew and Gong, Wenlong and Majumder, Suman and Rappold, Ana G. and Reich, Brian J. and Yang, Shu}, year={2022}, month={Dec} } @article{vargas_castaneda_liljedahl_mora_menezes-filho_smith_mergler_reich_giffin_hoppin_et al._2022, title={Exposure to common-use pesticides, manganese, lead, and thyroid function among pregnant women from the Infants' Environmental Health (ISA) study, Costa Rica}, volume={810}, ISSN={["1879-1026"]}, DOI={10.1016/j.scitotenv.2021.151288}, abstractNote={Pesticides and metals may disrupt thyroid function, which is key to fetal brain development.}, journal={SCIENCE OF THE TOTAL ENVIRONMENT}, author={Vargas, Andrea Corrales and Castaneda, Jorge Penaloza and Liljedahl, Emelie Rietz and Mora, Ana Maria and Menezes-Filho, Jose Antonio and Smith, Donald R. and Mergler, Donna and Reich, Brian and Giffin, Andrew and Hoppin, Jane A. and et al.}, year={2022}, month={Mar} } @article{giffin_reich_yang_rappold_2022, title={Generalized propensity score approach to causal inference with spatial interference}, volume={9}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13745}, abstractNote={Abstract Many spatial phenomena exhibit interference, where exposures at one location may affect the response at other locations. Because interference violates the stable unit treatment value assumption, standard methods for causal inference do not apply. We propose a new causal framework to recover direct and spill-over effects in the presence of spatial interference, taking into account that exposures at nearby locations are more influential than exposures at locations further apart. Under the no unmeasured confounding assumption, we show that a generalized propensity score is sufficient to remove all measured confounding. To reduce dimensionality issues, we propose a Bayesian spline-based regression model accounting for a sufficient set of variables for the generalized propensity score. A simulation study demonstrates the accuracy and coverage properties. We apply the method to estimate the causal effect of wildland fires on air pollution in the Western United States over 2005–2018.}, journal={BIOMETRICS}, author={Giffin, A. and Reich, B. J. and Yang, S. and Rappold, A. G.}, year={2022}, month={Sep} } @article{lan_reich_guinness_bandyopadhyay_ma_moeller_2022, title={Geostatistical modeling of positive-definite matrices: An application to diffusion tensor imaging}, volume={78}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13445}, abstractNote={Geostatistical modeling for continuous point-referenced data has extensively been applied to neuroimaging because it produces efficient and valid statistical inference. However, diffusion tensor imaging (DTI), a neuroimaging technique characterizing the brain's anatomical structure, produces a positive-definite (p.d.) matrix for each voxel. Currently, only a few geostatistical models for p.d. matrices have been proposed because introducing spatial dependence among p.d. matrices properly is challenging. In this paper, we use the spatial Wishart process, a spatial stochastic process (random field), where each p.d. matrix-variate random variable marginally follows a Wishart distribution, and spatial dependence between random matrices is induced by latent Gaussian processes. This process is valid on an uncountable collection of spatial locations and is almost-surely continuous, leading to a reasonable way of modeling spatial dependence. Motivated by a DTI data set of cocaine users, we propose a spatial matrix-variate regression model based on the spatial Wishart process. A problematic issue is that the spatial Wishart process has no closed-form density function. Hence, we propose an approximation method to obtain a feasible Cholesky decomposition model, which we show to be asymptotically equivalent to the spatial Wishart process model. A local likelihood approximation method is also applied to achieve fast computation. The simulation studies and real data application demonstrate that the Cholesky decomposition process model produces reliable inference and improved performance, compared to other methods.}, number={2}, journal={BIOMETRICS}, author={Lan, Zhou and Reich, Brian J. and Guinness, Joseph and Bandyopadhyay, Dipankar and Ma, Liangsuo and Moeller, F. Gerard}, year={2022}, month={Jun}, pages={548–559} } @article{majumder_guan_reich_saibaba_2022, title={Kryging: geostatistical analysis of large-scale datasets using Krylov subspace methods}, volume={32}, ISSN={["1573-1375"]}, DOI={10.1007/s11222-022-10104-3}, abstractNote={Analyzing massive spatial datasets using a Gaussian process model poses computational challenges. This is a problem prevailing heavily in applications such as environmental modeling, ecology, forestry and environmental health. We present a novel approximate inference methodology that uses profile likelihood and Krylov subspace methods to estimate the spatial covariance parameters and makes spatial predictions with uncertainty quantification for point-referenced spatial data. “Kryging” combines Kriging and Krylov subspace methods and applies for both observations on regular grid and irregularly spaced observations, and for any Gaussian process with a stationary isotropic (and certain geometrically anisotropic) covariance function, including the popular Matérn covariance family. We make use of the block Toeplitz structure with Toeplitz blocks of the covariance matrix and use fast Fourier transform methods to bypass the computational and memory bottlenecks of approximating log-determinant and matrix-vector products. We perform extensive simulation studies to show the effectiveness of our model by varying sample sizes, spatial parameter values and sampling designs. A real data application is also performed on a dataset consisting of land surface temperature readings taken by the MODIS satellite. Compared to existing methods, the proposed method performs satisfactorily with much less computation time and better scalability.}, number={5}, journal={STATISTICS AND COMPUTING}, author={Majumder, Suman and Guan, Yawen and Reich, Brian J. and Saibaba, Arvind K.}, year={2022}, month={Oct} } @article{huberman_reich_bondell_2022, title={Nonparametric conditional density estimation in a deep learning framework for short-term forecasting (May, 10.1007/s10651-021-00499-z, 2021)}, volume={8}, ISSN={["1573-3009"]}, DOI={10.1007/s10651-022-00543-6}, journal={ENVIRONMENTAL AND ECOLOGICAL STATISTICS}, author={Huberman, David B. and Reich, Brian J. and Bondell, Howard D.}, year={2022}, month={Aug} } @article{giffin_hoppin_cordoba_solano-diaz_ruepert_penaloza-castaneda_lindh_reich_joode_2022, title={Pyrimethanil and chlorpyrifos air concentrations and pregnant women's urinary metabolites in the Infants' Environmental Health Study (ISA) Costa Rica ,}, volume={166}, ISSN={["1873-6750"]}, DOI={10.1016/j.envint.2022.107328}, abstractNote={Only few studies have compared environmental pesticide air concentrations with specific urinary metabolites to evaluate pathways of exposure. Therefore, we compared pyrimethanil and chlorpyrifos concentrations in air with urinary 4-hydroxypyrimethanil (OHP, metabolite of pyrimethanil) and 3,5,6-trichloro-2-pyridinol (TCPy, metabolite of chlorpyrifos) among pregnant women from the Infant's Environmental Health Study (ISA) in Matina County, Costa Rica. During pregnancy, we obtained repeat urinary samples from 448 women enrolled in the ISA study. We extrapolated pyrimethanil and chlorpyrifos concentrations measured with passive air samplers (PAS) (n = 48, from 12 schools), across space and time using a Bayesian spatiotemporal model. We subsequently compared these concentrations with urinary OHP and TCPy in 915 samples from 448 women, using separate mixed models and considering several covariables. A 10% increase in air pyrimethanil (ng/m3) was associated with a 5.7% (95% confidence interval (CI 4.6, 6.8) increase in OHP (μg/L). Women living further from banana plantations had lower OHP: −0.7% (95% CI −1.2, −0.3) for each 10% increase in distance (meters) as well as women who ate rice and beans ≥15 times a week −23% (95% CI −38, −4). In addition, each 1 ng/m3 increase in chlorpyrifos in air was associated with a 1.5% (95% CI 0.2, 2.8) increase in TCPy (μg/L), and women working in agriculture tended to have increased TCPy (21%, 95% CI −2, 49). The Bayesian spatiotemporal models were useful to estimate pyrimethanil and chlorpyrifos air concentrations across space and time. Our results suggest inhalation of pyrimethanil and chlorpyrifos is a pathway of environmental exposure. PAS seems a useful technique to monitor environmental current-use pesticide exposures. For future studies, we recommend increasing the number of locations of environmental air measurements, obtaining all air and urine measurements during the same month, and, ideally, including dermal exposure estimates as well.}, journal={ENVIRONMENT INTERNATIONAL}, author={Giffin, Andrew and Hoppin, Jane A. and Cordoba, Leonel and Solano-Diaz, Karla and Ruepert, Clemens and Penaloza-Castaneda, Jorge and Lindh, Christian and Reich, Brian J. and Joode, Berna van Wendel}, year={2022}, month={Aug} } @article{islam_hoppin_mora_soto-martinez_cordoba gamboa_penaloza castaneda_reich_lindh_joode_2022, title={Respiratory and allergic outcomes among 5-year-old children exposed to pesticides}, volume={2}, ISSN={["1468-3296"]}, DOI={10.1136/thoraxjnl-2021-218068}, abstractNote={Background Little is known about the effects of pesticides on children’s respiratory and allergic outcomes. We evaluated associations of prenatal and current pesticide exposures with respiratory and allergic outcomes in children from the Infants’ Environmental Health Study in Costa Rica. Methods Among 5-year-old children (n=303), we measured prenatal and current specific gravity-corrected urinary metabolite concentrations of insecticides (chlorpyrifos, pyrethroids), fungicides (mancozeb, pyrimethanil, thiabendazole) and 2,4-D. We collected information from caregivers on respiratory (ever doctor-diagnosed asthma and lower respiratory tract infections (LRTI), wheeze and cough during last 12 months) and allergic (nasal allergies, itchy rash, ever eczema) outcomes. We fitted separate multivariable logistic regression models for high (≥75th percentile (P75)) vs low (1-m difference between rainfall and evapotranspiration) and rich in Fe-oxides/hydroxides. We observed a positive association between NTM presence and iron in wet soils, supporting past studies, but no such association in dry soils. High soil-water balance may facilitate underground movement of NTM into the aquifer system, potentially compounded by expansive capabilities allowing crack formation under drought conditions, representing further possible avenues for aquifer infiltration. These results suggest both precipitation and soil properties are mechanisms by which surface NTM may reach the human water supply.}, journal={APPLIED AND ENVIRONMENTAL MICROBIOLOGY}, author={Parsons, Arielle W. and Dawrs, Stephanie N. and Nelson, Stephen T. and Norton, Grant J. and Virdi, Ravleen and Hasan, Nabeeh A. and Epperson, L. Elaine and Holst, Brady and Chan, Edward D. and Leos-Barajas, Vianey and et al.}, year={2022}, month={Apr} } @article{guan_page_reich_ventrucci_yang_2022, title={Spectral adjustment for spatial confounding}, volume={12}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/asac069}, abstractNote={Adjusting for an unmeasured confounder is generally an intractable problem, but in the spatial setting it may be possible under certain conditions. We derive necessary conditions on the coherence between the exposure and the unmeasured confounder that ensure the effect of exposure is estimable. We specify our model and assumptions in the spectral domain to allow for different degrees of confounding at different spatial resolutions. One assumption that ensures identifiability is that confounding present at global scales dissipates at local scales. We show that this assumption in the spectral domain is equivalent to adjusting for global-scale confounding in the spatial domain by adding a spatially smoothed version of the exposure to the mean of the response variable. Within this general framework, we propose a sequence of confounder adjustment methods that range from parametric adjustments based on the Matérn coherence function to more robust semiparametric methods that use smoothing splines. These ideas are applied to areal and geostatistical data for both simulated and real datasets.}, journal={BIOMETRIKA}, author={Guan, Yawen and Page, Garritt L. and Reich, Brian J. and Ventrucci, Massimo and Yang, Shu}, year={2022}, month={Dec} } @article{euan_sun_reich_2022, title={Statistical analysis of multi-day solar irradiance using a threshold time series model}, volume={1}, ISSN={["1099-095X"]}, DOI={10.1002/env.2716}, abstractNote={Abstract The analysis of solar irradiance has important applications in predicting solar energy production from solar power plants. Although the sun provides every day more energy than we need, the variability caused by environmental conditions affects electricity production. Recently, new statistical models have been proposed to provide stochastic simulations of high‐resolution data to downscale and forecast solar irradiance measurements. Most of the existing models are linear and highly depend on normality assumptions. However, solar irradiance shows strong nonlinearity and is only measured during the day time. Thus, we propose a new multi‐day threshold autoregressive model to quantify the variability of the daily irradiance time series. We establish the sufficient conditions for our model to be stationary, and we develop an inferential procedure to estimate the model parameters. When we apply our model to study the statistical properties of observed irradiance data in Guadeloupe island group, a French overseas region located in the Southern Caribbean Sea, we are able to characterize two states of the irradiance series. These states represent the clear‐sky and non‐clear sky regimes. Using our model, we are able to simulate irradiance series that behave similarly to the real data in mean and variability, and more accurate forecasts compared to linear models.}, journal={ENVIRONMETRICS}, author={Euan, Carolina and Sun, Ying and Reich, Brian J.}, year={2022}, month={Jan} } @article{mao_martin_reich_2022, title={Valid Model-Free Spatial Prediction}, volume={12}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2022.2147531}, abstractNote={Predicting the response at an unobserved location is a fundamental problem in spatial statistics. Given the difficulty in modeling spatial dependence, especially in nonstationary cases, model-based prediction intervals are at risk of misspecification bias that can negatively affect their validity. Here we present a new approach for model-free nonparametric spatial prediction based on the conformal prediction machinery. Our key observation is that spatial data can be treated as exactly or approximately exchangeable in a wide range of settings. In particular, under an infill asymptotic regime, we prove that the response values are, in a certain sense, locally approximately exchangeable for a broad class of spatial processes, and we develop a local spatial conformal prediction algorithm that yields valid prediction intervals without strong model assumptions like stationarity. Numerical examples with both real and simulated data confirm that the proposed conformal prediction intervals are valid and generally more efficient than existing model-based procedures for large datasets across a range of nonstationary and non-Gaussian settings.}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Mao, Huiying and Martin, Ryan and Reich, Brian J. J.}, year={2022}, month={Dec} } @article{pease_pacifici_kays_reich_2022, title={What drives spatially varying ecological relationships in a wide-ranging species?}, volume={7}, ISSN={["1472-4642"]}, url={https://doi.org/10.1111/ddi.13594}, DOI={10.1111/ddi.13594}, abstractNote={Abstract Aim Decades of research on species distributions has revealed geographic variation in species‐environment relationships for a given species. That is, the way a species uses the local environment varies across geographic space. However, the drivers underlying this variation are contested and still largely unexplored. Niche traits that are conserved should reflect the evolutionary history of a species whereas more flexible ecological traits could vary at finer scales, reflecting local adaptation. Location North America. Methods We used mammal observations during a 5‐year period from the iNaturalist biodiversity database and a local ensemble modelling approach to explore spatial variation in American black bear ( Ursus americanus ) relationships with eight ecological correlates. We tested four biologically driven hypotheses to explain the patterns of local adaptation. We evaluated non‐stationarity in ecological relationships using a Stationarity Index and tested predictive performance using an independent, national‐level animal occurrence data set. Results We documented considerable spatial non‐stationarity in all eight environmental relationships, with the greatest spatial variation occurring in bear's relationship to climatic factors. Notably, the greatest variation in environmental relationships tended to occur along the current boundaries of the species' range, potentially representing the ecological limits to the species geographic range. We additionally documented that spatial variation in relationships with land cover and anthropogenic factors were best explained by niche conservatism at the subspecies level, whereas climatic relationships were better explained by local adaptation. Main Conclusions Based on these results, we propose that the current distribution of American black bear is determined by an evolutionary legacy of habitat relationships unique to each subspecies combined with more fine‐scale local adaptation to climatic conditions. This result suggests that black bears should be adaptable to climatic changes over the 21st century and that management of habitat and human‐bear relationships could be considered at the subspecies level.}, journal={DIVERSITY AND DISTRIBUTIONS}, publisher={Wiley}, author={Pease, Brent S. and Pacifici, Krishna and Kays, Roland and Reich, Brian}, year={2022}, month={Jul} } @article{tian_reich_2021, title={A BAYESIAN SEMI-PARAMETRIC MIXTURE MODEL FOR BIVARIATE EXTREME VALUE ANALYSIS WITH APPLICATION TO PRECIPITATION FORECASTING}, volume={31}, ISSN={["1996-8507"]}, DOI={10.5705/ss.202018.0420}, abstractNote={We propose a novel mixture Generalized Pareto (MIXGP) model to calibrate extreme precipitation forecasts. This model is able to describe the marginal distribution of observed precipitation and capture the dependence between climate forecasts and the observed precipitation under suitable conditions. In addition, the full range distribution of precipitation conditional on grid forecast ensembles can also be estimated. Unlike the classical Generalized Pareto distribution that can only model points over a hard threshold, our model takes the threshold as a latent parameter. Tail behavior of both univariate and bivariate models are studied. The utility of our model is evaluated in Monte Carlo simulation study and is applied to precipitation data for the US where it outperforms competing methods.}, number={3}, journal={STATISTICA SINICA}, author={Tian, Yuan and Reich, Brian J.}, year={2021}, month={Jul}, pages={1619–1641} } @article{reich_yang_guan_giffin_miller_rappold_2021, title={A Review of Spatial Causal Inference Methods for Environmental and Epidemiological Applications}, volume={5}, DOI={10.1111/insr.12452}, abstractNote={Summary The scientific rigor and computational methods of causal inference have had great impacts on many disciplines but have only recently begun to take hold in spatial applications. Spatial causal inference poses analytic challenges due to complex correlation structures and interference between the treatment at one location and the outcomes at others. In this paper, we review the current literature on spatial causal inference and identify areas of future work. We first discuss methods that exploit spatial structure to account for unmeasured confounding variables. We then discuss causal analysis in the presence of spatial interference including several common assumptions used to reduce the complexity of the interference patterns under consideration. These methods are extended to the spatiotemporal case where we compare and contrast the potential outcomes framework with Granger causality and to geostatistical analyses involving spatial random fields of treatments and responses. The methods are introduced in the context of observational environmental and epidemiological studies and are compared using both a simulation study and analysis of the effect of ambient air pollution on COVID‐19 mortality rate. Code to implement many of the methods using the popular Bayesian software OpenBUGS is provided.}, journal={INTERNATIONAL STATISTICAL REVIEW}, author={Reich, Brian and Yang, Shu and Guan, Yawen and Giffin, Andrew B. and Miller, Matthew J. and Rappold, Ana}, year={2021} } @article{lan_reich_bandyopadhyay_2021, title={A spatial Bayesian semiparametric mixture model for positive definite matrices with applications in diffusion tensor imaging}, volume={49}, ISSN={["1708-945X"]}, DOI={10.1002/cjs.11601}, abstractNote={Diffusion tensor imaging (DTI) is a popular magnetic resonance imaging technique used to characterize microstructural changes in the brain. DTI studies quantify the diffusion of water molecules in a voxel using an estimated 3x3 symmetric positive definite diffusion tensor matrix. Statistical analysis of DTI data is challenging because the data are positive definite matrices. Matrix-variate information is often summarized by a univariate quantity, such as the fractional anisotropy (FA), leading to a loss of information. Furthermore, DTI analyses often ignore the spatial association of neighboring voxels, which can lead to imprecise estimates. Although the spatial modeling literature is abundant, modeling spatially dependent positive definite matrices is challenging. To mitigate these issues, we propose a matrix-variate Bayesian semiparametric mixture model, where the positive definite matrices are distributed as a mixture of inverse Wishart distributions with the spatial dependence captured by a Markov model for the mixture component labels. Conjugacy and the double Metropolis-Hastings algorithm result in fast and elegant Bayesian computing. Our simulation study shows that the proposed method is more powerful than non-spatial methods. We also apply the proposed method to investigate the effect of cocaine use on brain structure. The contribution of our work is to provide a novel statistical inference tool for DTI analysis by extending spatial statistics to matrix-variate data.}, number={1}, journal={CANADIAN JOURNAL OF STATISTICS-REVUE CANADIENNE DE STATISTIQUE}, author={Lan, Zhou and Reich, Brian J. and Bandyopadhyay, Dipankar}, year={2021}, month={Mar}, pages={129–149} } @article{guan_reich_laber_2021, title={A spatiotemporal recommendation engine for malaria control}, volume={4}, ISSN={["1468-4357"]}, DOI={10.1093/biostatistics/kxab010}, abstractNote={Malaria is an infectious disease affecting a large population across the world, and interventions need to be efficiently applied to reduce the burden of malaria. We develop a framework to help policy-makers decide how to allocate limited resources in realtime for malaria control. We formalize a policy for the resource allocation as a sequence of decisions, one per intervention decision, that map up-to-date disease related information to a resource allocation. An optimal policy must control the spread of the disease while being interpretable and viewed as equitable to stakeholders. We construct an interpretable class of resource allocation policies that can accommodate allocation of resources residing in a continuous domain and combine a hierarchical Bayesian spatiotemporal model for disease transmission with a policy-search algorithm to estimate an optimal policy for resource allocation within the pre-specified class. The estimated optimal policy under the proposed framework improves the cumulative long-term outcome compared with naive approaches in both simulation experiments and application to malaria interventions in the Democratic Republic of the Congo.}, journal={BIOSTATISTICS}, author={Guan, Qian and Reich, Brian J. and Laber, Eric B.}, year={2021}, month={Apr} } @article{miller_cabral_dickey_lebeau_reich_2021, title={Accounting for Location Measurement Error in Imaging Data With Application to Atomic Resolution Images of Crystalline Materials}, volume={4}, ISSN={["1537-2723"]}, url={https://app.dimensions.ai/details/publication/pub.1136536111}, DOI={10.1080/00401706.2021.1905070}, abstractNote={Scientists use imaging to identify objects of interest and infer properties of these objects. The locations of these objects are often measured with error, which when ignored leads to biased parameter estimates and inflated variance. Current measurement error methods require an estimate or knowledge of the measurement error variance to correct these estimates, which may not be available. Instead, we create a spatial Bayesian hierarchical model that treats the locations as parameters, using the image itself to incorporate positional uncertainty. We lower the computational burden by approximating the likelihood using a noncontiguous block design around the object locations. We use this model to quantify the relationship between the intensity and displacement of hundreds of atom columns in crystal structures directly imaged via scanning transmission electron microscopy (STEM). Atomic displacements are related to important phenomena such as piezoelectricity, a property useful for engineering applications like ultrasound. Quantifying the sign and magnitude of this relationship will help materials scientists more precisely design materials with improved piezoelectricity. A simulation study confirms our method corrects bias in the estimate of the parameter of interest and drastically improves coverage in high noise scenarios compared to non-measurement error models.}, number={1}, journal={TECHNOMETRICS}, author={Miller, Matthew J. and Cabral, Matthew J. and Dickey, Elizabeth C. and LeBeau, James M. and Reich, Brian J.}, year={2021}, month={Apr} } @article{xu_reich_2021, title={Bayesian nonparametric quantile process regression and estimation of marginal quantile effects}, volume={11}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13576}, abstractNote={Abstract Flexible estimation of multiple conditional quantiles is of interest in numerous applications, such as studying the effect of pregnancy‐related factors on low and high birth weight. We propose a Bayesian nonparametric method to simultaneously estimate noncrossing, nonlinear quantile curves. We expand the conditional distribution function of the response in I‐spline basis functions where the covariate‐dependent coefficients are modeled using neural networks. By leveraging the approximation power of splines and neural networks, our model can approximate any continuous quantile function. Compared to existing models, our model estimates all rather than a finite subset of quantiles, scales well to high dimensions, and accounts for estimation uncertainty. While the model is arbitrarily flexible, interpretable marginal quantile effects are estimated using accumulative local effect plots and variable importance measures. A simulation study shows that our model can better recover quantiles of the response distribution when the data are sparse, and an analysis of birth weight data is presented.}, journal={BIOMETRICS}, author={Xu, Steven G. and Reich, Brian J.}, year={2021}, month={Nov} } @article{cui_singh_staicu_reich_2021, title={Bayesian variable selection for high-dimensional rank data}, volume={5}, ISSN={["1099-095X"]}, DOI={10.1002/env.2682}, abstractNote={Abstract The study of microbiomes has become a topic of intense interest in last several decades as the development of new sequencing technologies has made DNA data accessible across disciplines. In this paper, we analyze a global dataset to investigate environmental factors that affect topsoil microbiome. As yet, much associated work has focused on linking indicators of microbial health to specific outcomes in various fields, rather than understanding how external factors may influence the microbiome composition itself. This is partially due to limited statistical methods to model abundance counts. The counts are high‐dimensional, overdispersed, often zero‐inflated, and exhibit complex dependence structures. Additionally, the raw counts are often noisy and compositional, and thus are not directly comparable across samples. Often, practitioners transform the counts to presence–absence indicators, but this transformation discards much of the data. As an alternative, we propose transforming to taxa ranks and develop a Bayesian variable selection model that uses ranks to identify covariates that influence microbiome composition. We show by simulation that the proposed model outperforms competitors across various settings and particular improvement in recall for small magnitude and low prevalence covariates. When applied to the topsoil data, the proposed method identifies several factors that affect microbiome composition.}, journal={ENVIRONMETRICS}, author={Cui, Can and Singh, Susheela P. and Staicu, Ana-Maria and Reich, Brian J.}, year={2021}, month={May} } @article{li_reich_bondell_2021, title={Deep distribution regression}, volume={159}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2021.107203}, abstractNote={Due to their flexibility and predictive performance, machine-learning based regression methods have become an important tool for predictive modeling and forecasting. However, most methods focus on estimating the conditional mean or specific quantiles of the target quantity and do not provide the full conditional distribution, which contains uncertainty information that might be crucial for decision making. A general solution consists of transforming a conditional distribution estimation problem into a constrained multi-class classification problem, in which tools such as deep neural networks can be applied. A novel joint binary cross-entropy loss function is proposed to accomplish this goal. Its performance is compared to current state-of-the-art methods via simulation. The approach also shows improved accuracy in a probabilistic solar energy forecasting problem.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Li, Rui and Reich, Brian J. and Bondell, Howard D.}, year={2021}, month={Jul} } @article{alhanti_joode_martinez_mora_gamboa_reich_lindh_lepiz_hoppin_2021, title={Environmental exposures contribute to respiratory and allergic symptoms among women living in the banana growing regions of Costa Rica}, volume={12}, ISSN={["1470-7926"]}, url={https://doi.org/10.1136/oemed-2021-107611}, DOI={10.1136/oemed-2021-107611}, abstractNote={Objectives This research evaluates whether environmental exposures (pesticides and smoke) influence respiratory and allergic outcomes in women living in a tropical, agricultural environment. Methods We used data from 266 mothers from the Infants’ Environmental Health cohort study in Costa Rica. We evaluated environmental exposures in women by measuring seven pesticide and two polycyclic aromatic hydrocarbons metabolites in urine samples. We defined ‘high exposure’ as having a metabolite value in the top 75th percentile. We collected survey data on respiratory and allergic outcomes in mothers as well as on pesticides and other environmental exposures. Using logistic regression models adjusted for obesity, we assessed the associations of pesticide exposure with multiple outcomes (wheeze, doctor-diagnosed asthma, high (≥2) asthma score based on symptoms, rhinitis, eczema and itchy rash). Results Current pesticide use in the home was positively associated with diagnosed asthma (OR=1.99 (95% CI=1.05 to 3.87)). High urinary levels of 5-hydroxythiabendazole (thiabendazole metabolite) and living in a neighbourhood with frequent smoke from waste burning were associated with a high asthma score (OR=1.84 (95%CI=1.05 to 3.25) and OR=2.31 (95%CI=1.11 to 5.16), respectively). Women who worked in agriculture had a significantly lower prevalence of rhinitis (0.19 (0.01 to 0.93)), but were more likely to report eczema (OR=2.54 (95%CI=1.33 to 4.89)) and an itchy rash (OR=3.17 (95%CI=1.24 to 7.73)). Conclusions While limited by sample size, these findings suggest that environmental exposure to both pesticides and smoke may impact respiratory and skin-related allergic outcomes in women.}, journal={OCCUPATIONAL AND ENVIRONMENTAL MEDICINE}, author={Alhanti, Brooke and Joode, Berna van Wendel and Martinez, Manuel Soto and Mora, Ana M. and Gamboa, Leonel Cordoba and Reich, Brian and Lindh, Christian H. and Lepiz, Marcela Quiros and Hoppin, Jane A.}, year={2021}, month={Dec} } @article{sass_li_reich_2021, title={Flexible and Fast Spatial Return Level Estimation Via a Spatially Fused Penalty}, volume={7}, ISSN={["1537-2715"]}, DOI={10.1080/10618600.2021.1938584}, abstractNote={Spatial extremes are common for climate data as the observations are usually referenced by geographic locations and dependent when they are nearby. An important goal of extremes modeling is to estimate the}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Sass, Danielle and Li, Bo and Reich, Brian J.}, year={2021}, month={Jul} } @article{dorman_hopperstad_reich_majumder_kennedy_reisig_greene_reay‐jones_collins_bacheler_et al._2021, title={Landscape‐level variation in Bt crops predict Helicoverpa ze a ( Lepidoptera: Noctuidae ) resistance in cotton agroecosystems}, volume={77}, ISSN={1526-498X 1526-4998}, url={http://dx.doi.org/10.1002/ps.6585}, DOI={10.1002/ps.6585}, abstractNote={Abstract BACKGROUND Helicoverpa zea (Boddie) damage to Bt cotton and maize has increased as a result of widespread Bt resistance across the USA Cotton Belt. Our objective was to link Bt crop production patterns to cotton damage through a series of spatial and temporal surveys of commercial fields to understand how Bt crop production relates to greater than expected H. zea damage to Bt cotton. To do this, we assembled longitudinal cotton damage data that spanned the Bt adoption period, collected cotton damage data since Bt resistance has been detected, and estimated local population susceptibility using replicated on‐farm studies that included all Bt pyramids marketed in cotton. RESULTS Significant year effects of H. zea damage frequency in commercial cotton were observed throughout the Bt adoption period, with a recent damage increase after 2012. Landscape‐level Bt crop production intensity over time was positively associated with the risk of H. zea damage in two‐ and three‐toxin pyramided Bt cotton. Helicoverpa zea damage also varied across Bt toxin types in spatially replicated on‐farm studies. CONCLUSIONS Landscape‐level predictors of H. zea damage in Bt cotton can be used to identify heightened Bt resistance risk areas and serves as a model to understand factors that drive pest resistance evolution to Bt toxins in the southeastern United States. These results provide a framework for more effective insect resistance management strategies to be used in combination with conventional pest management practices that improve Bt trait durability while minimizing the environmental footprint of row crop agriculture. © 2021 Society of Chemical Industry. This article has been contributed to by US Government employees and their work is in the public domain in the USA.}, number={12}, journal={Pest Management Science}, publisher={Wiley}, author={Dorman, Seth J and Hopperstad, Kristen A and Reich, Brian J and Majumder, Suman and Kennedy, George and Reisig, Dominic D and Greene, Jeremy K and Reay‐Jones, Francis PF and Collins, Guy and Bacheler, Jack S and et al.}, year={2021}, month={Aug}, pages={5454–5462} } @article{gao_gray_reich_2021, title={Long-term, medium spatial resolution annual land surface phenology with a Bayesian hierarchical model}, volume={261}, ISSN={["1879-0704"]}, DOI={10.1016/j.rse.2021.112484}, abstractNote={Land surface phenology (LSP) is a consistent and sensitive indicator of climate change effects on Earth's vegetation. Existing methods of estimating LSP require time series densities that, until recently, have only been available from coarse spatial resolution imagery such as MODIS (500 m) and AVHRR (1 km). LSP products from these datasets have improved our understanding of phenological change at the global scale, especially over the MODIS era (2001-present). Nevertheless, these products may obscure important finer scale spatial patterns and longer-term changes. Therefore, we have developed a Bayesian hierarchical model to retrieve complete annual sequences of LSP from Landsat imagery (1984-present), which has medium spatial resolution (30 m) but relatively sparse temporal frequency. Our approach uses Markov Chain Monte Carlo (MCMC) sampling to quantify individual phenometric uncertainty, which is especially important when considering long time series with variable observation quality and density, but has rarely been demonstrated. The estimated spring LSP had strong agreement with ground phenology records at Harvard Forest (R2 = 0.87) and Hubbard Brook Experimental Forest (R2 = 0.67). The estimated LSP were consistent with the recently released 30 m LSP product, MSLSP30NA, in its time period of 2016 to 2018 (R2 of 0.86 and 0.73 for spring and autumn phenology, respectively). Our Bayesian hierarchical model is an important step forward in extending medium resolution LSP records back in time as it accomplishes both critical goals of retrieving annual LSP from sparse time series and accurately estimating uncertainty.}, journal={REMOTE SENSING OF ENVIRONMENT}, author={Gao, Xiaojie and Gray, Josh M. and Reich, Brian J.}, year={2021}, month={Aug} } @article{wendelberger_reich_wilson_2021, title={Multi-model penalized regression}, volume={14}, ISSN={["1932-1872"]}, DOI={10.1002/sam.11496}, abstractNote={Abstract Wendelberger, LJ, Reich, BJ, Wilson, AG. Multi‐model penalized regression. Stat Anal Data Min: The ASA Data Sci Journal. 2021; 1 ‐ 25. https://doi.org/10.1002/sam.11496 The above article from Statistical Analysis and Data Mining, published online on 13 January 2021 in Wiley Online Library (wileyonlinelibrary.com), has been retracted by agreement between the authors, the journal Editor‐in‐Chief and Wiley Periodicals, LLC. The retraction has been agreed due to inadvertent but substantial overlap with a previously published article in the journal Technometrics.}, number={6}, journal={STATISTICAL ANALYSIS AND DATA MINING}, author={Wendelberger, Laura J. and Reich, Brian J. and Wilson, Alyson G.}, year={2021}, month={Dec}, pages={698–722} } @article{johnson_reich_gray_2021, title={Multisensor fusion of remotely sensed vegetation indices using space-time dynamic linear models}, volume={5}, ISSN={["1467-9876"]}, DOI={10.1111/rssc.12495}, abstractNote={Abstract High spatiotemporal resolution maps of surface vegetation from remote sensing data are desirable for vegetation and disturbance monitoring. However, due to the current limitations of imaging spectrometers, remote sensing datasets of vegetation with high temporal frequency of measurements have lower spatial resolution, and vice versa. In this research, we propose a space-time dynamic linear model to fuse high temporal frequency data (MODIS) with high spatial resolution data (Landsat) to create high spatiotemporal resolution data products of a vegetation greenness index. The model incorporates the spatial misalignment of the data and models dependence within and across land cover types with a latent multivariate Matérn process. To handle the large size of the data, we introduce a fast estimation procedure and a moving window Kalman smoother to produce a daily, 30-m resolution data product with associated uncertainty.}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES C-APPLIED STATISTICS}, author={Johnson, Margaret C. and Reich, Brian J. and Gray, Josh M.}, year={2021}, month={May} } @article{gong_reich_chang_2021, title={Multivariate spatial prediction of air pollutant concentrations with INLA}, volume={3}, ISSN={["2515-7620"]}, DOI={10.1088/2515-7620/ac2f92}, abstractNote={Estimates of daily air pollution concentrations with complete spatial and temporal coverage are important for supporting epidemiologic studies and health impact assessments. While numerous approaches have been developed for modeling air pollution, they typically only consider each pollutant separately. We describe a spatial multipollutant data fusion model that combines monitoring measurements and chemical transport model simulations that leverages dependence between pollutants to improve spatial prediction. For the contiguous United States, we created a data product of daily concentration for 12 pollutants (CO, NOx, NO2, SO2, O3, PM10, and PM2.5 species EC, OC, NO3, NH4, SO4) during the period 2005 to 2014. Out-of-sample prediction showed good performance, particularly for daily PM2.5 species EC (R2 = 0.64), OC (R2 = 0.75), NH4 (R2 = 0.84), NO3 (R2 = 0.73), and SO4 (R2 = 0.80). By employing the integrated nested Laplace approximation (INLA) for Bayesian inference, our approach also provides model-based prediction error estimates. The daily data product at 12 km spatial resolution will be publicly available immediately upon publication. To our knowledge this is the first publicly available data product for major PM2.5 species and several gases at this spatial and temporal resolution.}, number={10}, journal={ENVIRONMENTAL RESEARCH COMMUNICATIONS}, author={Gong, Wenlong and Reich, Brian J. and Chang, Howard H.}, year={2021}, month={Oct} } @article{huberman_reich_bondell_2021, title={Nonparametric conditional density estimation in a deep learning framework for short-term forecasting}, volume={5}, ISSN={["1573-3009"]}, DOI={10.1007/s10651-021-00499-z}, abstractNote={Short-term forecasting is an important tool in understanding environmental processes. In this paper, we incorporate machine learning algorithms into a conditional distribution estimator for the purposes of forecasting tropical cyclone intensity. Many machine learning techniques give a single-point prediction of the conditional distribution of the target variable, which does not give a full accounting of the prediction variability. Conditional distribution estimation can provide extra insight on predicted response behavior, which could influence decision-making and policy. We propose a technique that simultaneously estimates the entire conditional distribution and flexibly allows for machine learning techniques to be incorporated. A smooth model is fit over both the target variable and covariates, and a logistic transformation is applied on the model output layer to produce an expression of the conditional density function. We provide two examples of machine learning models that can be used, polynomial regression and deep learning models. To achieve computational efficiency, we propose a case–control sampling approximation to the conditional distribution. A simulation study for four different data distributions highlights the effectiveness of our method compared to other machine learning-based conditional distribution estimation techniques. We then demonstrate the utility of our approach for forecasting purposes using tropical cyclone data from the Atlantic Seaboard. This paper gives a proof of concept for the promise of our method, further computational developments can fully unlock its insights in more complex forecasting and other applications.}, journal={ENVIRONMENTAL AND ECOLOGICAL STATISTICS}, author={Huberman, David B. and Reich, Brian J. and Bondell, Howard D.}, year={2021}, month={May} } @article{winkel_stallrich_storlie_reich_2021, title={Sequential Optimization in Locally Important Dimensions}, volume={63}, ISSN={["1537-2723"]}, url={http://dx.doi.org/10.1080/00401706.2020.1714738}, DOI={10.1080/00401706.2020.1714738}, abstractNote={Optimizing an expensive, black-box function f(·) is challenging when its input space is high-dimensional. Sequential design frameworks first model f(·) with a surrogate function and then optimize an acquisition function to determine input settings to evaluate next. Optimization of both f(·) and the acquisition function benefit from effective dimension reduction. Global variable selection detects and removes input variables that do not affect f(·) across the input space. Further dimension reduction may be possible if we consider local variable selection around the current optimum estimate. We develop a sequential design algorithm called sequential optimization in locally important dimensions (SOLID) that incorporates global and local variable selection to optimize a continuous, differentiable function. SOLID performs local variable selection by comparing the surrogate's predictions in a localized region around the estimated optimum with the p alternative predictions made by removing each input variable. The search space of the acquisition function is further restricted to focus only on the variables that are deemed locally active, leading to greater emphasis on refining the surrogate model in locally active dimensions. A simulation study across multiple test functions and an application to the Sarcos robot dataset show that SOLID outperforms conventional approaches. Supplementary materials for this article are available online.}, number={2}, journal={TECHNOMETRICS}, publisher={Informa UK Limited}, author={Winkel, Munir A. and Stallrich, Jonathan W. and Storlie, Curtis B. and Reich, Brian J.}, year={2021}, month={Apr}, pages={236–248} } @article{dorman_hopperstad_reich_kennedy_huseth_2021, title={Soybeans as a non-Bt refuge for Helicoverpa zea in maize-cotton agroecosystems}, volume={322}, ISSN={0167-8809}, url={http://dx.doi.org/10.1016/j.agee.2021.107642}, DOI={10.1016/j.agee.2021.107642}, abstractNote={Geospatial models are crucial for identifying likely ‘hot-spots’ of Bt resistance evolution in Helicoverpa zea (Lepidoptera: Noctuidae), thereby improving regional insecticide resistance management (IRM) strategies and planted refuge compliance. To characterize H. zea distributions in relation to land use , we used historical trapping data collected from 2008 to 2019 in North Carolina to model the spatial and temporal abundance of H. zea populations across Bt -dominated landscapes. Helicoverpa zea abundance was standardized across site-year observations, and candidate landscape composition and configuration predictors of H. zea abundance were obtained. Spatiotemporal Bayesian hierarchical models were developed to make posterior predictions of H. zea abundance from environmental covariates, and results were used to generate interpolation prediction maps to visualize H. zea abundance across the sampled region. Our results suggest inverse distance weighted (IDW) soybeans is the most important predictor of H. zea abundance through time in row crop agroecosystems in North Carolina. Soybeans in North Carolina and southeastern U.S. likely serves as a critical non- Bt refuge for delaying H. zea resistance to Bt toxins in landscapes dominated by Bt maize and cotton. Moreover, soybean abundance can be used to predict the spatial abundance of H. zea in this region. Results can be applied to understand population dynamics of H. zea in landscapes dominated by genetically engineered (GE) crops expressing Bt toxins and will enable the development of sound insect resistance management strategies of H. zea populations to GE toxins targeting noctuid pests of maize and cotton. This work will also drive future geospatial studies investigating environmental predictors of resistance evolution in arthropod pests to GE technologies in crop production systems. Landscape-level variation in soybeans predicts spatial and temporal Helicoverpa zea abundance and likely serves as important non- Bt refugia in maize and cotton agroecosystems. • Helicoverpa zea population dynamics in row crops relate to landscape drivers • Landscape-level soybean and cotton variation in the southeastern U.S. associate with increased H. zea abundance through time • Soybeans likely serve as critical non- Bt refugia for delaying H. zea resistance in maize and cotton agroecosystems}, journal={Agriculture, Ecosystems & Environment}, publisher={Elsevier BV}, author={Dorman, Seth J. and Hopperstad, Kristen A. and Reich, Brian J. and Kennedy, George and Huseth, Anders S.}, year={2021}, month={Dec}, pages={107642} } @article{roy_reich_guinness_shinohara_staicu_2021, title={Spatial Shrinkage Via the Product Independent Gaussian Process Prior}, volume={6}, ISSN={["1537-2715"]}, DOI={10.1080/10618600.2021.1923512}, abstractNote={We study the problem of sparse signal detection on a spatial domain. We propose a novel approach to model continuous signals that are sparse and piecewise-smooth as the product of independent Gaussian (PING) processes with a smooth covariance kernel. The smoothness of the PING process is ensured by the smoothness of the covariance kernels of the Gaussian components in the product, and sparsity is controlled by the number of components. The bivariate kurtosis of the PING process implies that more components in the product results in the thicker tail and sharper peak at zero. We develop an efficient computation algorithm based on spectral methods. The simulation results demonstrate superior estimation using the PING prior over Gaussian process prior for different image regressions. We apply our method to a longitudinal magnetic resonance imaging dataset to detect the regions that are affected by multiple sclerosis computation in this domain. Supplementary materials for this article are available online.}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Roy, Arkaprava and Reich, Brian J. and Guinness, Joseph and Shinohara, Russell T. and Staicu, Ana-Maria}, year={2021}, month={Jun} } @article{majumder_guan_reich_o'neill_rappold_2021, title={Statistical Downscaling with Spatial Misalignment: Application to Wildland Fire PM2.5 Concentration Forecasting}, volume={26}, ISSN={["1537-2693"]}, DOI={10.1007/s13253-020-00420-4}, abstractNote={Fine particulate matter, PM2.5, has been documented to have adverse health effects and wildland fires are a major contributor to PM2.5 air pollution in the US. Forecasters use numerical models to predict PM2.5 concentrations to warn the public of impending health risk. Statistical methods are needed to calibrate the numerical model forecast using monitor data to reduce bias and quantify uncertainty. Typical model calibration techniques do not allow for errors due to misalignment of geographic locations. We propose a spatiotemporal downscaling methodology that uses image registration techniques to identify the spatial misalignment and accounts for and corrects the bias produced by such warping. Our model is fitted in a Bayesian framework to provide uncertainty quantification of the misalignment and other sources of error. We apply this method to different simulated data sets and show enhanced performance of the method in presence of spatial misalignment. Finally, we apply the method to a large fire in Washington state and show that the proposed method provides more realistic uncertainty quantification than standard methods.}, number={1}, journal={JOURNAL OF AGRICULTURAL BIOLOGICAL AND ENVIRONMENTAL STATISTICS}, author={Majumder, Suman and Guan, Yawen and Reich, Brian J. and O'Neill, Susan and Rappold, Ana G.}, year={2021}, month={Mar}, pages={23–44} } @article{larsen_hanigan_reich_qin_cope_morgan_rappold_2020, title={A deep learning approach to identify smoke plumes in satellite imagery in near-real time for health risk communication}, ISBN={1559-064X}, DOI={10.1038/s41370-020-0246-y}, abstractNote={Wildland fire (wildfire; bushfire) pollution contributes to poor air quality, a risk factor for premature death. The frequency and intensity of wildfires are expected to increase; improved tools for estimating exposure to fire smoke are vital. New-generation satellite-based sensors produce high-resolution spectral images, providing real-time information of surface features during wildfire episodes. Because of the vast size of such data, new automated methods for processing information are required. We present a deep fully convolutional neural network (FCN) for predicting fire smoke in satellite imagery in near-real time (NRT). The FCN identifies fire smoke using output from operational smoke identification methods as training data, leveraging validated smoke products in a framework that can be operationalized in NRT. We demonstrate this for a fire episode in Australia; the algorithm is applicable to any geographic region. The algorithm has high classification accuracy (99.5% of pixels correctly classified on average) and precision (average intersection over union = 57.6%). The FCN algorithm has high potential as an exposure-assessment tool, capable of providing critical information to fire managers, health and environmental agencies, and the general public to prevent the health risks associated with exposure to hazardous smoke from wildland fires in NRT.}, journal={JOURNAL OF EXPOSURE SCIENCE AND ENVIRONMENTAL EPIDEMIOLOGY}, author={Larsen, Alexandra and Hanigan, Ivan and Reich, Brian J. and Qin, Yi and Cope, Martin and Morgan, Geoffrey and Rappold, Ana G.}, year={2020} } @article{hazra_reich_staicu_2020, title={A multivariate spatial skew-t process for joint modeling of extreme precipitation indexes}, volume={31}, ISSN={["1099-095X"]}, DOI={10.1002/env.2602}, abstractNote={Abstract To study trends in extreme precipitation across the United States over the years 1951–2017, we analyze 10 climate indexes that represent extreme precipitation, such as annual maximum of daily precipitation and annual maximum of consecutive five‐day average precipitation. We consider the gridded data produced by the CLIMDEX project ( http://www.climdex.org/gewocs.html ), constructed using daily precipitation data. These indexes exhibit spatial and mutual dependence. In this paper, we propose a multivariate spatial skew‐ t process for joint modeling of extreme precipitation indexes and discuss its theoretical properties. The model framework allows Bayesian inference while maintaining a computational time that is competitive with common multivariate geostatistical approaches. In a numerical study, we find that the proposed model outperforms several simpler alternatives in terms of various model selection criteria. We apply the proposed model to estimate the average decadal change in the extreme precipitation indexes throughout the United States and find several significant local changes.}, number={3}, journal={ENVIRONMETRICS}, author={Hazra, Arnab and Reich, Brian J. and Staicu, Ana-Maria}, year={2020}, month={May} } @article{guan_reich_laber_bandyopadhyay_2020, title={Bayesian Nonparametric Policy Search With Application to Periodontal Recall Intervals}, volume={115}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2019.1660169}, abstractNote={Tooth loss from periodontal disease is a major public health burden in the United States. Standard clinical practice is to recommend a dental visit every six months; however, this practice is not evidence-based, and poor dental outcomes and increasing dental insurance premiums indicate room for improvement. We consider a tailored approach that recommends recall time based on patient characteristics and medical history to minimize disease progression without increasing resource expenditures. We formalize this method as a dynamic treatment regime which comprises a sequence of decisions, one per stage of intervention, that follow a decision rule which maps current patient information to a recommendation for their next visit time. The dynamics of periodontal health, visit frequency, and patient compliance are complex, yet the estimated optimal regime must be interpretable to domain experts if it is to be integrated into clinical practice. We combine non-parametric Bayesian dynamics modeling with policy-search algorithms to estimate the optimal dynamic treatment regime within an interpretable class of regimes. Both simulation experiments and application to a rich database of electronic dental records from the HealthPartners HMO shows that our proposed method leads to better dental health without increasing the average recommended recall time relative to competing methods.}, number={531}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Guan, Qian and Reich, Brian J. and Laber, Eric B. and Bandyopadhyay, Dipankar}, year={2020}, month={Jul}, pages={1066–1078} } @article{huberman_reich_pacifici_collazo_2020, title={Estimating the drivers of species distributions with opportunistic data using mediation analysis}, volume={11}, ISSN={["2150-8925"]}, url={https://doi.org/10.1002/ecs2.3165}, DOI={10.1002/ecs2.3165}, abstractNote={Abstract Ecological occupancy modeling has historically relied on high‐quality, low‐quantity designed‐survey data for estimation and prediction. In recent years, there has been a large increase in the amount of high‐quantity, unknown‐quality opportunistic data. This has motivated research on how best to combine these two data sources in order to optimize inference. Existing methods can be infeasible for large datasets or require opportunistic data to be located where designed‐survey data exist. These methods map species occupancies, motivating a need to properly evaluate covariate effects (e.g., land cover proportion) on their distributions. We describe a spatial estimation method for supplementarily including additional opportunistic data using mediation analysis concepts. The opportunistic data mediate the effect of the covariate on the designed‐survey data response, decomposing it into a direct and indirect effect. A component of the indirect effect can then be quickly estimated via regressing the mediator on the covariate, while the other components are estimated through a spatial occupancy model. The regression step allows for use of large quantities of opportunistic data that can be collected in locations with no designed‐survey data available. Simulation results suggest that the mediated method produces an improvement in relative MSE when the data are of reasonable quality. However, when the simulated opportunistic data are poorly correlated with the true spatial process, the standard, unmediated method is still preferable. A spatiotemporal extension of the method is also developed for analyzing the effect of deciduous forest land cover on red‐eyed vireo distribution in the southeastern United States and find that including the opportunistic data do not lead to a substantial improvement. Opportunistic data quality remains an important consideration when employing this method, as with other data integration methods.}, number={6}, journal={ECOSPHERE}, publisher={Wiley}, author={Huberman, David B. and Reich, Brian J. and Pacifici, Krishna and Collazo, Jaime A.}, year={2020}, month={Jun} } @article{guan_johnson_katzfuss_mannshardt_messier_reich_song_2020, title={Fine-Scale Spatiotemporal Air Pollution Analysis Using Mobile Monitors on Google Street View Vehicles}, volume={115}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2019.1665526}, abstractNote={People are increasingly concerned with understanding their personal environment, including possible exposure to harmful air pollutants. To make informed decisions on their day-to-day activities, they are interested in real-time information on a localized scale. Publicly available, fine-scale, high-quality air pollution measurements acquired using mobile monitors represent a paradigm shift in measurement technologies. A methodological framework utilizing these increasingly fine-scale measurements to provide real-time air pollution maps and short-term air quality forecasts on a fine-resolution spatial scale could prove to be instrumental in increasing public awareness and understanding. The Google Street View study provides a unique source of data with spatial and temporal complexities, with the potential to provide information about commuter exposure and hot spots within city streets with high traffic. We develop a computationally efficient spatiotemporal model for these data and use the model to make short-term forecasts and high-resolution maps of current air pollution levels. We also show via an experiment that mobile networks can provide more nuanced information than an equally sized fixed-location network. This modeling framework has important real-world implications in understanding citizens' personal environments, as data production and real-time availability continue to be driven by the ongoing development and improvement of mobile measurement technologies. Supplementary materials for this article, including a standardized description of the materials available for reproducing the work, are available as an online supplement.}, number={531}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Guan, Yawen and Johnson, Margaret C. and Katzfuss, Matthias and Mannshardt, Elizabeth and Messier, Kyle P. and Reich, Brian J. and Song, Joon J.}, year={2020}, month={Jul}, pages={1111–1124} } @article{grantham_reich_laber_pacifici_dunn_fierer_gebert_allwood_faith_2020, title={Global forensic geolocation with deep neural networks}, volume={69}, ISSN={["1467-9876"]}, DOI={10.1111/rssc.12427}, abstractNote={Summary An important problem in modern forensic analyses is identifying the provenance of materials at a crime scene, such as biological material on a piece of clothing. This procedure, which is known as geolocation, is conventionally guided by expert knowledge of the biological evidence and therefore tends to be application specific, labour intensive and often subjective. Purely data-driven methods have yet to be fully realized in this domain, because in part of the lack of a sufficiently rich source of data. However, high throughput sequencing technologies can identify tens of thousands of fungi and bacteria taxa by using DNA recovered from a single swab collected from nearly any object or surface. This microbial community, or microbiome, may be highly informative of the provenance of the sample, but data on the spatial variation of microbiomes are sparse and high dimensional and have a complex dependence structure that render them difficult to model with standard statistical tools. Deep learning algorithms have generated a tremendous amount of interest within the machine learning community for their predictive performance in high dimensional problems. We present DeepSpace: a new algorithm for geolocation that aggregates over an ensemble of deep neural network classifiers trained on randomly generated Voronoi partitions of a spatial domain. The DeepSpace algorithm makes remarkably good point predictions; for example, when applied to the microbiomes of over 1300 dust samples collected across continental USA, more than half of geolocation predictions produced by this model fall less than 100 km from their true origin, which is a 60% reduction in error from competing geolocation methods. Moreover, we apply DeepSpace to a novel data set of global dust samples collected from nearly 30 countries, finding that dust-associated fungi alone predict a sample's country of origin with nearly 90% accuracy.}, number={4}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES C-APPLIED STATISTICS}, author={Grantham, Neal S. and Reich, Brian J. and Laber, Eric B. and Pacifici, Krishna and Dunn, Robert R. and Fierer, Noah and Gebert, Matthew and Allwood, Julia S. and Faith, Seth A.}, year={2020}, month={Aug}, pages={909–929} } @article{reich_guan_fourches_warren_sarnat_chang_2020, title={INTEGRATIVE STATISTICAL METHODS FOR EXPOSURE MIXTURES AND HEALTH}, volume={14}, ISSN={["1941-7330"]}, DOI={10.1214/20-AOAS1364}, abstractNote={Humans are concurrently exposed to chemically, structurally and toxicologically diverse chemicals. A critical challenge for environmental epidemiology is to quantify the risk of adverse health outcomes resulting from exposures to such chemical mixtures and to identify which mixture constituents may be driving etiologic associations. A variety of statistical methods have been proposed to address these critical research questions. However, they generally rely solely on measured exposure and health data available within a specific study. Advancements in understanding of the role of mixtures on human health impacts may be better achieved through the utilization of external data and knowledge from multiple disciplines with innovative statistical tools. In this paper we develop new methods for health analyses that incorporate auxiliary information about the chemicals in a mixture, such as physicochemical, structural and/or toxicological data. We expect that the constituents identified using auxiliary information will be more biologically meaningful than those identified by methods that solely utilize observed correlations between measured exposure. We develop flexible Bayesian models by specifying prior distributions for the exposures and their effects that include auxiliary information and examine this idea over a spectrum of analyses from regression to factor analysis. The methods are applied to study the effects of volatile organic compounds on emergency room visits in Atlanta. We find that including cheminformatic information about the exposure variables improves prediction and provides a more interpretable model for emergency room visits for respiratory diseases.}, number={4}, journal={ANNALS OF APPLIED STATISTICS}, author={Reich, Brian J. and Guan, Yawen and Fourches, Denis and Warren, Joshua L. and Sarnat, Stefanie E. and Chang, Howard H.}, year={2020}, month={Dec}, pages={1945–1963} } @article{grantham_guan_reich_borer_gross_2020, title={MIMIX: A Bayesian Mixed-Effects Model for Microbiome Data From Designed Experiments}, volume={115}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2019.1626242}, abstractNote={Recent advances in bioinformatics have made high-throughput microbiome data widely available, and new statistical tools are required to maximize the information gained from these data. For example, analysis of high-dimensional microbiome data from designed experiments remains an open area in microbiome research. Contemporary analyses work on metrics that summarize collective properties of the microbiome, but such reductions preclude inference on the fine-scale effects of environmental stimuli on individual microbial taxa. Other approaches model the proportions or counts of individual taxa as response variables in mixed models, but these methods fail to account for complex correlation patterns among microbial communities. In this article, we propose a novel Bayesian mixed-effects model that exploits cross-taxa correlations within the microbiome, a model we call microbiome mixed model (MIMIX). MIMIX offers global tests for treatment effects, local tests and estimation of treatment effects on individual taxa, quantification of the relative contribution from heterogeneous sources to microbiome variability, and identification of latent ecological subcommunities in the microbiome. MIMIX is tailored to large microbiome experiments using a combination of Bayesian factor analysis to efficiently represent dependence between taxa and Bayesian variable selection methods to achieve sparsity. We demonstrate the model using a simulation experiment and on a 2 × 2 factorial experiment of the effects of nutrient supplement and herbivore exclusion on the foliar fungal microbiome of Andropogon gerardii, a perennial bunchgrass, as part of the global Nutrient Network research initiative. Supplementary materials for this article, including a standardized description of the materials available for reproducing the work, are available as an online supplement.}, number={530}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Grantham, Neal S. and Guan, Yawen and Reich, Brian J. and Borer, Elizabeth T. and Gross, Kevin}, year={2020}, month={Apr}, pages={599–609} } @article{wei_reich_hoppin_ghosal_2020, title={SPARSE BAYESIAN ADDITIVE NONPARAMETRIC REGRESSION WITH APPLICATION TO HEALTH EFFECTS OF PESTICIDES MIXTURES}, volume={30}, ISSN={["1996-8507"]}, DOI={10.5705/ss.202017.0315}, abstractNote={In many practical problems that simultaneously investigate the joint effect of covariates, we first need to identify the subset of significant covariates, and then estimate their joint effect. An example is an epidemiological study that analyzes the effects of exposure variables on a health response. In order to make inferences on the covariate effects, we propose a Bayesian additive nonparametric regression model with a multivariate continuous shrinkage prior to address the model uncertainty and to identify important covariates. Our general approach is to decompose the response function into the sum of the nonlinear main effects and the two-way interaction terms. Then we apply the computationally advantageous Bayesian variable selection method to identify the important effects. The proposed Bayesian method is a multivariate Dirichlet–Laplace prior that aggressively shrinks many terms toward zero, thus mitigating the noise of including unimportant exposures and isolating the effects of the important covariates. Our theoretical studies demonstrate asymptotic prediction and variable selection consistency properties. In addition, we use numerical simulations to evaluate the model performance in terms of prediction and variable selection under practical scenarios. The method is applied to a neurobehavioral data set from the Agricultural Health Study that investigates the association between pesticide usage and neurobehavioral outcomes in farmers. The proposed method shows improved accuracy in predicting the joint effects on the neurobehavioral responses, while restricting the number of covariates included in the model through variable selection.}, number={1}, journal={STATISTICA SINICA}, author={Wei, Ran and Reich, Brian J. and Hoppin, Jane A. and Ghosal, Subhashis}, year={2020}, month={Jan}, pages={55–79} } @article{jhuang_fuentes_bandyopadhyay_reich_2020, title={Spatiotemporal signal detection using continuous shrinkage priors}, volume={39}, ISSN={["1097-0258"]}, DOI={10.1002/sim.8514}, abstractNote={Periodontal disease (PD) is a chronic inflammatory disease that affects the gum tissue and bone supporting the teeth. Although tooth‐site level PD progression is believed to be spatio‐temporally referenced, the whole‐mouth average periodontal pocket depth (PPD) has been commonly used as an indicator of the current/active status of PD. This leads to imminent loss of information, and imprecise parameter estimates. Despite availability of statistical methods that accommodates spatiotemporal information for responses collected at the tooth‐site level, the enormity of longitudinal databases derived from oral health practice‐based settings render them unscalable for application. To mitigate this, we introduce a Bayesian spatiotemporal model to detect problematic/diseased tooth‐sites dynamically inside the mouth for any subject obtained from large databases. This is achieved via a spatial continuous sparsity‐inducing shrinkage prior on spatially varying linear‐trend regression coefficients. A low‐rank representation captures the nonstationary covariance structure of the PPD outcomes, and facilitates the relevant Markov chain Monte Carlo computing steps applicable to thousands of study subjects. Application of our method to both simulated data and to a rich database of electronic dental records from the HealthPartners Institute reveal improved prediction performances, compared with alternative models with usual Gaussian priors for regression parameters and conditionally autoregressive specification of the covariance structure.}, number={13}, journal={STATISTICS IN MEDICINE}, author={Jhuang, An-Ting and Fuentes, Montserrat and Bandyopadhyay, Dipankar and Reich, Brian J.}, year={2020}, month={Jun}, pages={1817–1832} } @article{saia_nelson_huseth_grieger_reich_2020, title={Transitioning Machine Learning from Theory to Practice in Natural Resources Management}, volume={435}, ISSN={0304-3800}, url={http://dx.doi.org/10.1016/j.ecolmodel.2020.109257}, DOI={10.1016/j.ecolmodel.2020.109257}, journal={Ecological Modelling}, publisher={Elsevier BV}, author={Saia, S.M. and Nelson, N. and Huseth, A.S. and Grieger, K and Reich, B.J.}, year={2020}, month={Nov}, pages={109257} } @article{allwood_fierer_dunn_breen_reich_laber_clifton_grantham_faith_2020, title={Use of standardized bioinformatics for the analysis of fungal DNA signatures applied to sample provenance}, volume={310}, ISSN={["1872-6283"]}, DOI={10.1016/j.forsciint.2020.110250}, abstractNote={The use of environmental trace material to aid criminal investigations is an ongoing field of research within forensic science. The application of environmental material thus far has focused upon a variety of different objectives relevant to forensic biology, including sample provenance (also referred to as sample attribution). The capability to predict the provenance or origin of an environmental DNA sample would be an advantageous addition to the suite of investigative tools currently available. A metabarcoding approach is often used to predict sample provenance, through the extraction and comparison of the DNA signatures found within different environmental materials, such as the bacteria within soil or fungi within dust. Such approaches are combined with bioinformatics workflows and statistical modelling, often as part of large-scale study, with less emphasis on the investigation of the adaptation of these methods to a smaller scale method for forensic use. The present work was investigating a small-scale approach as an adaptation of a larger metabarcoding study to develop a model for global sample provenance using fungal DNA signatures collected from dust swabs. This adaptation was to facilitate a standardized method for consistent, reproducible sample treatment, including bioinformatics processing and final application of resulting data to the available prediction model. To investigate this small-scale method, 76 DNA samples were treated as anonymous test samples and analyzed using the standardized process to demonstrate and evaluate processing and customized sequence data analysis. This testing included samples originating from countries previously used to train the model, samples artificially mixed to represent multiple or mixed countries, as well as outgroup samples. Positive controls were also developed to monitor laboratory processing and bioinformatics analysis. Through this evaluation we were able to demonstrate that the samples could be processed and analyzed in a consistent manner, facilitated by a relatively user-friendly bioinformatic pipeline for sequence data analysis. Such investigation into standardized analyses and application of metabarcoding data is of key importance for the future use of applied microbiology in forensic science.}, journal={FORENSIC SCIENCE INTERNATIONAL}, author={Allwood, Julia S. and Fierer, Noah and Dunn, Robert R. and Breen, Matthew and Reich, Brian J. and Laber, Eric B. and Clifton, Jesse and Grantham, Neal S. and Faith, Seth A.}, year={2020}, month={May} } @article{rekabdarkolaee_krut_fuentes_reich_2019, title={A Bayesian multivariate functional model with spatially varying coefficient approach for modeling hurricane track data}, volume={29}, ISSN={["2211-6753"]}, DOI={10.1016/j.spasta.2018.12.006}, abstractNote={Abstract Hurricanes are massive storm systems with enormous destructive capabilities. Understanding the trends across space and time of a hurricane track and intensity leads to improved forecasts and minimizes their damage. Viewing the hurricane’s latitude, longitude, and wind speed as functions of time, we propose a novel spatiotemporal multivariate functional model to simultaneously allow for multivariate, longitudinal, and spatially observed data with noisy functional covariates. The proposed procedure is fully Bayesian and inference is performed using MCMC. This new approach is illustrated through simulation studies and analyzing the hurricane track data from 2004 to 2013 in the Atlantic basin. Simulation results indicate that our proposed model offers a significant reduction in the mean square error and averaged interval and increases the coverage probability. In addition, our method offers a 10% reduction in location and wind speed prediction error.}, journal={SPATIAL STATISTICS}, author={Rekabdarkolaee, Hossein Moradi and Krut, Christopher and Fuentes, Montserrat and Reich, Brian J.}, year={2019}, month={Mar}, pages={351–365} } @article{cloud_reich_rozoff_alessandrini_lewis_delle monache_2019, title={A Feed Forward Neural Network Based on Model Output Statistics for Short-Term Hurricane Intensity Prediction}, volume={34}, ISSN={["1520-0434"]}, DOI={10.1175/WAF-D-18-0173.1}, abstractNote={Abstract A feed forward neural network (FFNN) is developed for tropical cyclone (TC) intensity prediction, where intensity is defined as the maximum 1-min average 10-m wind speed. This deep learning model incorporates a real-time operational estimate of the current intensity and predictors derived from Hurricane Weather Research and Forecasting (HWRF; 2017 version) Model forecasts. The FFNN model is developed with the operational constraint of being restricted to 6-h-old HWRF data. Best track intensity data are used for observational verification. The forecast training data are from 2014 to 2016 HWRF reforecast data and cover a wide variety of TCs from both the Atlantic and eastern Pacific Ocean basins. Cross validation shows that the FFNN increasingly outperforms the operational observation-adjusted HWRF (HWFI) in terms of mean absolute error (MAE) at forecast lead times from 3 to 57 h. Out-of-sample testing on real-time data from 2017 shows the HWFI produces lower MAE than the FFNN at lead times of 24 h or less and similar MAEs at later lead times. On the other hand, the 2017 data indicate significant potential for the FFNN in the prediction of rapid intensification (RI), with RI defined here as an intensification of at least 30 kt (1 kt ≈ 0.51 m s−1) in a 24-h period. The FFNN produces 4 times the number of hits in HWFI for RI. While the FFNN has more false alarms than the HWFI, Brier skill scores show that, in the Atlantic, the FFNN has significantly greater skill than the HWFI and probabilistic Statistical Hurricane Intensity Prediction System RI index.}, number={4}, journal={WEATHER AND FORECASTING}, author={Cloud, Kirkwood A. and Reich, Brian J. and Rozoff, Christopher M. and Alessandrini, Stefano and Lewis, William E. and Delle Monache, Luca}, year={2019}, month={Aug}, pages={985–997} } @article{reich_shaby_2019, title={A Spatial Markov Model for Climate Extremes}, volume={28}, ISSN={["1537-2715"]}, DOI={10.1080/10618600.2018.1482764}, abstractNote={Spatial climate data are often presented as summaries of areal regions such as grid cells, either because they are the output of numerical climate models or to facilitate comparison with numerical climate model output. Extreme value analysis can benefit greatly from spatial methods that borrow information across regions. For Gaussian outcomes, a host of methods that respect the areal nature of the data are available, including conditional and simultaneous autoregressive models. However, to our knowledge, there is no such method in the spatial extreme value analysis literature. In this article, we propose a new method for areal extremes that accounts for spatial dependence using latent clustering of neighboring regions. We show that the proposed model has desirable asymptotic dependence properties and leads to relatively simple computation. Applying the proposed method to North American climate data reveals several local and continental-scale changes in the distribution of precipitation and temperature extremes over time. Supplementary material for this article is available online.}, number={1}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Reich, Brian J. and Shaby, Benjamin A.}, year={2019}, month={Jan}, pages={117–126} } @article{sahoo_guinness_reich_2019, title={A TEST FOR ISOTROPY ON A SPHERE USING SPHERICAL HARMONIC FUNCTIONS}, volume={29}, ISSN={["1996-8507"]}, DOI={10.5705/ss.202017.0475}, abstractNote={Analysis of geostatistical data is often based on the assumption that the spatial random field is isotropic.This assumption, if erroneous, can adversely affect model predictions and statistical inference.Nowadays many applications consider data over the entire globe and hence it is necessary to check the assumption of isotropy on a sphere.In this paper, a test for spatial isotropy on a sphere is proposed.The data are first projected onto the set of spherical harmonic functions.Under isotropy, the spherical harmonic coefficients are uncorrelated whereas they are correlated if the underlying fields are not isotropic.This motivates a test based on the sample correlation matrix of the spherical harmonic coefficients.In particular, we use the largest eigenvalue of the sample correlation matrix as the test statistic.Extensive simulations are conducted to assess the Type I errors of the test under different scenarios.We show how temporal correlation affects the test and provide a method for handling temporal correlation.We also gauge the power of the test as we move away from isotropy.The method is applied to the near-surface air temperature data which is part of the HadCM3 model output.Although we do not expect global temperature fields to be isotropic, we propose several anisotropic models with increasing complexity, each of which has an isotropic process as model component and we apply the test to the isotropic component in a sequence of such models as a method of determining how well the models capture the anisotropy in the fields.}, number={3}, journal={STATISTICA SINICA}, author={Sahoo, Indranil and Guinness, Joseph and Reich, Brian J.}, year={2019}, month={Jul}, pages={1253–1276} } @article{binion-rock_reich_buckel_2019, title={A spatial kernel density method to estimate the diet composition of fish}, volume={76}, ISSN={["1205-7533"]}, DOI={10.1139/cjfas-2017-0306}, abstractNote={We present a novel spatially explicit kernel density approach to estimate the proportional contribution of a prey to a predator’s diet by mass. First, we compared the spatial estimator to a traditional cluster-based approach using a Monte Carlo simulation study. Next, we compared the diet composition of three predators from Pamlico Sound, North Carolina, to evaluate how ignoring spatial correlation affects diet estimates. The spatial estimator had lower mean squared error values compared with the traditional cluster-based estimator for all Monte Carlo simulations. Incorporating spatial correlation when estimating the predator’s diet resulted in a consistent increase in precision across multiple levels of spatial correlation. Bias was often similar between the two estimators; however, when it differed it mostly favored the spatial estimator. The two estimators produced different estimates of proportional contribution of prey to the diets of the three field-collected predator species, especially when spatial correlation was strong and prey were consumed in patchy areas. Our simulation and empirical data provide strong evidence that data on food habits should be modeled using spatial approaches and not treated as spatially independent.}, number={2}, journal={CANADIAN JOURNAL OF FISHERIES AND AQUATIC SCIENCES}, author={Binion-Rock, Samantha M. and Reich, Brian J. and Buckel, Jeffrey A.}, year={2019}, month={Feb}, pages={249–267} } @article{hazra_reich_reich_shinohara_staicu_2019, title={A spatio-temporal model for longitudinal image-on-image regression}, volume={11}, ISSN={["1867-1772"]}, DOI={10.1007/s12561-017-9206-z}, abstractNote={Neurologists and radiologists often use magnetic resonance imaging (MRI) in the management of subjects with multiple sclerosis (MS) because it is sensitive to inflammatory and demyelinative changes in the white matter of the brain and spinal cord. Two conventional modalities used for identifying lesions are T1-weighted (T1) and T2-weighted fluid-attenuated inversion recovery (FLAIR) imaging, which are used clinically and in research studies. Magnetization transfer ratio (MTR), which is available only in research settings, is an advanced MRI modality that has been used extensively for measuring disease-related demyelination both in white matter lesions as well across normal-appearing white matter. Acquiring MTR is not standard in clinical practice, due to the increased scan time and cost. Hence, prediction of MTR based on the modalities T1 and FLAIR could have great impact on the availability of these promising measures for improved patient management. We propose a spatio-temporal regression model for image response and image predictors that are acquired longitudinally, with images being co-registered within the subject but not across subjects. The model is additive, with the response at a voxel being dependent on the available covariates not only through the current voxel but also on the imaging information from the voxels within a neighboring spatial region as well as their temporal gradients. We propose a dynamic Bayesian estimation procedure that updates the parameters of the subject-specific regression model as data accummulates. To bypass the computational challenges associated with a Bayesian approach for high-dimensional imaging data, we propose an approximate Bayesian inference technique. We assess the model fitting and the prediction performance using longitudinally acquired MRI images from 46 MS patients.}, number={1}, journal={Statistics in Biosciences}, author={Hazra, A. and Reich, B.J. and Reich, D.S. and Shinohara, R.T. and Staicu, A.M.}, year={2019}, pages={22–46} } @article{huang_reich_fuentes_sankarasubramanian_2019, title={Complete spatial model calibration}, volume={13}, ISSN={1932-6157}, url={http://dx.doi.org/10.1214/18-aoas1219}, DOI={10.1214/18-AOAS1219}, abstractNote={Computer simulation models are central to environmental science. These mathematical models are used to understand complex weather and climate patterns and to predict the climate's response to different forcings. Climate models are of course not perfect reflections of reality, and so comparison with observed data is needed to quantify and to correct for biases and other deficiencies. We propose a new method to calibrate model output using observed data. Our approach not only matches the marginal distributions of the model output and gridded observed data, but it simultaneously postprocesses the model output to have the same spatial correlation as the observed data. This comprehensive calibration method permits realistic spatial simulations for regional impact studies. We apply the proposed method to global climate model output in North America and show that it successfully calibrates the model output for temperature and precipitation.}, number={2}, journal={The Annals of Applied Statistics}, publisher={Institute of Mathematical Statistics}, author={Huang, Yen-Ning and Reich, Brian J. and Fuentes, Montserrat and Sankarasubramanian, A.}, year={2019}, month={Jun}, pages={746–766} } @article{ferguson_mueller_rajasekaran_reich_2019, title={Conference report: 2018 materials and data science hackathon (MATDAT18)}, volume={4}, ISSN={["2058-9689"]}, DOI={10.1039/c9me90018g}, abstractNote={MATDAT18 organizers and participants.}, number={3}, journal={MOLECULAR SYSTEMS DESIGN & ENGINEERING}, author={Ferguson, Andrew L. and Mueller, Tim and Rajasekaran, Sanguthevar and Reich, Brian J.}, year={2019}, month={Jun}, pages={462–468} } @article{morris_reich_thibaud_2019, title={Exploration and Inference in Spatial Extremes Using Empirical Basis Functions}, volume={24}, ISSN={["1537-2693"]}, DOI={10.1007/s13253-019-00359-1}, abstractNote={Statistical methods for inference on spatial extremes of large datasets are yet to be developed. Motivated by standard dimension reduction techniques used in spatial statistics, we propose an approach based on empirical basis functions to explore and model spatial extremal dependence. Based on a low-rank max-stable model, we propose a data-driven approach to estimate meaningful basis functions using empirical pairwise extremal coefficients. These spatial empirical basis functions can be used to visualize the main trends in extremal dependence. In addition to exploratory analysis, we describe how these functions can be used in a Bayesian hierarchical model to model spatial extremes of large datasets. We illustrate our methods on extreme precipitations in eastern USA. Supplementary materials accompanying this paper appear online}, number={4}, journal={JOURNAL OF AGRICULTURAL BIOLOGICAL AND ENVIRONMENTAL STATISTICS}, author={Morris, Samuel A. and Reich, Brian J. and Thibaud, Emeric}, year={2019}, month={Dec}, pages={555–572} } @article{hammerling_reich_2019, title={Guest Editors' Introduction to the Special Issue on "Climate and the Earth System"}, volume={24}, ISSN={["1537-2693"]}, DOI={10.1007/s13253-019-00373-3}, abstractNote={The Journal of Agricultural, Biological and Environment Statistics (JABES) special issue on the Climate and Earth System highlights recent statistical develops that aim to refine our understanding of this complex system. New methods are required to process the massive environmental data that often fuels climate analysis and to properly account for uncertainty in the results. This special issue proudly features eight papers that span a wide range of computational and methodological problems related to the climate and earth system. In this brief introduction, we identify common themes among the papers and point to areas of future research.}, number={3}, journal={JOURNAL OF AGRICULTURAL BIOLOGICAL AND ENVIRONMENTAL STATISTICS}, author={Hammerling, Dorit and Reich, Brian J.}, year={2019}, month={Sep}, pages={395–397} } @article{pacifici_reich_miller_pease_2019, title={Resolving misaligned spatial data with integrated species distribution models}, volume={100}, ISSN={["1939-9170"]}, DOI={10.1002/ecy.2709}, abstractNote={Abstract Advances in species distribution modeling continue to be driven by a need to predict species responses to environmental change coupled with increasing data availability. Recent work has focused on development of methods that integrate multiple streams of data to model species distributions. Combining sources of information increases spatial coverage and can improve accuracy in estimates of species distributions. However, when fusing multiple streams of data, the temporal and spatial resolutions of data sources may be mismatched. This occurs when data sources have fluctuating geographic coverage, varying spatial scales and resolutions, and differing sources of bias and sparsity. It is well documented in the spatial statistics literature that ignoring the misalignment of different data sources will result in bias in both the point estimates and uncertainty. This will ultimately lead to inaccurate predictions of species distributions. Here, we examine the issue of misaligned data as it relates specifically to integrated species distribution models. We then provide a general solution that builds off work in the statistical literature for the change‐of‐support problem. Specifically, we leverage spatial correlation and repeat observations at multiple scales to make statistically valid predictions at the ecologically relevant scale of inference. An added feature of the approach is that addressing differences in spatial resolution between data sets can allow for the evaluation and calibration of lesser‐quality sources in many instances. Using both simulations and data examples, we highlight the utility of this modeling approach and the consequences of not reconciling misaligned spatial data. We conclude with a brief discussion of the upcoming challenges and obstacles for species distribution modeling via data fusion.}, number={6}, journal={ECOLOGY}, author={Pacifici, Krishna and Reich, Brian J. and Miller, David A. W. and Pease, Brent S.}, year={2019}, month={Jun} } @article{jhuang_fuentes_jones_esteves_fancher_furman_reich_2019, title={Spatial Signal Detection Using Continuous Shrinkage Priors}, volume={61}, ISSN={0040-1706 1537-2723}, url={http://dx.doi.org/10.1080/00401706.2018.1546622}, DOI={10.1080/00401706.2018.1546622}, abstractNote={Motivated by the problem of detecting changes in two-dimensional X-ray diffraction data, we propose a Bayesian spatial model for sparse signal detection in image data. Our model places considerable mass near zero and has heavy tails to reflect the prior belief that the image signal is zero for most pixels and large for an important subset. We show that the spatial prior places mass on nearby locations simultaneously being zero, and also allows for nearby locations to simultaneously be large signals. The form of the prior also facilitates efficient computing for large images. We conduct a simulation study to evaluate the properties of the proposed prior and show that it outperforms other spatial models. We apply our method in the analysis of X-ray diffraction data from a two-dimensional area detector to detect changes in the pattern when the material is exposed to an electric field.}, number={4}, journal={Technometrics}, publisher={Informa UK Limited}, author={Jhuang, An-Ting and Fuentes, Montserrat and Jones, Jacob L. and Esteves, Giovanni and Fancher, Chris M. and Furman, Marschall and Reich, Brian J.}, year={2019}, month={Mar}, pages={494–506} } @article{miller_pacifici_sanderlin_reich_2019, title={The recent past and promising future for data integration methods to estimate species' distributions}, volume={10}, ISSN={["2041-2096"]}, DOI={10.1111/2041-210X.13110}, abstractNote={Abstract With the advance of methods for estimating species distribution models has come an interest in how to best combine datasets to improve estimates of species distributions. This has spurred the development of data integration methods that simultaneously harness information from multiple datasets while dealing with the specific strengths and weaknesses of each dataset. We outline the general principles that have guided data integration methods and review recent developments in the field. We then outline key areas that allow for a more general framework for integrating data and provide suggestions for improving sampling design and validation for integrated models. Key to recent advances has been using point‐process thinking to combine estimators developed for different data types. Extending this framework to new data types will further improve our inferences, as well as relaxing assumptions about how parameters are jointly estimated. These along with the better use of information regarding sampling effort and spatial autocorrelation will further improve our inferences. Recent developments form a strong foundation for implementation of data integration models. Wider adoption can improve our inferences about species distributions and the dynamic processes that lead to distributional shifts.}, number={1}, journal={METHODS IN ECOLOGY AND EVOLUTION}, author={Miller, David A. W. and Pacifici, Krishna and Sanderlin, Jamie S. and Reich, Brian J.}, year={2019}, month={Jan}, pages={22–37} } @article{jones_broughton_iamsasri_fancher_wilson_reich_smith_2019, title={The use of Bayesian inference in the characterization of materials and thin films}, volume={75}, ISSN={["2053-2733"]}, DOI={10.1107/S0108767319097940}, journal={ACTA CRYSTALLOGRAPHICA A-FOUNDATION AND ADVANCES}, author={Jones, Jacob L. and Broughton, Rachel and Iamsasri, Thanakorn and Fancher, Chris M. and Wilson, Alyson G. and Reich, Brian and Smith, Ralph C.}, year={2019}, pages={A211–A211} } @article{king_staicu_davis_reich_eder_2018, title={A functional data analysis of spatiotemporal trends and variation in fine particulate matter}, volume={184}, ISSN={["1873-2844"]}, DOI={10.1016/j.atmosenv.2018.04.001}, abstractNote={In this paper we illustrate the application of modern functional data analysis methods to study the spatiotemporal variability of particulate matter components across the United States. The approach models the pollutant annual profiles in a way that describes the dynamic behavior over time and space. This new technique allows us to predict yearly profiles for locations and years at which data are not available and also offers dimension reduction for easier visualization of the data. Additionally it allows us to study changes of pollutant levels annually or for a particular season. We apply our method to daily concentrations of two particular components of PM2.5 measured by two networks of monitoring sites across the United States from 2003 to 2015. Our analysis confirms existing findings and additionally reveals new trends in the change of the pollutants across seasons and years that may not be as easily determined from other common approaches such as Kriging.}, journal={ATMOSPHERIC ENVIRONMENT}, author={King, Meredith C. and Staicu, Ana-Maria and Davis, Jerry M. and Reich, Brian J. and Eder, Brian}, year={2018}, month={Jul}, pages={233–243} } @article{libera_sankarasubramanian_sharma_reich_2018, title={A non-parametric bootstrapping framework embedded in a toolkit for assessing water quality model performance}, volume={107}, ISSN={1364-8152}, url={http://dx.doi.org/10.1016/j.envsoft.2018.05.013}, DOI={10.1016/j.envsoft.2018.05.013}, abstractNote={Assessing the ability to predict nutrient concentration in streams is important for determining compliance with the Numeric Nutrient Water Quality Criteria for Nitrogen in the U.S.A. Evaluation of the USGS's Load Estimator (LOADEST) and the Weighted Regression on Time, Discharge, and Season (WRTDS) models in predicting total nitrogen loads over 18 stations from the Water Quality Network show good performance (Nash-Sutcliffe Efficiency (NSE) > 0.8) in capturing the observed variability even for stations with limited data. However, both models captured only 40% of observed variance in total nitrogen (TN) concentration (NSE < 0.4). Thus, the same dataset performed differently in predicting two attributes – TN load and concentration – questioning the predictive skill of the models. This study proposes a non-parametric re-sampling approach for assessing the performance of water quality models particularly in predicting TN concentration. Null distributions for three common performance metrics belonging to populations of metrics with no skill in capturing the observed variability are constructed through a bootstrap resampling technique. Sample metrics from the LOADEST and WRTDS model in predicting TN concentration are used to calculate p-values for determining if the sample metrics belongs to the null distributions. .}, journal={Environmental Modelling & Software}, publisher={Elsevier BV}, author={Libera, Dominic A. and Sankarasubramanian, A. and Sharma, Ashish and Reich, Brian J.}, year={2018}, month={Sep}, pages={25–33} } @article{irizarry_collazo_pacifici_reich_battle_2018, title={Avian response to shade-layer restoration in coffee plantations in Puerto Rico}, volume={26}, ISSN={["1526-100X"]}, url={https://doi.org/10.1111/rec.12697}, DOI={10.1111/rec.12697}, abstractNote={Documenting the evolving processes associated with habitat restoration and how long it takes to detect avian demographic responses is crucial to evaluate the success of restoration initiatives and to identify ways to improve their effectiveness. The importance of this endeavor prompted the U.S. Fish and Wildlife Service and the USDA Natural Resources Conservation Service to evaluate their sun‐to‐shade coffee restoration program in Puerto Rico initiated in 2003. We quantified the responses of 12 resident avian species using estimates of local occupancy and extinction probabilities based on surveys conducted in 2015–2017 at 65 restored farms grouped according to time‐since‐initial‐restoration (TSIR): new (2011–2014), intermediate (2007–2010), and old (2003–2006). We also surveyed 40 forest sites, which served as reference sites. Vegetation complexity increased with TSIR, ranging between 35 and 40% forest cover in farms 6–9 years TSIR. Forest specialists (e.g. Loxigilla portoricencis ) exhibited highest average occupancy in farms initially classified as intermediate (6–9 years) and old (>10 years), paralleling occupancy in secondary forests. Occupancy of open‐habitat specialists (e.g. Tiaris olivaceus ) was more variable, but higher in recently restored farms. Restoring the shade layer has the potential to heighten ecological services derived from forest specialists (e.g. frugivores) without losing the services of many open‐habitat specialists (e.g. insectivores). Annual local extinction probability for forest specialists decreased with increasing habitat complexity, strengthening the potential value of shade restoration as a tool to enhance habitat for avifauna that evolved in forested landscapes.}, number={6}, journal={RESTORATION ECOLOGY}, publisher={Wiley}, author={Irizarry, Amarilys D. and Collazo, Jaime A. and Pacifici, Krishna and Reich, Brian J. and Battle, Kathryn E.}, year={2018}, month={Nov}, pages={1212–1220} } @article{reich_guinness_vandekar_shinohara_staicu_2018, title={Fully Bayesian spectral methods for imaging data}, volume={74}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12782}, abstractNote={Summary Medical imaging data with thousands of spatially correlated data points are common in many fields. Methods that account for spatial correlation often require cumbersome matrix evaluations which are prohibitive for data of this size, and thus current work has either used low-rank approximations or analyzed data in blocks. We propose a method that accounts for nonstationarity, functional connectivity of distant regions of interest, and local signals, and can be applied to large multi-subject datasets using spectral methods combined with Markov Chain Monte Carlo sampling. We illustrate using simulated data that properly accounting for spatial dependence improves precision of estimates and yields valid statistical inference. We apply the new approach to study associations between cortical thickness and Alzheimer's disease, and find several regions of the cortex where patients with Alzheimer's disease are thinner on average than healthy controls.}, number={2}, journal={BIOMETRICS}, author={Reich, Brian J. and Guinness, Joseph and Vandekar, Simon N. and Shinohara, Russell T. and Staicu, Ana-Maria}, year={2018}, month={Jun}, pages={645–652} } @article{larsen_reich_ruminski_rappold_2018, title={Impacts of fire smoke plumes on regional air quality, 2006-2013}, volume={28}, ISSN={["1559-064X"]}, DOI={10.1038/s41370-017-0013-x}, abstractNote={Increases in the severity and frequency of large fires necessitate improved understanding of the influence of smoke on air quality and public health. The objective of this study is to estimate the effect of smoke from fires across the continental U.S. on regional air quality over an extended period of time. We use 2006–2013 data on ozone (O3), fine particulate matter (PM2.5), and PM2.5 constituents from environmental monitoring sites to characterize regional air quality and satellite imagery data to identify plumes. Unhealthy levels of O3 and PM2.5 were, respectively, 3.3 and 2.5 times more likely to occur on plume days than on clear days. With a two-stage approach, we estimated the effect of plumes on pollutants, controlling for season, temperature, and within-site and between-site variability. Plumes were associated with an average increase of 2.6 p.p.b. (2.5, 2.7) in O3 and 2.9 µg/m3 (2.8, 3.0) in PM2.5 nationwide, but the magnitude of effects varied by location. The largest impacts were observed across the southeast. High impacts on O3 were also observed in densely populated urban areas at large distance from the fires throughout the southeast. Fire smoke substantially affects regional air quality and accounts for a disproportionate number of unhealthy days.}, number={4}, journal={JOURNAL OF EXPOSURE SCIENCE AND ENVIRONMENTAL EPIDEMIOLOGY}, author={Larsen, Alexandra E. and Reich, Brian J. and Ruminski, Mark and Rappold, Ana G.}, year={2018}, month={Jun}, pages={319–327} } @article{reich_pacifici_stallings_2018, title={Integrating auxiliary data in optimal spatial design for species distribution modelling}, volume={9}, ISSN={["2041-2096"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85044859013&partnerID=MN8TOARS}, DOI={10.1111/2041-210X.13002}, abstractNote={Abstract Traditional surveys used to create species distribution maps and estimate ecological relationships are expensive and time consuming. Citizen science offers a way to collect a massive amount of data at negligible cost and has been shown to be a useful supplement to traditional analyses. However, there remains a need to conduct formal surveys to firmly establish ecological relationships and trends. In this paper, we investigate the use of auxiliary (e.g. citizen science) data as a guide to designing more efficient ecological surveys. Our aim is to explore the use of opportunistic data to inform spatial survey design through a novel objective function that minimizes misclassificaton rate (i.e. false positives and false negatives) of the estimated occupancy maps. We use an initial occupancy estimate from auxiliary data as the prior in a Bayesian spatial occupancy model, and an efficient posterior approximation that accounts for spatial dependence, covariate effects, and imperfect detection in an exchange algorithm to search for the optimal set of sampling locations to minimize misclassification rate. We examine the optimal design as a function of the detection rate and quality of the citizen‐science data, and compare this optimal design with several common ad hoc designs via an extensive simulation study. We then apply our method to eBird data for the brown‐headed nuthatch in the Southeast US. We argue that planning a survey with the use of auxiliary data improves estimation accuracy and may significantly reduce the costs of sampling.}, number={6}, journal={METHODS IN ECOLOGY AND EVOLUTION}, author={Reich, Brian J. and Pacifici, Krishna and Stallings, Jonathan W.}, year={2018}, month={Jun}, pages={1626–1637} } @article{reich_haran_2018, title={Precision maps for public health}, volume={555}, ISSN={0028-0836 1476-4687}, url={http://dx.doi.org/10.1038/D41586-018-02096-W}, DOI={10.1038/D41586-018-02096-W}, abstractNote={Researchers have produced high-resolution maps of childhood growth failure and educational attainment across Africa between 2000 and 2015, to assess progress and guide policy decisions in public health. Researchers have produced high-resolution maps of childhood growth failure and educational attainment across Africa between 2000 and 2015, to assess progress and guide policy decisions in public health.}, number={7694}, journal={Nature}, publisher={Springer Science and Business Media LLC}, author={Reich, Brian J. and Haran, Murali}, year={2018}, month={Mar}, pages={32–33} } @article{tsai_leung_mchale_floyd_reich_2018, title={Relationships between urban green land cover and human health at different spatial resolutions}, volume={22}, ISSN={1083-8155 1573-1642}, url={http://dx.doi.org/10.1007/s11252-018-0813-3}, DOI={10.1007/s11252-018-0813-3}, number={2}, journal={Urban Ecosystems}, publisher={Springer Nature}, author={Tsai, Wei-Lun and Leung, Yu-Fai and McHale, Melissa R. and Floyd, Myron F. and Reich, Brian J.}, year={2018}, month={Nov}, pages={315–324} } @article{kang_reich_staicu_2018, title={Scalar-on-image regression via the soft-thresholded Gaussian process}, volume={105}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/asx075}, abstractNote={This work concerns spatial variable selection for scalar-on-image regression. We propose a new class of Bayesian nonparametric models and develop an efficient posterior computational algorithm. The proposed soft-thresholded Gaussian process provides large prior support over the class of piecewise-smooth, sparse, and continuous spatially varying regression coefficient functions. In addition, under some mild regularity conditions the soft-thresholded Gaussian process prior leads to the posterior consistency for parameter estimation and variable selection for scalar-on-image regression, even when the number of predictors is larger than the sample size. The proposed method is compared to alternatives via simulation and applied to an electroencephalography study of alcoholism.}, number={1}, journal={BIOMETRIKA}, author={Kang, Jian and Reich, Brian J. and Staicu, Ana-Maria}, year={2018}, month={Mar}, pages={165–184} } @article{grantham_reich_liu_chang_2018, title={Spatial regression with an informatively missing covariate: Application to mapping fine particulate matter}, volume={29}, ISSN={1180-4009}, url={http://dx.doi.org/10.1002/ENV.2499}, DOI={10.1002/ENV.2499}, abstractNote={The United States Environmental Protection Agency has established a large network of stations to monitor fine particulate matter of <2.5 µm (PM 2.5 ) that is known to be harmful to human health. Unfortunately, the network has limited spatial coverage, and stations often only measure PM 2.5 every few days. Satellite‐measured aerosol optical depth (AOD) is a low‐cost surrogate with greater spatiotemporal coverage, and spatial regression models have established that including AOD as a covariate improves the spatial interpolation of PM 2.5 . However, AOD is often missing, and our analysis reveals that the conditions that lead to missing AOD are also conducive to high AOD. Therefore, naïve interpolation that ignores informative missingness may lead to bias. We propose a joint hierarchical model for PM 2.5 and AOD that accounts for informatively missing AOD. We conduct a simulation study of the effects of ignoring informative missingness in the covariate and evaluate the performance of the proposed model. We apply the method to map daily PM 2.5 in the Southeastern United States. Our analysis reveals statistically significant informative missingness and relationships between PM 2.5 and AOD in many seasons after accounting for meteorological and land‐use variables.}, number={4}, journal={Environmetrics}, publisher={Wiley}, author={Grantham, Neal S. and Reich, Brian J. and Liu, Yang and Chang, Howard H.}, year={2018}, month={Apr}, pages={e2499} } @article{janko_irish_reich_peterson_doctor_mwandagalirwa_likwela_tshefu_meshnick_emch_2018, title={The links between agriculture, Anopheles mosquitoes, and malaria risk in children younger than 5 years in the Democratic Republic of the Congo: a population-based, cross-sectional, spatial study}, volume={2}, ISSN={2542-5196}, url={http://dx.doi.org/10.1016/S2542-5196(18)30009-3}, DOI={10.1016/S2542-5196(18)30009-3}, abstractNote={BackgroundThe relationship between agriculture, Anopheles mosquitoes, and malaria in Africa is not fully understood, but it is important for malaria control as countries consider expanding agricultural projects to address population growth and food demand. Therefore, we aimed to assess the effect of agriculture on Anopheles biting behaviour and malaria risk in children in rural areas of the Democratic Republic of the Congo (DR Congo).MethodsWe did a population-based, cross-sectional, spatial study of rural children (<5 years) in the DR Congo. We used information about the presence of malaria parasites in each child, as determined by PCR analysis of dried-blood spots from the 2013–14 DR Congo Demographic and Health Survey (DHS). We also used data from the DHS, a longitudinal entomological study, and available land cover and climate data to evaluate the relationships between agriculture, Anopheles biting behaviour, and malaria prevalence. Satellite imagery was used to measure the percentage of agricultural land cover around DHS villages and Anopheles sites. Anopheles biting behaviour was assessed by Human Landing Catch. We used probit regression to assess the relationship between agriculture and the probability of malaria infection, as well as the relationship between agriculture and the probability that a mosquito was caught biting indoors.FindingsBetween Aug 13, 2013, and Feb 13, 2014, a total of 9790 dried-blood spots were obtained from the DHS, of which 4612 participants were included in this study. Falciparum malaria infection prevalence in rural children was 38·7% (95% uncertainty interval [UI] 37·3–40·0). Increasing exposure to agriculture was associated with increasing malaria risk with a high posterior probability (estimate 0·07, 95% UI −0·04 to 0·17; posterior probability [estimate >0]=0·89), with the probability of malaria infection increased between 0·2% (95% UI −0·1 to 3·4) and 2·6% (–1·5 to 6·6) given a 15% increase in agricultural cover, depending on other risk factors. The models predicted that large increases in agricultural cover (from 0% to 75%) increase the probability of infection by as much as 13·1% (95% UI −7·3 to 28·9). Increased risk might be due to Anopheles gambiae sensu lato, whose probability of biting indoors increased between 11·3% (95% UI −15·3 to 25·6) and 19·7% (–12·1 to 35·9) with a 15% increase in agriculture.InterpretationMalaria control programmes must consider the possibility of increased risk due to expanding agriculture. Governments considering initiating large-scale agricultural projects should therefore also consider accompanying additional malaria control measures.FundingNational Institutes of Health, National Science Foundation, Bill & Melinda Gates Foundation, President's Malaria Initiative, and Royster Society of Fellows at the University of North Carolina at Chapel Hill.}, number={2}, journal={The Lancet Planetary Health}, publisher={Elsevier BV}, author={Janko, Mark M and Irish, Seth R and Reich, Brian J and Peterson, Marc and Doctor, Stephanie M and Mwandagalirwa, Melchior Kashamuka and Likwela, Joris L and Tshefu, Antoinette K and Meshnick, Steven R and Emch, Michael E}, year={2018}, month={Feb}, pages={e74–e82} } @article{li_guindani_reich_bondell_vannucci_2017, title={A Bayesian mixture model for clustering and selection of feature occurrence rates under mean constraints}, volume={10}, ISSN={["1932-1872"]}, DOI={10.1002/sam.11350}, abstractNote={In this paper, we consider the problem of modeling a matrix of count data, where multiple features are observed as counts over a number of samples. Due to the nature of the data generating mechanism, such data are often characterized by a high number of zeros and overdispersion. In order to take into account the skewness and heterogeneity of the data, some type of normalization and regularization is necessary for conducting inference on the occurrences of features across samples. We propose a zero‐inflated Poisson mixture modeling framework that incorporates a model‐based normalization through prior distributions with mean constraints, as well as a feature selection mechanism, which allows us to identify a parsimonious set of discriminatory features, and simultaneously cluster the samples into homogenous groups. We show how our approach improves on the accuracy of the clustering with respect to more standard approaches for the analysis of count data, by means of a simulation study and an application to a bag‐of‐words benchmark data set, where the features are represented by the frequencies of occurrence of each word.}, number={6}, journal={STATISTICAL ANALYSIS AND DATA MINING}, author={Li, Qiwei and Guindani, Michele and Reich, Brian J. and Bondell, Howard D. and Vannucci, Marina}, year={2017}, month={Dec}, pages={393–409} } @article{morris_reich_thibaud_cooley_2017, title={A Space-Time Skew-t Model for Threshold Exceedances}, volume={73}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12644}, abstractNote={To assess the compliance of air quality regulations, the Environmental Protection Agency (EPA) must know if a site exceeds a pre-specified level. In the case of ozone, the level for compliance is fixed at 75 parts per billion, which is high, but not extreme at all locations. We present a new space-time model for threshold exceedances based on the skew-t process. Our method incorporates a random partition to permit long-distance asymptotic independence while allowing for sites that are near one another to be asymptotically dependent, and we incorporate thresholding to allow the tails of the data to speak for themselves. We also introduce a transformed AR(1) time-series to allow for temporal dependence. Finally, our model allows for high-dimensional Bayesian inference that is comparable in computation time to traditional geostatistical methods for large data sets. We apply our method to an ozone analysis for July 2005, and find that our model improves over both Gaussian and max-stable methods in terms of predicting exceedances of a high level.}, number={3}, journal={BIOMETRICS}, author={Morris, Samuel A. and Reich, Brian J. and Thibaud, Emeric and Cooley, Daniel}, year={2017}, month={Sep}, pages={749–758} } @article{kaufeld_fuentes_reich_herring_shaw_terres_2017, title={A multivariate dynamic spatial factor model for speciated pollutants and adverse birth outcomes}, volume={14}, number={9}, journal={International Journal of Environmental Research and Public Health}, author={Kaufeld, K. A. and Fuentes, M. and Reich, B. J. and Herring, A. H. and Shaw, G. M. and Terres, M. A.}, year={2017} } @article{morris_reich_pacifici_lei_2017, title={A spatial model for rare binary events}, volume={24}, ISSN={["1573-3009"]}, DOI={10.1007/s10651-017-0385-z}, number={4}, journal={ENVIRONMENTAL AND ECOLOGICAL STATISTICS}, author={Morris, Samuel A. and Reich, Brian J. and Pacifici, Krishna and Lei, Yuancai}, year={2017}, month={Dec}, pages={485–504} } @article{wootten_terando_reich_boyles_semazzi_2017, title={Characterizing Sources of Uncertainty from Global Climate Models and Downscaling Techniques}, volume={56}, ISSN={["1558-8432"]}, DOI={10.1175/jamc-d-17-0087.1}, abstractNote={Abstract In recent years, climate model experiments have been increasingly oriented toward providing information that can support local and regional adaptation to the expected impacts of anthropogenic climate change. This shift has magnified the importance of downscaling as a means to translate coarse-scale global climate model (GCM) output to a finer scale that more closely matches the scale of interest. Applying this technique, however, introduces a new source of uncertainty into any resulting climate model ensemble. Here a method is presented, on the basis of a previously established variance decomposition method, to partition and quantify the uncertainty in climate model ensembles that is attributable to downscaling. The method is applied to the southeastern United States using five downscaled datasets that represent both statistical and dynamical downscaling techniques. The combined ensemble is highly fragmented, in that only a small portion of the complete set of downscaled GCMs and emission scenarios is typically available. The results indicate that the uncertainty attributable to downscaling approaches ~20% for large areas of the Southeast for precipitation and ~30% for extreme heat days (>35°C) in the Appalachian Mountains. However, attributable quantities are significantly lower for time periods when the full ensemble is considered but only a subsample of all models is available, suggesting that overconfidence could be a serious problem in studies that employ a single set of downscaled GCMs. This article concludes with recommendations to advance the design of climate model experiments so that the uncertainty that accrues when downscaling is employed is more fully and systematically considered.}, number={12}, journal={JOURNAL OF APPLIED METEOROLOGY AND CLIMATOLOGY}, author={Wootten, A. and Terando, A. and Reich, B. J. and Boyles, R. P. and Semazzi, F.}, year={2017}, month={Dec}, pages={3245–3262} } @article{wilson_reich_nolte_spero_hubbell_rappold_2017, title={Climate change impacts on projections of excess mortality at 2030 using spatially varying ozone-temperature risk surfaces}, volume={27}, ISSN={["1559-064X"]}, DOI={10.1038/jes.2016.14}, abstractNote={We project the change in ozone-related mortality burden attributable to changes in climate between a historical (1995-2005) and near-future (2025-2035) time period while incorporating a non-linear and synergistic effect of ozone and temperature on mortality. We simulate air quality from climate projections varying only biogenic emissions and holding anthropogenic emissions constant, thus attributing changes in ozone only to changes in climate and independent of changes in air pollutant emissions. We estimate non-linear, spatially varying, ozone-temperature risk surfaces for 94 US urban areas using observed data. Using the risk surfaces and climate projections we estimate daily mortality attributable to ozone exceeding 40 p.p.b. (moderate level) and 75 p.p.b. (US ozone NAAQS) for each time period. The average increases in city-specific median April-October ozone and temperature between time periods are 1.02 p.p.b. and 1.94 °F; however, the results varied by region. Increases in ozone because of climate change result in an increase in ozone mortality burden. Mortality attributed to ozone exceeding 40 p.p.b. increases by 7.7% (1.6-14.2%). Mortality attributed to ozone exceeding 75 p.p.b. increases by 14.2% (1.6 28.9%). The absolute increase in excess ozone mortality is larger for changes in moderate ozone levels, reflecting the larger number of days with moderate ozone levels.}, number={1}, journal={JOURNAL OF EXPOSURE SCIENCE AND ENVIRONMENTAL EPIDEMIOLOGY}, author={Wilson, Ander and Reich, Brian J. and Nolte, Christopher G. and Spero, Tanya L. and Hubbell, Bryan and Rappold, Ana G.}, year={2017}, pages={118–124} } @article{cabral_zhang_chi_reich_dickey_lebeau_2017, title={Correlating Local Chemistry and Local Cation Displacements in the Relaxor Ferroelectric PMN}, volume={23}, ISSN={1431-9276 1435-8115}, url={http://dx.doi.org/10.1017/S1431927617008741}, DOI={10.1017/S1431927617008741}, abstractNote={Relaxor ferroelectrics are a unique class of materials that can be identified by their high dielectric constants, low hysteresis, large electrostrictive strains}, number={S1}, journal={Microscopy and Microanalysis}, publisher={Cambridge University Press (CUP)}, author={Cabral, Matthew J. and Zhang, Shujun and Chi, Jocelyn and Reich, Brian J. and Dickey, Elizabeth C. and LeBeau, James M.}, year={2017}, month={Jul}, pages={1616–1617} } @article{li_bucholz_peterson_reich_russ_brenner_2017, title={How predictable is plastic damage at the atomic scale?}, volume={110}, ISSN={0003-6951 1077-3118}, url={http://dx.doi.org/10.1063/1.4977420}, DOI={10.1063/1.4977420}, abstractNote={The title of this letter implies two questions: To what degree is plastic damage inherently predictable at the atomic scale, and can this predictability be quantified? We answer these questions by combining image analysis with molecular dynamics (MD) simulation to quantify similarities between atomic structures of plastic damage in a database of strained copper bi-crystals. We show that a manifold of different outcomes can originate ostensibly from the same initial structure, but that with this approach complex plastic damage within this manifold can be statistically connected to the initial structure. Not only does this work introduce a powerful approach for analyzing MD simulations of a complex plastic damage but also provides a much needed and critical framework for analyzing and organizing atomic-scale microstructural databases.}, number={9}, journal={Applied Physics Letters}, publisher={AIP Publishing}, author={Li, D. and Bucholz, E. W. and Peterson, G. and Reich, B. J. and Russ, J. C. and Brenner, D. W.}, year={2017}, month={Feb}, pages={091902} } @article{pacifici_reich_miller_gardner_stauffer_singh_mckerrow_collazo_2017, title={Integrating multiple data sources in species distribution modeling: a framework for data fusion}, volume={98}, ISSN={["1939-9170"]}, DOI={10.1002/ecy.1710}, abstractNote={The last decade has seen a dramatic increase in the use of species distribution models (SDMs) to characterize patterns of species' occurrence and abundance. Efforts to parameterize SDMs often create a tension between the quality and quantity of data available to fit models. Estimation methods that integrate both standardized and non-standardized data types offer a potential solution to the tradeoff between data quality and quantity. Recently several authors have developed approaches for jointly modeling two sources of data (one of high quality and one of lesser quality). We extend their work by allowing for explicit spatial autocorrelation in occurrence and detection error using a Multivariate Conditional Autoregressive (MVCAR) model and develop three models that share information in a less direct manner resulting in more robust performance when the auxiliary data is of lesser quality. We describe these three new approaches ("Shared," "Correlation," "Covariates") for combining data sources and show their use in a case study of the Brown-headed Nuthatch in the Southeastern U.S. and through simulations. All three of the approaches which used the second data source improved out-of-sample predictions relative to a single data source ("Single"). When information in the second data source is of high quality, the Shared model performs the best, but the Correlation and Covariates model also perform well. When the information quality in the second data source is of lesser quality, the Correlation and Covariates model performed better suggesting they are robust alternatives when little is known about auxiliary data collected opportunistically or through citizen scientists. Methods that allow for both data types to be used will maximize the useful information available for estimating species distributions.}, number={3}, journal={ECOLOGY}, author={Pacifici, Krishna and Reich, Brian J. and Miller, David A. W. and Gardner, Beth and Stauffer, Glenn and Singh, Susheela and McKerrow, Alexa and Collazo, Jaime A.}, year={2017}, month={Mar}, pages={840–850} } @article{farjat_reich_guinness_whetten_mckeand_isik_2017, title={Optimal seed deployment under climate change using spatial models: Application to loblolly pine in the Southeastern US}, volume={112}, DOI={10.1080/01621459.2017.1292179}, abstractNote={Provenance tests are a common tool in forestry designed to identify superior genotypes for planting at specific locations. The trials are replicated experiments established with seed from parent trees collected from different regions and grown at several locations. In this work, a Bayesian spatial approach is developed for modeling the expected relative performance of seed sources using climate variables as predictors associated with the origin of seed source and the planting site. The proposed modeling technique accounts for the spatial dependence in the data and introduces a separable Matérn covariance structure that provides a flexible means to estimate effects associated with the origin and planting site locations. The statistical model was used to develop a quantitative tool for seed deployment aimed to identify the location of superior performing seed sources that could be suitable for a specific planting site under a given climate scenario. Cross-validation results indicate that the proposed spatial models provide superior predictive ability compared to multiple linear regression methods in unobserved locations. The general trend of performance predictions based on future climate scenarios suggests an optimal assisted migration of loblolly pine seed sources from southern and warmer regions to northern and colder areas in the southern USA. Supplementary materials for this article are available online.}, number={519}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Farjat, A. and Reich, Brian and Guinness, J. and Whetten, Ross and McKeand, Steven and Isik, Fikret}, year={2017}, pages={909–920} } @article{peterson_li_reich_brenner_2017, title={Spatial prediction of crystalline defects observed in molecular dynamic simulations of plastic damage}, volume={44}, ISSN={["1360-0532"]}, DOI={10.1080/02664763.2016.1221915}, abstractNote={Molecular dynamic computer simulation is an essential tool in materials science to study atomic properties of materials in extreme environments and guide development of new materials. We propose a statistical analysis to emulate simulation output with the ultimate goal of efficiently approximating the computationally intensive simulation. We compare several spatial regression approaches including conditional autoregression (CAR), discrete wavelets transform (DWT), and principle components analysis (PCA). The methods are applied to simulation of copper atoms with twin wall and dislocation loop defects, under varying tilt tension angles. We find that CAR and DWT yield accurate results but fail to capture extreme defects, yet PCA better captures defect structure.}, number={10}, journal={JOURNAL OF APPLIED STATISTICS}, author={Peterson, Geoffrey Colin L. and Li, Dong and Reich, Brian J. and Brenner, Donald}, year={2017}, pages={1761–1784} } @article{storlie_reich_rust_ticknor_bonnie_montoya_michalak_2017, title={Spatiotemporal Modeling of Node Temperatures in Supercomputers}, volume={112}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2016.1195271}, abstractNote={Los Alamos National Laboratory is home to many large supercomputing clusters. These clusters require an enormous amount of power (∼500–2000 kW each), and most of this energy is converted into heat. Thus, cooling the components of the supercomputer becomes a critical and expensive endeavor. Recently, a project was initiated to investigate the effect that changes to the cooling system in a machine room had on three large machines that were housed there. Coupled with this goal was the aim to develop a general good-practice for characterizing the effect of cooling changes and monitoring machine node temperatures in this and other machine rooms. This article focuses on the statistical approach used to quantify the effect that several cooling changes to the room had on the temperatures of the individual nodes of the computers. The largest cluster in the room has 1600 nodes that run a variety of jobs during general use. Since extremes temperatures are important, a Normal distribution plus generalized Pareto distribution for the upper tail is used to model the marginal distribution, along with a Gaussian process copula to account for spatio-temporal dependence. A Gaussian Markov random field (GMRF) model is used to model the spatial effects on the node temperatures as the cooling changes take place. This model is then used to assess the condition of the node temperatures after each change to the room. The analysis approach was used to uncover the cause of a problematic episode of overheating nodes on one of the supercomputing clusters. This same approach can easily be applied to monitor and investigate cooling systems at other data centers, as well. Supplementary materials for this article are available online.}, number={517}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Storlie, Curtis B. and Reich, Brian J. and Rust, William N. and Ticknor, Lawrence O. and Bonnie, Amanda M. and Montoya, Andrew J. and Michalak, Sarah E.}, year={2017}, pages={92–108} } @article{li_reich_brenner_2017, title={Statistical and image analysis for characterizing simulated atomic-scale damage in crystals}, volume={135}, ISSN={["1879-0801"]}, DOI={10.1016/j.commatsci.2017.03.054}, abstractNote={While molecular dynamics simulations have been used for decades to study structure and formation mechanisms of plastic damage in crystals, the analytical tools needed to characterize collections of plastic defects have been limited. Here we demonstrate the use of two methods, spatial cross-correlations (CC) and Linear Discriminate Analysis (LDA), to analyze and compare plastic damage profiles among molecular dynamics simulations in which damage was created by straining bi-crystals containing symmetric tilt grain boundaries with different tilt angles. Two potentials were used, one representing Cu and one representing Ag, and two coarse-grained descriptors for different types of crystal damage were used, averaged central symmetry parameters (CSP) and atomic hydrostatic stress (HS). We find that in general the CSP is a more accurate descriptor than HS for both analysis methods, and for data base sizes of about 30 or more simulations per tilt angle, the LDA does considerably better in predicting angle and material than the CC method. For example, at the largest data base size of 50 simulations per tilt angle and using the average CSP values, the LDA predicts the exact initial tilt angle and material type for 92% of the simulations, while the CC approach drops to 58%. If the average HS is used instead of the average CSP, the LDA and CC predictions drop to 63% and 32%, respectively. These results point to a number of possible applications of this method, for example in quantifying how the range of damage for a set of strained systems may depend on strain rate or temperature, or quantifying similarities between complex damage from processes such as indentation and energetic ion bombardment.}, journal={COMPUTATIONAL MATERIALS SCIENCE}, author={Li, D. and Reich, B. J. and Brenner, D. W.}, year={2017}, month={Jul}, pages={119–126} } @inbook{terando_reich_pacifici_costanza_mckerrow_collazo_2017, title={Uncertainty Quantification and Propagation for Projections of Extremes in Monthly Area Burned Under Climate Change: A Case Study in the Coastal Plain of Georgia, USA}, volume={223}, ISBN={0}, ISSN={2328-8779}, url={http://dx.doi.org/10.1002/9781119028116.ch16}, DOI={10.1002/9781119028116.ch16}, abstractNote={Human-caused climate change is predicted to affect the frequency of hazard-linked extremes. Unusually large wildfires are a type of extreme event that is constrained by climate and can be a hazard to society but also an important ecological disturbance. This chapter focuses on changes in the frequency of extreme monthly area burned by wildfires for the end of the 21st century for a wildfire-prone region in the southeast United States. Predicting changes in area burned is complicated by the large and varied uncertainties in how the climate will change and in the models used to predict those changes. The chapter characterizes and quantifies multiple sources of uncertainty and propagate the expanded prediction intervals of future area burned. It illustrates that while accounting for multiple sources of uncertainty in global change science problems is a difficult task, it will be necessary in order to properly assess the risk of increased exposure to these society-relevant events.}, booktitle={NATURAL HAZARD UNCERTAINTY ASSESSMENT: MODELING AND DECISION SUPPORT}, publisher={John Wiley & Sons, Inc.}, author={Terando, Adam J. and Reich, Brian and Pacifici, Krishna and Costanza, Jennifer and McKerrow, Alexa and Collazo, Jaime A.}, year={2017}, pages={245–256} } @article{li_reich_brenner_2017, title={Using spatial cross-correlation image analysis to characterize the influence of strain rate on plastic damage in molecular dynamics simulations}, volume={25}, number={7}, journal={Modelling and Simulation in Materials Science and Engineering}, author={Li, D. and Reich, B. J. and Brenner, D. W.}, year={2017} } @article{parker_reich_eidsvik_2016, title={A Fused Lasso Approach to Nonstationary Spatial Covariance Estimation}, volume={21}, ISSN={["1537-2693"]}, DOI={10.1007/s13253-016-0251-8}, number={3}, journal={JOURNAL OF AGRICULTURAL BIOLOGICAL AND ENVIRONMENTAL STATISTICS}, author={Parker, Ryan J. and Reich, Brian J. and Eidsvik, Jo}, year={2016}, month={Sep}, pages={569–587} } @article{shaby_reich_cooley_kaufman_2016, title={A MARKOV-SWITCHING MODEL FOR HEAT WAVES}, volume={10}, ISSN={["1932-6157"]}, DOI={10.1214/15-aoas873}, abstractNote={Heat waves merit careful study because they inflict severe economic and societal damage. We use an intuitive, informal working definition of a heat wave—a persistent event in the tail of the temperature distribution—to motivate an interpretable latent state extreme value model. A latent variable with dependence in time indicates membership in the heat wave state. The strength of the temporal dependence of the latent variable controls the frequency and persistence of heat waves. Within each heat wave, temperatures are modeled using extreme value distributions, with extremal dependence across time accomplished through an extreme value Markov model. One important virtue of interpretability is that model parameters directly translate into quantities of interest for risk management, so that questions like whether heat waves are becoming longer, more severe or more frequent are easily answered by querying an appropriate fitted model. We demonstrate the latent state model on two recent, calamitous, examples: the European heat wave of 2003 and the Russian heat wave of 2010.}, number={1}, journal={ANNALS OF APPLIED STATISTICS}, author={Shaby, Benjamin A. and Reich, Brian J. and Cooley, Daniel and Kaufman, Cari G.}, year={2016}, month={Mar}, pages={74–93} } @article{balderama_gardner_reich_2016, title={A spatial-temporal double-hurdle model for extremely over-dispersed avian count data}, volume={18}, ISSN={["2211-6753"]}, DOI={10.1016/j.spasta.2016.05.001}, abstractNote={Several wind energy facilities are currently being planned for offshore Atlantic waters of the United States. However, relatively little is known about the distribution, abundance and spatio-temporal variability of marine birds in their offshore habitats and it is becoming increasingly necessary to accurately characterize these demographic parameters before assessing the influence of factors such as offshore energy development on populations. Thus, we incorporate a multi-scale approach to develop models for the space-time distribution and abundance of marine birds to identify potential high-use areas in need of further study. With data taken from past and ongoing survey efforts, we provide relative abundance and density estimates for marine birds over a wide geographical area during multiple years. Due to the excessive amount of zeros as well as extremely large counts exhibited in the data, a double-hurdle model is formulated that includes a negative binomial and a generalized Pareto distribution mixture. Spatial heterogeneity is modeled using a conditional auto-regressive (CAR) prior, and a Fourier basis was used for seasonal variation. We demonstrate our model by creating probability maps that show areas of high-abundance and aggregation for twenty-four species of marine bird.}, journal={SPATIAL STATISTICS}, author={Balderama, Earvin and Gardner, Beth and Reich, Brian J.}, year={2016}, month={Nov}, pages={263–275} } @article{guan_laber_reich_2016, title={Bayesian nonparametric estimation for dynamic treatment regimes with sequential transition times comment}, volume={111}, number={515}, journal={Journal of the American Statistical Association}, author={Guan, Q. and Laber, E. B. and Reich, B. J.}, year={2016}, pages={936–942} } @article{guan_laber_reich_2016, title={Comment}, volume={111}, ISSN={0162-1459 1537-274X}, url={http://dx.doi.org/10.1080/01621459.2016.1200911}, DOI={10.1080/01621459.2016.1200911}, abstractNote={Material change: a universe of ideas for the new school year Gary Williams}, number={515}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Guan, Qian and Laber, Eric B. and Reich, Brian J.}, year={2016}, month={Jul}, pages={936–942} } @article{russell_cooley_porter_reich_heald_2016, title={DATA MINING TO INVESTIGATE THE METEOROLOGICAL DRIVERS FOR EXTREME GROUND LEVEL OZONE EVENTS}, volume={10}, ISSN={["1932-6157"]}, DOI={10.1214/16-aoas954}, abstractNote={This project aims to explore which combinations of meteorological conditions are associated with extreme ground level ozone conditions. Our approach focuses only on the tail by optimizing the tail dependence between the ozone response and functions of meteorological covariates. Since there is a long list of possible meteorological covariates, the space of possible models cannot be explored completely. Consequently, we perform data mining within the model selection context, employing an automated model search procedure. Our study is unique among extremes applications, as optimizing tail dependence has not previously been attempted, and it presents new challenges, such as requiring a smooth threshold. We present a simulation study which shows that the method can detect complicated conditions leading to extreme responses and resists overfitting. We apply the method to ozone data for Atlanta and Charlotte and find similar meteorological drivers for these two Southeastern US cities. We identify several covariates which help to differentiate the meteorological conditions which lead to extreme ozone levels from those which lead to merely high levels.}, number={3}, journal={ANNALS OF APPLIED STATISTICS}, author={Russell, Brook T. and Cooley, Daniel S. and Porter, William C. and Reich, Brian J. and Heald, Colette L.}, year={2016}, month={Sep}, pages={1673–1698} } @article{pacifici_reich_dorazio_conroy_2016, title={Occupancy estimation for rare species using a spatially-adaptive sampling design}, volume={7}, ISSN={["2041-2096"]}, DOI={10.1111/2041-210x.12499}, abstractNote={Summary Spatially clustered populations create unique challenges for conservation monitoring programmes. Advances in methodology typically are focused on either the design or the modelling stage of the study but do not involve integration of both. We integrate adaptive cluster sampling and spatial occupancy modelling by developing two models to handle the dependence induced by cluster sampling. We compare these models to scenarios using simple random sampling and traditional occupancy models via simulation and data collected on a rare plant species, Tamarix ramosissima , found in China. Our simulations show a marked improvement in confidence interval coverage for the new models combined with cluster sampling compared to simple random sampling and traditional occupancy models, with greatest improvement in the presence of low detection probability and spatial correlation in occupancy. Accounting for the design using the simple cluster random‐effects model reduces bias considerably, and full spatial modelling reduces bias further, especially for large n when the spatial covariance parameters can be estimated reliably. Both new models build on the strength of occupancy modelling and adaptive sampling and perform at least as well, and often better, than occupancy modelling alone. We believe our approach is unique and potentially useful for a variety of studies directed at patchily distributed, clustered or rare species exhibiting spatial variation.}, number={3}, journal={METHODS IN ECOLOGY AND EVOLUTION}, author={Pacifici, Krishna and Reich, Brian J. and Dorazio, Robert M. and Conroy, Michael J.}, year={2016}, month={Mar}, pages={285–293} } @article{reich_2016, title={Quantile regression for epidemiological applications}, journal={Handbook of spatial epidemiology}, author={Reich, B. J.}, year={2016}, pages={239–249} } @article{tsai_floyd_leung_mchale_reich_2016, title={Urban Vegetative Cover Fragmentation in the US Associations With Physical Activity and BMI}, volume={50}, ISSN={["1873-2607"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84961144121&partnerID=MN8TOARS}, DOI={10.1016/j.amepre.2015.09.022}, abstractNote={Introduction Urban vegetative cover provides a range of ecosystem services including contributions to human health and well-being. Urbanization exerts tremendous pressure on this natural resource, causing fragmentation and loss of urban greenspace. This study aimed to examine associations between vegetative cover fragmentation and physical activity and BMI at the county scale in the U.S. metropolitan statistical areas greater than 1 million in population. Methods National Land Cover Database 2006 and Behavioral Risk Factor Surveillance System 2008 provided land cover and human health data, respectively. Analyses were performed in 2013 at the county scale where the health data were reported. Spearman rank correlation and stepwise and hierarchical regression models were applied to estimate relationships between land cover and health variables. Results After controlling for median household income and race, greater forest edge density (β=0.272, p<0.05) and larger size of herbaceous patches (β=0.261, p<0.01) were associated with a higher percentage of participation in physical activity within counties. More connections between forest and developed area (β=0.37, p<0.01) and greater edge density of shrubland (β=0.646, p<0.001) were positively associated with a higher percentage of normal BMI (<25) within counties. Conclusions Forest land cover and some degree of fragmentation are associated with population physical activity. Future studies should examine how built environments and varying land cover configurations influence physical activity and weight status.}, number={4}, journal={AMERICAN JOURNAL OF PREVENTIVE MEDICINE}, author={Tsai, Wei-Lun and Floyd, Myron F. and Leung, Yu-Fai and McHale, Melissa R. and Reich, Brian J.}, year={2016}, month={Apr}, pages={509–517} } @article{fancher_han_levin_page_reich_smith_wilson_jones_2016, title={Use of Bayesian Inference in Crystallographic Structure Refinement via Full Diffraction Profile Analysis}, volume={6}, ISSN={2045-2322}, url={http://dx.doi.org/10.1038/srep31625}, DOI={10.1038/srep31625}, abstractNote={Abstract A Bayesian inference method for refining crystallographic structures is presented. The distribution of model parameters is stochastically sampled using Markov chain Monte Carlo. Posterior probability distributions are constructed for all model parameters to properly quantify uncertainty by appropriately modeling the heteroskedasticity and correlation of the error structure. The proposed method is demonstrated by analyzing a National Institute of Standards and Technology silicon standard reference material. The results obtained by Bayesian inference are compared with those determined by Rietveld refinement. Posterior probability distributions of model parameters provide both estimates and uncertainties. The new method better estimates the true uncertainties in the model as compared to the Rietveld method.}, number={1}, journal={Scientific Reports}, publisher={Springer Science and Business Media LLC}, author={Fancher, Chris M. and Han, Zhen and Levin, Igor and Page, Katharine and Reich, Brian J. and Smith, Ralph C. and Wilson, Alyson G. and Jones, Jacob L.}, year={2016}, month={Aug}, pages={31625} } @article{parker_reich_sain_2015, title={A Multiresolution Approach to Estimating the Value Added by Regional Climate Models}, volume={28}, ISSN={["1520-0442"]}, DOI={10.1175/jcli-d-14-00557.1}, abstractNote={Abstract Climate models have emerged as an essential tool for studying the earth’s climate. Global models are computationally expensive, and so a relatively coarse spatial resolution must be used within the model. This hinders direct application for many impacts studies that require regional and local climate information. A regional model with boundary conditions taken from the global model achieves a finer spatial scale for local analysis. In this paper the authors propose a new method for assessing the value added by these higher-resolution models, and they demonstrate the method within the context of regional climate models (RCMs) from the North American Regional Climate Change Assessment Program (NARCCAP) project. This spectral approach using the discrete cosine transformation (DCT) is based on characterizing the joint relationship between observations, coarser-scale models, and higher-resolution models to identify how the finer scales add value over the coarser output. The joint relationship is computed by estimating the covariance of the data sources at different spatial scales with a Bayesian hierarchical model. Using this model the authors can then estimate the value added by each data source over the others. For the NARCCAP data, they find that the higher-resolution models add value starting with low wavenumbers corresponding to features 550 km apart (or 11 total 50-km grid boxes per cycle) all the way down to higher wavenumbers at 150 km apart (3 grid boxes per cycle).}, number={22}, journal={JOURNAL OF CLIMATE}, author={Parker, Ryan J. and Reich, Brian J. and Sain, Stephan R.}, year={2015}, month={Nov}, pages={8873–8887} } @article{schnell_bandyopadhyay_reich_nunn_2015, title={A marginal cure rate proportional hazards model for spatial survival data}, volume={64}, ISSN={["1467-9876"]}, DOI={10.1111/rssc.12098}, abstractNote={Dental studies often produce spatially referenced multivariate time-to-event data, such as the time until tooth loss due to periodontal disease. These data are used to identify risk factors that are associated with tooth loss, and to predict outcomes for an individual patient.The rate of spatial referencing can vary with various tooth locations. In addition, these event time data are heavily censored, mostly because a certain proportion of teeth in the population are not expected to experience failure and can be considered 'cured'. We assume a proportional hazards model with a surviving fraction to model these clustered correlated data and account for dependence between nearby teeth by using spatial frailties which are modelled as linear combinations of positive stable random effects. This model permits predictions (conditioned on spatial frailties) that account for the survival status of nearby teeth and simultaneously preserves the proportional hazards relationship marginally over the random effects for the susceptible teeth, allowing for interpretable estimates of the effects of risk factors on tooth loss. We explore the potential of this model via simulation studies and application to a real data set obtained from a private periodontal practice, and we illustrate its advantages over other competing models to identify important risk factors for tooth loss and to predict the remaining lifespan of a patient's teeth.}, number={4}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES C-APPLIED STATISTICS}, author={Schnell, Patrick and Bandyopadhyay, Dipankar and Reich, Brian J. and Nunn, Martha}, year={2015}, month={Aug}, pages={673–691} } @article{chang_warren_darrow_reich_waller_2015, title={Assessment of critical exposure and outcome windows in time-to-event analysis with application to air pollution and preterm birth study}, volume={16}, ISSN={["1468-4357"]}, DOI={10.1093/biostatistics/kxu060}, abstractNote={In reproductive epidemiology, there is a growing interest to examine associations between air pollution exposure during pregnancy and the risk of preterm birth (PTB). One important research objective is to identify critical periods of exposure and estimate the associated effects at different stages of pregnancy. However, population studies have reported inconsistent findings. This may be due to limitations from the standard analytic approach of treating PTB as a binary outcome without considering time-varying exposures together over the course of pregnancy. To address this research gap, we present a Bayesian hierarchical model for conducting a comprehensive examination of gestational air pollution exposure by estimating the joint effects of weekly exposures during different vulnerable periods. Our model also treats PTB as a time-to-event outcome to address the challenge of different exposure lengths among ongoing pregnancies. The proposed model is applied to a dataset of geocoded birth records in the Atlanta metropolitan area between 1999–2005 to examine the risk of PTB associated with gestational exposure to ambient fine particulate matter |$\lt 2.5\,{\rm \mu}$|m in aerodynamic diameter (PM|$_{2.5}$|⁠). We find positive associations between PM|$_{2.5}$| exposure during early and mid-pregnancy, and evidence that associations are stronger for PTBs occurring around week 30.}, number={3}, journal={BIOSTATISTICS}, author={Chang, Howard H. and Warren, Joshua L. and Darrow, Lnydsey A. and Reich, Brian J. and Waller, Lance A.}, year={2015}, month={Jul}, pages={509–521} } @article{stephenson_shaby_reich_sullivan_2015, title={Estimating Spatially Varying Severity Thresholds of a Forest Fire Danger Rating System Using Max-Stable Extreme-Event Modeling}, volume={54}, ISSN={["1558-8432"]}, DOI={10.1175/jamc-d-14-0041.1}, abstractNote={Abstract Fire danger indices are used in many countries to estimate the potential fire danger and to issue warnings to local regions. The McArthur fire danger rating system is used in Australia. The McArthur forest fire danger index (FFDI) uses only meteorological elements. It combines information on wind speed, temperature, relative humidity, and recent rainfall to produce a weather index of fire potential. This index is converted into fire danger categories to serve as warnings to the local population and to estimate potential fire-suppression difficulty. FFDI values above the threshold of 75 are rated as extreme. The spatial behavior of large values of the FFDI is modeled to investigate whether a varying threshold across space may serve as a better guide for determining the onset of elevated fire danger. The authors modify and apply a statistical method that was recently developed for spatial extreme events, using a “max-stable” process to model FFDI data at approximately 17 000 data sites. The method that is described here produces a quantile map that can be employed as a spatially varying fire danger threshold. It is found that a spatially varying threshold may serve to more accurately represent high fire danger, and an adjustment is proposed that varies by local government area. Temporal change was also investigated, and evidence was found of a recent increase in extreme fire danger in southwestern Australia.}, number={2}, journal={JOURNAL OF APPLIED METEOROLOGY AND CLIMATOLOGY}, author={Stephenson, Alec G. and Shaby, Benjamin A. and Reich, Brian J. and Sullivan, Andrew L.}, year={2015}, month={Feb}, pages={395–407} } @article{sun_reich_cai_guindani_schwartzman_2015, title={False discovery control in large-scale spatial multiple testing}, volume={77}, ISSN={["1467-9868"]}, DOI={10.1111/rssb.12064}, abstractNote={This article develops a unified theoretical and computational framework for false discovery control in multiple testing of spatial signals. We consider both point-wise and cluster-wise spatial analyses, and derive oracle procedures which optimally control the false discovery rate, false discovery exceedance and false cluster rate, respectively. A data-driven finite approximation strategy is developed to mimic the oracle procedures on a continuous spatial domain. Our multiple testing procedures are asymptotically valid and can be effectively implemented using Bayesian computational algorithms for analysis of large spatial data sets. Numerical results show that the proposed procedures lead to more accurate error control and better power performance than conventional methods. We demonstrate our methods for analyzing the time trends in tropospheric ozone in eastern US.}, number={1}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B-STATISTICAL METHODOLOGY}, author={Sun, Wenguang and Reich, Brian J. and Cai, T. Tony and Guindani, Michele and Schwartzman, Armin}, year={2015}, month={Jan}, pages={59–83} } @article{grantham_reich_pacifici_laber_menninger_henley_barberán_leff_fierer_dunn_2015, title={Fungi Identify the Geographic Origin of Dust Samples}, volume={10}, ISSN={1932-6203}, url={http://dx.doi.org/10.1371/journal.pone.0122605}, DOI={10.1371/journal.pone.0122605}, abstractNote={There is a long history of archaeologists and forensic scientists using pollen found in a dust sample to identify its geographic origin or history. Such palynological approaches have important limitations as they require time-consuming identification of pollen grains, a priori knowledge of plant species distributions, and a sufficient diversity of pollen types to permit spatial or temporal identification. We demonstrate an alternative approach based on DNA sequencing analyses of the fungal diversity found in dust samples. Using nearly 1,000 dust samples collected from across the continental U.S., our analyses identify up to 40,000 fungal taxa from these samples, many of which exhibit a high degree of geographic endemism. We develop a statistical learning algorithm via discriminant analysis that exploits this geographic endemicity in the fungal diversity to correctly identify samples to within a few hundred kilometers of their geographic origin with high probability. In addition, our statistical approach provides a measure of certainty for each prediction, in contrast with current palynology methods that are almost always based on expert opinion and devoid of statistical inference. Fungal taxa found in dust samples can therefore be used to identify the origin of that dust and, more importantly, we can quantify our degree of certainty that a sample originated in a particular place. This work opens up a new approach to forensic biology that could be used by scientists to identify the origin of dust or soil samples found on objects, clothing, or archaeological artifacts.}, number={4}, journal={PLOS ONE}, publisher={Public Library of Science (PLoS)}, author={Grantham, Neal S. and Reich, Brian J. and Pacifici, Krishna and Laber, Eric B. and Menninger, Holly L. and Henley, Jessica B. and Barberán, Albert and Leff, Jonathan W. and Fierer, Noah and Dunn, Robert R.}, editor={Rokas, AntonisEditor}, year={2015}, month={Apr}, pages={e0122605} } @article{kao_reich_storlie_anderson_2015, title={Malware Detection Using Nonparametric Bayesian Clustering and Classification Techniques}, volume={57}, ISSN={["1537-2723"]}, DOI={10.1080/00401706.2014.958916}, abstractNote={Computer security requires statistical methods to quickly and accurately flag malicious programs. This article proposes a nonparametric Bayesian approach for classifying programs as benign or malicious and simultaneously clustering malicious programs. The analysis is based on the dynamic trace (DT) of instructions under the first-order Markov assumption. Each row of the trace's transition matrix is modeled using the Dirichlet process mixture (DPM) model. The DPM model clusters programs within each class (malicious or benign), and produces the posterior probability of being a malware which is used for classification. The novelty of the model is using this clustering algorithm to improve the classification accuracy. The simulation study shows that the DPM model outperforms the elastic net logistic (ENL) regression and the support vector machine (SVM) in classification performance under most of the scenarios, and also outperforms the spectral clustering method for grouping similar malware. In an analysis of real malicious and benign programs, the DPM model gives significantly better classification performance than the ENL model, and competitive results to the SVM. More importantly, the DPM model identifies clusters of programs during the classification procedure which is useful for reverse engineering.}, number={4}, journal={TECHNOMETRICS}, author={Kao, Yimin and Reich, Brian and Storlie, Curtis and Anderson, Blake}, year={2015}, month={Oct}, pages={535–546} } @article{farjat_isik_reich_whetten_mckeand_2015, title={Modeling Climate Change Effects on the Height Growth of Loblolly Pine}, volume={61}, ISSN={0015-749X}, url={http://dx.doi.org/10.5849/forsci.14-075}, DOI={10.5849/forsci.14-075}, abstractNote={We present a statistical model to predict the effects of climate change on the height growth of loblolly pine (Pinus taeda L.) families in the southeastern United States. Provenance-progeny trials were used for assessing the response of loblolly pine seed sources to environmental change. Ordinary least squares, ridge regression, and LASSO regression were used to develop height growth prediction models. The approach integrates both genetic and environmental effects and is meant to overcome the critical limitations of population response function and transfer function methods by making full use of data from provenance trials. Prediction models were tested using a hypothetical future climate scenario with 5% decrease in precipitation and 0.5° C increase in maximum and minimum temperatures, relative to historical average values. Under this scenario, local families from the coastal plains of Georgia, Florida, and South Carolina showed the highest performance relative to the current climate in their native environments. As these seed sources were moved to colder northern and inland regions from their origin, we observed declines in their height growth. Similarly, the climatic change scenario suggested that performance of northern seed sources declined significantly when they were moved to more southern warmer regions. The statistical model can be used as a quantitative tool to model the effect of climatic variables on the performance of loblolly pine seed sources and may help to develop sound breeding deployment strategies.}, number={4}, journal={Forest Science}, publisher={Oxford University Press (OUP)}, author={Farjat, Alfredo E. and Isik, Fikret and Reich, Brian J. and Whetten, Ross W. and McKeand, Steven E.}, year={2015}, month={Aug}, pages={703–715} } @article{smith_reich_herring_langlois_fuentes_2015, title={Multilevel quantile function modeling with application to birth outcomes}, volume={71}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12294}, abstractNote={Summary Infants born preterm or small for gestational age have elevated rates of morbidity and mortality. Using birth certificate records in Texas from 2002 to 2004 and Environmental Protection Agency air pollution estimates, we relate the quantile functions of birth weight and gestational age to ozone exposure and multiple predictors, including parental age, race, and education level. We introduce a semi-parametric Bayesian quantile approach that models the full quantile function rather than just a few quantile levels. Our multilevel quantile function model establishes relationships between birth weight and the predictors separately for each week of gestational age and between gestational age and the predictors separately across Texas Public Health Regions. We permit these relationships to vary nonlinearly across gestational age, spatial domain and quantile level and we unite them in a hierarchical model via a basis expansion on the regression coefficients that preserves interpretability. Very low birth weight is a primary concern, so we leverage extreme value theory to supplement our model in the tail of the distribution. Gestational ages are recorded in completed weeks of gestation (integer-valued), so we present methodology for modeling quantile functions of discrete response data. In a simulation study we show that pooling information across gestational age and quantile level substantially reduces MSE of predictor effects. We find that ozone is negatively associated with the lower tail of gestational age in south Texas and across the distribution of birth weight for high gestational ages. Our methods are available in the R package BSquare.}, number={2}, journal={BIOMETRICS}, author={Smith, Luke B. and Reich, Brian J. and Herring, Amy H. and Langlois, Peter H. and Fuentes, Montserrat}, year={2015}, month={Jun}, pages={508–519} } @article{coleman_martin_reich_2015, title={Multiple window discrete scan statistic for higher-order Markovian sequences}, volume={42}, ISSN={["1360-0532"]}, DOI={10.1080/02664763.2015.1005061}, abstractNote={Accurate and efficient methods to detect unusual clusters of abnormal activity are needed in many fields such as medicine and business. Often the size of clusters is unknown; hence, multiple (variable) window scan statistics are used to identify clusters using a set of different potential cluster sizes. We give an efficient method to compute the exact distribution of multiple window discrete scan statistics for higher-order, multi-state Markovian sequences. We define a Markov chain to efficiently keep track of probabilities needed to compute p-values for the statistic. The state space of the Markov chain is set up by a criterion developed to identify strings that are associated with observing the specified values of the statistic. Using our algorithm, we identify cases where the available approximations do not perform well. We demonstrate our methods by detecting unusual clusters of made free throw shots by National Basketball Association players during the 2009–2010 regular season.}, number={8}, journal={JOURNAL OF APPLIED STATISTICS}, author={Coleman, Deidra A. and Martin, Donald E. K. and Reich, Brian J.}, year={2015}, month={Aug}, pages={1690–1705} } @article{reich_porter_2015, title={Partially supervised spatiotemporal clustering for burglary crime series identification}, volume={178}, ISSN={["1467-985X"]}, DOI={10.1111/rssa.12076}, abstractNote={Summary Statistical clustering of criminal events can be used by crime analysts to create lists of potential suspects for an unsolved crime, to identify groups of crimes that may have been committed by the same individuals or group of individuals, for offender profiling and for predicting future events. We propose a Bayesian model-based clustering approach for criminal events. Our approach is semisupervised, because the offender is known for a subset of the events, and utilizes spatiotemporal crime locations as well as crime features describing the offender's modus operandi. The hierarchical model naturally handles complex features that are often seen in crime data, including missing data, interval-censored event times and a mix of discrete and continuous variables. In addition, our Bayesian model produces posterior clustering probabilities which allow analysts to act on model output only as warranted. We illustrate the approach by using a large data set of burglaries in 2009–2010 in Baltimore County, Maryland.}, number={2}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A-STATISTICS IN SOCIETY}, author={Reich, Brian J. and Porter, Michael D.}, year={2015}, month={Feb}, pages={465–480} } @article{smith_fuentes_gordon-larsen_reich_2015, title={QUANTILE REGRESSION FOR MIXED MODELS WITH AN APPLICATION TO EXAMINE BLOOD PRESSURE TRENDS IN CHINA}, volume={9}, ISSN={["1941-7330"]}, DOI={10.1214/15-aoas841}, abstractNote={Cardiometabolic diseases have substantially increased in China in the past 20 years and blood pressure is a primary modifiable risk factor. Using data from the China Health and Nutrition Survey, we examine blood pressure trends in China from 1991 to 2009, with a concentration on age cohorts and urbanicity. Very large values of blood pressure are of interest, so we model the conditional quantile functions of systolic and diastolic blood pressure. This allows the covariate effects in the middle of the distribution to vary from those in the upper tail, the focal point of our analysis. We join the distributions of systolic and diastolic blood pressure using a copula, which permits the relationships between the covariates and the two responses to share information and enables probabilistic statements about systolic and diastolic blood pressure jointly. Our copula maintains the marginal distributions of the group quantile effects while accounting for within-subject dependence, enabling inference at the population and subject levels. Our population-level regression effects change across quantile level, year and blood pressure type, providing a rich environment for inference. To our knowledge, this is the first quantile function model to explicitly model within-subject autocorrelation and is the first quantile function approach that simultaneously models multivariate conditional response. We find that the association between high blood pressure and living in an urban area has evolved from positive to negative, with the strongest changes occurring in the upper tail. The increase in urbanization over the last twenty years coupled with the transition from the positive association between urbanization and blood pressure in earlier years to a more uniform association with urbanization suggests increasing blood pressure over time throughout China, even in less urbanized areas. Our methods are available in the R package BSquare.}, number={3}, journal={ANNALS OF APPLIED STATISTICS}, author={Smith, Luke B. and Fuentes, Montserrat and Gordon-Larsen, Penny and Reich, Brian J.}, year={2015}, month={Sep}, pages={1226–1246} } @article{reich_fuentes_2015, title={Spatial Bayesian Nonparametric Methods}, ISBN={["978-3-319-19517-9"]}, DOI={10.1007/978-3-319-19518-6_17}, journal={NONPARAMETRIC BAYESIAN INFERENCE IN BIOSTATISTICS}, author={Reich, Brian James and Fuentes, Montserrat}, year={2015}, pages={347–357} } @article{vock_reich_fuentes_dominici_2015, title={Spatial Variable Selection Methods for Investigating Acute Health Effects of Fine Particulate Matter Components}, volume={71}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12254}, abstractNote={Summary Multi-site time series studies have reported evidence of an association between short term exposure to particulate matter (PM) and adverse health effects, but the effect size varies across the United States. Variability in the effect may partially be due to differing community level exposure and health characteristics, but also due to the chemical composition of PM which is known to vary greatly by location and time. The objective of this article is to identify particularly harmful components of this chemical mixture. Because of the large number of highly-correlated components, we must incorporate some regularization into a statistical model. We assume that, at each spatial location, the regression coefficients come from a mixture model with the flavor of stochastic search variable selection, but utilize a copula to share information about variable inclusion and effect magnitude across locations. The model differs from current spatial variable selection techniques by accommodating both local and global variable selection. The model is used to study the association between fine PM (PM 2.5m) components, measured at 115 counties nationally over the period 2000–2008, and cardiovascular emergency room admissions among Medicare patients.}, number={1}, journal={BIOMETRICS}, author={Vock, Laura F. Boehm and Reich, Brian J. and Fuentes, Montserrat and Dominici, Francesca}, year={2015}, month={Mar}, pages={167–177} } @article{reich_shaby_cooley_2014, title={A Hierarchical Model for Serially-Dependent Extremes: A Study of Heat Waves in the Western US}, volume={19}, ISSN={["1537-2693"]}, DOI={10.1007/s13253-013-0161-y}, number={1}, journal={JOURNAL OF AGRICULTURAL BIOLOGICAL AND ENVIRONMENTAL STATISTICS}, author={Reich, Brian J. and Shaby, Benjamin A. and Cooley, Daniel}, year={2014}, month={Mar}, pages={119–135} } @article{reich_chang_foley_2014, title={A Spectral Method for Spatial Downscaling}, volume={70}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12196}, abstractNote={Summary Complex computer models play a crucial role in air quality research. These models are used to evaluate potential regulatory impacts of emission control strategies and to estimate air quality in areas without monitoring data. For both of these purposes, it is important to calibrate model output with monitoring data to adjust for model biases and improve spatial prediction. In this article, we propose a new spectral method to study and exploit complex relationships between model output and monitoring data. Spectral methods allow us to estimate the relationship between model output and monitoring data separately at different spatial scales, and to use model output for prediction only at the appropriate scales. The proposed method is computationally efficient and can be implemented using standard software. We apply the method to compare Community Multiscale Air Quality (CMAQ) model output with ozone measurements in the United States in July 2005. We find that CMAQ captures large‐scale spatial trends, but has low correlation with the monitoring data at small spatial scales.}, number={4}, journal={BIOMETRICS}, author={Reich, Brian J. and Chang, Howard H. and Foley, Kristen M.}, year={2014}, month={Dec}, pages={932–942} } @article{reich_gardner_2014, title={A spatial capture-recapture model for territorial species}, volume={25}, DOI={10.1002/env.2317}, abstractNote={Advances in field techniques have lead to an increase in spatially referenced capture–recapture data to estimate a species' population size as well as other demographic parameters and patterns of space usage. Statistical models for these data have assumed that the number of individuals in the population and their spatial locations follow a homogeneous Poisson point process model, which implies that the individuals are uniformly and independently distributed over the spatial domain of interest. In many applications, there is reason to question independence, for example, when species display territorial behavior. In this paper, we propose a new statistical model, which allows for dependence between locations to account for avoidance or territorial behavior. We show via a simulation study that accounting for this can improve population size estimates. The method is illustrated using a case study of small mammal trapping data to estimate avoidance and population density of adult female field voles (Microtus agrestis) in Northern England. Copyright © 2014 John Wiley & Sons, Ltd.}, number={8}, journal={Environmetrics}, author={Reich, Brian and Gardner, B.}, year={2014}, pages={630–637} } @article{wilson_reich_2014, title={Confounder Selection via Penalized Credible Regions}, volume={70}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12203}, abstractNote={When estimating the effect of an exposure or treatment on an outcome it is important to select the proper subset of confounding variables to include in the model. Including too many covariates increases mean square error on the effect of interest while not including confounding variables biases the exposure effect estimate. We propose a decision-theoretic approach to confounder selection and effect estimation. We first estimate the full standard Bayesian regression model and then post-process the posterior distribution with a loss function that penalizes models omitting important confounders. Our method can be fit easily with existing software and in many situations without the use of Markov chain Monte Carlo methods, resulting in computation on the order of the least squares solution. We prove that the proposed estimator has attractive asymptotic properties. In a simulation study we show that our method outperforms existing methods. We demonstrate our method by estimating the effect of fine particulate matter (PM2.5) exposure on birth weight in Mecklenburg County, North Carolina.}, number={4}, journal={BIOMETRICS}, author={Wilson, Ander and Reich, Brian J.}, year={2014}, month={Dec}, pages={852–861} } @article{eidsvik_shaby_reich_wheeler_niemi_2014, title={Estimation and Prediction in Spatial Models With Block Composite Likelihoods}, volume={23}, ISSN={["1537-2715"]}, DOI={10.1080/10618600.2012.760460}, abstractNote={AbstractThis article develops a block composite likelihood for estimation and prediction in large spatial datasets. The composite likelihood (CL) is constructed from the joint densities of pairs of adjacent spatial blocks. This allows large datasets to be split into many smaller datasets, each of which can be evaluated separately, and combined through a simple summation. Estimates for unknown parameters are obtained by maximizing the block CL function. In addition, a new method for optimal spatial prediction under the block CL is presented. Asymptotic variances for both parameter estimates and predictions are computed using Godambe sandwich matrices. The approach considerably improves computational efficiency, and the composite structure obviates the need to load entire datasets into memory at once, completely avoiding memory limitations imposed by massive datasets. Moreover, computing time can be reduced even further by distributing the operations using parallel computing. A simulation study shows that CL estimates and predictions, as well as their corresponding asymptotic confidence intervals, are competitive with those based on the full likelihood. The procedure is demonstrated on one dataset from the mining industry and one dataset of satellite retrievals. The real-data examples show that the block composite results tend to outperform two competitors; the predictive process model and fixed-rank kriging. Supplementary materials for this article is available online on the journal web site.Key Words: Gaussian processGPULarge datasetsParallel computingSpatial statistics SUPPLEMENTARY MATERIALSAppendix: Score function and Hessian.Datasets, CPU, and GPU examples of code.ACKNOWLEDGMENTSWe thank the Statistical and Applied Mathematical Sciences Institute (SAMSI) for support during the program on space–time analysis (2009–2010). We also thank NVIDIA for supporting us with graphics cards. Rana Gruber provided the joints data, while Noel Cressie and Gardar Johannesson made the TCO data acquired by NASA available to us. Brian Reich was supported by National Science Foundation grant number 1107046.}, number={2}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Eidsvik, Jo and Shaby, Benjamin A. and Reich, Brian J. and Wheeler, Matthew and Niemi, Jarad}, year={2014}, month={Jun}, pages={295–315} } @article{wilson_reif_reich_2014, title={Hierarchical Dose-Response Modeling for High-Throughput Toxicity Screening of Environmental Chemicals}, volume={70}, ISSN={["1541-0420"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84895891991&partnerID=MN8TOARS}, DOI={10.1111/biom.12114}, abstractNote={Summary High‐throughput screening (HTS) of environmental chemicals is used to identify chemicals with high potential for adverse human health and environmental effects from among the thousands of untested chemicals. Predicting physiologically relevant activity with HTS data requires estimating the response of a large number of chemicals across a battery of screening assays based on sparse dose–response data for each chemical‐assay combination. Many standard dose–response methods are inadequate because they treat each curve separately and under‐perform when there are as few as 6–10 observations per curve. We propose a semiparametric Bayesian model that borrows strength across chemicals and assays. Our method directly parametrizes the efficacy and potency of the chemicals as well as the probability of response. We use the ToxCast data from the U.S. Environmental Protection Agency (EPA) as motivation. We demonstrate that our hierarchical method provides more accurate estimates of the probability of response, efficacy, and potency than separate curve estimation in a simulation study. We use our semiparametric method to compare the efficacy of chemicals in the ToxCast data to well‐characterized reference chemicals on estrogen receptor (ER ) and peroxisome proliferator‐activated receptor (PPAR ) assays, then estimate the probability that other chemicals are active at lower concentrations than the reference chemicals.}, number={1}, journal={BIOMETRICS}, publisher={Wiley-Blackwell}, author={Wilson, Ander and Reif, David M. and Reich, Brian J.}, year={2014}, month={Mar}, pages={237–246} } @article{wilson_rappold_neas_reich_2014, title={MODELING THE EFFECT OF TEMPERATURE ON OZONE-RELATED MORTALITY}, volume={8}, ISSN={["1932-6157"]}, DOI={10.1214/14-aoas754}, abstractNote={Climate change is expected to alter the distribution of ambient ozone levels and temperatures which, in turn, may impact public health. Much research has focused on the effect of short-term ozone exposures on mortality and morbidity while controlling for temperature as a confounder, but less is known about the joint effects of ozone and temperature. The extent of the health effects of changing ozone levels and temperatures will depend on whether these effects are additive or synergistic. In this paper we propose a spatial, semi-parametric model to estimate the joint ozone-temperature risk surfaces in 95 US urban areas. Our methodology restricts the ozone-temperature risk surfaces to be monotone in ozone and allows for both nonadditive and nonlinear effects of ozone and temperature. We use data from the National Mortality and Morbidity Air Pollution Study (NMMAPS) and show that the proposed model fits the data better than additive linear and nonlinear models. We then examine the synergistic effect of ozone and temperature both nationally and locally and find evidence of a nonlinear ozone effect and an ozone-temperature interaction at higher temperatures and ozone concentrations.}, number={3}, journal={ANNALS OF APPLIED STATISTICS}, author={Wilson, Ander and Rappold, Ana G. and Neas, Lucas M. and Reich, Brian J.}, year={2014}, month={Sep}, pages={1728–1749} } @article{reich_chang_strickland_2014, title={Spatial health effects analysis with uncertain residential locations}, volume={23}, ISSN={["1477-0334"]}, DOI={10.1177/0962280212447151}, abstractNote={Spatial epidemiology has benefited greatly from advances in geographic information system technology, which permits extensive study of associations between various health responses and a wide array of socio-economic and environmental factors. However, many spatial epidemiological datasets have missing values for a substantial proportion of spatial variables, such as the census tract of residence of study participants. The standard approach is to discard these observations and analyze only complete observations. In this article, we propose a new hierarchical Bayesian spatial model to handle missing observation locations. Our model utilizes all available information to learn about the missing locations and propagates uncertainty about the missing locations throughout the model. We show via a simulation study that this method can lead to more efficient epidemiological analysis. The method is applied to a study of the relationship between fine particulate matter and birth outcomes is southeast Georgia, where we find smaller posterior variance for most parameters using our missing data model compared to the standard complete case model.}, number={2}, journal={STATISTICAL METHODS IN MEDICAL RESEARCH}, author={Reich, Brian J. and Chang, Howard H. and Strickland, Matthew J.}, year={2014}, month={Apr}, pages={156–168} } @article{wang_reich_lim_2013, title={A Bayesian approach to probabilistic streamflow forecasts}, volume={15}, ISSN={["1465-1734"]}, DOI={10.2166/hydro.2012.080}, abstractNote={One-month-ahead streamflow forecasting is important for water utilities to manage water resources such as irrigation water usage and hydropower generation. While deterministic streamflow forecasts have been utilized extensively in research and practice, ensemble streamflow forecasts and probabilistic information are gaining more attention. This study aims to examine a multivariate linear Bayesian regression approach to provide probabilistic streamflow forecasts by incorporating gridded precipitation forecasts from climate models and lagged monthly streamflow data. Principal component analysis is applied to reduce the size of the regression model. A Markov Chain Monte Carlo (MCMC) algorithm is used to sample from the posterior distribution of model parameters. The proposed approach is tested on gauge data acquired during 1961–2000 in North Carolina. Results reveal that the proposed method is a promising alternative forecasting technique and that it performs well for probabilistic streamflow forecasts.}, number={2}, journal={JOURNAL OF HYDROINFORMATICS}, author={Wang, Hui and Reich, Brian and Lim, Yeo Howe}, year={2013}, pages={381–391} } @article{reich_bandyopadhyay_bondell_2013, title={A Nonparametric Spatial Model for Periodontal Data With Nonrandom Missingness}, volume={108}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2013.795487}, abstractNote={Periodontal disease progression is often quantified by clinical attachment level (CAL) defined as the distance down a tooth's root that is detached from the surrounding bone. Measured at 6 locations per tooth throughout the mouth (excluding the molars), it gives rise to a dependent data set-up. These data are often reduced to a one-number summary, such as the whole mouth average or the number of observations greater than a threshold, to be used as the response in a regression to identify important covariates related to the current state of a subject's periodontal health. Rather than a simple one-number summary, we set forward to analyze all available CAL data for each subject, exploiting the presence of spatial dependence, non-stationarity, and non-normality. Also, many subjects have a considerable proportion of missing teeth which cannot be considered missing at random because periodontal disease is the leading cause of adult tooth loss. Under a Bayesian paradigm, we propose a nonparametric flexible spatial (joint) model of observed CAL and the location of missing tooth via kernel convolution methods, incorporating the aforementioned features of CAL data under a unified framework. Application of this methodology to a data set recording the periodontal health of an African-American population, as well as simulation studies reveal the gain in model fit and inference, and provides a new perspective into unraveling covariate-response relationships in presence of complexities posed by these data.}, number={503}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Reich, Brian J. and Bandyopadhyay, Dipankar and Bondell, Howard D.}, year={2013}, month={Sep}, pages={820–831} } @article{chang_reich_miranda_2013, title={A spatial time-to-event approach for estimating associations between air pollution and preterm birth}, volume={62}, journal={Journal of the Royal Statistical Society. Series C, Applied Statistics}, author={Chang, H. H. and Reich, B. J. and Miranda, M. L.}, year={2013}, pages={167–179} } @article{storlie_reich_helton_swiler_sallaberry_2013, title={Analysis of computationally demanding models with continuous and categorical inputs}, volume={113}, ISSN={["1879-0836"]}, DOI={10.1016/j.ress.2012.11.018}, abstractNote={The analysis of many physical and engineering problems involves running complex computational models (e.g., simulation models and computer codes). With problems of this type, it is important to understand the relationships between the input (whose values are often imprecisely known) and the output variables, and to characterize the uncertainty in the output. Often, some of the input variables are categorical in nature (e.g., pointer variables to alternative models or different types of material, etc.). A computational model that sufficiently represents reality is often very costly in terms of run time. When the models are computationally demanding, meta-model approaches to their analysis have been shown to be very useful. However, the most popular meta-models for computational computer models do not explicitly allow for categorical input variables. In this case, categorical inputs are simply ordered in some way and treated as continuous variables in the estimation of a meta-model. In many cases, this can lead to undesirable and misleading results. In this paper, two meta-models based on functional ANOVA decomposition are presented that explicitly allow for an appropriate treatment of categorical inputs. The effectiveness of the presented meta-models in the analysis of models with continuous and categorical inputs is illustrated with several test cases and also with results from a real analysis.}, journal={RELIABILITY ENGINEERING & SYSTEM SAFETY}, author={Storlie, Curtis B. and Reich, Brian J. and Helton, Jon C. and Swiler, Laura P. and Sallaberry, Cedric J.}, year={2013}, month={May}, pages={30–41} } @article{reich_smith_2013, title={Bayesian Quantile Regression for Censored Data}, volume={69}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12053}, abstractNote={Abstract Summary In this paper we propose a semiparametric quantile regression model for censored survival data. Quantile regression permits covariates to affect survival differently at different stages in the follow‐up period, thus providing a comprehensive study of the survival distribution. We take a semiparametric approach, representing the quantile process as a linear combination of basis functions. The basis functions are chosen so that the prior for the quantile process is centered on a simple location‐scale model, but flexible enough to accommodate a wide range of quantile processes. We show in a simulation study that this approach is competitive with existing methods. The method is illustrated using data from a drug treatment study, where we find that the Bayesian model often gives smaller measures of uncertainty than its competitors, and thus identifies more significant effects.}, number={3}, journal={BIOMETRICS}, author={Reich, Brian J. and Smith, Luke B.}, year={2013}, month={Sep}, pages={651–660} } @article{boehm_reich_bandyopadhyay_2013, title={Bridging Conditional and Marginal Inference for Spatially Referenced Binary Data}, volume={69}, ISSN={["0006-341X"]}, DOI={10.1111/biom.12027}, abstractNote={Abstract Summary Spatially referenced binary data are common in epidemiology and public health. Owing to its elegant log‐odds interpretation of the regression coefficients, a natural model for these data is logistic regression. To account for missing confounding variables that might exhibit a spatial pattern (say, socioeconomic, biological, or environmental conditions), it is customary to include a Gaussian spatial random effect. Conditioned on the spatial random effect, the coefficients may be interpreted as log odds ratios. However, marginally over the random effects, the coefficients no longer preserve the log‐odds interpretation, and the estimates are hard to interpret and generalize to other spatial regions. To resolve this issue, we propose a new spatial random effect distribution through a copula framework which ensures that the regression coefficients maintain the log‐odds interpretation both conditional on and marginally over the spatial random effects. We present simulations to assess the robustness of our approach to various random effects, and apply it to an interesting dataset assessing periodontal health of Gullah‐speaking African Americans. The proposed methodology is flexible enough to handle areal or geo‐statistical datasets, and hierarchical models with multiple random intercepts.}, number={2}, journal={BIOMETRICS}, author={Boehm, Laura and Reich, Brian J. and Bandyopadhyay, Dipankar}, year={2013}, month={Jun}, pages={545–554} } @article{mannshardt_sucic_jiao_dominici_frey_reich_fuentes_2013, title={Comparing exposure metrics for the effects of fine particulate matter on emergency hospital admissions}, volume={23}, ISSN={["1559-064X"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84886725530&partnerID=MN8TOARS}, DOI={10.1038/jes.2013.39}, abstractNote={A crucial step in an epidemiological study of the effects of air pollution is to accurately quantify exposure of the population. In this paper, we investigate the sensitivity of the health effects estimates associated with short-term exposure to fine particulate matter with respect to three potential metrics for daily exposure: ambient monitor data, estimated values from a deterministic atmospheric chemistry model, and stochastic daily average human exposure simulation output. Each of these metrics has strengths and weaknesses when estimating the association between daily changes in ambient exposure to fine particulate matter and daily emergency hospital admissions. Monitor data is readily available, but is incomplete over space and time. The atmospheric chemistry model output is spatially and temporally complete but may be less accurate than monitor data. The stochastic human exposure estimates account for human activity patterns and variability in pollutant concentration across microenvironments, but requires extensive input information and computation time. To compare these metrics, we consider a case study of the association between fine particulate matter and emergency hospital admissions for respiratory cases for the Medicare population across three counties in New York. Of particular interest is to quantify the impact and/or benefit to using the stochastic human exposure output to measure ambient exposure to fine particulate matter. Results indicate that the stochastic human exposure simulation output indicates approximately the same increase in the relative risk associated with emergency admissions as using a chemistry model or monitoring data as exposure metrics. However, the stochastic human exposure simulation output and the atmospheric chemistry model both bring additional information, which helps to reduce the uncertainly in our estimated risk.}, number={6}, journal={JOURNAL OF EXPOSURE SCIENCE AND ENVIRONMENTAL EPIDEMIOLOGY}, author={Mannshardt, Elizabeth and Sucic, Katarina and Jiao, Wan and Dominici, Francesca and Frey, H. Christopher and Reich, Brian and Fuentes, Montserrat}, year={2013}, pages={627–636} } @article{reich_porter_2013, title={DISCUSSION OF "ESTIMATING THE HISTORICAL AND FUTURE PROBABILITIES OF LARGE TERRORIST EVENTS" BY AARON CLAUSET AND RYAN WOODARD}, volume={7}, ISSN={["1932-6157"]}, DOI={10.1214/13-aoas614b}, abstractNote={We congratulate the authors on this well-written and thought-provoking paper.They address the problem of estimating the probability of a large (and rare) terrorist attack by modeling the tail of the attack size distribution.Recognizing the importance of incorporating uncertainty, their approach uses bootstrap resampling to obtain a set of parameter estimates for the tail distribution from which estimates for the probability of the rare event can be made.The wide range for the estimated probability of a 9/11-sized attack (90% interval [0.182, 0.669]) illustrates the need to account for uncertainty in such a problem.The authors also recognize that the choice of tail model can have a large impact on the probability estimates.Using multiple tail models (power law, stretched exponential and log-normal), they estimate that the probability of a 9/11-sized attack over a 40-year period (or, more specifically, in 13,274 events) ranges from around 11-35%.We thought it would be interesting to compare the results of the authors' analysis with a more classical extreme value analysis [Coles (2001), de Haan and Ferreira (2006)] using a generalized Pareto distribution (GPD).The GPD distribution has three parameters: lower bound μ, scale σ and shape ξ .If Y ∼ GPD(μ, σ, ξ), then Y 's cumulative density function is}, number={4}, journal={ANNALS OF APPLIED STATISTICS}, author={Reich, Brian J. and Porter, Michael D.}, year={2013}, month={Dec}, pages={1871–1875} } @article{reich_cooley_foley_napelenok_shaby_2013, title={EXTREME VALUE ANALYSIS FOR EVALUATING OZONE CONTROL STRATEGIES}, volume={7}, ISSN={["1932-6157"]}, DOI={10.1214/13-aoas628}, abstractNote={Tropospheric ozone is one of six criteria pollutants regulated by the US EPA, and has been linked to respiratory and cardiovascular endpoints and adverse effects on vegetation and ecosystems. Regional photochemical models have been developed to study the impacts of emission reductions on ozone levels. The standard approach is to run the deterministic model under new emission levels and attribute the change in ozone concentration to the emission control strategy. However, running the deterministic model requires substantial computing time, and this approach does not provide a measure of uncertainty for the change in ozone levels. Recently, a reduced form model (RFM) has been proposed to approximate the complex model as a simple function of a few relevant inputs. In this paper, we develop a new statistical approach to make full use of the RFM to study the effects of various control strategies on the probability and magnitude of extreme ozone events. We fuse the model output with monitoring data to calibrate the RFM by modeling the conditional distribution of monitoring data given the RFM using a combination of flexible semiparametric quantile regression for the center of the distribution where data are abundant and a parametric extreme value distribution for the tail where data are sparse. Selected parameters in the conditional distribution are allowed to vary by the RFM value and the spatial location. Also, due to the simplicity of the RFM, we are able to embed the RFM in our Bayesian hierarchical framework to obtain a full posterior for the model input parameters, and propagate this uncertainty to the estimation of the effects of the control strategies. We use the new framework to evaluate three potential control strategies, and find that reducing mobile-source emissions has a larger impact than reducing point-source emissions or a combination of several emission sources.}, number={2}, journal={ANNALS OF APPLIED STATISTICS}, author={Reich, Brian and Cooley, Daniel and Foley, Kristen and Napelenok, Sergey and Shaby, Benjamin}, year={2013}, month={Jun}, pages={739–762} } @article{fuentes_reich_2013, title={Multivariate spatial nonparametric modelling via kernel processes mixing}, volume={23}, number={1}, journal={Statistica Sinica}, author={Fuentes, M. and Reich, B.}, year={2013}, pages={75–97} } @article{fuentes_henry_reich_2013, title={Nonparametric spatial models for extremes: application to extreme temperature data}, volume={16}, ISSN={["1572-915X"]}, DOI={10.1007/s10687-012-0154-1}, abstractNote={Estimating the probability of extreme temperature events is difficult because of limited records across time and the need to extrapolate the distributions of these events, as opposed to just the mean, to locations where observations are not available. Another related issue is the need to characterize the uncertainty in the estimated probability of extreme events at different locations. Although the tools for statistical modeling of univariate extremes are well-developed, extending these tools to model spatial extreme data is an active area of research. In this paper, in order to make inference about spatial extreme events, we introduce a new nonparametric model for extremes. We present a Dirichlet-based copula model that is a flexible alternative to parametric copula models such as the normal and t-copula. The proposed modelling approach is fitted using a Bayesian framework that allow us to take into account different sources of uncertainty in the data and models. We apply our methods to annual maximum temperature values in the east-south-central United States.}, number={1}, journal={EXTREMES}, author={Fuentes, Montserrat and Henry, John and Reich, Brian}, year={2013}, month={Mar}, pages={75–101} } @article{reich_shaby_2012, title={A HIERARCHICAL MAX-STABLE SPATIAL MODEL FOR EXTREME PRECIPITATION}, volume={6}, ISSN={["1932-6157"]}, DOI={10.1214/12-aoas591}, abstractNote={Extreme environmental phenomena such as major precipitation events manifestly exhibit spatial dependence. Max-stable processes are a class of asymptotically-justified models that are capable of representing spatial dependence among extreme values. While these models satisfy modeling requirements, they are limited in their utility because their corresponding joint likelihoods are unknown for more than a trivial number of spatial locations, preventing, in particular, Bayesian analyses. In this paper, we propose a new random effects model to account for spatial dependence. We show that our specification of the random effect distribution leads to a max-stable process that has the popular Gaussian extreme value process (GEVP) as a limiting case. The proposed model is used to analyze the yearly maximum precipitation from a regional climate model.}, number={4}, journal={ANNALS OF APPLIED STATISTICS}, author={Reich, Brian J. and Shaby, Benjamin A.}, year={2012}, month={Dec}, pages={1430–1451} } @article{foley_reich_napelenok_2012, title={Bayesian Analysis of a Reduced-Form Air Quality Model}, volume={46}, ISSN={["0013-936X"]}, DOI={10.1021/es300666e}, abstractNote={Numerical air quality models are being used for assessing emission control strategies for improving ambient pollution levels across the globe. This paper applies probabilistic modeling to evaluate the effectiveness of emission reduction scenarios aimed at lowering ground-level ozone concentrations. A Bayesian hierarchical model is used to combine air quality model output and monitoring data in order to characterize the impact of emissions reductions while accounting for different degrees of uncertainty in the modeled emissions inputs. The probabilistic model predictions are weighted based on population density in order to better quantify the societal benefits/disbenefits of four hypothetical emission reduction scenarios in which domain-wide NOx emissions from various sectors are reduced individually and then simultaneously. Cross validation analysis shows the statistical model performs well compared to observed ozone levels. Accounting for the variability and uncertainty in the emissions and atmospheric systems being modeled is shown to impact how emission reduction scenarios would be ranked, compared to standard methodology.}, number={14}, journal={ENVIRONMENTAL SCIENCE & TECHNOLOGY}, author={Foley, Kristen M. and Reich, Brian J. and Napelenok, Sergey L.}, year={2012}, month={Jul}, pages={7604–7611} } @article{shaby_reich_2012, title={Bayesian spatial extreme value analysis to assess the changing risk of concurrent high temperatures across large portions of European cropland}, volume={23}, ISSN={["1099-095X"]}, DOI={10.1002/env.2178}, abstractNote={There is strong evidence that extremely high temperatures are detrimental to the yield and quality of many economically and socially critical crops. Fortunately, the most deleterious conditions for agriculture occur rarely. We wish to assess the risk of the catastrophic scenario in which large areas of croplands experience extreme heat stress during the same growing season. Applying a hierarchical Bayesian spatial extreme value model that allows the distribution of extreme temperatures to change in time both marginally and in spatial coherence, we examine whether the risk of widespread extremely high temperatures across agricultural land in Europe has increased over the last century. Copyright © 2012 John Wiley & Sons, Ltd.}, number={8}, journal={ENVIRONMETRICS}, author={Shaby, Benjamin A. and Reich, Brian J.}, year={2012}, month={Dec}, pages={638–648} } @article{chang_reich_miranda_2012, title={Chang et al. Respond to "Environmental Exposures and Preterm Birth"}, volume={175}, ISSN={["1476-6256"]}, DOI={10.1093/aje/kwr406}, number={2}, journal={AMERICAN JOURNAL OF EPIDEMIOLOGY}, author={Chang, Howard H. and Reich, Brian J. and Miranda, Marie Lynn}, year={2012}, month={Jan}, pages={111–112} } @article{modlin_fuentes_reich_2012, title={Circular conditional autoregressive modeling of vector fields}, volume={23}, ISSN={["1180-4009"]}, DOI={10.1002/env.1133}, abstractNote={As hurricanes approach landfall, there are several hazards for which coastal populations must be prepared. Damaging winds, torrential rains, and tornadoes play havoc with both the coast and inland areas; but, the biggest seaside menace to life and property is the storm surge. Wind fields are used as the primary forcing for the numerical forecasts of the coastal ocean response to hurricane force winds, such as the height of the storm surge and the degree of coastal flooding. Unfortunately, developments in deterministic modeling of these forcings have been hindered by computational expenses. In this paper, we present a multivariate spatial model for vector fields, that we apply to hurricane winds. We parameterize the wind vector at each site in polar coordinates and specify a circular conditional autoregressive model for the vector direction, and a spatial CAR model for speed. We apply our framework for vector fields to hurricane surface wind fields for Hurricane Floyd of 1999 and compare our circular conditional autoregressive model to prior methods that decompose wind speed and direction into its N‐S and W‐E cardinal components. Copyright © 2011 John Wiley & Sons, Ltd.}, number={1}, journal={ENVIRONMETRICS}, author={Modlin, Danny and Fuentes, Montserrat and Reich, Brian}, year={2012}, month={Feb}, pages={46–53} } @article{bondell_reich_2012, title={Consistent High-Dimensional Bayesian Variable Selection via Penalized Credible Regions}, volume={107}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2012.716344}, abstractNote={For high-dimensional data, particularly when the number of predictors greatly exceeds the sample size, selection of relevant predictors for regression is a challenging problem. Methods such as sure screening, forward selection, or penalized regressions are commonly used. Bayesian variable selection methods place prior distributions on the parameters along with a prior over model space, or equivalently, a mixture prior on the parameters having mass at zero. Since exhaustive enumeration is not feasible, posterior model probabilities are often obtained via long MCMC runs. The chosen model can depend heavily on various choices for priors and also posterior thresholds. Alternatively, we propose a conjugate prior only on the full model parameters and use sparse solutions within posterior credible regions to perform selection. These posterior credible regions often have closed-form representations, and it is shown that these sparse solutions can be computed via existing algorithms. The approach is shown to outperform common methods in the high-dimensional setting, particularly under correlation. By searching for a sparse solution within a joint credible region, consistent model selection is established. Furthermore, it is shown that, under certain conditions, the use of marginal credible intervals can give consistent selection up to the case where the dimension grows exponentially in the sample size. The proposed approach successfully accomplishes variable selection in the high-dimensional setting, while avoiding pitfalls that plague typical Bayesian variable selection methods.}, number={500}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Bondell, Howard D. and Reich, Brian J.}, year={2012}, month={Dec}, pages={1610–1624} } @article{cooley_sain_gabda_towe_wadsworth_tawn_segers_shaby_reich_davison_et al._2012, title={Discussion of "Statistical modeling of spatial extremes" by A. C. Davison, S. A. Padoan and M. Ribatet}, volume={27}, number={2}, journal={Statistical Science}, author={Cooley, D. and Sain, S. R. and Gabda, D. and Towe, R. and Wadsworth, J. and Tawn, J. and Segers, J. and Shaby, B. and Reich, B. J. and Davison, A. C. and et al.}, year={2012}, pages={187–201} } @article{porter_reich_2012, title={Evaluating temporally weighted kernel density methods for predicting the next event location in a series}, volume={18}, ISSN={1947-5683 1947-5691}, url={http://dx.doi.org/10.1080/19475683.2012.691904}, DOI={10.1080/19475683.2012.691904}, abstractNote={One aspect of tactical crime or terrorism analysis is predicting the location of the next event in a series. The objective of this article is to present a methodology to identify the optimal parameters and to test the performance of temporally weighted kernel density estimation models for predicting the next event in a criminal or terrorist event series. By placing event series in a space–time point pattern framework, the next event prediction models are shown to be based on estimating a conditional spatial density function. We use temporal weights that indicate how much influence past events have toward predicting future event locations, which can also incorporate uncertainty in the event timing. Results of applying this methodology to crime series in Baltimore County, MD, indicate that performance can vary greatly by crime type and little by series length and is fairly robust to choice of bandwidth.}, number={3}, journal={Annals of GIS}, publisher={Informa UK Limited}, author={Porter, Michael D. and Reich, Brian J.}, year={2012}, month={Sep}, pages={225–240} } @article{hayashi_hayashi_reich_lee_sachdeva_mizoguchi_2012, title={Functional data analysis of mandibular movement using third-degree b-spline basis functions and self-modeling regression}, volume={71}, ISSN={1344-0241 1878-1837}, url={http://dx.doi.org/10.1016/j.odw.2011.11.001}, DOI={10.1016/j.odw.2011.11.001}, abstractNote={The purposes of this study were (1) to establish a new method for analyzing the movement of an incisor point on the mandible as mandibular movement and for analyzing noisy mandibular finite helical axis (FHA) parameters, and (2) to apply this new method in a clinical situation. The subjects were patients with anterior crossbite who were scheduled to receive orthognathic surgery. Chewing movement was measured by an opto-electronic motion-analysis system that can detect mandibular movement in space. The population average curves (trajectories) of the incisor point and the position vector of the FHA during chewing were calculated using third-degree b-spline basis functions and self-modeling regression (SEMOR). Although this study focused on the use of a new statistical model for assessing mandibular movement, the results demonstrated the effectiveness of the combination of the FHA and an additional, supplementary scientific expression of movement, the trajectory of an incisor point. Furthermore, the effectiveness of this new method was demonstrated in a clinical situation. The results of this study demonstrated that the calculation of population average curves is effective for clarifying the characteristics of functional data, such as in mandibular movement.}, number={1}, journal={Orthodontic Waves}, publisher={Informa UK Limited}, author={Hayashi, Kazuo and Hayashi, Meiri and Reich, Brian and Lee, Seung-Pyo and Sachdeva, Arjun U.C. and Mizoguchi, Itaru}, year={2012}, month={Mar}, pages={17–25} } @article{reich_fuentes_2012, title={Nonparametric Bayesian models for a spatial covariance}, volume={9}, ISSN={["1572-3127"]}, DOI={10.1016/j.stamet.2011.01.007}, abstractNote={A crucial step in the analysis of spatial data is to estimate the spatial correlation function that determines the relationship between a spatial process at two locations. The standard approach to selecting the appropriate correlation function is to use prior knowledge or exploratory analysis, such as a variogram analysis, to select the correct parametric correlation function. Rather that selecting a particular parametric correlation function, we treat the covariance function as an unknown function to be estimated from the data. We propose a flexible prior for the correlation function to provide robustness to the choice of correlation function. We specify the prior for the correlation function using spectral methods and the Dirichlet process prior, which is a common prior for an unknown distribution function. Our model does not require Gaussian data or spatial locations on a regular grid. The approach is demonstrated using a simulation study as well as an analysis of California air pollution data.}, number={1-2}, journal={STATISTICAL METHODOLOGY}, author={Reich, Brian J. and Fuentes, Montserrat}, year={2012}, pages={265–274} } @article{reich_2012, title={Spatiotemporal quantile regression for detecting distributional changes in environmental processes}, volume={61}, journal={Journal of the Royal Statistical Society. Series C, Applied Statistics}, author={Reich, B. J.}, year={2012}, pages={535–553} } @article{reich_kalendra_storlie_bondell_fuentes_2012, title={Variable selection for high dimensional Bayesian density estimation: application to human exposure simulation}, volume={61}, journal={Journal of the Royal Statistical Society. Series C, Applied Statistics}, author={Reich, B. J. and Kalendra, E. and Storlie, C. B. and Bondell, H. D. and Fuentes, M.}, year={2012}, pages={47–66} } @article{reich_bondell_2011, title={A Spatial Dirichlet Process Mixture Model for Clustering Population Genetics Data}, volume={67}, ISSN={["0006-341X"]}, DOI={10.1111/j.1541-0420.2010.01484.x}, abstractNote={Summary Identifying homogeneous groups of individuals is an important problem in population genetics. Recently, several methods have been proposed that exploit spatial information to improve clustering algorithms. In this article, we develop a Bayesian clustering algorithm based on the Dirichlet process prior that uses both genetic and spatial information to classify individuals into homogeneous clusters for further study. We study the performance of our method using a simulation study and use our model to cluster wolverines in Western Montana using microsatellite data.}, number={2}, journal={BIOMETRICS}, author={Reich, Brian J. and Bondell, Howard D.}, year={2011}, month={Jun}, pages={381–390} } @article{bandyopadhyay_reich_slate_2011, title={A spatial beta-binomial model for clustered count data on dental caries}, volume={20}, ISSN={["1477-0334"]}, DOI={10.1177/0962280210372453}, abstractNote={One of the most important indicators of dental caries prevalence is the total count of decayed, missing or filled surfaces in a tooth. These count data are often clustered in nature (several count responses clustered within a subject), over-dispersed as well as spatially referenced (a diseased tooth might be positively influencing the decay process of a set of neighbouring teeth). In this article, we develop a multivariate spatial betabinomial (BB) model for these data that accommodates both over-dispersion as well as latent spatial associations. Using a Bayesian paradigm, the re-parameterised marginal mean (as well as variance) under the BB framework are modelled using a regression on subject/tooth-specific co-variables and a conditionally autoregressive prior that models the latent spatial process. The necessity of exploiting spatial associations to model count data arising in dental caries research is demonstrated using a small simulation study. Real data confirms that our spatial BB model provides a superior estimation and model fit as compared to other sub-models that do not consider modelling spatial associations.}, number={2}, journal={STATISTICAL METHODS IN MEDICAL RESEARCH}, author={Bandyopadhyay, Dipankar and Reich, Brian J. and Slate, Elizabeth H.}, year={2011}, month={Apr}, pages={85–102} } @article{reich_fuentes_dunson_2011, title={Bayesian Spatial Quantile Regression}, volume={106}, ISSN={["1537-274X"]}, DOI={10.1198/jasa.2010.ap09237}, abstractNote={Tropospheric ozone is one of the six criteria pollutants regulated by the United States Environmental Protection Agency under the Clean Air Act and has been linked with several adverse health effects, including mortality. Due to the strong dependence on weather conditions, ozone may be sensitive to climate change and there is great interest in studying the potential effect of climate change on ozone, and how this change may affect public health. In this paper we develop a Bayesian spatial model to predict ozone under different meteorological conditions, and use this model to study spatial and temporal trends and to forecast ozone concentrations under different climate scenarios. We develop a spatial quantile regression model that does not assume normality and allows the covariates to affect the entire conditional distribution, rather than just the mean. The conditional distribution is allowed to vary from site-to-site and is smoothed with a spatial prior. For extremely large datasets our model is computationally infeasible, and we develop an approximate method. We apply the approximate version of our model to summer ozone from 1997-2005 in the Eastern U.S., and use deterministic climate models to project ozone under future climate conditions. Our analysis suggests that holding all other factors fixed, an increase in daily average temperature will lead to the largest increase in ozone in the Industrial Midwest and Northeast.}, number={493}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Reich, Brian J. and Fuentes, Montserrat and Dunson, David B.}, year={2011}, month={Mar}, pages={6–20} } @article{pati_reich_dunson_2011, title={Bayesian geostatistical modelling with informative sampling locations}, volume={98}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/asq067}, abstractNote={We consider geostatistical models that allow the locations at which data are collected to be informative about the outcomes. A Bayesian approach is proposed, which models the locations using a log Gaussian Cox process, while modelling the outcomes conditionally on the locations as Gaussian with a Gaussian process spatial random effect and adjustment for the location intensity process. We prove posterior propriety under an improper prior on the parameter controlling the degree of informative sampling, demonstrating that the data are informative. In addition, we show that the density of the locations and mean function of the outcome process can be estimated consistently under mild assumptions. The methods show significant evidence of informative sampling when applied to ozone data over Eastern U.S.A.}, number={1}, journal={BIOMETRIKA}, author={Pati, D. and Reich, B. J. and Dunson, D. B.}, year={2011}, month={Mar}, pages={35–48} } @article{reich_haran_2011, title={Guest Editors' Introduction to the Special Issue on "Computer Models and Spatial Statistics for Environmental Science"}, volume={16}, ISSN={["1085-7117"]}, DOI={10.1007/s13253-011-0071-9}, abstractNote={The December 2011 issue of the Journal of Agricultural, Biological, and Environmental Statistics is on the topic “Computer models and spatial statistics for environmental science.” This is a topic of great interest as the study of complex environmental phenomena increasingly relies on deterministic computer models. These models, for example regional climate models or rainfall-runoff simulators, are mathematical models that describe the evolution in time of a physical process. Usually, they consist of complex differential or partial differential equations that are not solvable in closed form. Hence, these are typically solved using numerical techniques, yielding deterministic predictions of a process. In this special issue, researchers tackle several important statistical problems that arise in the analysis of computer model output, for example calibrating model output with observed data, comparing and combing output from several computer models and physical observations, and building statistical emulators for computer models to predict the outcome of the models for new sets of input conditions. An important contribution of statisticians in the analysis of deterministic models is to quantify uncertainty in inferences and predictions in rigorous fashion. Uncertainty quantification is of great interest, especially as information from complex computer models and messy observational data is used for decision making. There are several types of uncertainty, including (1) parametric uncertainty in the model’s inputs or tuning parameters and (2) structural uncertainty in the mathematical equations that define the model. In “First-Order Emulator Inference for Parameters in Nonlinear Mechanistic Models”, Mevin B. Hooten, William B. Leeds, Jerome Fiechter, and Christopher K. Wikle provide a computationally-efficient method for quantifying parametric uncertainty. They approximate the complicated computer model with a more tractable statistical model, and use}, number={4}, journal={JOURNAL OF AGRICULTURAL BIOLOGICAL AND ENVIRONMENTAL STATISTICS}, author={Reich, Brian J. and Haran, Murali}, year={2011}, month={Dec}, pages={451–452} } @article{havard_reich_bean_chaix_2011, title={Social inequalities in residential exposure to road traffic noise: An environmental justice analysis based on the RECORD Cohort Study}, volume={68}, ISSN={["1470-7926"]}, DOI={10.1136/oem.2010.060640}, abstractNote={Objectives To explore social inequalities in residential exposure to road traffic noise in an urban area. Methods Environmental injustice in road traffic noise exposure was investigated in Paris, France, using the RECORD Cohort Study (n=2130) and modelled noise data. Associations were assessed by estimating noise exposure within the local area around participants' residence, considering various socioeconomic variables defined at both individual and neighbourhood level, and comparing different regression models attempting or not to control for spatial autocorrelation in noise levels. Results After individual-level adjustment, participants' noise exposure increased with neighbourhood educational level and dwelling value but also with proportion of non-French citizens, suggesting seemingly contradictory findings. However, when country of citizenship was defined according to its human development level, noise exposure in fact increased and decreased with the proportions of citizens from advantaged and disadvantaged countries, respectively. These findings were consistent with those reported for the other socioeconomic characteristics, suggesting higher road traffic noise exposure in advantaged neighbourhoods. Substantial collinearity between neighbourhood explanatory variables and spatial random effects caused identifiability problems that prevented successful control for spatial autocorrelation. Conclusions Contrary to previous literature, this study shows that people living in advantaged neighbourhoods were more exposed to road traffic noise in their residential environment than their deprived counterparts. This case study demonstrates the need to systematically perform sensitivity analyses with multiple socioeconomic characteristics to avoid incorrect inferences about an environmental injustice situation and the complexity of effectively controlling for spatial autocorrelation when fixed and random components of the model are correlated.}, number={5}, journal={OCCUPATIONAL AND ENVIRONMENTAL MEDICINE}, author={Havard, Sabrina and Reich, Brian J. and Bean, Kathy and Chaix, Basile}, year={2011}, month={May}, pages={366–374} } @article{reich_bondell_li_2011, title={Sufficient Dimension Reduction via Bayesian Mixture Modeling}, volume={67}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2010.01501.x}, abstractNote={Dimension reduction is central to an analysis of data with many predictors. Sufficient dimension reduction aims to identify the smallest possible number of linear combinations of the predictors, called the sufficient predictors, that retain all of the information in the predictors about the response distribution. In this article, we propose a Bayesian solution for sufficient dimension reduction. We directly model the response density in terms of the sufficient predictors using a finite mixture model. This approach is computationally efficient and offers a unified framework to handle categorical predictors, missing predictors, and Bayesian variable selection. We illustrate the method using both a simulation study and an analysis of an HIV data set.}, number={3}, journal={BIOMETRICS}, author={Reich, Brian J. and Bondell, Howard D. and Li, Lexin}, year={2011}, month={Sep}, pages={886–895} } @article{storlie_bondell_reich_zhang_2011, title={Surface estimation, variable selection, and the nonparametric oracle property}, volume={21}, number={2}, journal={Statistica Sinica}, author={Storlie, C. B. and Bondell, H. D. and Reich, B. J. and Zhang, H. H.}, year={2011}, pages={679–705} } @article{reich_bandyopadhyay_2010, title={A LATENT FACTOR MODEL FOR SPATIAL DATA WITH INFORMATIVE MISSINGNESS}, volume={4}, ISSN={["1932-6157"]}, DOI={10.1214/09-aoas278}, abstractNote={A large amount of data is typically collected during a periodontal exam. Analyzing these data poses several challenges. Several types of measurements are taken at many locations throughout the mouth. These spatially-referenced data are a mix of binary and continuous responses, making joint modeling difficult. Also, most patients have missing teeth. Periodontal disease is a leading cause of tooth loss, so it is likely that the number and location of missing teeth informs about the patient's periodontal health. In this paper we develop a multivariate spatial framework for these data which jointly models the binary and continuous responses as a function of a single latent spatial process representing general periodontal health. We also use the latent spatial process to model the location of missing teeth. We show using simulated and real data that exploiting spatial associations and jointly modeling the responses and locations of missing teeth mitigates the problems presented by these data.}, number={1}, journal={ANNALS OF APPLIED STATISTICS}, author={Reich, Brian J. and Bandyopadhyay, Dipankar}, year={2010}, month={Mar}, pages={439–459} } @article{storlie_bondell_reich_2010, title={A Locally Adaptive Penalty for Estimation of Functions With Varying Roughness}, volume={19}, ISSN={["1537-2715"]}, DOI={10.1198/jcgs.2010.09020}, abstractNote={We propose a new regularization method called Loco-Spline for nonparametric function estimation. Loco-Spline uses a penalty which is data driven and locally adaptive. This allows for more flexible estimation of the function in regions of the domain where it has more curvature, without over fitting in regions that have little curvature. This methodology is also transferred into higher dimensions via the Smoothing Spline ANOVA framework. General conditions for optimal MSE rate of convergence are given and the Loco-Spline is shown to achieve this rate. In our simulation study, the Loco-Spline substantially outperforms the traditional smoothing spline and the locally adaptive kernel smoother. Code to fit Loco-Spline models is included with the Supplemental Materials for this article which are available online.}, number={3}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Storlie, Curtis B. and Bondell, Howard D. and Reich, Brian J.}, year={2010}, month={Sep}, pages={569–589} } @article{hodges_reich_2010, title={Adding Spatially-Correlated Errors Can Mess Up the Fixed Effect You Love}, volume={64}, ISSN={["0003-1305"]}, DOI={10.1198/tast.2010.10052}, abstractNote={Many statisticians have had the experience of fitting a linear model with uncorrelated errors, then adding a spatially-correlated error term (random effect) and finding that the estimates of the fixed-effect coefficients have changed substantially. We show that adding a spatially-correlated error term to a linear model is equivalent to adding a saturated collection of canonical regressors, the coefficients of which are shrunk toward zero, where the spatial map determines both the canonical regressors and the relative extent of the coefficients' shrinkage. Adding a spatially-correlated error term can also be seen as inflating the error variances associated with specific contrasts of the data, where the spatial map determines the contrasts and the extent of error-variance inflation. We show how to avoid this spatial confounding by restricting the spatial random effect to the orthogonal complement (residual space) of the fixed effects, which we call restricted spatial regression. We consider five proposed interpretations of spatial confounding and draw implications about what, if anything, one should do about it. In doing so, we debunk the common belief that adding a spatially-correlated random effect adjusts fixed-effect estimates for spatially-structured missing covariates. This article has supplementary material online.}, number={4}, journal={AMERICAN STATISTICIAN}, author={Hodges, James S. and Reich, Brian J.}, year={2010}, month={Nov}, pages={325–334} } @article{reich_fuentes_herring_evenson_2010, title={Bayesian Variable Selection for Multivariate Spatially Varying Coefficient Regression}, volume={66}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2009.01333.x}, abstractNote={Summary Physical activity has many well‐documented health benefits for cardiovascular fitness and weight control. For pregnant women, the American College of Obstetricians and Gynecologists currently recommends 30 minutes of moderate exercise on most, if not all, days; however, very few pregnant women achieve this level of activity. Traditionally, studies have focused on examining individual or interpersonal factors to identify predictors of physical activity. There is a renewed interest in whether characteristics of the physical environment in which we live and work may also influence physical activity levels. We consider one of the first studies of pregnant women that examines the impact of characteristics of the built environment on physical activity levels. Using a socioecologic framework, we study the associations between physical activity and several factors including personal characteristics, meteorological/air quality variables, and neighborhood characteristics for pregnant women in four counties of North Carolina. We simultaneously analyze six types of physical activity and investigate cross‐dependencies between these activity types. Exploratory analysis suggests that the associations are different in different regions. Therefore, we use a multivariate regression model with spatially varying regression coefficients. This model includes a regression parameter for each covariate at each spatial location. For our data with many predictors, some form of dimension reduction is clearly needed. We introduce a Bayesian variable selection procedure to identify subsets of important variables. Our stochastic search algorithm determines the probabilities that each covariate's effect is null, non‐null but constant across space, and spatially varying. We found that individual‐level covariates had a greater influence on women's activity levels than neighborhood environmental characteristics, and some individual‐level covariates had spatially varying associations with the activity levels of pregnant women.}, number={3}, journal={BIOMETRICS}, author={Reich, Brian J. and Fuentes, Montserrat and Herring, Amy H. and Evenson, Kelly R.}, year={2010}, month={Sep}, pages={772–782} } @article{hayashi_mizoguchi_lee_reich_2010, title={Development of a novel statistical model for mandibular kinematics}, volume={32}, ISSN={["1350-4533"]}, DOI={10.1016/j.medengphy.2010.04.005}, abstractNote={The purpose of this study was to establish a new statistical method for the analysis of masticatory function. The subjects were patients with anterior crossbite who had received orthognathic surgery. Chewing movement was measured by means of an opto-electronic motion-analysis system. This movement was compared with similar movement in control patients. We sought to develop a statistical model to predict the population average curves of the chewing cycles. In this study, the mandibular incisor point was used as a target point of jaw movement. The combination of a spline function with random coefficients and self-modeling regression (SEMOR) extended to three dimensions was used to predict population average curves for each group. Unquestionably, significant differences were present in some areas. The present modeling method that uses the combination of a spline function and SEMOR is one of the best ways to eliminate subjective estimation with regard to predicting representative chewing cycles.}, number={5}, journal={MEDICAL ENGINEERING & PHYSICS}, author={Hayashi, Kazuo and Mizoguchi, Itaru and Lee, Seung-Pyo and Reich, Brian}, year={2010}, month={Jun}, pages={423–428} } @article{reich_bondell_wang_2010, title={Flexible Bayesian quantile regression for independent and clustered data}, volume={11}, ISSN={["1465-4644"]}, DOI={10.1093/biostatistics/kxp049}, abstractNote={Quantile regression has emerged as a useful supplement to ordinary mean regression. Traditional frequentist quantile regression makes very minimal assumptions on the form of the error distribution and thus is able to accommodate nonnormal errors, which are common in many applications. However, inference for these models is challenging, particularly for clustered or censored data. A Bayesian approach enables exact inference and is well suited to incorporate clustered, missing, or censored data. In this paper, we propose a flexible Bayesian quantile regression model. We assume that the error distribution is an infinite mixture of Gaussian densities subject to a stochastic constraint that enables inference on the quantile of interest. This method outperforms the traditional frequentist method under a wide array of simulated data models. We extend the proposed approach to analyze clustered data. Here, we differentiate between and develop conditional and marginal models for clustered data. We apply our methods to analyze a multipatient apnea duration data set.}, number={2}, journal={BIOSTATISTICS}, author={Reich, Brian J. and Bondell, Howard D. and Wang, Huixia J.}, year={2010}, month={Apr}, pages={337–352} } @article{bondell_reich_wang_2010, title={Noncrossing quantile regression curve estimation}, volume={97}, ISSN={["0006-3444"]}, DOI={10.1093/biomet/asq048}, abstractNote={Since quantile regression curves are estimated individually, the quantile curves can cross, leading to an invalid distribution for the response. A simple constrained version of quantile regression is proposed to avoid the crossing problem for both linear and nonparametric quantile curves. A simulation study and a reanalysis of tropical cyclone intensity data shows the usefulness of the procedure. Asymptotic properties of the estimator are equivalent to the typical approach under standard conditions, and the proposed estimator reduces to the classical one if there is no crossing. The performance of the constrained estimator has shown significant improvement by adding smoothing and stability across the quantile levels.}, number={4}, journal={BIOMETRIKA}, author={Bondell, Howard D. and Reich, Brian J. and Wang, Huixia}, year={2010}, month={Dec}, pages={825–838} } @article{hayashi_reich_delong_lee_mizoguchi_2009, title={A novel statistical model for mandibular helical axis analysis}, volume={36}, ISSN={["1365-2842"]}, DOI={10.1111/j.1365-2842.2008.01890.x}, abstractNote={Summary The purpose of this study was to establish a new statistical method for the analysis of noisy mandibular helical axis parameters, especially the position vector of the finite helical axis (FHA). The subjects were children with anterior cross‐bite who had received orthodontic treatment. Maximum mouth‐opening was measured by means of an opto‐electronic motion analysis system. These movements were compared with similar movement in the same group after treatment of their anterior cross‐bite. Each curve of FHA position vectors was modelled as a spline function with random coefficients. To determine the optimal number of knots, two criteria were used: deviance information criteria (DIC) and mean squared prediction error (MSE). We were interested in estimating a typical curve for a population. Self‐modelling regression (SEMOR) was extended to three dimensions to model groups of three‐dimensional curves. Each curve was modelled as a spline function using nine knots. Population average curves were created using SEMOR. This study provided detailed information about jaw movement for comparing cross‐bite to normal occlusion by calculating the population mean curves of the position vector of the FHA. Our results suggested that the two population mean curves for the position vector of the FHA were significantly different in the closing phase. The combination of a spline function with random coefficients and SEMOR extended to three dimensions can be used not only for FHA analysis but also for the analysis of other jaw movements.}, number={2}, journal={JOURNAL OF ORAL REHABILITATION}, author={Hayashi, K. and Reich, B. and Delong, R. and Lee, S. -P. and Mizoguchi, I.}, year={2009}, month={Feb}, pages={102–109} } @article{reich_fuentes_burke_2009, title={Analysis of the effects of ultrafine particulate matter while accounting for human exposure}, volume={20}, ISSN={["1099-095X"]}, DOI={10.1002/env.915}, abstractNote={Abstract Particulate matter (PM) has been associated with mortality in several epidemiological studies. The US EPA currently regulates PM 10 and PM 2.5 (mass concentration of particles with diameter less than 10 and 2.5 µm, respectively), but it is not clear which size of particles are most responsible for adverse heath outcomes. A current hypothesis is that ultrafine particles with diameter less than 0.1 µm are particularly harmful because their small size allows them to deeply penetrate the lungs. This paper investigates the association between exposure to particles of varying diameter and daily mortality. We propose a new dynamic factor analysis model to relate the ambient concentrations of several sizes of particles with diameters ranging from 0.01 to 0.40 µm with mortality. We introduce a Bayesian model that converts ambient concentrations into simulated personal exposure using the EPA's Stochastic Human Exposure and Dose Simulator, and relates simulated exposure with mortality. Using new data from Fresno, CA, we find that the 4‐day lag of particles with diameter between 0.02 and 0.08 µm is associated with mortality. This is consistent with the small particles hypothesis. Copyright © 2008 John Wiley & Sons, Ltd.}, number={2}, journal={ENVIRONMETRICS}, author={Reich, Brian J. and Fuentes, Montserrat and Burke, Janet}, year={2009}, month={Mar}, pages={131–146} } @article{bandyopadhyay_reich_slate_2009, title={Bayesian modeling of multivariate spatial binary data with applications to dental caries}, volume={28}, ISSN={["1097-0258"]}, DOI={10.1002/sim.3647}, abstractNote={Abstract Dental research gives rise to data with potentially complex correlation structure. Assessments of dental caries yield a binary outcome indicating the presence or absence of caries experience for each surface of each tooth in a subject's mouth. In addition to this nesting, caries outcome exhibit spatial structure among neighboring teeth. We develop a Bayesian multivariate model for spatial binary data using random effects autologistic regression that controls for the correlation within tooth surfaces and spatial correlation among neighboring teeth. Using a sample from a clinical study conducted at the Medical University of South Carolina, we compare this autologistic model with covariates to alternative models to demonstrate the improvement in predictions and also to assess the effects of covariates on caries experience. Copyright © 2009 John Wiley & Sons, Ltd.}, number={28}, journal={STATISTICS IN MEDICINE}, author={Bandyopadhyay, Dipankar and Reich, Brian J. and Slate, Elizabeth H.}, year={2009}, month={Dec}, pages={3492–3508} } @article{costalonga_batas_reich_2009, title={Effects of Toll-like receptor 4 on Porphyromonas gingivalis-induced bone loss in mice}, volume={44}, ISSN={["1600-0765"]}, DOI={10.1111/j.1600-0765.2008.01152.x}, abstractNote={Toll-like receptor 4 (TLR-4)/myeloid differentiation protein-2 complex ligation by lipopolysaccharide induces production of pro-inflammatory cytokines and co-stimulatory molecules on antigen presenting cells. The aim of this study was to determine the role of the TLR-4 in bone loss-resistant C57BL mice and in bone loss-susceptible BALB/c mice after infection with Porphyromonas gingivalis.The BALB/c and C57BL/10 mice, either normal or TLR-4 deficient, were infected or sham-infected orally four times, at 4 day intervals, with 10(9) colony forming units of P. gingivalis. At 47 days, defleshed jaws were stained and photographed in a standardized position. We measured the surface area of the root trunk to assess the alveolar bone loss.Porphyromonas gingivalis-infected wild-type BALB/c mice lost 13.8% more bone than P. gingivalis-infected wild-type C57BL/10 mice. In contrast, P. gingivalis-infected TLR-4-deficient C57BL/10 mice lost 12.7% more bone than P. gingivalis-infected TLR-4-deficient BALB/c mice. Porphyromonas gingivalis-infected wild-type C57BL/6 and TLR-2 knockout C57BL/6 mice had similar bone levels to sham-infected control mice.Toll-like receptor 4 is protective for C57BL/10 but detrimental to BALB/c mice, since its absence allowed C57BL/10 but not BALB/c mice to lose alveolar bone. Toll-like receptor 2 does not contribute to this protection in genetically similar C57BL/6 mice.}, number={4}, journal={JOURNAL OF PERIODONTAL RESEARCH}, author={Costalonga, M. and Batas, L. and Reich, B. J.}, year={2009}, month={Aug}, pages={537–542} } @article{choi_reich_fuentes_davis_2009, title={Multivariate Spatial-Temporal Modeling and Prediction of Speciated Fine Particles}, volume={3}, ISSN={1559-8608 1559-8616}, url={http://dx.doi.org/10.1080/15598608.2009.10411933}, DOI={10.1080/15598608.2009.10411933}, abstractNote={Fine particulate matter (PM(2.5)) is an atmospheric pollutant that has been linked to serious health problems, including mortality. PM(2.5) is a mixture of pollutants, and it has five main components: sulfate, nitrate, total carbonaceous mass, ammonium, and crustal material. These components have complex spatial-temporal dependency and cross dependency structures. It is important to gain insight and better understanding about the spatial-temporal distribution of each component of the total PM(2.5) mass, and also to estimate how the composition of PM(2.5) might change with space and time, by spatially interpolating speciated PM(2.5). This type of analysis is needed to conduct spatial-temporal epidemiological studies of the association of these pollutants and adverse health effect.We introduce a multivariate spatial-temporal model for speciated PM(2.5). We propose a Bayesian hierarchical framework with spatiotemporally varying coefficients. In addition, a linear model of coregionalization is developed to account for spatial and temporal dependency structures for each component as well as the associations among the components. We also introduce a statistical framework to combine different sources of data, which accounts for bias and measurement error. We apply our framework to speciated PM(2.5) data in the United States for the year 2004. Our study shows that sulfate concentrations are the highest during the summer while nitrate concentrations are the highest during the winter. The results also show total carbonaceous mass.}, number={2}, journal={Journal of Statistical Theory and Practice}, publisher={Springer Science and Business Media LLC}, author={Choi, Jungsoon and Reich, Brian J. and Fuentes, Montserrat and Davis, Jerry M.}, year={2009}, month={Jun}, pages={407–418} } @article{bondell_reich_2009, title={Simultaneous Factor Selection and Collapsing Levels in ANOVA}, volume={65}, ISSN={["0006-341X"]}, DOI={10.1111/j.1541-0420.2008.01061.x}, abstractNote={When performing an analysis of variance, the investigator often has two main goals: to determine which of the factors have a significant effect on the response, and to detect differences among the levels of the significant factors. Level comparisons are done via a post-hoc analysis based on pairwise differences. This article proposes a novel constrained regression approach to simultaneously accomplish both goals via shrinkage within a single automated procedure. The form of this shrinkage has the ability to collapse levels within a factor by setting their effects to be equal, while also achieving factor selection by zeroing out entire factors. Using this approach also leads to the identification of a structure within each factor, as levels can be automatically collapsed to form groups. In contrast to the traditional pairwise comparison methods, these groups are necessarily nonoverlapping so that the results are interpretable in terms of distinct subsets of levels. The proposed procedure is shown to have the oracle property in that asymptotically it performs as well as if the exact structure were known beforehand. A simulation and real data examples show the strong performance of the method.}, number={1}, journal={BIOMETRICS}, author={Bondell, Howard D. and Reich, Brian J.}, year={2009}, month={Mar}, pages={169–177} } @article{choi_fuentes_reich_2009, title={Spatial-temporal association between fine particulate matter and daily mortality}, volume={53}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2008.05.018}, abstractNote={Fine particulate matter (PM(2.5)) is a mixture of pollutants that has been linked to serious health problems, including premature mortality. Since the chemical composition of PM(2.5) varies across space and time, the association between PM(2.5) and mortality could also change with space and season. In this work we develop and implement a statistical multi-stage Bayesian framework that provides a very broad, flexible approach to studying the spatiotemporal associations between mortality and population exposure to daily PM(2.5) mass, while accounting for different sources of uncertainty. In stage 1, we map ambient PM(2.5) air concentrations using all available monitoring data (IMPROVE and FRM) and an air quality model (CMAQ) at different spatial and temporal scales. In stage 2, we examine the spatial temporal relationships between the health end-points and the exposures to PM(2.5) by introducing a spatial-temporal generalized Poisson regression model. We adjust for time-varying confounders, such as seasonal trends. A common seasonal trends model is to use a fixed number of basis functions to account for these confounders, but the results can be sensitive to the number of basis functions. In this study, the number of the basis functions is treated as an unknown parameter in our Bayesian model and we use a space-time stochastic search variable selection approach. We apply our methods to a data set in North Carolina for the year 2001.}, number={8}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Choi, Jungsoon and Fuentes, Montserrat and Reich, Brian J.}, year={2009}, month={Jun}, pages={2989–3000} } @article{reich_storlie_bondell_2009, title={Variable Selection in Bayesian Smoothing Spline ANOVA Models: Application to Deterministic Computer Codes}, volume={51}, ISSN={["1537-2723"]}, DOI={10.1198/TECH.2009.0013}, abstractNote={With many predictors, choosing an appropriate subset of the covariates is a crucial, and difficult, step in nonparametric regression. We propose a Bayesian nonparametric regression model for curve-fitting and variable selection. We use the smoothing spline ANOVA framework to decompose the regression function into interpretable main effect and interaction functions. Stochastic search variable selection via MCMC sampling is used to search for models that fit the data well. Also, we show that variable selection is highly-sensitive to hyperparameter choice and develop a technique to select hyperparameters that control the long-run false positive rate. The method is used to build an emulator for a complex computer model for two-phase fluid flow.}, number={2}, journal={TECHNOMETRICS}, author={Reich, Brian J. and Storlie, Curtis B. and Bondell, Howard D.}, year={2009}, month={May}, pages={110–120} } @article{reich_hodges_2008, title={Identitication of the variance components in the general two-variance linear model}, volume={138}, ISSN={["0378-3758"]}, DOI={10.1016/j.jspi.2007.05.046}, abstractNote={Bayesian analyses frequently employ two-stage hierarchical models involving two-variance parameters: one controlling measurement error and the other controlling the degree of smoothing implied by the model's higher level. These analyses can be hampered by poorly identified variances which may lead to difficulty in computing and in choosing reference priors for these parameters. In this paper, we introduce the class of two-variance hierarchical linear models and characterize the aspects of these models that lead to well-identified or poorly identified variances. These ideas are illustrated with a spatial analysis of a periodontal data set and examined in some generality for specific two-variance models including the conditionally autoregressive (CAR) and one-way random effect models. We also connect this theory with other constrained regression methods and suggest a diagnostic that can be used to search for missing spatially varying fixed effects in the CAR model.}, number={6}, journal={JOURNAL OF STATISTICAL PLANNING AND INFERENCE}, author={Reich, Brian J. and Hodges, James S.}, year={2008}, month={Jul}, pages={1592–1604} } @article{reich_hodges_2008, title={Modeling longitudinal spatial periodontal data: A spatially adaptive model with tools for specifying priors and checking fit}, volume={64}, ISSN={["0006-341X"]}, DOI={10.1111/j.1541-0420.2007.00956.x}, abstractNote={Summary Attachment loss (AL), the distance down a tooth's root that is no longer attached to surrounding bone by periodontal ligament, is a common measure of periodontal disease. In this article, we develop a spatiotemporal model to monitor the progression of AL. Our model is an extension of the conditionally autoregressive (CAR) prior, which spatially smooths estimates toward their neighbors. However, because AL often exhibits a burst of large values in space and time, we develop a nonstationary spatiotemporal CAR model that allows the degree of spatial and temporal smoothing to vary in different regions of the mouth. To do this, we assign each AL measurement site its own set of variance parameters and spatially smooth the variances with spatial priors. We propose a heuristic to measure the complexity of the site‐specific variances, and use it to select priors that ensure parameters in the model are well identified. In data from a clinical trial, this model improves the fit compared to the usual dynamic CAR model for 90 of 99 patients' AL measurements.}, number={3}, journal={BIOMETRICS}, author={Reich, Brian J. and Hodges, James S.}, year={2008}, month={Sep}, pages={790–799} } @article{fuentes_reich_lee_2008, title={SPATIAL-TEMPORAL MESOSCALE MODELING OF RAINFALL INTENSITY USING GAGE AND RADAR DATA}, volume={2}, ISSN={["1932-6157"]}, DOI={10.1214/08-AOAS166}, abstractNote={Gridded estimated rainfall intensity values at very high spatial and temporal resolution levels are needed as main inputs for weather prediction models to obtain accurate precipitation forecasts, and to verify the performance of precipitation forecast models. These gridded rainfall fields are also the main driver for hydrological models that forecast flash floods, and they are essential for disaster prediction associated with heavy rain. Rainfall information can be obtained from rain gages that provide relatively accurate estimates of the actual rainfall values at point-referenced locations, but they do not characterize well enough the spatial and temporal structure of the rainfall fields. Doppler radar data offer better spatial and temporal coverage, but Doppler radar measures effective radar reflectivity (Ze) rather than rainfall rate (R). Thus, rainfall estimates from radar data suffer from various uncertainties due to their measuring principle and the conversion from Ze to R. We introduce a framework to combine radar reflectivity and gage data, by writing the different sources of rainfall information in terms of an underlying unobservable spatial temporal process with the true rainfall values. We use spatial logistic regression to model the probability of rain for both sources of data in terms of the latent true rainfall process. We characterize the different sources of bias and error in the gage and radar data and we estimate the true rainfall intensity with its posterior predictive distribution, conditioning on the observed data. Our model allows for nonstationary and asymmetry in the spatio-temporal dependency structure of the rainfall process, and allows the temporal evolution of the rainfall process to depend on the motions of rain fields, and the spatial correlation to depend on geographic features. We apply our methods to estimate rainfall intensity every 10 minutes, in a subdomain over South Korea with a spatial resolution of 1 km by 1 km.}, number={4}, journal={ANNALS OF APPLIED STATISTICS}, author={Fuentes, Montserrat and Reich, Brian and Lee, Gyuwon}, year={2008}, month={Dec}, pages={1148–1169} } @article{bondell_reich_2008, title={Simultaneous regression shrinkage, variable selection, and supervised clustering of predictors with OSCAR}, volume={64}, ISSN={["0006-341X"]}, DOI={10.1111/j.1541-0420.2007.00843.x}, abstractNote={Summary Variable selection can be challenging, particularly in situations with a large number of predictors with possibly high correlations, such as gene expression data. In this article, a new method called the OSCAR (octagonal shrinkage and clustering algorithm for regression) is proposed to simultaneously select variables while grouping them into predictive clusters. In addition to improving prediction accuracy and interpretation, these resulting groups can then be investigated further to discover what contributes to the group having a similar behavior. The technique is based on penalized least squares with a geometrically intuitive penalty function that shrinks some coefficients to exactly zero. Additionally, this penalty yields exact equality of some coefficients, encouraging correlated predictors that have a similar effect on the response to form predictive clusters represented by a single coefficient. The proposed procedure is shown to compare favorably to the existing shrinkage and variable selection techniques in terms of both prediction error and model complexity, while yielding the additional grouping information.}, number={1}, journal={BIOMETRICS}, author={Bondell, Howard D. and Reich, Brian J.}, year={2008}, month={Mar}, pages={115–123} } @article{reich_fuentes_2007, title={A MULTIVARIATE SEMIPARAMETRIC BAYESIAN SPATIAL MODELING FRAMEWORK FOR HURRICANE SURFACE WIND FIELDS}, volume={1}, ISSN={["1932-6157"]}, DOI={10.1214/07-AOAS108}, abstractNote={Storm surge, the onshore rush of sea water caused by the high winds and low pressure associated with a hurricane, can compound the effects of inland flooding caused by rainfall, leading to loss of property and loss of life for residents of coastal areas. Numerical ocean models are essential for creating storm surge forecasts for coastal areas. These models are driven primarily by the surface wind forcings. Currently, the gridded wind fields used by ocean models are specified by deterministic formulas that are based on the central pressure and location of the storm center. While these equations incorporate important physical knowledge about the structure of hurricane surface wind fields, they cannot always capture the asymmetric and dynamic nature of a hurricane. A new Bayesian multivariate spatial statistical modeling framework is introduced combining data with physical knowledge about the wind fields to improve the estimation of the wind vectors. Many spatial models assume the data follow a Gaussian distribution. However, this may be overly-restrictive for wind fields data which often display erratic behavior, such as sudden changes in time or space. In this paper we develop a semiparametric multivariate spatial model for these data. Our model builds on the stick-breaking prior, which is frequently used in Bayesian modeling to capture uncertainty in the parametric form of an outcome. The stick-breaking prior is extended to the spatial setting by assigning each location a different, unknown distribution, and smoothing the distributions in space with a series of kernel functions. This semiparametric spatial model is shown to improve prediction compared to usual Bayesian Kriging methods for the wind field of Hurricane Ivan.}, number={1}, journal={ANNALS OF APPLIED STATISTICS}, author={Reich, Brian J. and Fuentes, Montserrat}, year={2007}, month={Jun}, pages={249–264} } @article{reich_hodges_carlin_reich_2006, title={A spatial analysis of basketball shot chart data}, volume={60}, ISSN={["1537-2731"]}, DOI={10.1198/000313006X90305}, abstractNote={Basketball coaches at all levels use shot charts to study shot locations and outcomes for their own teams as well as upcoming opponents. Shot charts are simple plots of the location and result of each shot taken during a game. Although shot chart data are rapidly increasing in richness and availability, most coaches still use them purely as descriptive summaries. However, a team's ability to defend a certain player could potentially be improved by using shot data to make inferences about the player's tendencies and abilities. This article develops hierarchical spatial models for shot-chart data, which allow for spatially varying effects of covariates. Our spatial models permit differential smoothing of the fitted surface in two spatial directions, which naturally correspond to polar coordinates: distance to the basket and angle from the line connecting the two baskets. We illustrate our approach using the 2003–2004 shot chart data for Minnesota Timberwolves guard Sam Cassell.}, number={1}, journal={AMERICAN STATISTICIAN}, author={Reich, BJ and Hodges, JS and Carlin, BP and Reich, AM}, year={2006}, month={Feb}, pages={3–12} } @article{reich_hodges_zadnik_2006, title={Effects of residual smoothing on the posterior of the fixed effects in disease-mapping models}, volume={62}, DOI={10.1111/j.1541-0420.2006.00617}, number={4}, journal={Biometrics}, author={Reich, Brian and Hodges, J. S. and Zadnik, V.}, year={2006}, pages={1197–1206} } @article{lemmonds_mooney_reich_hatsukami_2004, title={Characteristics of cigarette smokers seeking treatment for cessation versus reduction}, volume={29}, ISSN={0306-4603}, url={http://dx.doi.org/10.1016/j.addbeh.2003.08.049}, DOI={10.1016/j.addbeh.2003.08.049}, abstractNote={Comparisons were made between cigarette smokers seeking treatment to quit smoking and cigarette smokers seeking treatment to reduce the number of cigarettes they smoke. Potential subjects were recruited from the local metropolitan area by advertisement in the local media. A total of 665 cigarette smokers telephoned our clinic to seek treatment for smoking cessation and 565 cigarette smokers telephoned to seek treatment to gradually reduce the number of cigarettes they smoke but not quit smoking. Potential subjects were instructed to call the clinic to find out additional information about the studies, and while on the telephone they were asked questions pertaining to tobacco use and health status. The results show that the two populations are similar in many respects with the following exceptions: smokers seeking treatment to reduce cigarette use tend to smoke more cigarettes per day, are less motivated to quit, make fewer quit attempts, drink more alcoholic beverages per day, and have more health problems (Ps<.05). These results indicate that cigarette smokers seeking treatment for smoking reduction but not cessation may be more dependent smokers who experience more medical disorders.}, number={2}, journal={Addictive Behaviors}, publisher={Elsevier BV}, author={Lemmonds, Charlotte A. and Mooney, Marc and Reich, Brian and Hatsukami, Dorothy}, year={2004}, month={Feb}, pages={357–364} } @article{allen_brintnell_hatsukami_reich_2004, title={Energy intake and physical activity during short-term smoking cessation in postmenopausal women}, volume={29}, ISSN={0306-4603}, url={http://dx.doi.org/10.1016/j.addbeh.2004.02.041}, DOI={10.1016/j.addbeh.2004.02.041}, abstractNote={This study assessed the effect of short-term (2-week) smoking abstinence on weight gain, energy intake, and physical activity in 60 postmenopausal women. Participants were stratified by their use of hormone replacement therapy (HRT; currently taking/not taking) and then randomized to abstinence or continued smoking for 2 weeks. The 30 abstainers gained a mean of 1.28 kg, compared with a 0.54 kg loss for the 30 continued smokers (P=.002). The abstainers also reported a significantly greater increase than did the smokers in total kilocalorie and in carbohydrate consumption for both weeks. There were no changes in physical activity and HRT effect. These are the first published findings on caloric intake and weight gain during smoking abstinence in postmenopausal women—an understudied population in the smoking cessation literature.}, number={5}, journal={Addictive Behaviors}, publisher={Elsevier BV}, author={Allen, Sharon S and Brintnell, Dawn M and Hatsukami, Dorothy and Reich, Brian}, year={2004}, month={Jul}, pages={947–951} }