@article{simafranca_willoughby_o'neil_farr_reich_giertych_johnson_pascolini-campbell_2024, title={Modeling wildland fire burn severity in California using a spatial Super Learner approach}, volume={3}, ISSN={["1573-3009"]}, DOI={10.1007/s10651-024-00601-1}, journal={ENVIRONMENTAL AND ECOLOGICAL STATISTICS}, author={Simafranca, Nicholas and Willoughby, Bryant and O'Neil, Erin and Farr, Sophie and Reich, Brian J. and Giertych, Naomi and Johnson, Margaret C. and Pascolini-Campbell, Madeleine A.}, year={2024}, month={Mar} } @article{yang_ruiz-suarez_reich_guan_rappold_2023, title={A Data-Fusion Approach to Assessing the Contribution of Wildland Fire Smoke to Fine Particulate Matter in California}, volume={15}, ISSN={["2072-4292"]}, url={https://www.mdpi.com/2072-4292/15/17/4246}, DOI={10.3390/rs15174246}, abstractNote={The escalating frequency and severity of global wildfires necessitate an in-depth understanding and monitoring of wildfire smoke impacts, specifically its contribution to fine particulate matter (PM2.5). We propose a data-fusion method to study wildfire contribution to PM2.5 using satellite-derived smoke plume indicators and PM2.5 monitoring data. Our study incorporates two types of monitoring data, the high-quality but sparse Air Quality System (AQS) stations and the abundant but less accurate PurpleAir (PA) sensors that are gaining popularity among citizen scientists. We propose a multi-resolution spatiotemporal model specified in the spectral domain to calibrate the PA sensors against accurate AQS measurements, and leverage the two networks to estimate wildfire contribution to PM2.5 in California in 2020 and 2021. A Bayesian approach is taken to incorporate all uncertainties and our prior intuition that the dependence between networks, as well as the accuracy of PA network, vary by frequency. We find that 1% to 3% increase in PM2.5 concentration due to wildfire smoke, and that leveraging PA sensors improves accuracy.}, number={17}, journal={REMOTE SENSING}, author={Yang, Hongjian and Ruiz-Suarez, Sofia and Reich, Brian J. and Guan, Yawen and Rappold, Ana G.}, year={2023}, month={Sep} } @article{abba_williams_reich_2023, title={A PENALIZED COMPLEXITY PRIOR FOR DEEP BAYESIAN TRANSFER LEARNING WITH APPLICATION TO MATERIALS INFORMATICS}, volume={17}, ISSN={["1941-7330"]}, DOI={10.1214/23-AOAS1759}, abstractNote={A key task in the emerging field of materials informatics is to use machine learning to predict a material's properties and functions. A fast and accurate predictive model allows researchers to more efficiently identify or construct a material with desirable properties. As in many fields, deep learning is one of the state-of-the art approaches, but fully training a deep learning model is not always feasible in materials informatics due to limitations on data availability, computational resources, and time. Accordingly, there is a critical need in the application of deep learning to materials informatics problems to develop efficient transfer learning algorithms. The Bayesian framework is natural for transfer learning because the model trained from the source data can be encoded in the prior distribution for the target task of interest. However, the Bayesian perspective on transfer learning is relatively unaccounted for in the literature, and is complicated for deep learning because the parameter space is large and the interpretations of individual parameters are unclear. Therefore, rather than subjective prior distributions for individual parameters, we propose a new Bayesian transfer learning approach based on the penalized complexity prior on the Kullback-Leibler divergence between the predictive models of the source and target tasks. We show via simulations that the proposed method outperforms other transfer learning methods across a variety of settings. The new method is then applied to a predictive materials science problem where we show improved precision for estimating the band gap of a material based on its structural properties.}, number={4}, journal={ANNALS OF APPLIED STATISTICS}, author={Abba, Mohamed A. and Williams, Jonathan P. and Reich, Brian J.}, year={2023}, month={Dec}, pages={3241–3256} } @article{werthmann_joode_cuffney_reich_soto-martinez_corrales-vargas_palomo-cordero_penaloza-castanedac_hoppin_2023, title={A cross-sectional analysis of medical conditions and environmental factors associated with fractional exhaled nitric oxide (FeNO) in women and children from the ISA birth cohort, Costa Rica}, volume={233}, ISSN={["1096-0953"]}, url={https://doi.org/10.1016/j.envres.2023.116449}, DOI={10.1016/j.envres.2023.116449}, abstractNote={Fractional exhaled nitric oxide (FeNO) is a marker of airway inflammation. Elevated FeNO has been associated with environmental exposures, however, studies from tropical countries are limited. Using data from the Infants' Environmental Health Study (ISA) birth cohort, we evaluated medical conditions and environmental exposures' association with elevated FeNO.We performed a cross-sectional analysis of 277 women and 293 8-year old children who participated in the 8-year post-partum visit in 2019. We measured FeNO and collected information on medical conditions and environmental exposures including smoke from waste burning, work in banana plantations, and home pesticide use. We defined elevated FeNO as >25 ppb for women and >20 ppb for children. To evaluate factors associated with elevated FeNO, we used logistic regression models adjusted for obesity in women and unadjusted in children.Overall elevated FeNO was common (20% of women, 13% of children). Rhinitis diagnosis was significantly associated with elevated FeNO in both women (odds ratio (OR): 3.67 95% Confidence Interval (CI): 1.81,7.35) and children (OR: 8.18 95%CI: 3.15, 21.22); wheeze was associated with elevated FeNO in women (OR: 4.50 95% CI: 2.25, 8.99). Environmental exposures were associated with elevated FeNO, but not significantly. Waste burning was associated with elevated FeNO in both women (OR: 1.58 95%CI 0.68, 4.15) and children (OR: 2.49 95%CI:0.82, 10.79). Para-occupational pesticide exposures were associated with elevated FeNO in women and children. For women, having a partner working in agriculture was associated with elevated FeNO (OR: 1.61 95%CI:0.77, 3.58) and for children, maternal work in agriculture was associated with elevated FeNO. (OR 2.08 95%CI 0.86, 4.67) CONCLUSION: Rhinitis and wheeze were associated with elevated FeNO in this rural, agricultural population. Smoke from waste burning as well as para-occupational pesticide exposure may contribute to elevated FeNO in rural communities.}, journal={ENVIRONMENTAL RESEARCH}, author={Werthmann, Derek and Joode, Berna van Wendel and Cuffney, Michael T. and Reich, Brian J. and Soto-Martinez, Manuel E. and Corrales-Vargas, Andrea and Palomo-Cordero, Luis and Penaloza-Castanedac, Jorge and Hoppin, Jane A.}, year={2023}, month={Sep} } @article{majumder_reich_2023, title={A deep learning synthetic likelihood approximation of a non-stationary spatial model for extreme streamflow forecasting}, volume={55}, ISSN={["2211-6753"]}, DOI={10.1016/j.spasta.2023.100755}, abstractNote={Extreme streamflow is a key indicator of flood risk, and quantifying the changes in its distribution under non-stationary climate conditions is key to mitigating the impact of flooding events. We propose a non-stationary process mixture model (NPMM) for annual streamflow maxima over the central US (CUS) which uses downscaled climate model precipitation projections to forecast extremal streamflow. Spatial dependence for the model is specified as a convex combination of transformed Gaussian and max-stable processes, indexed by a weight parameter which identifies the asymptotic regime of the process. The weight parameter is modeled as a function of the annual precipitation for each of the two hydrologic regions within the CUS, introducing spatio-temporal non-stationarity within the model. The NPMM is flexible with desirable tail dependence properties, but yields an intractable likelihood. To address this, we embed a neural network within a density regression model which is used to learn a synthetic likelihood function using simulations from the NPMM with different parameter settings. Our model is fitted using observational data for 1972--2021, and inference carried out in a Bayesian framework. The two regions within the CUS are estimated to be in different asymptotic regimes based on the posterior distribution of the weight parameter. Annual streamflow maxima estimates based on global climate models for two representative climate pathway scenarios suggest an overall increase in the frequency and magnitude of extreme streamflow for 2006-2035 compared to the historical period of 1972-2005.}, journal={SPATIAL STATISTICS}, author={Majumder, Reetam and Reich, Brian J.}, year={2023}, month={Jun} } @article{long_reich_staicu_meitzen_2023, title={A nonparametric test of group distributional differences for hierarchically clustered functional data}, volume={3}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13846}, abstractNote={Abstract}, journal={BIOMETRICS}, author={Long, Alexander S. and Reich, Brian J. and Staicu, Ana-Maria and Meitzen, John}, year={2023}, month={Mar} } @article{awasthi_archfield_reich_sankarasubramanian_2023, title={Beyond Simple Trend Tests: Detecting Significant Changes in Design-Flood Quantiles}, volume={50}, ISSN={["1944-8007"]}, url={https://doi.org/10.1029/2023GL103438}, DOI={10.1029/2023GL103438}, abstractNote={Abstract}, number={13}, journal={GEOPHYSICAL RESEARCH LETTERS}, author={Awasthi, C. and Archfield, S. A. and Reich, B. J. and Sankarasubramanian, A.}, year={2023}, month={Jul} } @article{burgener_hyland_reich_scotese_2023, title={Cretaceous climates: Mapping paleo-Koppen climatic zones using a Bayesian statistical analysis of lithologic, paleontologic, and geochemical proxies}, volume={613}, ISSN={["1872-616X"]}, DOI={10.1016/j.palaeo.2022.111373}, abstractNote={The Cretaceous Period (145 to 66 Ma) was a prolonged warmhouse to hothouse period characterized by high atmospheric CO2 conditions, elevated surface temperatures, and an enhanced global hydrologic cycle. It provides a case study for understanding how a hothouse climate system operates, and is an analog for future anthropogenic climate change scenarios. This study presents new quantitative temperature and precipitation proxy datasets for nine key Cretaceous time slices (Berriasian/Valanginian, Hauterivian/Barremian, Aptian, Albian, Cenomanian, Turonian, Coniacian/Santonian, Campanian, Maastrichtian), and a new geostatistical analysis technique that utilizes Markov Chain Monte Carlo algorithm and Bayesian hierarchical models to generate high resolution, quantitative global paleoclimate reconstructions from these proxy datasets, with associated uncertainties. Using these paleoclimate reconstructions, paleo-Köppen (-Geiger) climate zone maps are produced that provide new insights into the changing spatial and temporal climate patterns during the Cretaceous. These new paleoclimate reconstructions and paleo-Köppen climate maps provide new insight into the timing of the initiation of the Early Cretaceous equatorial humid belt over Gondwana and reveal temporal shifts in the width of the subtropical arid belts from the Early to mid- to Late Cretaceous. A comparison of these proxy-based reconstructions and model simulations of Cretaceous climate reveal continued proxy/model differences. In addition, the methodology developed for this study can be applied to other time periods, providing a framework for better understanding ancient climate, environments, and ecosystems.}, journal={PALAEOGEOGRAPHY PALAEOCLIMATOLOGY PALAEOECOLOGY}, author={Burgener, Landon and Hyland, Ethan and Reich, Brian J. and Scotese, Christopher}, year={2023}, month={Mar} } @article{hector_reich_2023, title={Distributed Inference for Spatial Extremes Modeling in High Dimensions}, volume={4}, ISSN={["1537-274X"]}, url={https://doi.org/10.1080/01621459.2023.2186886}, DOI={10.1080/01621459.2023.2186886}, abstractNote={Extreme environmental events frequently exhibit spatial and temporal dependence. These data are often modeled using max stable processes (MSPs). MSPs are computationally prohibitive to fit for as few as a dozen observations, with supposed computationally-efficient approaches like the composite likelihood remaining computationally burdensome with a few hundred observations. In this paper, we propose a spatial partitioning approach based on local modeling of subsets of the spatial domain that delivers computationally and statistically efficient inference. Marginal and dependence parameters of the MSP are estimated locally on subsets of observations using censored pairwise composite likelihood, and combined using a modified generalized method of moments procedure. The proposed distributed approach is extended to estimate spatially varying coefficient models to deliver computationally efficient modeling of spatial variation in marginal parameters. We demonstrate consistency and asymptotic normality of estimators, and show empirically that our approach leads to a surprising reduction in bias of parameter estimates over a full data approach. We illustrate the flexibility and practicability of our approach through simulations and the analysis of streamflow data from the U.S. Geological Survey.}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, publisher={Taylor & Francis}, author={Hector, Emily C. and Reich, Brian J.}, year={2023}, month={Apr} } @article{sahoo_guinness_reich_2023, title={Estimating atmospheric motion winds from satellite image data using space-time drift models}, volume={7}, ISSN={["1099-095X"]}, DOI={10.1002/env.2818}, abstractNote={Abstract}, journal={ENVIRONMETRICS}, author={Sahoo, Indranil and Guinness, Joseph and Reich, Brian J. J.}, year={2023}, month={Jul} } @article{mohottige_davenport_bhavsar_schappe_lyn_maxson_johnson_planey_mcelroy_wang_et al._2023, title={Residential Structural Racism and Prevalence of Chronic Health Conditions}, volume={6}, ISSN={["2574-3805"]}, DOI={10.1001/jamanetworkopen.2023.48914}, abstractNote={ImportanceStudies elucidating determinants of residential neighborhood–level health inequities are needed.}, number={12}, journal={JAMA NETWORK OPEN}, author={Mohottige, Dinushika and Davenport, Clemontina A. and Bhavsar, Nrupen and Schappe, Tyler and Lyn, Michelle J. and Maxson, Pamela and Johnson, Fred and Planey, Arrianna M. and Mcelroy, Lisa M. and Wang, Virginia and et al.}, year={2023}, month={Dec} } @article{yanchenko_bondell_reich_2023, title={Spatial regression modeling via the R2D2 framework}, volume={10}, ISSN={["1099-095X"]}, DOI={10.1002/env.2829}, abstractNote={Abstract}, journal={ENVIRONMETRICS}, author={Yanchenko, Eric and Bondell, Howard D. and Reich, Brian J.}, year={2023}, month={Oct} } @article{nag_sun_reich_2023, title={Spatio-temporal DeepKriging for interpolation and probabilistic forecasting}, volume={57}, ISSN={["2211-6753"]}, DOI={10.1016/j.spasta.2023.100773}, abstractNote={Gaussian processes (GP) and Kriging are widely used in traditional spatio-temporal modelling and prediction. These techniques typically presuppose that the data are observed from a stationary GP with a parametric covariance structure. However, processes in real-world applications often exhibit non-Gaussianity and nonstationarity. Moreover, likelihood-based inference for GPs is computationally expensive and thus prohibitive for large datasets. In this paper, we propose a deep neural network (DNN) based two-stage model for spatio-temporal interpolation and forecasting. Interpolation is performed in the first step, which utilizes a dependent DNN with the embedding layer constructed with spatio-temporal basis functions. For the second stage, we use Long-Short Term Memory (LSTM) and convolutional LSTM to forecast future observations at a given location. We adopt the quantile-based loss function in the DNN to provide probabilistic forecasting. Compared to Kriging, the proposed method does not require specifying covariance functions or making stationarity assumptions and is computationally efficient. Therefore, it is suitable for large-scale prediction of complex spatio-temporal processes. We apply our method to monthly PM2.5 data at more than 200,000 space–time locations from January 1999 to December 2022 for fast imputation of missing values and forecasts with uncertainties.}, journal={SPATIAL STATISTICS}, author={Nag, Pratik and Sun, Ying and Reich, Brian J.}, year={2023}, month={Oct} } @article{larsen_yang_reich_rappold_2022, title={A SPATIAL CAUSAL ANALYSIS OF WILDLAND FIRE-CONTRIBUTED PM2.5 USING NUMERICAL MODEL OUTPUT}, volume={16}, ISSN={["1941-7330"]}, DOI={10.1214/22-AOAS1610}, abstractNote={Wildland fire smoke contains hazardous levels of fine particulate matter (PM2.5), a pollutant shown to adversely effect health. Estimating fire attributable PM2.5 concentrations is key to quantifying the impact on air quality and subsequent health burden. This is a challenging problem since only total PM2.5 is measured at monitoring stations and both fire-attributable PM2.5 and PM2.5 from all other sources are correlated in space and time. We propose a framework for estimating fire-contributed PM2.5 and PM2.5 from all other sources using a novel causal inference framework and bias-adjusted chemical model representations of PM2.5 under counterfactual scenarios. The chemical model representation of PM2.5 for this analysis is simulated using Community Multiscale Air Quality Modeling System (CMAQ), run with and without fire emissions across the contiguous U.S. for the 2008-2012 wildfire seasons. The CMAQ output is calibrated with observations from monitoring sites for the same spatial domain and time period. We use a Bayesian model that accounts for spatial variation to estimate the effect of wildland fires on PM2.5 and state assumptions under which the estimate has a valid causal interpretation. Our results include estimates of the contributions of wildfire smoke to PM2.5 for the contiguous U.S. Additionally, we compute the health burden associated with the PM2.5 attributable to wildfire smoke.}, number={4}, journal={ANNALS OF APPLIED STATISTICS}, author={Larsen, Alexandra and Yang, Shu and Reich, Brian J. and Rappold, Ana G.}, year={2022}, month={Dec}, pages={2714–2731} } @article{trostle_corzo_reich_machado_2022, title={A discrete-time survival model for porcine epidemic diarrhoea virus}, volume={10}, ISSN={["1865-1682"]}, url={https://doi.org/10.1111/tbed.14739}, DOI={10.1111/tbed.14739}, abstractNote={Since the arrival of porcine epidemic diarrhea virus (PEDV) in the United States in 2013, elimination and control programs have had partial success. The dynamics of its spread are hard to quantify, though previous work has shown that local transmission and the transfer of pigs within production systems are most associated with the spread of PEDV. Our work relies on the history of PEDV infections in a region of the southeastern United States. This infection data is complemented by farm-level features and extensive industry data on the movement of both pigs and vehicles. We implement a discrete-time survival model and evaluate different approaches to modeling the local-transmission and network effects. We find strong evidence in that the local-transmission and pig-movement effects are associated with the spread of PEDV, even while controlling for seasonality, farm-level features, and the possible spread of disease by vehicles. Our fully Bayesian model permits full uncertainty quantification of these effects. Our farm-level out-of-sample predictions have a receiver-operating characteristic area under the curve (AUC) of 0.779 and a precision-recall AUC of 0.097. The quantification of these effects in a comprehensive model allows stakeholders to make more informed decisions about disease prevention efforts.}, journal={TRANSBOUNDARY AND EMERGING DISEASES}, author={Trostle, Parker and Corzo, Cesar A. and Reich, Brian J. and Machado, Gustavo}, year={2022}, month={Oct} } @article{zhang_naughton_bondell_reich_2022, title={Bayesian Regression Using a Prior on the Model Fit: The R2-D2 Shrinkage Prior}, volume={117}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2020.1825449}, abstractNote={Abstract Prior distributions for high-dimensional linear regression require specifying a joint distribution for the unobserved regression coefficients, which is inherently difficult. We instead propose a new class of shrinkage priors for linear regression via specifying a prior first on the model fit, in particular, the coefficient of determination, and then distributing through to the coefficients in a novel way. The proposed method compares favorably to previous approaches in terms of both concentration around the origin and tail behavior, which leads to improved performance both in posterior contraction and in empirical performance. The limiting behavior of the proposed prior is , both around the origin and in the tails. This behavior is optimal in the sense that it simultaneously lies on the boundary of being an improper prior both in the tails and around the origin. None of the existing shrinkage priors obtain this behavior in both regions simultaneously. We also demonstrate that our proposed prior leads to the same near-minimax posterior contraction rate as the spike-and-slab prior. Supplementary materials for this article are available online.}, number={538}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Zhang, Yan Dora and Naughton, Brian P. and Bondell, Howard D. and Reich, Brian J.}, year={2022}, month={Apr}, pages={862–874} } @article{miller_reich_2022, title={Bayesian spatial modeling using random Fourier frequencies}, volume={48}, ISSN={["2211-6753"]}, DOI={10.1016/j.spasta.2022.100598}, abstractNote={Spectral methods are important for both theory and computation in spatial data analysis. When data lie on a grid, spectral approaches can take advantage of the discrete Fourier transform for fast computation. If data are not on a grid, then low-rank processes with Fourier basis functions may be sufficient approximations. However, deciding which basis functions to use is difficult and can depend on unknown parameters. Here, we introduce Bayesian Random Fourier Frequencies (BRFF), a fully Bayesian extension of the random Fourier features approach. BRFF treats the spectral frequencies as random parameters, which unlike fixed frequency approximations allows the frequencies to be data-adaptive and averages over uncertainty in frequency selection. We apply this method to non-gridded continuous, binary, and count data. We compare BRFF using simulated and observed data to another popular low-rank method, the predictive processes (PP) model. BRFF is faster than PP, and outperforms or matches the predictive performance of the PP model in settings with high numbers of observations.}, journal={SPATIAL STATISTICS}, author={Miller, Matthew J. J. and Reich, Brian J. J.}, year={2022}, month={Apr} } @article{reich_yang_guan_2022, title={Discussion on "Spatial plus : A novel approach to spatial confounding" by Dupont, Wood, and Augustin}, volume={3}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13651}, abstractNote={Congratulations to the authors for this thoughtful and timely contribution to the spatial confounding literature. The intuitive nature of the method and simplicity of the estimation procedure will surely make Spatial+ popular with practitioners, and the theoretical developments are a major advance for researchers in this area. There is much to discuss! We have formatted our discussion in two sections: in Section 2 we consider the assumptions and statistical properties of Spatial+, and in Section 3 we examine how Spatial+ fits in the wider literature on spatial causal inference.}, journal={BIOMETRICS}, author={Reich, Brian J. and Yang, Shu and Guan, Yawen}, year={2022}, month={Mar} } @article{giffin_gong_majumder_rappold_reich_yang_2022, title={Estimating intervention effects on infectious disease control: The effect of community mobility reduction on Coronavirus spread}, volume={52}, ISSN={["2211-6753"]}, DOI={10.1016/j.spasta.2022.100711}, abstractNote={Understanding the effects of interventions, such as restrictions on community and large group gatherings, is critical to controlling the spread of COVID-19. Susceptible-Infectious-Recovered (SIR) models are traditionally used to forecast the infection rates but do not provide insights into the causal effects of interventions. We propose a spatiotemporal model that estimates the causal effect of changes in community mobility (intervention) on infection rates. Using an approximation to the SIR model and incorporating spatiotemporal dependence, the proposed model estimates a direct and indirect (spillover) effect of intervention. Under an interference and treatment ignorability assumption, this model is able to estimate causal intervention effects, and additionally allows for spatial interference between locations. Reductions in community mobility were measured by cell phone movement data. The results suggest that the reductions in mobility decrease Coronavirus cases 4 to 7 weeks after the intervention.}, journal={SPATIAL STATISTICS}, author={Giffin, Andrew and Gong, Wenlong and Majumder, Suman and Rappold, Ana G. and Reich, Brian J. and Yang, Shu}, year={2022}, month={Dec} } @article{vargas_castaneda_liljedahl_mora_menezes-filho_smith_mergler_reich_giffin_hoppin_et al._2022, title={Exposure to common-use pesticides, manganese, lead, and thyroid function among pregnant women from the Infants' Environmental Health (ISA) study, Costa Rica}, volume={810}, ISSN={["1879-1026"]}, DOI={10.1016/j.scitotenv.2021.151288}, abstractNote={Pesticides and metals may disrupt thyroid function, which is key to fetal brain development.To evaluate if current-use pesticide exposures, lead and excess manganese alter free thyroxine (FT4), free triiodothyronine (FT3), and thyroid stimulating hormone (TSH) concentrations in pregnant women from the Infants' Environmental Health Study (ISA).At enrollment, we determined women's (n = 400) specific-gravity corrected urinary pesticide (μg/L) metabolite concentrations of mancozeb (ethylene thiourea (ETU)), pyrimethanil, thiabendazole, chlorpyrifos, synthetic pyrethroids, and 2,4-D. We also measured manganese hair (MnH) (μg/g) and blood (MnB) (μg/L), and blood lead (PbB) (μg/L) concentrations. To detect an immediate and late effect on thyroid homeostasis, we determined TSH, FT4 and FT3 in serum obtained at the same visit (n = 400), and about ten weeks afterwards (n = 245). We assessed associations between exposures and outcomes with linear regression and general additive models, Bayesian multivariate linear regression, and Bayesian kernel machine regression.About 80%, 94%, and 100% of the women had TSH, FT4, and FT3 within clinical reference ranges, respectively. Women with higher urinary ETU, and pyrimethanil-metabolites, had lower FT4: β = -0.79 (95%CI = -1.51, -0.08) and β = -0.29 (95%CI = -0.62, -0.03), respectively, for each tenfold increase in exposure. MnB was positively associated with FT4 (β = 0.04 (95%CI = 0.00, 0.07 per 1 μg/L increase), and women with high urinary pyrethroid-metabolite concentrations had decreased TSH (non-linear effects). For the late-effect analysis, metabolites of pyrethroids and chlorpyrifos, as well as MnH, and PbB were associated decreased TSH, or increased FT4 and/or FT3.Mancozeb (ETU) and pyrimethanil may inhibit FT4 secretion (hypothyroidism-like effect), while chlorpyrifos, pyrethroids, MnB, MnH, PbB and Mn showed hyperthyroidism-like effects. Some effects on thyroid homeostasis seemed to be immediate (mancozeb (ETU), pyrimethanil, MnB), others delayed (chlorpyrifos, MnH, PbB), or both (pyrethroids), possibly reflecting different mechanisms of action.}, journal={SCIENCE OF THE TOTAL ENVIRONMENT}, author={Vargas, Andrea Corrales and Castaneda, Jorge Penaloza and Liljedahl, Emelie Rietz and Mora, Ana Maria and Menezes-Filho, Jose Antonio and Smith, Donald R. and Mergler, Donna and Reich, Brian and Giffin, Andrew and Hoppin, Jane A. and et al.}, year={2022}, month={Mar} } @article{giffin_reich_yang_rappold_2022, title={Generalized propensity score approach to causal inference with spatial interference}, volume={9}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13745}, abstractNote={Abstract}, journal={BIOMETRICS}, author={Giffin, A. and Reich, B. J. and Yang, S. and Rappold, A. G.}, year={2022}, month={Sep} } @article{lan_reich_guinness_bandyopadhyay_ma_moeller_2022, title={Geostatistical modeling of positive-definite matrices: An application to diffusion tensor imaging}, volume={78}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13445}, abstractNote={Abstract}, number={2}, journal={BIOMETRICS}, author={Lan, Zhou and Reich, Brian J. and Guinness, Joseph and Bandyopadhyay, Dipankar and Ma, Liangsuo and Moeller, F. Gerard}, year={2022}, month={Jun}, pages={548–559} } @article{majumder_guan_reich_saibaba_2022, title={Kryging: geostatistical analysis of large-scale datasets using Krylov subspace methods}, volume={32}, ISSN={["1573-1375"]}, DOI={10.1007/s11222-022-10104-3}, abstractNote={Analyzing massive spatial datasets using a Gaussian process model poses computational challenges. This is a problem prevailing heavily in applications such as environmental modeling, ecology, forestry and environmental health. We present a novel approximate inference methodology that uses profile likelihood and Krylov subspace methods to estimate the spatial covariance parameters and makes spatial predictions with uncertainty quantification for point-referenced spatial data. “Kryging” combines Kriging and Krylov subspace methods and applies for both observations on regular grid and irregularly spaced observations, and for any Gaussian process with a stationary isotropic (and certain geometrically anisotropic) covariance function, including the popular Matérn covariance family. We make use of the block Toeplitz structure with Toeplitz blocks of the covariance matrix and use fast Fourier transform methods to bypass the computational and memory bottlenecks of approximating log-determinant and matrix-vector products. We perform extensive simulation studies to show the effectiveness of our model by varying sample sizes, spatial parameter values and sampling designs. A real data application is also performed on a dataset consisting of land surface temperature readings taken by the MODIS satellite. Compared to existing methods, the proposed method performs satisfactorily with much less computation time and better scalability.}, number={5}, journal={STATISTICS AND COMPUTING}, author={Majumder, Suman and Guan, Yawen and Reich, Brian J. and Saibaba, Arvind K.}, year={2022}, month={Oct} } @article{huberman_reich_bondell_2022, title={Nonparametric conditional density estimation in a deep learning framework for short-term forecasting (May, 10.1007/s10651-021-00499-z, 2021)}, volume={8}, ISSN={["1573-3009"]}, DOI={10.1007/s10651-022-00543-6}, journal={ENVIRONMENTAL AND ECOLOGICAL STATISTICS}, author={Huberman, David B. and Reich, Brian J. and Bondell, Howard D.}, year={2022}, month={Aug} } @article{giffin_hoppin_cordoba_solano-diaz_ruepert_penaloza-castaneda_lindh_reich_joode_2022, title={Pyrimethanil and chlorpyrifos air concentrations and pregnant women's urinary metabolites in the Infants' Environmental Health Study (ISA) Costa Rica ,}, volume={166}, ISSN={["1873-6750"]}, DOI={10.1016/j.envint.2022.107328}, abstractNote={Only few studies have compared environmental pesticide air concentrations with specific urinary metabolites to evaluate pathways of exposure. Therefore, we compared pyrimethanil and chlorpyrifos concentrations in air with urinary 4-hydroxypyrimethanil (OHP, metabolite of pyrimethanil) and 3,5,6-trichloro-2-pyridinol (TCPy, metabolite of chlorpyrifos) among pregnant women from the Infant’s Environmental Health Study (ISA) in Matina County, Costa Rica. During pregnancy, we obtained repeat urinary samples from 448 women enrolled in the ISA study. We extrapolated pyrimethanil and chlorpyrifos concentrations measured with passive air samplers (PAS) (n = 48, from 12 schools), across space and time using a Bayesian spatiotemporal model. We subsequently compared these concentrations with urinary OHP and TCPy in 915 samples from 448 women, using separate mixed models and considering several covariables. A 10% increase in air pyrimethanil (ng/m3) was associated with a 5.7% (95% confidence interval (CI 4.6, 6.8) increase in OHP (μg/L). Women living further from banana plantations had lower OHP: −0.7% (95% CI −1.2, −0.3) for each 10% increase in distance (meters) as well as women who ate rice and beans ≥15 times a week −23% (95% CI −38, −4). In addition, each 1 ng/m3 increase in chlorpyrifos in air was associated with a 1.5% (95% CI 0.2, 2.8) increase in TCPy (μg/L), and women working in agriculture tended to have increased TCPy (21%, 95% CI −2, 49). The Bayesian spatiotemporal models were useful to estimate pyrimethanil and chlorpyrifos air concentrations across space and time. Our results suggest inhalation of pyrimethanil and chlorpyrifos is a pathway of environmental exposure. PAS seems a useful technique to monitor environmental current-use pesticide exposures. For future studies, we recommend increasing the number of locations of environmental air measurements, obtaining all air and urine measurements during the same month, and, ideally, including dermal exposure estimates as well.}, journal={ENVIRONMENT INTERNATIONAL}, author={Giffin, Andrew and Hoppin, Jane A. and Cordoba, Leonel and Solano-Diaz, Karla and Ruepert, Clemens and Penaloza-Castaneda, Jorge and Lindh, Christian and Reich, Brian J. and Joode, Berna van Wendel}, year={2022}, month={Aug} } @article{islam_hoppin_mora_soto-martinez_cordoba gamboa_penaloza castaneda_reich_lindh_joode_2022, title={Respiratory and allergic outcomes among 5-year-old children exposed to pesticides}, volume={2}, ISSN={["1468-3296"]}, DOI={10.1136/thoraxjnl-2021-218068}, abstractNote={BackgroundLittle is known about the effects of pesticides on children’s respiratory and allergic outcomes. We evaluated associations of prenatal and current pesticide exposures with respiratory and allergic outcomes in children from the Infants’ Environmental Health Study in Costa Rica.}, journal={THORAX}, author={Islam, Jessica Y. and Hoppin, Jane and Mora, Ana M. and Soto-Martinez, Manuel E. and Cordoba Gamboa, Leonel and Penaloza Castaneda, Jorge Ernesto and Reich, Brian and Lindh, Christian and Joode, Berna}, year={2022}, month={Feb} } @article{parsons_dawrs_nelson_norton_virdi_hasan_epperson_holst_chan_leos-barajas_et al._2022, title={Soil Properties and Moisture Synergistically Influence Nontuberculous Mycobacterial Prevalence in Natural Environments of Hawai'i}, volume={4}, ISSN={["1098-5336"]}, DOI={10.1128/aem.00018-22}, abstractNote={Nontuberculous mycobacteria (NTM) are ubiquitous in the environment, being found commonly in soils and natural bodies of freshwater. However, little is known about the environmental niches of NTM and how they relate to NTM prevalence in homes and other human-dominated areas.}, journal={APPLIED AND ENVIRONMENTAL MICROBIOLOGY}, author={Parsons, Arielle W. and Dawrs, Stephanie N. and Nelson, Stephen T. and Norton, Grant J. and Virdi, Ravleen and Hasan, Nabeeh A. and Epperson, L. Elaine and Holst, Brady and Chan, Edward D. and Leos-Barajas, Vianey and et al.}, year={2022}, month={Apr} } @article{guan_page_reich_ventrucci_yang_2022, title={Spectral adjustment for spatial confounding}, volume={12}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/asac069}, abstractNote={Summary}, journal={BIOMETRIKA}, author={Guan, Yawen and Page, Garritt L. and Reich, Brian J. and Ventrucci, Massimo and Yang, Shu}, year={2022}, month={Dec} } @article{euan_sun_reich_2022, title={Statistical analysis of multi-day solar irradiance using a threshold time series model}, volume={1}, ISSN={["1099-095X"]}, DOI={10.1002/env.2716}, abstractNote={Abstract}, journal={ENVIRONMETRICS}, author={Euan, Carolina and Sun, Ying and Reich, Brian J.}, year={2022}, month={Jan} } @article{mao_martin_reich_2022, title={Valid Model-Free Spatial Prediction}, volume={12}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2022.2147531}, abstractNote={Predicting the response at an unobserved location is a fundamental problem in spatial statistics. Given the difficulty in modeling spatial dependence, especially in non-stationary cases, model-based prediction intervals are at risk of misspecification bias that can negatively affect their validity. Here we present a new approach for model-free spatial prediction based on the {\em conformal prediction} machinery. Our key observation is that spatial data can be treated as exactly or approximately exchangeable in a wide range of settings. For example, when the spatial locations are deterministic, we prove that the response values are, in a certain sense, locally approximately exchangeable for a broad class of spatial processes, and we develop a local spatial conformal prediction algorithm that yields valid prediction intervals without model assumptions. Numerical examples with both real and simulated data confirm that the proposed conformal prediction intervals are valid and generally more efficient than existing model-based procedures across a range of non-stationary and non-Gaussian settings.}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Mao, Huiying and Martin, Ryan and Reich, Brian J. J.}, year={2022}, month={Dec} } @article{pease_pacifici_kays_reich_2022, title={What drives spatially varying ecological relationships in a wide-ranging species?}, volume={7}, ISSN={["1472-4642"]}, url={https://doi.org/10.1111/ddi.13594}, DOI={10.1111/ddi.13594}, abstractNote={Abstract}, journal={DIVERSITY AND DISTRIBUTIONS}, publisher={Wiley}, author={Pease, Brent S. and Pacifici, Krishna and Kays, Roland and Reich, Brian}, year={2022}, month={Jul} } @article{tian_reich_2021, title={A BAYESIAN SEMI-PARAMETRIC MIXTURE MODEL FOR BIVARIATE EXTREME VALUE ANALYSIS WITH APPLICATION TO PRECIPITATION FORECASTING}, volume={31}, ISSN={["1996-8507"]}, DOI={10.5705/ss.202018.0420}, abstractNote={We propose a novel mixture Generalized Pareto (MIXGP) model to calibrate extreme precipitation forecasts. This model is able to describe the marginal distribution of observed precipitation and capture the dependence between climate forecasts and the observed precipitation under suitable conditions. In addition, the full range distribution of precipitation conditional on grid forecast ensembles can also be estimated. Unlike the classical Generalized Pareto distribution that can only model points over a hard threshold, our model takes the threshold as a latent parameter. Tail behavior of both univariate and bivariate models are studied. The utility of our model is evaluated in Monte Carlo simulation study and is applied to precipitation data for the US where it outperforms competing methods.}, number={3}, journal={STATISTICA SINICA}, author={Tian, Yuan and Reich, Brian J.}, year={2021}, month={Jul}, pages={1619–1641} } @article{reich_yang_guan_giffin_miller_rappold_2021, title={A Review of Spatial Causal Inference Methods for Environmental and Epidemiological Applications}, volume={5}, DOI={10.1111/insr.12452}, abstractNote={Summary}, journal={INTERNATIONAL STATISTICAL REVIEW}, author={Reich, Brian and Yang, Shu and Guan, Yawen and Giffin, Andrew B. and Miller, Matthew J. and Rappold, Ana}, year={2021} } @article{lan_reich_bandyopadhyay_2021, title={A spatial Bayesian semiparametric mixture model for positive definite matrices with applications in diffusion tensor imaging}, volume={49}, ISSN={["1708-945X"]}, DOI={10.1002/cjs.11601}, abstractNote={Abstract}, number={1}, journal={CANADIAN JOURNAL OF STATISTICS-REVUE CANADIENNE DE STATISTIQUE}, author={Lan, Zhou and Reich, Brian J. and Bandyopadhyay, Dipankar}, year={2021}, month={Mar}, pages={129–149} } @article{guan_reich_laber_2021, title={A spatiotemporal recommendation engine for malaria control}, volume={4}, ISSN={["1468-4357"]}, DOI={10.1093/biostatistics/kxab010}, abstractNote={Summary}, journal={BIOSTATISTICS}, author={Guan, Qian and Reich, Brian J. and Laber, Eric B.}, year={2021}, month={Apr} } @article{miller_cabral_dickey_lebeau_reich_2021, title={Accounting for Location Measurement Error in Imaging Data With Application to Atomic Resolution Images of Crystalline Materials}, volume={4}, ISSN={["1537-2723"]}, url={https://app.dimensions.ai/details/publication/pub.1136536111}, DOI={10.1080/00401706.2021.1905070}, abstractNote={Abstract Scientists use imaging to identify objects of interest and infer properties of these objects. The locations of these objects are often measured with error, which when ignored leads to biased parameter estimates and inflated variance. Current measurement error methods require an estimate or knowledge of the measurement error variance to correct these estimates, which may not be available. Instead, we create a spatial Bayesian hierarchical model that treats the locations as parameters, using the image itself to incorporate positional uncertainty. We lower the computational burden by approximating the likelihood using a noncontiguous block design around the object locations. We use this model to quantify the relationship between the intensity and displacement of hundreds of atom columns in crystal structures directly imaged via scanning transmission electron microscopy (STEM). Atomic displacements are related to important phenomena such as piezoelectricity, a property useful for engineering applications like ultrasound. Quantifying the sign and magnitude of this relationship will help materials scientists more precisely design materials with improved piezoelectricity. A simulation study confirms our method corrects bias in the estimate of the parameter of interest and drastically improves coverage in high noise scenarios compared to non-measurement error models.}, number={1}, journal={TECHNOMETRICS}, author={Miller, Matthew J. and Cabral, Matthew J. and Dickey, Elizabeth C. and LeBeau, James M. and Reich, Brian J.}, year={2021}, month={Apr} } @article{xu_reich_2021, title={Bayesian nonparametric quantile process regression and estimation of marginal quantile effects}, volume={11}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13576}, abstractNote={Abstract}, journal={BIOMETRICS}, author={Xu, Steven G. and Reich, Brian J.}, year={2021}, month={Nov} } @article{cui_singh_staicu_reich_2021, title={Bayesian variable selection for high-dimensional rank data}, volume={5}, ISSN={["1099-095X"]}, DOI={10.1002/env.2682}, abstractNote={Abstract}, journal={ENVIRONMETRICS}, author={Cui, Can and Singh, Susheela P. and Staicu, Ana-Maria and Reich, Brian J.}, year={2021}, month={May} } @article{li_reich_bondell_2021, title={Deep distribution regression}, volume={159}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2021.107203}, abstractNote={Due to their flexibility and predictive performance, machine-learning based regression methods have become an important tool for predictive modeling and forecasting. However, most methods focus on estimating the conditional mean or specific quantiles of the target quantity and do not provide the full conditional distribution, which contains uncertainty information that might be crucial for decision making. A general solution consists of transforming a conditional distribution estimation problem into a constrained multi-class classification problem, in which tools such as deep neural networks can be applied. A novel joint binary cross-entropy loss function is proposed to accomplish this goal. Its performance is compared to current state-of-the-art methods via simulation. The approach also shows improved accuracy in a probabilistic solar energy forecasting problem.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Li, Rui and Reich, Brian J. and Bondell, Howard D.}, year={2021}, month={Jul} } @article{alhanti_joode_martinez_mora_gamboa_reich_lindh_lepiz_hoppin_2021, title={Environmental exposures contribute to respiratory and allergic symptoms among women living in the banana growing regions of Costa Rica}, volume={12}, ISSN={["1470-7926"]}, url={https://doi.org/10.1136/oemed-2021-107611}, DOI={10.1136/oemed-2021-107611}, abstractNote={ObjectivesThis research evaluates whether environmental exposures (pesticides and smoke) influence respiratory and allergic outcomes in women living in a tropical, agricultural environment.}, journal={OCCUPATIONAL AND ENVIRONMENTAL MEDICINE}, author={Alhanti, Brooke and Joode, Berna van Wendel and Martinez, Manuel Soto and Mora, Ana M. and Gamboa, Leonel Cordoba and Reich, Brian and Lindh, Christian H. and Lepiz, Marcela Quiros and Hoppin, Jane A.}, year={2021}, month={Dec} } @article{sass_li_reich_2021, title={Flexible and Fast Spatial Return Level Estimation Via a Spatially Fused Penalty}, volume={7}, ISSN={["1537-2715"]}, DOI={10.1080/10618600.2021.1938584}, abstractNote={Abstract Spatial extremes are common for climate data as the observations are usually referenced by geographic locations and dependent when they are nearby. An important goal of extremes modeling is to estimate the T-year return level. Among the methods suitable for modeling spatial extremes, perhaps the simplest and fastest approach is the spatial generalized extreme value (GEV) distribution and the spatial generalized Pareto distribution (GPD) that assume marginal independence and only account for dependence through the parameters. Despite the simplicity, simulations have shown that return level estimation using the spatial GEV and spatial GPD still provides satisfactory results compared to max-stable processes, which are asymptotically justified models capable of representing spatial dependence among extremes. However, the linear functions used to model the spatially varying coefficients are restrictive and may be violated. We propose a flexible and fast approach based on the spatial GEV and spatial GPD by introducing fused lasso and fused ridge penalty for parameter regularization. This enables improved return level estimation for large spatial extremes compared to the existing methods. Supplemental files for this article are available online.}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Sass, Danielle and Li, Bo and Reich, Brian J.}, year={2021}, month={Jul} } @article{dorman_hopperstad_reich_majumder_kennedy_reisig_greene_reay‐jones_collins_bacheler_et al._2021, title={Landscape‐level variation in Bt crops predict Helicoverpa ze a ( Lepidoptera: Noctuidae ) resistance in cotton agroecosystems}, volume={77}, ISSN={1526-498X 1526-4998}, url={http://dx.doi.org/10.1002/ps.6585}, DOI={10.1002/ps.6585}, abstractNote={Abstract}, number={12}, journal={Pest Management Science}, publisher={Wiley}, author={Dorman, Seth J and Hopperstad, Kristen A and Reich, Brian J and Majumder, Suman and Kennedy, George and Reisig, Dominic D and Greene, Jeremy K and Reay‐Jones, Francis PF and Collins, Guy and Bacheler, Jack S and et al.}, year={2021}, month={Aug}, pages={5454–5462} } @article{gao_gray_reich_2021, title={Long-term, medium spatial resolution annual land surface phenology with a Bayesian hierarchical model}, volume={261}, ISSN={["1879-0704"]}, DOI={10.1016/j.rse.2021.112484}, abstractNote={Land surface phenology (LSP) is a consistent and sensitive indicator of climate change effects on Earth's vegetation. Existing methods of estimating LSP require time series densities that, until recently, have only been available from coarse spatial resolution imagery such as MODIS (500 m) and AVHRR (1 km). LSP products from these datasets have improved our understanding of phenological change at the global scale, especially over the MODIS era (2001-present). Nevertheless, these products may obscure important finer scale spatial patterns and longer-term changes. Therefore, we have developed a Bayesian hierarchical model to retrieve complete annual sequences of LSP from Landsat imagery (1984-present), which has medium spatial resolution (30 m) but relatively sparse temporal frequency. Our approach uses Markov Chain Monte Carlo (MCMC) sampling to quantify individual phenometric uncertainty, which is especially important when considering long time series with variable observation quality and density, but has rarely been demonstrated. The estimated spring LSP had strong agreement with ground phenology records at Harvard Forest (R2 = 0.87) and Hubbard Brook Experimental Forest (R2 = 0.67). The estimated LSP were consistent with the recently released 30 m LSP product, MSLSP30NA, in its time period of 2016 to 2018 (R2 of 0.86 and 0.73 for spring and autumn phenology, respectively). Our Bayesian hierarchical model is an important step forward in extending medium resolution LSP records back in time as it accomplishes both critical goals of retrieving annual LSP from sparse time series and accurately estimating uncertainty.}, journal={REMOTE SENSING OF ENVIRONMENT}, author={Gao, Xiaojie and Gray, Josh M. and Reich, Brian J.}, year={2021}, month={Aug} } @article{wendelberger_reich_wilson_2021, title={Multi-model penalized regression}, volume={14}, ISSN={["1932-1872"]}, DOI={10.1002/sam.11496}, abstractNote={Abstract}, number={6}, journal={STATISTICAL ANALYSIS AND DATA MINING}, author={Wendelberger, Laura J. and Reich, Brian J. and Wilson, Alyson G.}, year={2021}, month={Dec}, pages={698–722} } @article{johnson_reich_gray_2021, title={Multisensor fusion of remotely sensed vegetation indices using space-time dynamic linear models}, volume={5}, ISSN={["1467-9876"]}, DOI={10.1111/rssc.12495}, abstractNote={Abstract}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES C-APPLIED STATISTICS}, author={Johnson, Margaret C. and Reich, Brian J. and Gray, Josh M.}, year={2021}, month={May} } @article{gong_reich_chang_2021, title={Multivariate spatial prediction of air pollutant concentrations with INLA}, volume={3}, ISSN={["2515-7620"]}, DOI={10.1088/2515-7620/ac2f92}, abstractNote={Estimates of daily air pollution concentrations with complete spatial and temporal coverage are important for supporting epidemiologic studies and health impact assessments. While numerous approaches have been developed for modeling air pollution, they typically only consider each pollutant separately. We describe a spatial multipollutant data fusion model that combines monitoring measurements and chemical transport model simulations that leverages dependence between pollutants to improve spatial prediction. For the contiguous United States, we created a data product of daily concentration for 12 pollutants (CO, NOx, NO2, SO2, O3, PM10, and PM2.5 species EC, OC, NO3, NH4, SO4) during the period 2005 to 2014. Out-of-sample prediction showed good performance, particularly for daily PM2.5 species EC (R2 = 0.64), OC (R2 = 0.75), NH4 (R2 = 0.84), NO3 (R2 = 0.73), and SO4 (R2 = 0.80). By employing the integrated nested Laplace approximation (INLA) for Bayesian inference, our approach also provides model-based prediction error estimates. The daily data product at 12 km spatial resolution will be publicly available immediately upon publication. To our knowledge this is the first publicly available data product for major PM2.5 species and several gases at this spatial and temporal resolution.}, number={10}, journal={ENVIRONMENTAL RESEARCH COMMUNICATIONS}, author={Gong, Wenlong and Reich, Brian J. and Chang, Howard H.}, year={2021}, month={Oct} } @article{huberman_reich_bondell_2021, title={Nonparametric conditional density estimation in a deep learning framework for short-term forecasting}, volume={5}, ISSN={["1573-3009"]}, DOI={10.1007/s10651-021-00499-z}, abstractNote={Short-term forecasting is an important tool in understanding environmental processes. In this paper, we incorporate machine learning algorithms into a conditional distribution estimator for the purposes of forecasting tropical cyclone intensity. Many machine learning techniques give a single-point prediction of the conditional distribution of the target variable, which does not give a full accounting of the prediction variability. Conditional distribution estimation can provide extra insight on predicted response behavior, which could influence decision-making and policy. We propose a technique that simultaneously estimates the entire conditional distribution and flexibly allows for machine learning techniques to be incorporated. A smooth model is fit over both the target variable and covariates, and a logistic transformation is applied on the model output layer to produce an expression of the conditional density function. We provide two examples of machine learning models that can be used, polynomial regression and deep learning models. To achieve computational efficiency, we propose a case–control sampling approximation to the conditional distribution. A simulation study for four different data distributions highlights the effectiveness of our method compared to other machine learning-based conditional distribution estimation techniques. We then demonstrate the utility of our approach for forecasting purposes using tropical cyclone data from the Atlantic Seaboard. This paper gives a proof of concept for the promise of our method, further computational developments can fully unlock its insights in more complex forecasting and other applications.}, journal={ENVIRONMENTAL AND ECOLOGICAL STATISTICS}, author={Huberman, David B. and Reich, Brian J. and Bondell, Howard D.}, year={2021}, month={May} } @article{winkel_stallrich_storlie_reich_2021, title={Sequential Optimization in Locally Important Dimensions}, volume={63}, ISSN={["1537-2723"]}, DOI={10.1080/00401706.2020.1714738}, abstractNote={Abstract Optimizing an expensive, black-box function is challenging when its input space is high-dimensional. Sequential design frameworks first model with a surrogate function and then optimize an acquisition function to determine input settings to evaluate next. Optimization of both and the acquisition function benefit from effective dimension reduction. Global variable selection detects and removes input variables that do not affect across the input space. Further dimension reduction may be possible if we consider local variable selection around the current optimum estimate. We develop a sequential design algorithm called sequential optimization in locally important dimensions (SOLID) that incorporates global and local variable selection to optimize a continuous, differentiable function. SOLID performs local variable selection by comparing the surrogate’s predictions in a localized region around the estimated optimum with the p alternative predictions made by removing each input variable. The search space of the acquisition function is further restricted to focus only on the variables that are deemed locally active, leading to greater emphasis on refining the surrogate model in locally active dimensions. A simulation study across multiple test functions and an application to the Sarcos robot dataset show that SOLID outperforms conventional approaches. Supplementary materials for this article are available online.}, number={2}, journal={TECHNOMETRICS}, author={Winkel, Munir A. and Stallrich, Jonathan W. and Storlie, Curtis B. and Reich, Brian J.}, year={2021}, month={Apr}, pages={236–248} } @article{dorman_hopperstad_reich_kennedy_huseth_2021, title={Soybeans as a non-Bt refuge for Helicoverpa zea in maize-cotton agroecosystems}, volume={322}, ISSN={0167-8809}, url={http://dx.doi.org/10.1016/j.agee.2021.107642}, DOI={10.1016/j.agee.2021.107642}, abstractNote={Geospatial models are crucial for identifying likely ‘hot-spots’ of Bt resistance evolution in Helicoverpa zea (Lepidoptera: Noctuidae), thereby improving regional insecticide resistance management (IRM) strategies and planted refuge compliance. To characterize H. zea distributions in relation to land use , we used historical trapping data collected from 2008 to 2019 in North Carolina to model the spatial and temporal abundance of H. zea populations across Bt -dominated landscapes. Helicoverpa zea abundance was standardized across site-year observations, and candidate landscape composition and configuration predictors of H. zea abundance were obtained. Spatiotemporal Bayesian hierarchical models were developed to make posterior predictions of H. zea abundance from environmental covariates, and results were used to generate interpolation prediction maps to visualize H. zea abundance across the sampled region. Our results suggest inverse distance weighted (IDW) soybeans is the most important predictor of H. zea abundance through time in row crop agroecosystems in North Carolina. Soybeans in North Carolina and southeastern U.S. likely serves as a critical non- Bt refuge for delaying H. zea resistance to Bt toxins in landscapes dominated by Bt maize and cotton. Moreover, soybean abundance can be used to predict the spatial abundance of H. zea in this region. Results can be applied to understand population dynamics of H. zea in landscapes dominated by genetically engineered (GE) crops expressing Bt toxins and will enable the development of sound insect resistance management strategies of H. zea populations to GE toxins targeting noctuid pests of maize and cotton. This work will also drive future geospatial studies investigating environmental predictors of resistance evolution in arthropod pests to GE technologies in crop production systems. Landscape-level variation in soybeans predicts spatial and temporal Helicoverpa zea abundance and likely serves as important non- Bt refugia in maize and cotton agroecosystems. • Helicoverpa zea population dynamics in row crops relate to landscape drivers • Landscape-level soybean and cotton variation in the southeastern U.S. associate with increased H. zea abundance through time • Soybeans likely serve as critical non- Bt refugia for delaying H. zea resistance in maize and cotton agroecosystems}, journal={Agriculture, Ecosystems & Environment}, publisher={Elsevier BV}, author={Dorman, Seth J. and Hopperstad, Kristen A. and Reich, Brian J. and Kennedy, George and Huseth, Anders S.}, year={2021}, month={Dec}, pages={107642} } @article{roy_reich_guinness_shinohara_staicu_2021, title={Spatial Shrinkage Via the Product Independent Gaussian Process Prior}, volume={6}, ISSN={["1537-2715"]}, DOI={10.1080/10618600.2021.1923512}, abstractNote={Abstract We study the problem of sparse signal detection on a spatial domain. We propose a novel approach to model continuous signals that are sparse and piecewise-smooth as the product of independent Gaussian (PING) processes with a smooth covariance kernel. The smoothness of the PING process is ensured by the smoothness of the covariance kernels of the Gaussian components in the product, and sparsity is controlled by the number of components. The bivariate kurtosis of the PING process implies that more components in the product results in the thicker tail and sharper peak at zero. We develop an efficient computation algorithm based on spectral methods. The simulation results demonstrate superior estimation using the PING prior over Gaussian process prior for different image regressions. We apply our method to a longitudinal magnetic resonance imaging dataset to detect the regions that are affected by multiple sclerosis computation in this domain. Supplementary materials for this article are available online.}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Roy, Arkaprava and Reich, Brian J. and Guinness, Joseph and Shinohara, Russell T. and Staicu, Ana-Maria}, year={2021}, month={Jun} } @article{majumder_guan_reich_o'neill_rappold_2021, title={Statistical Downscaling with Spatial Misalignment: Application to Wildland Fire PM2.5 Concentration Forecasting}, volume={26}, ISSN={["1537-2693"]}, DOI={10.1007/s13253-020-00420-4}, abstractNote={Fine particulate matter, PM2.5, has been documented to have adverse health effects and wildland fires are a major contributor to PM2.5 air pollution in the US. Forecasters use numerical models to predict PM2.5 concentrations to warn the public of impending health risk. Statistical methods are needed to calibrate the numerical model forecast using monitor data to reduce bias and quantify uncertainty. Typical model calibration techniques do not allow for errors due to misalignment of geographic locations. We propose a spatiotemporal downscaling methodology that uses image registration techniques to identify the spatial misalignment and accounts for and corrects the bias produced by such warping. Our model is fitted in a Bayesian framework to provide uncertainty quantification of the misalignment and other sources of error. We apply this method to different simulated data sets and show enhanced performance of the method in presence of spatial misalignment. Finally, we apply the method to a large fire in Washington state and show that the proposed method provides more realistic uncertainty quantification than standard methods.}, number={1}, journal={JOURNAL OF AGRICULTURAL BIOLOGICAL AND ENVIRONMENTAL STATISTICS}, author={Majumder, Suman and Guan, Yawen and Reich, Brian J. and O'Neill, Susan and Rappold, Ana G.}, year={2021}, month={Mar}, pages={23–44} } @article{larsen_hanigan_reich_qin_cope_morgan_rappold_2020, title={A deep learning approach to identify smoke plumes in satellite imagery in near-real time for health risk communication}, ISBN={1559-064X}, DOI={10.1038/s41370-020-0246-y}, abstractNote={Wildland fire (wildfire; bushfire) pollution contributes to poor air quality, a risk factor for premature death. The frequency and intensity of wildfires are expected to increase; improved tools for estimating exposure to fire smoke are vital. New-generation satellite-based sensors produce high-resolution spectral images, providing real-time information of surface features during wildfire episodes. Because of the vast size of such data, new automated methods for processing information are required. We present a deep fully convolutional neural network (FCN) for predicting fire smoke in satellite imagery in near-real time (NRT). The FCN identifies fire smoke using output from operational smoke identification methods as training data, leveraging validated smoke products in a framework that can be operationalized in NRT. We demonstrate this for a fire episode in Australia; the algorithm is applicable to any geographic region. The algorithm has high classification accuracy (99.5% of pixels correctly classified on average) and precision (average intersection over union = 57.6%). The FCN algorithm has high potential as an exposure-assessment tool, capable of providing critical information to fire managers, health and environmental agencies, and the general public to prevent the health risks associated with exposure to hazardous smoke from wildland fires in NRT.}, journal={JOURNAL OF EXPOSURE SCIENCE AND ENVIRONMENTAL EPIDEMIOLOGY}, author={Larsen, Alexandra and Hanigan, Ivan and Reich, Brian J. and Qin, Yi and Cope, Martin and Morgan, Geoffrey and Rappold, Ana G.}, year={2020} } @article{hazra_reich_staicu_2020, title={A multivariate spatial skew-t process for joint modeling of extreme precipitation indexes}, volume={31}, ISSN={["1099-095X"]}, DOI={10.1002/env.2602}, abstractNote={Abstract}, number={3}, journal={ENVIRONMETRICS}, author={Hazra, Arnab and Reich, Brian J. and Staicu, Ana-Maria}, year={2020}, month={May} } @article{guan_reich_laber_bandyopadhyay_2020, title={Bayesian Nonparametric Policy Search With Application to Periodontal Recall Intervals}, volume={115}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2019.1660169}, abstractNote={Abstract Tooth loss from periodontal disease is a major public health burden in the United States. Standard clinical practice is to recommend a dental visit every six months; however, this practice is not evidence-based, and poor dental outcomes and increasing dental insurance premiums indicate room for improvement. We consider a tailored approach that recommends recall time based on patient characteristics and medical history to minimize disease progression without increasing resource expenditures. We formalize this method as a dynamic treatment regime which comprises a sequence of decisions, one per stage of intervention, that follow a decision rule which maps current patient information to a recommendation for their next visit time. The dynamics of periodontal health, visit frequency, and patient compliance are complex, yet the estimated optimal regime must be interpretable to domain experts if it is to be integrated into clinical practice. We combine nonparametric Bayesian dynamics modeling with policy-search algorithms to estimate the optimal dynamic treatment regime within an interpretable class of regimes. Both simulation experiments and application to a rich database of electronic dental records from the HealthPartners HMO shows that our proposed method leads to better dental health without increasing the average recommended recall time relative to competing methods. Supplementary materials for this article, including a standardized description of the materials available for reproducing the work, are available as an online supplement.}, number={531}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Guan, Qian and Reich, Brian J. and Laber, Eric B. and Bandyopadhyay, Dipankar}, year={2020}, month={Jul}, pages={1066–1078} } @article{huberman_reich_pacifici_collazo_2020, title={Estimating the drivers of species distributions with opportunistic data using mediation analysis}, volume={11}, ISSN={["2150-8925"]}, url={https://doi.org/10.1002/ecs2.3165}, DOI={10.1002/ecs2.3165}, abstractNote={Abstract}, number={6}, journal={ECOSPHERE}, publisher={Wiley}, author={Huberman, David B. and Reich, Brian J. and Pacifici, Krishna and Collazo, Jaime A.}, year={2020}, month={Jun} } @article{guan_johnson_katzfuss_mannshardt_messier_reich_song_2020, title={Fine-Scale Spatiotemporal Air Pollution Analysis Using Mobile Monitors on Google Street View Vehicles}, volume={115}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2019.1665526}, abstractNote={Abstract People are increasingly concerned with understanding their personal environment, including possible exposure to harmful air pollutants. To make informed decisions on their day-to-day activities, they are interested in real-time information on a localized scale. Publicly available, fine-scale, high-quality air pollution measurements acquired using mobile monitors represent a paradigm shift in measurement technologies. A methodological framework utilizing these increasingly fine-scale measurements to provide real-time air pollution maps and short-term air quality forecasts on a fine-resolution spatial scale could prove to be instrumental in increasing public awareness and understanding. The Google Street View study provides a unique source of data with spatial and temporal complexities, with the potential to provide information about commuter exposure and hot spots within city streets with high traffic. We develop a computationally efficient spatiotemporal model for these data and use the model to make short-term forecasts and high-resolution maps of current air pollution levels. We also show via an experiment that mobile networks can provide more nuanced information than an equally sized fixed-location network. This modeling framework has important real-world implications in understanding citizens’ personal environments, as data production and real-time availability continue to be driven by the ongoing development and improvement of mobile measurement technologies. Supplementary materials for this article, including a standardized description of the materials available for reproducing the work, are available as an online supplement.}, number={531}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Guan, Yawen and Johnson, Margaret C. and Katzfuss, Matthias and Mannshardt, Elizabeth and Messier, Kyle P. and Reich, Brian J. and Song, Joon J.}, year={2020}, month={Jul}, pages={1111–1124} } @article{grantham_reich_laber_pacifici_dunn_fierer_gebert_allwood_faith_2020, title={Global forensic geolocation with deep neural networks}, volume={69}, ISSN={["1467-9876"]}, DOI={10.1111/rssc.12427}, abstractNote={Summary}, number={4}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES C-APPLIED STATISTICS}, author={Grantham, Neal S. and Reich, Brian J. and Laber, Eric B. and Pacifici, Krishna and Dunn, Robert R. and Fierer, Noah and Gebert, Matthew and Allwood, Julia S. and Faith, Seth A.}, year={2020}, month={Aug}, pages={909–929} } @article{reich_guan_fourches_warren_sarnat_chang_2020, title={INTEGRATIVE STATISTICAL METHODS FOR EXPOSURE MIXTURES AND HEALTH}, volume={14}, ISSN={["1941-7330"]}, DOI={10.1214/20-AOAS1364}, abstractNote={Humans are concurrently exposed to chemically, structurally and toxicologically diverse chemicals. A critical challenge for environmental epidemiology is to quantify the risk of adverse health outcomes resulting from exposures to such chemical mixtures and to identify which mixture constituents may be driving etiologic associations. A variety of statistical methods have been proposed to address these critical research questions. However, they generally rely solely on measured exposure and health data available within a specific study. Advancements in understanding of the role of mixtures on human health impacts may be better achieved through the utilization of external data and knowledge from multiple disciplines with innovative statistical tools. In this paper we develop new methods for health analyses that incorporate auxiliary information about the chemicals in a mixture, such as physicochemical, structural and/or toxicological data. We expect that the constituents identified using auxiliary information will be more biologically meaningful than those identified by methods that solely utilize observed correlations between measured exposure. We develop flexible Bayesian models by specifying prior distributions for the exposures and their effects that include auxiliary information and examine this idea over a spectrum of analyses from regression to factor analysis. The methods are applied to study the effects of volatile organic compounds on emergency room visits in Atlanta. We find that including cheminformatic information about the exposure variables improves prediction and provides a more interpretable model for emergency room visits for respiratory diseases.}, number={4}, journal={ANNALS OF APPLIED STATISTICS}, author={Reich, Brian J. and Guan, Yawen and Fourches, Denis and Warren, Joshua L. and Sarnat, Stefanie E. and Chang, Howard H.}, year={2020}, month={Dec}, pages={1945–1963} } @article{grantham_guan_reich_borer_gross_2020, title={MIMIX: A Bayesian Mixed-Effects Model for Microbiome Data From Designed Experiments}, volume={115}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2019.1626242}, abstractNote={Abstract Recent advances in bioinformatics have made high-throughput microbiome data widely available, and new statistical tools are required to maximize the information gained from these data. For example, analysis of high-dimensional microbiome data from designed experiments remains an open area in microbiome research. Contemporary analyses work on metrics that summarize collective properties of the microbiome, but such reductions preclude inference on the fine-scale effects of environmental stimuli on individual microbial taxa. Other approaches model the proportions or counts of individual taxa as response variables in mixed models, but these methods fail to account for complex correlation patterns among microbial communities. In this article, we propose a novel Bayesian mixed-effects model that exploits cross-taxa correlations within the microbiome, a model we call microbiome mixed model (MIMIX). MIMIX offers global tests for treatment effects, local tests and estimation of treatment effects on individual taxa, quantification of the relative contribution from heterogeneous sources to microbiome variability, and identification of latent ecological subcommunities in the microbiome. MIMIX is tailored to large microbiome experiments using a combination of Bayesian factor analysis to efficiently represent dependence between taxa and Bayesian variable selection methods to achieve sparsity. We demonstrate the model using a simulation experiment and on a 2 × 2 factorial experiment of the effects of nutrient supplement and herbivore exclusion on the foliar fungal microbiome of Andropogon gerardii, a perennial bunchgrass, as part of the global Nutrient Network research initiative. Supplementary materials for this article, including a standardized description of the materials available for reproducing the work, are available as an online supplement.}, number={530}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Grantham, Neal S. and Guan, Yawen and Reich, Brian J. and Borer, Elizabeth T. and Gross, Kevin}, year={2020}, month={Apr}, pages={599–609} } @article{wei_reich_hoppin_ghosal_2020, title={SPARSE BAYESIAN ADDITIVE NONPARAMETRIC REGRESSION WITH APPLICATION TO HEALTH EFFECTS OF PESTICIDES MIXTURES}, volume={30}, ISSN={["1996-8507"]}, DOI={10.5705/ss.202017.0315}, abstractNote={In many practical problems that simultaneously investigate the joint effect of covariates, we first need to identify the subset of significant covariates, and then estimate their joint effect. An example is an epidemiological study that analyzes the effects of exposure variables on a health response. In order to make inferences on the covariate effects, we propose a Bayesian additive nonparametric regression model with a multivariate continuous shrinkage prior to address the model uncertainty and to identify important covariates. Our general approach is to decompose the response function into the sum of the nonlinear main effects and the two-way interaction terms. Then we apply the computationally advantageous Bayesian variable selection method to identify the important effects. The proposed Bayesian method is a multivariate Dirichlet–Laplace prior that aggressively shrinks many terms toward zero, thus mitigating the noise of including unimportant exposures and isolating the effects of the important covariates. Our theoretical studies demonstrate asymptotic prediction and variable selection consistency properties. In addition, we use numerical simulations to evaluate the model performance in terms of prediction and variable selection under practical scenarios. The method is applied to a neurobehavioral data set from the Agricultural Health Study that investigates the association between pesticide usage and neurobehavioral outcomes in farmers. The proposed method shows improved accuracy in predicting the joint effects on the neurobehavioral responses, while restricting the number of covariates included in the model through variable selection.}, number={1}, journal={STATISTICA SINICA}, author={Wei, Ran and Reich, Brian J. and Hoppin, Jane A. and Ghosal, Subhashis}, year={2020}, month={Jan}, pages={55–79} } @article{jhuang_fuentes_bandyopadhyay_reich_2020, title={Spatiotemporal signal detection using continuous shrinkage priors}, volume={39}, ISSN={["1097-0258"]}, DOI={10.1002/sim.8514}, abstractNote={Periodontal disease (PD) is a chronic inflammatory disease that affects the gum tissue and bone supporting the teeth. Although tooth‐site level PD progression is believed to be spatio‐temporally referenced, the whole‐mouth average periodontal pocket depth (PPD) has been commonly used as an indicator of the current/active status of PD. This leads to imminent loss of information, and imprecise parameter estimates. Despite availability of statistical methods that accommodates spatiotemporal information for responses collected at the tooth‐site level, the enormity of longitudinal databases derived from oral health practice‐based settings render them unscalable for application. To mitigate this, we introduce a Bayesian spatiotemporal model to detect problematic/diseased tooth‐sites dynamically inside the mouth for any subject obtained from large databases. This is achieved via a spatial continuous sparsity‐inducing shrinkage prior on spatially varying linear‐trend regression coefficients. A low‐rank representation captures the nonstationary covariance structure of the PPD outcomes, and facilitates the relevant Markov chain Monte Carlo computing steps applicable to thousands of study subjects. Application of our method to both simulated data and to a rich database of electronic dental records from the HealthPartners Institute reveal improved prediction performances, compared with alternative models with usual Gaussian priors for regression parameters and conditionally autoregressive specification of the covariance structure.}, number={13}, journal={STATISTICS IN MEDICINE}, author={Jhuang, An-Ting and Fuentes, Montserrat and Bandyopadhyay, Dipankar and Reich, Brian J.}, year={2020}, month={Jun}, pages={1817–1832} } @article{saia_nelson_huseth_grieger_reich_2020, title={Transitioning Machine Learning from Theory to Practice in Natural Resources Management}, volume={435}, ISSN={0304-3800}, url={http://dx.doi.org/10.1016/j.ecolmodel.2020.109257}, DOI={10.1016/j.ecolmodel.2020.109257}, journal={Ecological Modelling}, publisher={Elsevier BV}, author={Saia, S.M. and Nelson, N. and Huseth, A.S. and Grieger, K and Reich, B.J.}, year={2020}, month={Nov}, pages={109257} } @article{allwood_fierer_dunn_breen_reich_laber_clifton_grantham_faith_2020, title={Use of standardized bioinformatics for the analysis of fungal DNA signatures applied to sample provenance}, volume={310}, ISSN={["1872-6283"]}, DOI={10.1016/j.forsciint.2020.110250}, abstractNote={The use of environmental trace material to aid criminal investigations is an ongoing field of research within forensic science. The application of environmental material thus far has focused upon a variety of different objectives relevant to forensic biology, including sample provenance (also referred to as sample attribution). The capability to predict the provenance or origin of an environmental DNA sample would be an advantageous addition to the suite of investigative tools currently available. A metabarcoding approach is often used to predict sample provenance, through the extraction and comparison of the DNA signatures found within different environmental materials, such as the bacteria within soil or fungi within dust. Such approaches are combined with bioinformatics workflows and statistical modelling, often as part of large-scale study, with less emphasis on the investigation of the adaptation of these methods to a smaller scale method for forensic use. The present work was investigating a small-scale approach as an adaptation of a larger metabarcoding study to develop a model for global sample provenance using fungal DNA signatures collected from dust swabs. This adaptation was to facilitate a standardized method for consistent, reproducible sample treatment, including bioinformatics processing and final application of resulting data to the available prediction model. To investigate this small-scale method, 76 DNA samples were treated as anonymous test samples and analyzed using the standardized process to demonstrate and evaluate processing and customized sequence data analysis. This testing included samples originating from countries previously used to train the model, samples artificially mixed to represent multiple or mixed countries, as well as outgroup samples. Positive controls were also developed to monitor laboratory processing and bioinformatics analysis. Through this evaluation we were able to demonstrate that the samples could be processed and analyzed in a consistent manner, facilitated by a relatively user-friendly bioinformatic pipeline for sequence data analysis. Such investigation into standardized analyses and application of metabarcoding data is of key importance for the future use of applied microbiology in forensic science.}, journal={FORENSIC SCIENCE INTERNATIONAL}, author={Allwood, Julia S. and Fierer, Noah and Dunn, Robert R. and Breen, Matthew and Reich, Brian J. and Laber, Eric B. and Clifton, Jesse and Grantham, Neal S. and Faith, Seth A.}, year={2020}, month={May} } @article{rekabdarkolaee_krut_fuentes_reich_2019, title={A Bayesian multivariate functional model with spatially varying coefficient approach for modeling hurricane track data}, volume={29}, ISSN={["2211-6753"]}, DOI={10.1016/j.spasta.2018.12.006}, abstractNote={Abstract Hurricanes are massive storm systems with enormous destructive capabilities. Understanding the trends across space and time of a hurricane track and intensity leads to improved forecasts and minimizes their damage. Viewing the hurricane’s latitude, longitude, and wind speed as functions of time, we propose a novel spatiotemporal multivariate functional model to simultaneously allow for multivariate, longitudinal, and spatially observed data with noisy functional covariates. The proposed procedure is fully Bayesian and inference is performed using MCMC. This new approach is illustrated through simulation studies and analyzing the hurricane track data from 2004 to 2013 in the Atlantic basin. Simulation results indicate that our proposed model offers a significant reduction in the mean square error and averaged interval and increases the coverage probability. In addition, our method offers a 10% reduction in location and wind speed prediction error.}, journal={SPATIAL STATISTICS}, author={Rekabdarkolaee, Hossein Moradi and Krut, Christopher and Fuentes, Montserrat and Reich, Brian J.}, year={2019}, month={Mar}, pages={351–365} } @article{cloud_reich_rozoff_alessandrini_lewis_delle monache_2019, title={A Feed Forward Neural Network Based on Model Output Statistics for Short-Term Hurricane Intensity Prediction}, volume={34}, ISSN={["1520-0434"]}, DOI={10.1175/WAF-D-18-0173.1}, abstractNote={Abstract}, number={4}, journal={WEATHER AND FORECASTING}, author={Cloud, Kirkwood A. and Reich, Brian J. and Rozoff, Christopher M. and Alessandrini, Stefano and Lewis, William E. and Delle Monache, Luca}, year={2019}, month={Aug}, pages={985–997} } @article{reich_shaby_2019, title={A Spatial Markov Model for Climate Extremes}, volume={28}, ISSN={["1537-2715"]}, DOI={10.1080/10618600.2018.1482764}, abstractNote={ABSTRACT Spatial climate data are often presented as summaries of areal regions such as grid cells, either because they are the output of numerical climate models or to facilitate comparison with numerical climate model output. Extreme value analysis can benefit greatly from spatial methods that borrow information across regions. For Gaussian outcomes, a host of methods that respect the areal nature of the data are available, including conditional and simultaneous autoregressive models. However, to our knowledge, there is no such method in the spatial extreme value analysis literature. In this article, we propose a new method for areal extremes that accounts for spatial dependence using latent clustering of neighboring regions. We show that the proposed model has desirable asymptotic dependence properties and leads to relatively simple computation. Applying the proposed method to North American climate data reveals several local and continental-scale changes in the distribution of precipitation and temperature extremes over time. Supplementary material for this article is available online.}, number={1}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Reich, Brian J. and Shaby, Benjamin A.}, year={2019}, month={Jan}, pages={117–126} } @article{sahoo_guinness_reich_2019, title={A TEST FOR ISOTROPY ON A SPHERE USING SPHERICAL HARMONIC FUNCTIONS}, volume={29}, ISSN={["1996-8507"]}, DOI={10.5705/ss.202017.0475}, abstractNote={Analysis of geostatistical data is often based on the assumption that the spatial random field is isotropic. This assumption, if erroneous, can adversely affect model predictions and statistical inference. Nowadays many applications consider data over the entire globe and hence it is necessary to check the assumption of isotropy on a sphere. In this paper, a test for spatial isotropy on a sphere is proposed. The data are first projected onto the set of spherical harmonic functions. Under isotropy, the spherical harmonic coefficients are uncorrelated whereas they are correlated if the underlying fields are not isotropic. This motivates a test based on the sample correlation matrix of the spherical harmonic coefficients. In particular, we use the largest eigenvalue of the sample correlation matrix as the test statistic. Extensive simulations are conducted to assess the Type I errors of the test under different scenarios. We show how temporal correlation affects the test and provide a method for handling temporal correlation. We also gauge the power of the test as we move away from isotropy. The method is applied to the near-surface air temperature data which is part of the HadCM3 model output. Although we do not expect global temperature fields to be isotropic, we propose several anisotropic models with increasing complexity, each of which has an isotropic process as model component and we apply the test to the isotropic component in a sequence of such models as a method of determining how well the models capture the anisotropy in the fields.}, number={3}, journal={STATISTICA SINICA}, author={Sahoo, Indranil and Guinness, Joseph and Reich, Brian J.}, year={2019}, month={Jul}, pages={1253–1276} } @article{binion-rock_reich_buckel_2019, title={A spatial kernel density method to estimate the diet composition of fish}, volume={76}, ISSN={["1205-7533"]}, DOI={10.1139/cjfas-2017-0306}, abstractNote={ We present a novel spatially explicit kernel density approach to estimate the proportional contribution of a prey to a predator’s diet by mass. First, we compared the spatial estimator to a traditional cluster-based approach using a Monte Carlo simulation study. Next, we compared the diet composition of three predators from Pamlico Sound, North Carolina, to evaluate how ignoring spatial correlation affects diet estimates. The spatial estimator had lower mean squared error values compared with the traditional cluster-based estimator for all Monte Carlo simulations. Incorporating spatial correlation when estimating the predator’s diet resulted in a consistent increase in precision across multiple levels of spatial correlation. Bias was often similar between the two estimators; however, when it differed it mostly favored the spatial estimator. The two estimators produced different estimates of proportional contribution of prey to the diets of the three field-collected predator species, especially when spatial correlation was strong and prey were consumed in patchy areas. Our simulation and empirical data provide strong evidence that data on food habits should be modeled using spatial approaches and not treated as spatially independent. }, number={2}, journal={CANADIAN JOURNAL OF FISHERIES AND AQUATIC SCIENCES}, author={Binion-Rock, Samantha M. and Reich, Brian J. and Buckel, Jeffrey A.}, year={2019}, month={Feb}, pages={249–267} } @article{hazra_reich_reich_shinohara_staicu_2019, title={A spatio-temporal model for longitudinal image-on-image regression}, volume={11}, ISSN={["1867-1772"]}, DOI={10.1007/s12561-017-9206-z}, abstractNote={Neurologists and radiologists often use magnetic resonance imaging (MRI) in the management of subjects with multiple sclerosis (MS) because it is sensitive to inflammatory and demyelinative changes in the white matter of the brain and spinal cord. Two conventional modalities used for identifying lesions are T1-weighted (T1) and T2-weighted fluid-attenuated inversion recovery (FLAIR) imaging, which are used clinically and in research studies. Magnetization transfer ratio (MTR), which is available only in research settings, is an advanced MRI modality that has been used extensively for measuring disease-related demyelination both in white matter lesions as well across normal-appearing white matter. Acquiring MTR is not standard in clinical practice, due to the increased scan time and cost. Hence, prediction of MTR based on the modalities T1 and FLAIR could have great impact on the availability of these promising measures for improved patient management. We propose a spatio-temporal regression model for image response and image predictors that are acquired longitudinally, with images being co-registered within the subject but not across subjects. The model is additive, with the response at a voxel being dependent on the available covariates not only through the current voxel but also on the imaging information from the voxels within a neighboring spatial region as well as their temporal gradients. We propose a dynamic Bayesian estimation procedure that updates the parameters of the subject-specific regression model as data accummulates. To bypass the computational challenges associated with a Bayesian approach for high-dimensional imaging data, we propose an approximate Bayesian inference technique. We assess the model fitting and the prediction performance using longitudinally acquired MRI images from 46 MS patients.}, number={1}, journal={Statistics in Biosciences}, author={Hazra, A. and Reich, B.J. and Reich, D.S. and Shinohara, R.T. and Staicu, A.M.}, year={2019}, pages={22–46} } @article{huang_reich_fuentes_sankarasubramanian_2019, title={Complete spatial model calibration}, volume={13}, ISSN={1932-6157}, url={http://dx.doi.org/10.1214/18-aoas1219}, DOI={10.1214/18-AOAS1219}, abstractNote={Computer simulation models are central to environmental science. These mathematical models are used to understand complex weather and climate patterns and to predict the climate’s response to different forcings. Climate models are of course not perfect reflections of reality, and so comparison with observed data is needed to quantify and to correct for biases and other deficiencies. We propose a new method to calibrate model output using observed data. Our approach not only matches the marginal distributions of the model output and gridded observed data, but it simultaneously postprocesses the model output to have the same spatial correlation as the observed data. This comprehensive calibration method permits realistic spatial simulations for regional impact studies. We apply the proposed method to global climate model output in North America and show that it successfully calibrates the model output for temperature and precipitation.}, number={2}, journal={The Annals of Applied Statistics}, publisher={Institute of Mathematical Statistics}, author={Huang, Yen-Ning and Reich, Brian J. and Fuentes, Montserrat and Sankarasubramanian, A.}, year={2019}, month={Jun}, pages={746–766} } @article{ferguson_mueller_rajasekaran_reich_2019, title={Conference report: 2018 materials and data science hackathon (MATDAT18)}, volume={4}, ISSN={["2058-9689"]}, DOI={10.1039/c9me90018g}, abstractNote={MATDAT18 organizers and participants.}, number={3}, journal={MOLECULAR SYSTEMS DESIGN & ENGINEERING}, author={Ferguson, Andrew L. and Mueller, Tim and Rajasekaran, Sanguthevar and Reich, Brian J.}, year={2019}, month={Jun}, pages={462–468} } @article{morris_reich_thibaud_2019, title={Exploration and Inference in Spatial Extremes Using Empirical Basis Functions}, volume={24}, ISSN={["1537-2693"]}, DOI={10.1007/s13253-019-00359-1}, abstractNote={Statistical methods for inference on spatial extremes of large datasets are yet to be developed. Motivated by standard dimension reduction techniques used in spatial statistics, we propose an approach based on empirical basis functions to explore and model spatial extremal dependence. Based on a low-rank max-stable model, we propose a data-driven approach to estimate meaningful basis functions using empirical pairwise extremal coefficients. These spatial empirical basis functions can be used to visualize the main trends in extremal dependence. In addition to exploratory analysis, we describe how these functions can be used in a Bayesian hierarchical model to model spatial extremes of large datasets. We illustrate our methods on extreme precipitations in eastern USA. Supplementary materials accompanying this paper appear online}, number={4}, journal={JOURNAL OF AGRICULTURAL BIOLOGICAL AND ENVIRONMENTAL STATISTICS}, author={Morris, Samuel A. and Reich, Brian J. and Thibaud, Emeric}, year={2019}, month={Dec}, pages={555–572} } @article{hammerling_reich_2019, title={Guest Editors' Introduction to the Special Issue on "Climate and the Earth System"}, volume={24}, ISSN={["1537-2693"]}, DOI={10.1007/s13253-019-00373-3}, abstractNote={The Journal of Agricultural, Biological and Environment Statistics (JABES) special issue on the Climate and Earth System highlights recent statistical develops that aim to refine our understanding of this complex system. New methods are required to process the massive environmental data that often fuels climate analysis and to properly account for uncertainty in the results. This special issue proudly features eight papers that span a wide range of computational and methodological problems related to the climate and earth system. In this brief introduction, we identify common themes among the papers and point to areas of future research.}, number={3}, journal={JOURNAL OF AGRICULTURAL BIOLOGICAL AND ENVIRONMENTAL STATISTICS}, author={Hammerling, Dorit and Reich, Brian J.}, year={2019}, month={Sep}, pages={395–397} } @article{pacifici_reich_miller_pease_2019, title={Resolving misaligned spatial data with integrated species distribution models}, volume={100}, ISSN={["1939-9170"]}, DOI={10.1002/ecy.2709}, abstractNote={Abstract}, number={6}, journal={ECOLOGY}, author={Pacifici, Krishna and Reich, Brian J. and Miller, David A. W. and Pease, Brent S.}, year={2019}, month={Jun} } @article{jhuang_fuentes_jones_esteves_fancher_furman_reich_2019, title={Spatial Signal Detection Using Continuous Shrinkage Priors}, volume={61}, ISSN={["1537-2723"]}, DOI={10.1080/00401706.2018.1546622}, abstractNote={Abstract Motivated by the problem of detecting changes in two-dimensional X-ray diffraction data, we propose a Bayesian spatial model for sparse signal detection in image data. Our model places considerable mass near zero and has heavy tails to reflect the prior belief that the image signal is zero for most pixels and large for an important subset. We show that the spatial prior places mass on nearby locations simultaneously being zero, and also allows for nearby locations to simultaneously be large signals. The form of the prior also facilitates efficient computing for large images. We conduct a simulation study to evaluate the properties of the proposed prior and show that it outperforms other spatial models. We apply our method in the analysis of X-ray diffraction data from a two-dimensional area detector to detect changes in the pattern when the material is exposed to an electric field.}, number={4}, journal={TECHNOMETRICS}, author={Jhuang, An-Ting and Fuentes, Montserrat and Jones, Jacob L. and Esteves, Giovanni and Fancher, Chris M. and Furman, Marschall and Reich, Brian J.}, year={2019}, month={Oct}, pages={494–506} } @article{miller_pacifici_sanderlin_reich_2019, title={The recent past and promising future for data integration methods to estimate species' distributions}, volume={10}, ISSN={["2041-2096"]}, DOI={10.1111/2041-210X.13110}, abstractNote={Abstract}, number={1}, journal={METHODS IN ECOLOGY AND EVOLUTION}, author={Miller, David A. W. and Pacifici, Krishna and Sanderlin, Jamie S. and Reich, Brian J.}, year={2019}, month={Jan}, pages={22–37} } @article{jones_broughton_iamsasri_fancher_wilson_reich_smith_2019, title={The use of Bayesian inference in the characterization of materials and thin films}, volume={75}, ISSN={["2053-2733"]}, DOI={10.1107/S0108767319097940}, journal={ACTA CRYSTALLOGRAPHICA A-FOUNDATION AND ADVANCES}, author={Jones, Jacob L. and Broughton, Rachel and Iamsasri, Thanakorn and Fancher, Chris M. and Wilson, Alyson G. and Reich, Brian and Smith, Ralph C.}, year={2019}, pages={A211–A211} } @article{king_staicu_davis_reich_eder_2018, title={A functional data analysis of spatiotemporal trends and variation in fine particulate matter}, volume={184}, ISSN={["1873-2844"]}, DOI={10.1016/j.atmosenv.2018.04.001}, abstractNote={In this paper we illustrate the application of modern functional data analysis methods to study the spatiotemporal variability of particulate matter components across the United States. The approach models the pollutant annual profiles in a way that describes the dynamic behavior over time and space. This new technique allows us to predict yearly profiles for locations and years at which data are not available and also offers dimension reduction for easier visualization of the data. Additionally it allows us to study changes of pollutant levels annually or for a particular season. We apply our method to daily concentrations of two particular components of PM2.5 measured by two networks of monitoring sites across the United States from 2003 to 2015. Our analysis confirms existing findings and additionally reveals new trends in the change of the pollutants across seasons and years that may not be as easily determined from other common approaches such as Kriging.}, journal={ATMOSPHERIC ENVIRONMENT}, author={King, Meredith C. and Staicu, Ana-Maria and Davis, Jerry M. and Reich, Brian J. and Eder, Brian}, year={2018}, month={Jul}, pages={233–243} } @article{libera_sankarasubramanian_sharma_reich_2018, title={A non-parametric bootstrapping framework embedded in a toolkit for assessing water quality model performance}, volume={107}, ISSN={1364-8152}, url={http://dx.doi.org/10.1016/j.envsoft.2018.05.013}, DOI={10.1016/j.envsoft.2018.05.013}, abstractNote={Assessing the ability to predict nutrient concentration in streams is important for determining compliance with the Numeric Nutrient Water Quality Criteria for Nitrogen in the U.S.A. Evaluation of the USGS's Load Estimator (LOADEST) and the Weighted Regression on Time, Discharge, and Season (WRTDS) models in predicting total nitrogen loads over 18 stations from the Water Quality Network show good performance (Nash-Sutcliffe Efficiency (NSE) > 0.8) in capturing the observed variability even for stations with limited data. However, both models captured only 40% of observed variance in total nitrogen (TN) concentration (NSE < 0.4). Thus, the same dataset performed differently in predicting two attributes – TN load and concentration – questioning the predictive skill of the models. This study proposes a non-parametric re-sampling approach for assessing the performance of water quality models particularly in predicting TN concentration. Null distributions for three common performance metrics belonging to populations of metrics with no skill in capturing the observed variability are constructed through a bootstrap resampling technique. Sample metrics from the LOADEST and WRTDS model in predicting TN concentration are used to calculate p-values for determining if the sample metrics belongs to the null distributions. .}, journal={Environmental Modelling & Software}, publisher={Elsevier BV}, author={Libera, Dominic A. and Sankarasubramanian, A. and Sharma, Ashish and Reich, Brian J.}, year={2018}, month={Sep}, pages={25–33} } @article{irizarry_collazo_pacifici_reich_battle_2018, title={Avian response to shade-layer restoration in coffee plantations in Puerto Rico}, volume={26}, ISSN={["1526-100X"]}, url={https://doi.org/10.1111/rec.12697}, DOI={10.1111/rec.12697}, abstractNote={Documenting the evolving processes associated with habitat restoration and how long it takes to detect avian demographic responses is crucial to evaluate the success of restoration initiatives and to identify ways to improve their effectiveness. The importance of this endeavor prompted the U.S. Fish and Wildlife Service and the USDA Natural Resources Conservation Service to evaluate their sun‐to‐shade coffee restoration program in Puerto Rico initiated in 2003. We quantified the responses of 12 resident avian species using estimates of local occupancy and extinction probabilities based on surveys conducted in 2015–2017 at 65 restored farms grouped according to time‐since‐initial‐restoration (TSIR): new (2011–2014), intermediate (2007–2010), and old (2003–2006). We also surveyed 40 forest sites, which served as reference sites. Vegetation complexity increased with TSIR, ranging between 35 and 40% forest cover in farms 6–9 years TSIR. Forest specialists (e.g. Loxigilla portoricencis) exhibited highest average occupancy in farms initially classified as intermediate (6–9 years) and old (>10 years), paralleling occupancy in secondary forests. Occupancy of open‐habitat specialists (e.g. Tiaris olivaceus) was more variable, but higher in recently restored farms. Restoring the shade layer has the potential to heighten ecological services derived from forest specialists (e.g. frugivores) without losing the services of many open‐habitat specialists (e.g. insectivores). Annual local extinction probability for forest specialists decreased with increasing habitat complexity, strengthening the potential value of shade restoration as a tool to enhance habitat for avifauna that evolved in forested landscapes.}, number={6}, journal={RESTORATION ECOLOGY}, publisher={Wiley}, author={Irizarry, Amarilys D. and Collazo, Jaime A. and Pacifici, Krishna and Reich, Brian J. and Battle, Kathryn E.}, year={2018}, month={Nov}, pages={1212–1220} } @article{reich_guinness_vandekar_shinohara_staicu_2018, title={Fully Bayesian spectral methods for imaging data}, volume={74}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12782}, abstractNote={Summary}, number={2}, journal={BIOMETRICS}, author={Reich, Brian J. and Guinness, Joseph and Vandekar, Simon N. and Shinohara, Russell T. and Staicu, Ana-Maria}, year={2018}, month={Jun}, pages={645–652} } @article{larsen_reich_ruminski_rappold_2018, title={Impacts of fire smoke plumes on regional air quality, 2006-2013}, volume={28}, ISSN={["1559-064X"]}, DOI={10.1038/s41370-017-0013-x}, abstractNote={Increases in the severity and frequency of large fires necessitate improved understanding of the influence of smoke on air quality and public health. The objective of this study is to estimate the effect of smoke from fires across the continental U.S. on regional air quality over an extended period of time. We use 2006–2013 data on ozone (O3), fine particulate matter (PM2.5), and PM2.5 constituents from environmental monitoring sites to characterize regional air quality and satellite imagery data to identify plumes. Unhealthy levels of O3 and PM2.5 were, respectively, 3.3 and 2.5 times more likely to occur on plume days than on clear days. With a two-stage approach, we estimated the effect of plumes on pollutants, controlling for season, temperature, and within-site and between-site variability. Plumes were associated with an average increase of 2.6 p.p.b. (2.5, 2.7) in O3 and 2.9 µg/m3 (2.8, 3.0) in PM2.5 nationwide, but the magnitude of effects varied by location. The largest impacts were observed across the southeast. High impacts on O3 were also observed in densely populated urban areas at large distance from the fires throughout the southeast. Fire smoke substantially affects regional air quality and accounts for a disproportionate number of unhealthy days.}, number={4}, journal={JOURNAL OF EXPOSURE SCIENCE AND ENVIRONMENTAL EPIDEMIOLOGY}, author={Larsen, Alexandra E. and Reich, Brian J. and Ruminski, Mark and Rappold, Ana G.}, year={2018}, month={Jun}, pages={319–327} } @article{reich_pacifici_stallings_2018, title={Integrating auxiliary data in optimal spatial design for species distribution modelling}, volume={9}, ISSN={["2041-2096"]}, DOI={10.1111/2041-210x.13002}, abstractNote={Abstract}, number={6}, journal={METHODS IN ECOLOGY AND EVOLUTION}, author={Reich, Brian J. and Pacifici, Krishna and Stallings, Jonathan W.}, year={2018}, month={Jun}, pages={1626–1637} } @article{reich_haran_2018, title={Precision maps for public health}, volume={555}, ISSN={0028-0836 1476-4687}, url={http://dx.doi.org/10.1038/D41586-018-02096-W}, DOI={10.1038/D41586-018-02096-W}, abstractNote={Researchers have produced high-resolution maps of childhood growth failure and educational attainment across Africa between 2000 and 2015, to assess progress and guide policy decisions in public health. Researchers have produced high-resolution maps of childhood growth failure and educational attainment across Africa between 2000 and 2015, to assess progress and guide policy decisions in public health.}, number={7694}, journal={Nature}, publisher={Springer Science and Business Media LLC}, author={Reich, Brian J. and Haran, Murali}, year={2018}, month={Mar}, pages={32–33} } @article{tsai_leung_mchale_floyd_reich_2018, title={Relationships between urban green land cover and human health at different spatial resolutions}, volume={22}, ISSN={1083-8155 1573-1642}, url={http://dx.doi.org/10.1007/s11252-018-0813-3}, DOI={10.1007/s11252-018-0813-3}, number={2}, journal={Urban Ecosystems}, publisher={Springer Nature}, author={Tsai, Wei-Lun and Leung, Yu-Fai and McHale, Melissa R. and Floyd, Myron F. and Reich, Brian J.}, year={2018}, month={Nov}, pages={315–324} } @article{kang_reich_staicu_2018, title={Scalar-on-image regression via the soft-thresholded Gaussian process}, volume={105}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/asx075}, abstractNote={Summary This work concerns spatial variable selection for scalar‐on‐image regression. We propose a new class of Bayesian nonparametric models and develop an efficient posterior computational algorithm. The proposed soft‐thresholded Gaussian process provides large prior support over the class of piecewise‐smooth, sparse, and continuous spatially varying regression coefficient functions. In addition, under some mild regularity conditions the soft‐thresholded Gaussian process prior leads to the posterior consistency for parameter estimation and variable selection for scalar‐on‐image regression, even when the number of predictors is larger than the sample size. The proposed method is compared to alternatives via simulation and applied to an electroencephalography study of alcoholism.}, number={1}, journal={BIOMETRIKA}, author={Kang, Jian and Reich, Brian J. and Staicu, Ana-Maria}, year={2018}, month={Mar}, pages={165–184} } @article{grantham_reich_liu_chang_2018, title={Spatial regression with an informatively missing covariate: Application to mapping fine particulate matter}, volume={29}, ISSN={1180-4009}, url={http://dx.doi.org/10.1002/ENV.2499}, DOI={10.1002/ENV.2499}, abstractNote={The United States Environmental Protection Agency has established a large network of stations to monitor fine particulate matter of <2.5 µm (PM2.5) that is known to be harmful to human health. Unfortunately, the network has limited spatial coverage, and stations often only measure PM2.5 every few days. Satellite‐measured aerosol optical depth (AOD) is a low‐cost surrogate with greater spatiotemporal coverage, and spatial regression models have established that including AOD as a covariate improves the spatial interpolation of PM2.5. However, AOD is often missing, and our analysis reveals that the conditions that lead to missing AOD are also conducive to high AOD. Therefore, naïve interpolation that ignores informative missingness may lead to bias. We propose a joint hierarchical model for PM2.5 and AOD that accounts for informatively missing AOD. We conduct a simulation study of the effects of ignoring informative missingness in the covariate and evaluate the performance of the proposed model. We apply the method to map daily PM2.5 in the Southeastern United States. Our analysis reveals statistically significant informative missingness and relationships between PM2.5 and AOD in many seasons after accounting for meteorological and land‐use variables.}, number={4}, journal={Environmetrics}, publisher={Wiley}, author={Grantham, Neal S. and Reich, Brian J. and Liu, Yang and Chang, Howard H.}, year={2018}, month={Apr}, pages={e2499} } @article{janko_irish_reich_peterson_doctor_mwandagalirwa_likwela_tshefu_meshnick_emch_2018, title={The links between agriculture, Anopheles mosquitoes, and malaria risk in children younger than 5 years in the Democratic Republic of the Congo: a population-based, cross-sectional, spatial study}, volume={2}, ISSN={2542-5196}, url={http://dx.doi.org/10.1016/S2542-5196(18)30009-3}, DOI={10.1016/S2542-5196(18)30009-3}, abstractNote={The relationship between agriculture, Anopheles mosquitoes, and malaria in Africa is not fully understood, but it is important for malaria control as countries consider expanding agricultural projects to address population growth and food demand. Therefore, we aimed to assess the effect of agriculture on Anopheles biting behaviour and malaria risk in children in rural areas of the Democratic Republic of the Congo (DR Congo).We did a population-based, cross-sectional, spatial study of rural children (<5 years) in the DR Congo. We used information about the presence of malaria parasites in each child, as determined by PCR analysis of dried-blood spots from the 2013-14 DR Congo Demographic and Health Survey (DHS). We also used data from the DHS, a longitudinal entomological study, and available land cover and climate data to evaluate the relationships between agriculture, Anopheles biting behaviour, and malaria prevalence. Satellite imagery was used to measure the percentage of agricultural land cover around DHS villages and Anopheles sites. Anopheles biting behaviour was assessed by Human Landing Catch. We used probit regression to assess the relationship between agriculture and the probability of malaria infection, as well as the relationship between agriculture and the probability that a mosquito was caught biting indoors.Between Aug 13, 2013, and Feb 13, 2014, a total of 9790 dried-blood spots were obtained from the DHS, of which 4612 participants were included in this study. Falciparum malaria infection prevalence in rural children was 38·7% (95% uncertainty interval [UI] 37·3-40·0). Increasing exposure to agriculture was associated with increasing malaria risk with a high posterior probability (estimate 0·07, 95% UI -0·04 to 0·17; posterior probability [estimate >0]=0·89), with the probability of malaria infection increased between 0·2% (95% UI -0·1 to 3·4) and 2·6% (-1·5 to 6·6) given a 15% increase in agricultural cover, depending on other risk factors. The models predicted that large increases in agricultural cover (from 0% to 75%) increase the probability of infection by as much as 13·1% (95% UI -7·3 to 28·9). Increased risk might be due to Anopheles gambiae sensu lato, whose probability of biting indoors increased between 11·3% (95% UI -15·3 to 25·6) and 19·7% (-12·1 to 35·9) with a 15% increase in agriculture.Malaria control programmes must consider the possibility of increased risk due to expanding agriculture. Governments considering initiating large-scale agricultural projects should therefore also consider accompanying additional malaria control measures.National Institutes of Health, National Science Foundation, Bill & Melinda Gates Foundation, President's Malaria Initiative, and Royster Society of Fellows at the University of North Carolina at Chapel Hill.}, number={2}, journal={The Lancet Planetary Health}, publisher={Elsevier BV}, author={Janko, Mark M and Irish, Seth R and Reich, Brian J and Peterson, Marc and Doctor, Stephanie M and Mwandagalirwa, Melchior Kashamuka and Likwela, Joris L and Tshefu, Antoinette K and Meshnick, Steven R and Emch, Michael E}, year={2018}, month={Feb}, pages={e74–e82} } @article{li_guindani_reich_bondell_vannucci_2017, title={A Bayesian mixture model for clustering and selection of feature occurrence rates under mean constraints}, volume={10}, ISSN={["1932-1872"]}, DOI={10.1002/sam.11350}, abstractNote={In this paper, we consider the problem of modeling a matrix of count data, where multiple features are observed as counts over a number of samples. Due to the nature of the data generating mechanism, such data are often characterized by a high number of zeros and overdispersion. In order to take into account the skewness and heterogeneity of the data, some type of normalization and regularization is necessary for conducting inference on the occurrences of features across samples. We propose a zero‐inflated Poisson mixture modeling framework that incorporates a model‐based normalization through prior distributions with mean constraints, as well as a feature selection mechanism, which allows us to identify a parsimonious set of discriminatory features, and simultaneously cluster the samples into homogenous groups. We show how our approach improves on the accuracy of the clustering with respect to more standard approaches for the analysis of count data, by means of a simulation study and an application to a bag‐of‐words benchmark data set, where the features are represented by the frequencies of occurrence of each word.}, number={6}, journal={STATISTICAL ANALYSIS AND DATA MINING}, author={Li, Qiwei and Guindani, Michele and Reich, Brian J. and Bondell, Howard D. and Vannucci, Marina}, year={2017}, month={Dec}, pages={393–409} } @article{morris_reich_thibaud_cooley_2017, title={A Space-Time Skew-t Model for Threshold Exceedances}, volume={73}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12644}, abstractNote={Summary}, number={3}, journal={BIOMETRICS}, author={Morris, Samuel A. and Reich, Brian J. and Thibaud, Emeric and Cooley, Daniel}, year={2017}, month={Sep}, pages={749–758} } @article{kaufeld_fuentes_reich_herring_shaw_terres_2017, title={A multivariate dynamic spatial factor model for speciated pollutants and adverse birth outcomes}, volume={14}, number={9}, journal={International Journal of Environmental Research and Public Health}, author={Kaufeld, K. A. and Fuentes, M. and Reich, B. J. and Herring, A. H. and Shaw, G. M. and Terres, M. A.}, year={2017} } @article{morris_reich_pacifici_lei_2017, title={A spatial model for rare binary events}, volume={24}, ISSN={["1573-3009"]}, DOI={10.1007/s10651-017-0385-z}, number={4}, journal={ENVIRONMENTAL AND ECOLOGICAL STATISTICS}, author={Morris, Samuel A. and Reich, Brian J. and Pacifici, Krishna and Lei, Yuancai}, year={2017}, month={Dec}, pages={485–504} } @article{wootten_terando_reich_boyles_semazzi_2017, title={Characterizing Sources of Uncertainty from Global Climate Models and Downscaling Techniques}, volume={56}, ISSN={["1558-8432"]}, DOI={10.1175/jamc-d-17-0087.1}, abstractNote={Abstract}, number={12}, journal={JOURNAL OF APPLIED METEOROLOGY AND CLIMATOLOGY}, author={Wootten, A. and Terando, A. and Reich, B. J. and Boyles, R. P. and Semazzi, F.}, year={2017}, month={Dec}, pages={3245–3262} } @article{wilson_reich_nolte_spero_hubbell_rappold_2017, title={Climate change impacts on projections of excess mortality at 2030 using spatially varying ozone-temperature risk surfaces}, volume={27}, ISSN={["1559-064X"]}, DOI={10.1038/jes.2016.14}, abstractNote={We project the change in ozone-related mortality burden attributable to changes in climate between a historical (1995-2005) and near-future (2025-2035) time period while incorporating a non-linear and synergistic effect of ozone and temperature on mortality. We simulate air quality from climate projections varying only biogenic emissions and holding anthropogenic emissions constant, thus attributing changes in ozone only to changes in climate and independent of changes in air pollutant emissions. We estimate non-linear, spatially varying, ozone-temperature risk surfaces for 94 US urban areas using observed data. Using the risk surfaces and climate projections we estimate daily mortality attributable to ozone exceeding 40 p.p.b. (moderate level) and 75 p.p.b. (US ozone NAAQS) for each time period. The average increases in city-specific median April-October ozone and temperature between time periods are 1.02 p.p.b. and 1.94 °F; however, the results varied by region. Increases in ozone because of climate change result in an increase in ozone mortality burden. Mortality attributed to ozone exceeding 40 p.p.b. increases by 7.7% (1.6-14.2%). Mortality attributed to ozone exceeding 75 p.p.b. increases by 14.2% (1.6 28.9%). The absolute increase in excess ozone mortality is larger for changes in moderate ozone levels, reflecting the larger number of days with moderate ozone levels.}, number={1}, journal={JOURNAL OF EXPOSURE SCIENCE AND ENVIRONMENTAL EPIDEMIOLOGY}, author={Wilson, Ander and Reich, Brian J. and Nolte, Christopher G. and Spero, Tanya L. and Hubbell, Bryan and Rappold, Ana G.}, year={2017}, pages={118–124} } @article{cabral_zhang_chi_reich_dickey_lebeau_2017, title={Correlating Local Chemistry and Local Cation Displacements in the Relaxor Ferroelectric PMN}, volume={23}, ISSN={1431-9276 1435-8115}, url={http://dx.doi.org/10.1017/S1431927617008741}, DOI={10.1017/S1431927617008741}, abstractNote={Relaxor ferroelectrics are a unique class of materials that can be identified by their high dielectric constants, low hysteresis, large electrostrictive strains}, number={S1}, journal={Microscopy and Microanalysis}, publisher={Cambridge University Press (CUP)}, author={Cabral, Matthew J. and Zhang, Shujun and Chi, Jocelyn and Reich, Brian J. and Dickey, Elizabeth C. and LeBeau, James M.}, year={2017}, month={Jul}, pages={1616–1617} } @article{li_bucholz_peterson_reich_russ_brenner_2017, title={How predictable is plastic damage at the atomic scale?}, volume={110}, ISSN={0003-6951 1077-3118}, url={http://dx.doi.org/10.1063/1.4977420}, DOI={10.1063/1.4977420}, abstractNote={The title of this letter implies two questions: To what degree is plastic damage inherently predictable at the atomic scale, and can this predictability be quantified? We answer these questions by combining image analysis with molecular dynamics (MD) simulation to quantify similarities between atomic structures of plastic damage in a database of strained copper bi-crystals. We show that a manifold of different outcomes can originate ostensibly from the same initial structure, but that with this approach complex plastic damage within this manifold can be statistically connected to the initial structure. Not only does this work introduce a powerful approach for analyzing MD simulations of a complex plastic damage but also provides a much needed and critical framework for analyzing and organizing atomic-scale microstructural databases.}, number={9}, journal={Applied Physics Letters}, publisher={AIP Publishing}, author={Li, D. and Bucholz, E. W. and Peterson, G. and Reich, B. J. and Russ, J. C. and Brenner, D. W.}, year={2017}, month={Feb}, pages={091902} } @article{pacifici_reich_miller_gardner_stauffer_singh_mckerrow_collazo_2017, title={Integrating multiple data sources in species distribution modeling: a framework for data fusion}, volume={98}, ISSN={["1939-9170"]}, DOI={10.1002/ecy.1710}, abstractNote={Abstract}, number={3}, journal={ECOLOGY}, author={Pacifici, Krishna and Reich, Brian J. and Miller, David A. W. and Gardner, Beth and Stauffer, Glenn and Singh, Susheela and McKerrow, Alexa and Collazo, Jaime A.}, year={2017}, month={Mar}, pages={840–850} } @article{farjat_reich_guinness_whetten_mckeand_isik_2017, title={Optimal seed deployment under climate change using spatial models: Application to loblolly pine in the Southeastern US}, volume={112}, DOI={10.1080/01621459.2017.1292179}, abstractNote={ABSTRACT Provenance tests are a common tool in forestry designed to identify superior genotypes for planting at specific locations. The trials are replicated experiments established with seed from parent trees collected from different regions and grown at several locations. In this work, a Bayesian spatial approach is developed for modeling the expected relative performance of seed sources using climate variables as predictors associated with the origin of seed source and the planting site. The proposed modeling technique accounts for the spatial dependence in the data and introduces a separable Matérn covariance structure that provides a flexible means to estimate effects associated with the origin and planting site locations. The statistical model was used to develop a quantitative tool for seed deployment aimed to identify the location of superior performing seed sources that could be suitable for a specific planting site under a given climate scenario. Cross-validation results indicate that the proposed spatial models provide superior predictive ability compared to multiple linear regression methods in unobserved locations. The general trend of performance predictions based on future climate scenarios suggests an optimal assisted migration of loblolly pine seed sources from southern and warmer regions to northern and colder areas in the southern USA. Supplementary materials for this article are available online.}, number={519}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Farjat, A. and Reich, Brian and Guinness, J. and Whetten, Ross and McKeand, Steven and Isik, Fikret}, year={2017}, pages={909–920} } @article{peterson_li_reich_brenner_2017, title={Spatial prediction of crystalline defects observed in molecular dynamic simulations of plastic damage}, volume={44}, ISSN={["1360-0532"]}, DOI={10.1080/02664763.2016.1221915}, abstractNote={ABSTRACT Molecular dynamic computer simulation is an essential tool in materials science to study atomic properties of materials in extreme environments and guide development of new materials. We propose a statistical analysis to emulate simulation output with the ultimate goal of efficiently approximating the computationally intensive simulation. We compare several spatial regression approaches including conditional autoregression (CAR), discrete wavelets transform (DWT), and principle components analysis (PCA). The methods are applied to simulation of copper atoms with twin wall and dislocation loop defects, under varying tilt tension angles. We find that CAR and DWT yield accurate results but fail to capture extreme defects, yet PCA better captures defect structure.}, number={10}, journal={JOURNAL OF APPLIED STATISTICS}, author={Peterson, Geoffrey Colin L. and Li, Dong and Reich, Brian J. and Brenner, Donald}, year={2017}, pages={1761–1784} } @article{storlie_reich_rust_ticknor_bonnie_montoya_michalak_2017, title={Spatiotemporal Modeling of Node Temperatures in Supercomputers}, volume={112}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2016.1195271}, abstractNote={ABSTRACT Los Alamos National Laboratory is home to many large supercomputing clusters. These clusters require an enormous amount of power (∼500–2000 kW each), and most of this energy is converted into heat. Thus, cooling the components of the supercomputer becomes a critical and expensive endeavor. Recently, a project was initiated to investigate the effect that changes to the cooling system in a machine room had on three large machines that were housed there. Coupled with this goal was the aim to develop a general good-practice for characterizing the effect of cooling changes and monitoring machine node temperatures in this and other machine rooms. This article focuses on the statistical approach used to quantify the effect that several cooling changes to the room had on the temperatures of the individual nodes of the computers. The largest cluster in the room has 1600 nodes that run a variety of jobs during general use. Since extremes temperatures are important, a Normal distribution plus generalized Pareto distribution for the upper tail is used to model the marginal distribution, along with a Gaussian process copula to account for spatio-temporal dependence. A Gaussian Markov random field (GMRF) model is used to model the spatial effects on the node temperatures as the cooling changes take place. This model is then used to assess the condition of the node temperatures after each change to the room. The analysis approach was used to uncover the cause of a problematic episode of overheating nodes on one of the supercomputing clusters. This same approach can easily be applied to monitor and investigate cooling systems at other data centers, as well. Supplementary materials for this article are available online.}, number={517}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Storlie, Curtis B. and Reich, Brian J. and Rust, William N. and Ticknor, Lawrence O. and Bonnie, Amanda M. and Montoya, Andrew J. and Michalak, Sarah E.}, year={2017}, pages={92–108} } @article{li_reich_brenner_2017, title={Statistical and image analysis for characterizing simulated atomic-scale damage in crystals}, volume={135}, ISSN={["1879-0801"]}, DOI={10.1016/j.commatsci.2017.03.054}, abstractNote={While molecular dynamics simulations have been used for decades to study structure and formation mechanisms of plastic damage in crystals, the analytical tools needed to characterize collections of plastic defects have been limited. Here we demonstrate the use of two methods, spatial cross-correlations (CC) and Linear Discriminate Analysis (LDA), to analyze and compare plastic damage profiles among molecular dynamics simulations in which damage was created by straining bi-crystals containing symmetric tilt grain boundaries with different tilt angles. Two potentials were used, one representing Cu and one representing Ag, and two coarse-grained descriptors for different types of crystal damage were used, averaged central symmetry parameters (CSP) and atomic hydrostatic stress (HS). We find that in general the CSP is a more accurate descriptor than HS for both analysis methods, and for data base sizes of about 30 or more simulations per tilt angle, the LDA does considerably better in predicting angle and material than the CC method. For example, at the largest data base size of 50 simulations per tilt angle and using the average CSP values, the LDA predicts the exact initial tilt angle and material type for 92% of the simulations, while the CC approach drops to 58%. If the average HS is used instead of the average CSP, the LDA and CC predictions drop to 63% and 32%, respectively. These results point to a number of possible applications of this method, for example in quantifying how the range of damage for a set of strained systems may depend on strain rate or temperature, or quantifying similarities between complex damage from processes such as indentation and energetic ion bombardment.}, journal={COMPUTATIONAL MATERIALS SCIENCE}, author={Li, D. and Reich, B. J. and Brenner, D. W.}, year={2017}, month={Jul}, pages={119–126} } @inbook{terando_reich_pacifici_costanza_mckerrow_collazo_2017, title={Uncertainty Quantification and Propagation for Projections of Extremes in Monthly Area Burned Under Climate Change: A Case Study in the Coastal Plain of Georgia, USA}, volume={223}, ISBN={0}, ISSN={2328-8779}, url={http://dx.doi.org/10.1002/9781119028116.ch16}, DOI={10.1002/9781119028116.ch16}, abstractNote={Human-caused climate change is predicted to affect the frequency of hazard-linked extremes. Unusually large wildfires are a type of extreme event that is constrained by climate and can be a hazard to society but also an important ecological disturbance. This chapter focuses on changes in the frequency of extreme monthly area burned by wildfires for the end of the 21st century for a wildfire-prone region in the southeast United States. Predicting changes in area burned is complicated by the large and varied uncertainties in how the climate will change and in the models used to predict those changes. The chapter characterizes and quantifies multiple sources of uncertainty and propagate the expanded prediction intervals of future area burned. It illustrates that while accounting for multiple sources of uncertainty in global change science problems is a difficult task, it will be necessary in order to properly assess the risk of increased exposure to these society-relevant events.}, booktitle={NATURAL HAZARD UNCERTAINTY ASSESSMENT: MODELING AND DECISION SUPPORT}, publisher={John Wiley & Sons, Inc.}, author={Terando, Adam J. and Reich, Brian and Pacifici, Krishna and Costanza, Jennifer and McKerrow, Alexa and Collazo, Jaime A.}, year={2017}, pages={245–256} } @article{li_reich_brenner_2017, title={Using spatial cross-correlation image analysis to characterize the influence of strain rate on plastic damage in molecular dynamics simulations}, volume={25}, number={7}, journal={Modelling and Simulation in Materials Science and Engineering}, author={Li, D. and Reich, B. J. and Brenner, D. W.}, year={2017} } @article{parker_reich_eidsvik_2016, title={A Fused Lasso Approach to Nonstationary Spatial Covariance Estimation}, volume={21}, ISSN={["1537-2693"]}, DOI={10.1007/s13253-016-0251-8}, number={3}, journal={JOURNAL OF AGRICULTURAL BIOLOGICAL AND ENVIRONMENTAL STATISTICS}, author={Parker, Ryan J. and Reich, Brian J. and Eidsvik, Jo}, year={2016}, month={Sep}, pages={569–587} } @article{shaby_reich_cooley_kaufman_2016, title={A MARKOV-SWITCHING MODEL FOR HEAT WAVES}, volume={10}, ISSN={["1932-6157"]}, DOI={10.1214/15-aoas873}, abstractNote={Heat waves merit careful study because they inict severe economic and societal damage. We use an intuitive, informal working denition of a heat wave|a persistent event in the tail of the temperature distribution| to motivate an interpretable latent state extreme value model. A latent variable with dependence in time indicates membership in the heat wave state. The strength of the temporal dependence of the latent variable controls the frequency and persistence of heat waves. Within each heat wave, temperatures are modeled using extreme value distributions, with extremal dependence across time accomplished through an extreme value Markov model. One important virtue of interpretability is that model parameters directly translate into quantities of interest for risk management, so that questions like whether heat waves are becoming longer, more severe, or more frequent, are easily answered by querying an appropriate tted model. We demonstrate the latent state model on two recent, calamitous, examples: the European heat wave of 2003 and the Russian heat wave of 2010.}, number={1}, journal={ANNALS OF APPLIED STATISTICS}, author={Shaby, Benjamin A. and Reich, Brian J. and Cooley, Daniel and Kaufman, Cari G.}, year={2016}, month={Mar}, pages={74–93} } @article{balderama_gardner_reich_2016, title={A spatial-temporal double-hurdle model for extremely over-dispersed avian count data}, volume={18}, ISSN={["2211-6753"]}, DOI={10.1016/j.spasta.2016.05.001}, abstractNote={Several wind energy facilities are currently being planned for offshore Atlantic waters of the United States. However, relatively little is known about the distribution, abundance and spatio-temporal variability of marine birds in their offshore habitats and it is becoming increasingly necessary to accurately characterize these demographic parameters before assessing the influence of factors such as offshore energy development on populations. Thus, we incorporate a multi-scale approach to develop models for the space-time distribution and abundance of marine birds to identify potential high-use areas in need of further study. With data taken from past and ongoing survey efforts, we provide relative abundance and density estimates for marine birds over a wide geographical area during multiple years. Due to the excessive amount of zeros as well as extremely large counts exhibited in the data, a double-hurdle model is formulated that includes a negative binomial and a generalized Pareto distribution mixture. Spatial heterogeneity is modeled using a conditional auto-regressive (CAR) prior, and a Fourier basis was used for seasonal variation. We demonstrate our model by creating probability maps that show areas of high-abundance and aggregation for twenty-four species of marine bird.}, journal={SPATIAL STATISTICS}, author={Balderama, Earvin and Gardner, Beth and Reich, Brian J.}, year={2016}, month={Nov}, pages={263–275} } @article{guan_laber_reich_2016, title={Bayesian nonparametric estimation for dynamic treatment regimes with sequential transition times comment}, volume={111}, number={515}, journal={Journal of the American Statistical Association}, author={Guan, Q. and Laber, E. B. and Reich, B. J.}, year={2016}, pages={936–942} } @article{guan_laber_reich_2016, title={Comment}, volume={111}, ISSN={0162-1459 1537-274X}, url={http://dx.doi.org/10.1080/01621459.2016.1200911}, DOI={10.1080/01621459.2016.1200911}, abstractNote={Material change: a universe of ideas for the new school year Gary Williams}, number={515}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Guan, Qian and Laber, Eric B. and Reich, Brian J.}, year={2016}, month={Jul}, pages={936–942} } @article{russell_cooley_porter_reich_heald_2016, title={DATA MINING TO INVESTIGATE THE METEOROLOGICAL DRIVERS FOR EXTREME GROUND LEVEL OZONE EVENTS}, volume={10}, ISSN={["1932-6157"]}, DOI={10.1214/16-aoas954}, abstractNote={This project aims to explore which combinations of meteorological conditions are associated with extreme ground level ozone conditions. Our approach focuses only on the tail by optimizing the tail dependence between the ozone response and functions of meteorological covariates. Since there is a long list of possible meteorological covariates, the space of possible models cannot be explored completely. Consequently, we perform data mining within the model selection context, employing an automated model search procedure. Our study is unique among extremes applications as optimizing tail dependence has not previously been attempted, and it presents new challenges, such as requiring a smooth threshold. We present a simulation study which shows that the method can detect complicated conditions leading to extreme responses and resists overfitting. We apply the method to ozone data for Atlanta and Charlotte and find similar meteorological drivers for these two Southeastern US cities. We identify several covariates which help to differentiate the meteorological conditions which lead to extreme ozone levels from those which lead to merely high levels.}, number={3}, journal={ANNALS OF APPLIED STATISTICS}, author={Russell, Brook T. and Cooley, Daniel S. and Porter, William C. and Reich, Brian J. and Heald, Colette L.}, year={2016}, month={Sep}, pages={1673–1698} } @article{pacifici_reich_dorazio_conroy_2016, title={Occupancy estimation for rare species using a spatially-adaptive sampling design}, volume={7}, ISSN={["2041-2096"]}, DOI={10.1111/2041-210x.12499}, abstractNote={Summary}, number={3}, journal={METHODS IN ECOLOGY AND EVOLUTION}, author={Pacifici, Krishna and Reich, Brian J. and Dorazio, Robert M. and Conroy, Michael J.}, year={2016}, month={Mar}, pages={285–293} } @article{reich_2016, title={Quantile regression for epidemiological applications}, journal={Handbook of spatial epidemiology}, author={Reich, B. J.}, year={2016}, pages={239–249} } @article{tsai_floyd_leung_mchale_reich_2016, title={Urban Vegetative Cover Fragmentation in the US Associations With Physical Activity and BMI}, volume={50}, ISSN={["1873-2607"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84961144121&partnerID=MN8TOARS}, DOI={10.1016/j.amepre.2015.09.022}, abstractNote={Introduction Urban vegetative cover provides a range of ecosystem services including contributions to human health and well-being. Urbanization exerts tremendous pressure on this natural resource, causing fragmentation and loss of urban greenspace. This study aimed to examine associations between vegetative cover fragmentation and physical activity and BMI at the county scale in the U.S. metropolitan statistical areas greater than 1 million in population. Methods National Land Cover Database 2006 and Behavioral Risk Factor Surveillance System 2008 provided land cover and human health data, respectively. Analyses were performed in 2013 at the county scale where the health data were reported. Spearman rank correlation and stepwise and hierarchical regression models were applied to estimate relationships between land cover and health variables. Results After controlling for median household income and race, greater forest edge density (β=0.272, p<0.05) and larger size of herbaceous patches (β=0.261, p<0.01) were associated with a higher percentage of participation in physical activity within counties. More connections between forest and developed area (β=0.37, p<0.01) and greater edge density of shrubland (β=0.646, p<0.001) were positively associated with a higher percentage of normal BMI (<25) within counties. Conclusions Forest land cover and some degree of fragmentation are associated with population physical activity. Future studies should examine how built environments and varying land cover configurations influence physical activity and weight status.}, number={4}, journal={AMERICAN JOURNAL OF PREVENTIVE MEDICINE}, author={Tsai, Wei-Lun and Floyd, Myron F. and Leung, Yu-Fai and McHale, Melissa R. and Reich, Brian J.}, year={2016}, month={Apr}, pages={509–517} } @article{fancher_han_levin_page_reich_smith_wilson_jones_2016, title={Use of Bayesian Inference in Crystallographic Structure Refinement via Full Diffraction Profile Analysis}, volume={6}, ISSN={2045-2322}, url={http://dx.doi.org/10.1038/SREP31625}, DOI={10.1038/SREP31625}, abstractNote={Abstract}, number={1}, journal={Scientific Reports}, publisher={Springer Science and Business Media LLC}, author={Fancher, Chris M. and Han, Zhen and Levin, Igor and Page, Katharine and Reich, Brian J. and Smith, Ralph C. and Wilson, Alyson G. and Jones, Jacob L.}, year={2016}, month={Aug}, pages={31625} } @article{parker_reich_sain_2015, title={A Multiresolution Approach to Estimating the Value Added by Regional Climate Models}, volume={28}, ISSN={["1520-0442"]}, DOI={10.1175/jcli-d-14-00557.1}, abstractNote={Abstract}, number={22}, journal={JOURNAL OF CLIMATE}, author={Parker, Ryan J. and Reich, Brian J. and Sain, Stephan R.}, year={2015}, month={Nov}, pages={8873–8887} } @article{schnell_bandyopadhyay_reich_nunn_2015, title={A marginal cure rate proportional hazards model for spatial survival data}, volume={64}, ISSN={["1467-9876"]}, DOI={10.1111/rssc.12098}, abstractNote={Summary}, number={4}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES C-APPLIED STATISTICS}, author={Schnell, Patrick and Bandyopadhyay, Dipankar and Reich, Brian J. and Nunn, Martha}, year={2015}, month={Aug}, pages={673–691} } @article{chang_warren_darrow_reich_waller_2015, title={Assessment of critical exposure and outcome windows in time-to-event analysis with application to air pollution and preterm birth study}, volume={16}, ISSN={["1468-4357"]}, DOI={10.1093/biostatistics/kxu060}, abstractNote={In reproductive epidemiology, there is a growing interest to examine associations between air pollution exposure during pregnancy and the risk of preterm birth (PTB). One important research objective is to identify critical periods of exposure and estimate the associated effects at different stages of pregnancy. However, population studies have reported inconsistent findings. This may be due to limitations from the standard analytic approach of treating PTB as a binary outcome without considering time-varying exposures together over the course of pregnancy. To address this research gap, we present a Bayesian hierarchical model for conducting a comprehensive examination of gestational air pollution exposure by estimating the joint effects of weekly exposures during different vulnerable periods. Our model also treats PTB as a time-to-event outcome to address the challenge of different exposure lengths among ongoing pregnancies. The proposed model is applied to a dataset of geocoded birth records in the Atlanta metropolitan area between 1999-2005 to examine the risk of PTB associated with gestational exposure to ambient fine particulate matter [Formula: see text]m in aerodynamic diameter (PM[Formula: see text]). We find positive associations between PM[Formula: see text] exposure during early and mid-pregnancy, and evidence that associations are stronger for PTBs occurring around week 30.}, number={3}, journal={BIOSTATISTICS}, author={Chang, Howard H. and Warren, Joshua L. and Darrow, Lnydsey A. and Reich, Brian J. and Waller, Lance A.}, year={2015}, month={Jul}, pages={509–521} } @article{stephenson_shaby_reich_sullivan_2015, title={Estimating Spatially Varying Severity Thresholds of a Forest Fire Danger Rating System Using Max-Stable Extreme-Event Modeling}, volume={54}, ISSN={["1558-8432"]}, DOI={10.1175/jamc-d-14-0041.1}, abstractNote={Abstract}, number={2}, journal={JOURNAL OF APPLIED METEOROLOGY AND CLIMATOLOGY}, author={Stephenson, Alec G. and Shaby, Benjamin A. and Reich, Brian J. and Sullivan, Andrew L.}, year={2015}, month={Feb}, pages={395–407} } @article{sun_reich_cai_guindani_schwartzman_2015, title={False discovery control in large-scale spatial multiple testing}, volume={77}, ISSN={["1467-9868"]}, DOI={10.1111/rssb.12064}, abstractNote={Summary}, number={1}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B-STATISTICAL METHODOLOGY}, author={Sun, Wenguang and Reich, Brian J. and Cai, T. Tony and Guindani, Michele and Schwartzman, Armin}, year={2015}, month={Jan}, pages={59–83} } @article{grantham_reich_pacifici_laber_menninger_henley_barberán_leff_fierer_dunn_2015, title={Fungi Identify the Geographic Origin of Dust Samples}, volume={10}, ISSN={1932-6203}, url={http://dx.doi.org/10.1371/journal.pone.0122605}, DOI={10.1371/journal.pone.0122605}, abstractNote={There is a long history of archaeologists and forensic scientists using pollen found in a dust sample to identify its geographic origin or history. Such palynological approaches have important limitations as they require time-consuming identification of pollen grains, a priori knowledge of plant species distributions, and a sufficient diversity of pollen types to permit spatial or temporal identification. We demonstrate an alternative approach based on DNA sequencing analyses of the fungal diversity found in dust samples. Using nearly 1,000 dust samples collected from across the continental U.S., our analyses identify up to 40,000 fungal taxa from these samples, many of which exhibit a high degree of geographic endemism. We develop a statistical learning algorithm via discriminant analysis that exploits this geographic endemicity in the fungal diversity to correctly identify samples to within a few hundred kilometers of their geographic origin with high probability. In addition, our statistical approach provides a measure of certainty for each prediction, in contrast with current palynology methods that are almost always based on expert opinion and devoid of statistical inference. Fungal taxa found in dust samples can therefore be used to identify the origin of that dust and, more importantly, we can quantify our degree of certainty that a sample originated in a particular place. This work opens up a new approach to forensic biology that could be used by scientists to identify the origin of dust or soil samples found on objects, clothing, or archaeological artifacts.}, number={4}, journal={PLOS ONE}, publisher={Public Library of Science (PLoS)}, author={Grantham, Neal S. and Reich, Brian J. and Pacifici, Krishna and Laber, Eric B. and Menninger, Holly L. and Henley, Jessica B. and Barberán, Albert and Leff, Jonathan W. and Fierer, Noah and Dunn, Robert R.}, editor={Rokas, AntonisEditor}, year={2015}, month={Apr}, pages={e0122605} } @article{kao_reich_storlie_anderson_2015, title={Malware Detection Using Nonparametric Bayesian Clustering and Classification Techniques}, volume={57}, ISSN={["1537-2723"]}, DOI={10.1080/00401706.2014.958916}, abstractNote={Computer security requires statistical methods to quickly and accurately flag malicious programs. This article proposes a nonparametric Bayesian approach for classifying programs as benign or malicious and simultaneously clustering malicious programs. The analysis is based on the dynamic trace (DT) of instructions under the first-order Markov assumption. Each row of the trace’s transition matrix is modeled using the Dirichlet process mixture (DPM) model. The DPM model clusters programs within each class (malicious or benign), and produces the posterior probability of being a malware which is used for classification. The novelty of the model is using this clustering algorithm to improve the classification accuracy. The simulation study shows that the DPM model outperforms the elastic net logistic (ENL) regression and the support vector machine (SVM) in classification performance under most of the scenarios, and also outperforms the spectral clustering method for grouping similar malware. In an analysis of real malicious and benign programs, the DPM model gives significantly better classification performance than the ENL model, and competitive results to the SVM. More importantly, the DPM model identifies clusters of programs during the classification procedure which is useful for reverse engineering.}, number={4}, journal={TECHNOMETRICS}, author={Kao, Yimin and Reich, Brian and Storlie, Curtis and Anderson, Blake}, year={2015}, month={Oct}, pages={535–546} } @article{farjat_isik_reich_whetten_mckeand_2015, title={Modeling Climate Change Effects on the Height Growth of Loblolly Pine}, volume={61}, ISSN={0015-749X}, url={http://dx.doi.org/10.5849/forsci.14-075}, DOI={10.5849/forsci.14-075}, abstractNote={We present a statistical model to predict the effects of climate change on the height growth of loblolly pine (Pinus taeda L.) families in the southeastern United States.Provenance-progeny trials were used for assessing the response of loblolly pine seed sources to environmental change.Ordinary least squares, ridge regression, and LASSO regression were used to develop height growth prediction models.The approach integrates both genetic and environmental effects and is meant to overcome the critical limitations of population response function and transfer function methods by making full use of data from provenance trials.Prediction models were tested using a hypothetical future climate scenario with 5% decrease in precipitation and 0.5°C increase in maximum and minimum temperatures, relative to historical average values.Under this scenario, local families from the coastal plains of Georgia, Florida, and South Carolina showed the highest performance relative to the current climate in their native environments.As these seed sources were moved to colder northern and inland regions from their origin, we observed declines in their height growth.Similarly, the climatic change scenario suggested that performance of northern seed sources declined significantly when they were moved to more southern warmer regions.The statistical model can be used as a quantitative tool to model the effect of climatic variables on the performance of loblolly pine seed sources and may help to develop sound breeding deployment strategies.}, number={4}, journal={Forest Science}, publisher={Oxford University Press (OUP)}, author={Farjat, Alfredo E. and Isik, Fikret and Reich, Brian J. and Whetten, Ross W. and McKeand, Steven E.}, year={2015}, month={Aug}, pages={703–715} } @article{smith_reich_herring_langlois_fuentes_2015, title={Multilevel quantile function modeling with application to birth outcomes}, volume={71}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12294}, abstractNote={Summary}, number={2}, journal={BIOMETRICS}, author={Smith, Luke B. and Reich, Brian J. and Herring, Amy H. and Langlois, Peter H. and Fuentes, Montserrat}, year={2015}, month={Jun}, pages={508–519} } @article{coleman_martin_reich_2015, title={Multiple window discrete scan statistic for higher-order Markovian sequences}, volume={42}, ISSN={["1360-0532"]}, DOI={10.1080/02664763.2015.1005061}, abstractNote={Accurate and efficient methods to detect unusual clusters of abnormal activity are needed in many fields such as medicine and business. Often the size of clusters is unknown; hence, multiple (variable) window scan statistics are used to identify clusters using a set of different potential cluster sizes. We give an efficient method to compute the exact distribution of multiple window discrete scan statistics for higher-order, multi-state Markovian sequences. We define a Markov chain to efficiently keep track of probabilities needed to compute p-values for the statistic. The state space of the Markov chain is set up by a criterion developed to identify strings that are associated with observing the specified values of the statistic. Using our algorithm, we identify cases where the available approximations do not perform well. We demonstrate our methods by detecting unusual clusters of made free throw shots by National Basketball Association players during the 2009–2010 regular season.}, number={8}, journal={JOURNAL OF APPLIED STATISTICS}, author={Coleman, Deidra A. and Martin, Donald E. K. and Reich, Brian J.}, year={2015}, month={Aug}, pages={1690–1705} } @article{reich_porter_2015, title={Partially supervised spatiotemporal clustering for burglary crime series identification}, volume={178}, ISSN={["1467-985X"]}, DOI={10.1111/rssa.12076}, abstractNote={Summary}, number={2}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A-STATISTICS IN SOCIETY}, author={Reich, Brian J. and Porter, Michael D.}, year={2015}, month={Feb}, pages={465–480} } @article{smith_fuentes_gordon-larsen_reich_2015, title={QUANTILE REGRESSION FOR MIXED MODELS WITH AN APPLICATION TO EXAMINE BLOOD PRESSURE TRENDS IN CHINA}, volume={9}, ISSN={["1941-7330"]}, DOI={10.1214/15-aoas841}, abstractNote={Cardiometabolic diseases have substantially increased in China in the past 20 years and blood pressure is a primary modifiable risk factor. Using data from the China Health and Nutrition Survey we examine blood pressure trends in China from 1991 to 2009, with a concentration on age cohorts and urbanicity. Very large values of blood pressure are of interest, so we model the conditional quantile functions of systolic and diastolic blood pressure. This allows the covariate effects in the middle of the distribution to vary from those in the upper tail, the focal point of our analysis. We join the distributions of systolic and diastolic blood pressure using a copula, which permits the relationships between the covariates and the two responses to share information and enables probabilistic statements about systolic and diastolic blood pressure jointly. Our copula maintains the marginal distributions of the group quantile effects while accounting for within-subject dependence, enabling inference at the population and subject levels. Our population level regression effects change across quantile level, year, and blood pressure type, providing a rich environment for inference. To our knowledge, this is the first quantile function model to explicitly model within-subject autocorrelation and is the first quantile function approach that simultaneously models multivariate conditional response. We find that the association between high blood pressure and living in an urban area has evolved from positive to negative, with the strongest changes occurring in the upper tail. The increase in urbanization over the last twenty years coupled with the transition from the positive association between urbanization and blood pressure in earlier years to a more uniform association with urbanization suggests increasing blood pressure over time throughout China, even in less urbanized areas. Our methods are available in the R package BSquare.}, number={3}, journal={ANNALS OF APPLIED STATISTICS}, author={Smith, Luke B. and Fuentes, Montserrat and Gordon-Larsen, Penny and Reich, Brian J.}, year={2015}, month={Sep}, pages={1226–1246} } @article{reich_fuentes_2015, title={Spatial Bayesian Nonparametric Methods}, ISBN={["978-3-319-19517-9"]}, DOI={10.1007/978-3-319-19518-6_17}, journal={NONPARAMETRIC BAYESIAN INFERENCE IN BIOSTATISTICS}, author={Reich, Brian James and Fuentes, Montserrat}, year={2015}, pages={347–357} } @article{vock_reich_fuentes_dominici_2015, title={Spatial Variable Selection Methods for Investigating Acute Health Effects of Fine Particulate Matter Components}, volume={71}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12254}, abstractNote={Summary}, number={1}, journal={BIOMETRICS}, author={Vock, Laura F. Boehm and Reich, Brian J. and Fuentes, Montserrat and Dominici, Francesca}, year={2015}, month={Mar}, pages={167–177} } @article{reich_shaby_cooley_2014, title={A Hierarchical Model for Serially-Dependent Extremes: A Study of Heat Waves in the Western US}, volume={19}, ISSN={["1537-2693"]}, DOI={10.1007/s13253-013-0161-y}, number={1}, journal={JOURNAL OF AGRICULTURAL BIOLOGICAL AND ENVIRONMENTAL STATISTICS}, author={Reich, Brian J. and Shaby, Benjamin A. and Cooley, Daniel}, year={2014}, month={Mar}, pages={119–135} } @article{reich_chang_foley_2014, title={A Spectral Method for Spatial Downscaling}, volume={70}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12196}, abstractNote={Summary}, number={4}, journal={BIOMETRICS}, author={Reich, Brian J. and Chang, Howard H. and Foley, Kristen M.}, year={2014}, month={Dec}, pages={932–942} } @article{reich_gardner_2014, title={A spatial capture-recapture model for territorial species}, volume={25}, DOI={10.1002/env.2317}, abstractNote={Advances in field techniques have lead to an increase in spatially referenced capture–recapture data to estimate a species' population size as well as other demographic parameters and patterns of space usage. Statistical models for these data have assumed that the number of individuals in the population and their spatial locations follow a homogeneous Poisson point process model, which implies that the individuals are uniformly and independently distributed over the spatial domain of interest. In many applications, there is reason to question independence, for example, when species display territorial behavior. In this paper, we propose a new statistical model, which allows for dependence between locations to account for avoidance or territorial behavior. We show via a simulation study that accounting for this can improve population size estimates. The method is illustrated using a case study of small mammal trapping data to estimate avoidance and population density of adult female field voles (Microtus agrestis) in Northern England. Copyright © 2014 John Wiley & Sons, Ltd.}, number={8}, journal={Environmetrics}, author={Reich, Brian and Gardner, B.}, year={2014}, pages={630–637} } @article{wilson_reich_2014, title={Confounder Selection via Penalized Credible Regions}, volume={70}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12203}, abstractNote={Summary}, number={4}, journal={BIOMETRICS}, author={Wilson, Ander and Reich, Brian J.}, year={2014}, month={Dec}, pages={852–861} } @article{eidsvik_shaby_reich_wheeler_niemi_2014, title={Estimation and Prediction in Spatial Models With Block Composite Likelihoods}, volume={23}, ISSN={["1537-2715"]}, DOI={10.1080/10618600.2012.760460}, abstractNote={AbstractThis article develops a block composite likelihood for estimation and prediction in large spatial datasets. The composite likelihood (CL) is constructed from the joint densities of pairs of adjacent spatial blocks. This allows large datasets to be split into many smaller datasets, each of which can be evaluated separately, and combined through a simple summation. Estimates for unknown parameters are obtained by maximizing the block CL function. In addition, a new method for optimal spatial prediction under the block CL is presented. Asymptotic variances for both parameter estimates and predictions are computed using Godambe sandwich matrices. The approach considerably improves computational efficiency, and the composite structure obviates the need to load entire datasets into memory at once, completely avoiding memory limitations imposed by massive datasets. Moreover, computing time can be reduced even further by distributing the operations using parallel computing. A simulation study shows that CL estimates and predictions, as well as their corresponding asymptotic confidence intervals, are competitive with those based on the full likelihood. The procedure is demonstrated on one dataset from the mining industry and one dataset of satellite retrievals. The real-data examples show that the block composite results tend to outperform two competitors; the predictive process model and fixed-rank kriging. Supplementary materials for this article is available online on the journal web site.Key Words: Gaussian processGPULarge datasetsParallel computingSpatial statistics SUPPLEMENTARY MATERIALSAppendix: Score function and Hessian.Datasets, CPU, and GPU examples of code.ACKNOWLEDGMENTSWe thank the Statistical and Applied Mathematical Sciences Institute (SAMSI) for support during the program on space–time analysis (2009–2010). We also thank NVIDIA for supporting us with graphics cards. Rana Gruber provided the joints data, while Noel Cressie and Gardar Johannesson made the TCO data acquired by NASA available to us. Brian Reich was supported by National Science Foundation grant number 1107046.}, number={2}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Eidsvik, Jo and Shaby, Benjamin A. and Reich, Brian J. and Wheeler, Matthew and Niemi, Jarad}, year={2014}, month={Jun}, pages={295–315} } @article{wilson_reif_reich_2014, title={Hierarchical Dose-Response Modeling for High-Throughput Toxicity Screening of Environmental Chemicals}, volume={70}, ISSN={["1541-0420"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84895891991&partnerID=MN8TOARS}, DOI={10.1111/biom.12114}, abstractNote={Summary}, number={1}, journal={BIOMETRICS}, publisher={Wiley-Blackwell}, author={Wilson, Ander and Reif, David M. and Reich, Brian J.}, year={2014}, month={Mar}, pages={237–246} } @article{wilson_rappold_neas_reich_2014, title={MODELING THE EFFECT OF TEMPERATURE ON OZONE-RELATED MORTALITY}, volume={8}, ISSN={["1932-6157"]}, DOI={10.1214/14-aoas754}, abstractNote={Climate change is expected to alter the distribution of ambient ozone levels and temperatures which, in turn, may impact public health. Much research has focused on the effect of short-term ozone exposures on mortality and morbidity while controlling for temperature as a confounder, but less is known about the joint effects of ozone and temperature. The extent of the health effects of changing ozone levels and temperatures will depend on whether these effects are additive or synergistic. In this paper we propose a spatial, semi-parametric model to estimate the joint ozone-temperature risk surfaces in 95 US urban areas. Our methodology restricts the ozone-temperature risk surfaces to be monotone in ozone and allows for both nonadditive and nonlinear effects of ozone and temperature. We use data from the National Mortality and Morbidity Air Pollution Study (NMMAPS) and show that the proposed model fits the data better than additive linear and nonlinear models. We then examine the synergistic effect of ozone and temperature both nationally and locally and find evidence of a nonlinear ozone effect and an ozone-temperature interaction at higher temperatures and ozone concentrations.}, number={3}, journal={ANNALS OF APPLIED STATISTICS}, author={Wilson, Ander and Rappold, Ana G. and Neas, Lucas M. and Reich, Brian J.}, year={2014}, month={Sep}, pages={1728–1749} } @article{reich_chang_strickland_2014, title={Spatial health effects analysis with uncertain residential locations}, volume={23}, ISSN={["1477-0334"]}, DOI={10.1177/0962280212447151}, abstractNote={ Spatial epidemiology has benefited greatly from advances in geographic information system technology, which permits extensive study of associations between various health responses and a wide array of socio-economic and environmental factors. However, many spatial epidemiological datasets have missing values for a substantial proportion of spatial variables, such as the census tract of residence of study participants. The standard approach is to discard these observations and analyze only complete observations. In this article, we propose a new hierarchical Bayesian spatial model to handle missing observation locations. Our model utilizes all available information to learn about the missing locations and propagates uncertainty about the missing locations throughout the model. We show via a simulation study that this method can lead to more efficient epidemiological analysis. The method is applied to a study of the relationship between fine particulate matter and birth outcomes is southeast Georgia, where we find smaller posterior variance for most parameters using our missing data model compared to the standard complete case model. }, number={2}, journal={STATISTICAL METHODS IN MEDICAL RESEARCH}, author={Reich, Brian J. and Chang, Howard H. and Strickland, Matthew J.}, year={2014}, month={Apr}, pages={156–168} } @article{wang_reich_lim_2013, title={A Bayesian approach to probabilistic streamflow forecasts}, volume={15}, ISSN={["1465-1734"]}, DOI={10.2166/hydro.2012.080}, abstractNote={One-month-ahead streamflow forecasting is important for water utilities to manage water resources such as irrigation water usage and hydropower generation. While deterministic streamflow forecasts have been utilized extensively in research and practice, ensemble streamflow forecasts and probabilistic information are gaining more attention. This study aims to examine a multivariate linear Bayesian regression approach to provide probabilistic streamflow forecasts by incorporating gridded precipitation forecasts from climate models and lagged monthly streamflow data. Principal component analysis is applied to reduce the size of the regression model. A Markov Chain Monte Carlo (MCMC) algorithm is used to sample from the posterior distribution of model parameters. The proposed approach is tested on gauge data acquired during 1961–2000 in North Carolina. Results reveal that the proposed method is a promising alternative forecasting technique and that it performs well for probabilistic streamflow forecasts.}, number={2}, journal={JOURNAL OF HYDROINFORMATICS}, author={Wang, Hui and Reich, Brian and Lim, Yeo Howe}, year={2013}, pages={381–391} } @article{reich_bandyopadhyay_bondell_2013, title={A Nonparametric Spatial Model for Periodontal Data With Nonrandom Missingness}, volume={108}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2013.795487}, abstractNote={Periodontal disease (PD) progression is often quantified by clinical attachment level (CAL) defined as the distance down a tooth's root that is detached from the surrounding bone. Measured at six locations per tooth throughout the mouth (excluding the molars), it gives rise to a dependent data setup. These data are often reduced to a one-number summary, such as the whole-mouth average or the number of observations greater than a threshold, to be used as the response in a regression to identify important covariates related to the current state of a subject's periodontal health. Rather than a simple one-number summary, we set forward to analyze all available CAL data for each subject, exploiting the presence of spatial dependence, nonstationarity, and nonnormality. Also, many subjects have a considerable proportion of missing teeth, which cannot be considered missing at random because PD is the leading cause of adult tooth loss. Under a Bayesian paradigm, we propose a nonparametric flexible spatial (joint) model of observed CAL and the location of missing tooth via kernel convolution methods, incorporating the aforementioned features of CAL data under a unified framework. Application of this methodology to a dataset recording the periodontal health of an African-American population, as well as simulation studies reveal the gain in model fit and inference, and provides a new perspective into unraveling covariate–response relationships in the presence of complexities posed by these data.}, number={503}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Reich, Brian J. and Bandyopadhyay, Dipankar and Bondell, Howard D.}, year={2013}, month={Sep}, pages={820–831} } @article{chang_reich_miranda_2013, title={A spatial time-to-event approach for estimating associations between air pollution and preterm birth}, volume={62}, journal={Journal of the Royal Statistical Society. Series C, Applied Statistics}, author={Chang, H. H. and Reich, B. J. and Miranda, M. L.}, year={2013}, pages={167–179} } @article{storlie_reich_helton_swiler_sallaberry_2013, title={Analysis of computationally demanding models with continuous and categorical inputs}, volume={113}, ISSN={["1879-0836"]}, DOI={10.1016/j.ress.2012.11.018}, abstractNote={The analysis of many physical and engineering problems involves running complex computational models (e.g., simulation models and computer codes). With problems of this type, it is important to understand the relationships between the input (whose values are often imprecisely known) and the output variables, and to characterize the uncertainty in the output. Often, some of the input variables are categorical in nature (e.g., pointer variables to alternative models or different types of material, etc.). A computational model that sufficiently represents reality is often very costly in terms of run time. When the models are computationally demanding, meta-model approaches to their analysis have been shown to be very useful. However, the most popular meta-models for computational computer models do not explicitly allow for categorical input variables. In this case, categorical inputs are simply ordered in some way and treated as continuous variables in the estimation of a meta-model. In many cases, this can lead to undesirable and misleading results. In this paper, two meta-models based on functional ANOVA decomposition are presented that explicitly allow for an appropriate treatment of categorical inputs. The effectiveness of the presented meta-models in the analysis of models with continuous and categorical inputs is illustrated with several test cases and also with results from a real analysis.}, journal={RELIABILITY ENGINEERING & SYSTEM SAFETY}, author={Storlie, Curtis B. and Reich, Brian J. and Helton, Jon C. and Swiler, Laura P. and Sallaberry, Cedric J.}, year={2013}, month={May}, pages={30–41} } @article{reich_smith_2013, title={Bayesian Quantile Regression for Censored Data}, volume={69}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12053}, abstractNote={Abstract}, number={3}, journal={BIOMETRICS}, author={Reich, Brian J. and Smith, Luke B.}, year={2013}, month={Sep}, pages={651–660} } @article{boehm_reich_bandyopadhyay_2013, title={Bridging Conditional and Marginal Inference for Spatially Referenced Binary Data}, volume={69}, ISSN={["0006-341X"]}, DOI={10.1111/biom.12027}, abstractNote={Abstract}, number={2}, journal={BIOMETRICS}, author={Boehm, Laura and Reich, Brian J. and Bandyopadhyay, Dipankar}, year={2013}, month={Jun}, pages={545–554} } @article{mannshardt_sucic_jiao_dominici_frey_reich_fuentes_2013, title={Comparing exposure metrics for the effects of fine particulate matter on emergency hospital admissions}, volume={23}, ISSN={["1559-064X"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84886725530&partnerID=MN8TOARS}, DOI={10.1038/jes.2013.39}, abstractNote={A crucial step in an epidemiological study of the effects of air pollution is to accurately quantify exposure of the population. In this paper, we investigate the sensitivity of the health effects estimates associated with short-term exposure to fine particulate matter with respect to three potential metrics for daily exposure: ambient monitor data, estimated values from a deterministic atmospheric chemistry model, and stochastic daily average human exposure simulation output. Each of these metrics has strengths and weaknesses when estimating the association between daily changes in ambient exposure to fine particulate matter and daily emergency hospital admissions. Monitor data is readily available, but is incomplete over space and time. The atmospheric chemistry model output is spatially and temporally complete but may be less accurate than monitor data. The stochastic human exposure estimates account for human activity patterns and variability in pollutant concentration across microenvironments, but requires extensive input information and computation time. To compare these metrics, we consider a case study of the association between fine particulate matter and emergency hospital admissions for respiratory cases for the Medicare population across three counties in New York. Of particular interest is to quantify the impact and/or benefit to using the stochastic human exposure output to measure ambient exposure to fine particulate matter. Results indicate that the stochastic human exposure simulation output indicates approximately the same increase in the relative risk associated with emergency admissions as using a chemistry model or monitoring data as exposure metrics. However, the stochastic human exposure simulation output and the atmospheric chemistry model both bring additional information, which helps to reduce the uncertainly in our estimated risk.}, number={6}, journal={JOURNAL OF EXPOSURE SCIENCE AND ENVIRONMENTAL EPIDEMIOLOGY}, author={Mannshardt, Elizabeth and Sucic, Katarina and Jiao, Wan and Dominici, Francesca and Frey, H. Christopher and Reich, Brian and Fuentes, Montserrat}, year={2013}, pages={627–636} } @article{reich_porter_2013, title={DISCUSSION OF "ESTIMATING THE HISTORICAL AND FUTURE PROBABILITIES OF LARGE TERRORIST EVENTS" BY AARON CLAUSET AND RYAN WOODARD}, volume={7}, ISSN={["1932-6157"]}, DOI={10.1214/13-aoas614b}, abstractNote={Discussion of "Estimating the historical and future probabilities of large terrorist events" by Aaron Clauset and Ryan Woodard [arXiv:1209.0089].}, number={4}, journal={ANNALS OF APPLIED STATISTICS}, author={Reich, Brian J. and Porter, Michael D.}, year={2013}, month={Dec}, pages={1871–1875} } @article{reich_cooley_foley_napelenok_shaby_2013, title={EXTREME VALUE ANALYSIS FOR EVALUATING OZONE CONTROL STRATEGIES}, volume={7}, ISSN={["1932-6157"]}, DOI={10.1214/13-aoas628}, abstractNote={Tropospheric ozone is one of six criteria pollutants regulated by the US EPA, and has been linked to respiratory and cardiovascular endpoints and adverse effects on vegetation and ecosystems. Regional photochemical models have been developed to study the impacts of emission reductions on ozone levels. The standard approach is to run the deterministic model under new emission levels and attribute the change in ozone concentration to the emission control strategy. However, running the deterministic model requires substantial computing time, and this approach does not provide a measure of uncertainty for the change in ozone levels. Recently, a reduced form model (RFM) has been proposed to approximate the complex model as a simple function of a few relevant inputs. In this paper, we develop a new statistical approach to make full use of the RFM to study the effects of various control strategies on the probability and magnitude of extreme ozone events. We fuse the model output with monitoring data to calibrate the RFM by modeling the conditional distribution of monitoring data given the RFM using a combination of flexible semiparametric quantile regression for the center of the distribution where data are abundant and a parametric extreme value distribution for the tail where data are sparse. Selected parameters in the conditional distribution are allowed to vary by the RFM value and the spatial location. Also, due to the simplicity of the RFM, we are able to embed the RFM in our Bayesian hierarchical framework to obtain a full posterior for the model input parameters, and propagate this uncertainty to the estimation of the effects of the control strategies. We use the new framework to evaluate three potential control strategies, and find that reducing mobile-source emissions has a larger impact than reducing point-source emissions or a combination of several emission sources.}, number={2}, journal={ANNALS OF APPLIED STATISTICS}, author={Reich, Brian and Cooley, Daniel and Foley, Kristen and Napelenok, Sergey and Shaby, Benjamin}, year={2013}, month={Jun}, pages={739–762} } @article{fuentes_reich_2013, title={Multivariate spatial nonparametric modelling via kernel processes mixing}, volume={23}, number={1}, journal={Statistica Sinica}, author={Fuentes, M. and Reich, B.}, year={2013}, pages={75–97} } @article{fuentes_henry_reich_2013, title={Nonparametric spatial models for extremes: application to extreme temperature data}, volume={16}, ISSN={["1572-915X"]}, DOI={10.1007/s10687-012-0154-1}, abstractNote={Estimating the probability of extreme temperature events is difficult because of limited records across time and the need to extrapolate the distributions of these events, as opposed to just the mean, to locations where observations are not available. Another related issue is the need to characterize the uncertainty in the estimated probability of extreme events at different locations. Although the tools for statistical modeling of univariate extremes are well-developed, extending these tools to model spatial extreme data is an active area of research. In this paper, in order to make inference about spatial extreme events, we introduce a new nonparametric model for extremes. We present a Dirichlet-based copula model that is a flexible alternative to parametric copula models such as the normal and t-copula. The proposed modelling approach is fitted using a Bayesian framework that allow us to take into account different sources of uncertainty in the data and models. We apply our methods to annual maximum temperature values in the east-south-central United States.}, number={1}, journal={EXTREMES}, author={Fuentes, Montserrat and Henry, John and Reich, Brian}, year={2013}, month={Mar}, pages={75–101} } @article{reich_shaby_2012, title={A HIERARCHICAL MAX-STABLE SPATIAL MODEL FOR EXTREME PRECIPITATION}, volume={6}, ISSN={["1932-6157"]}, DOI={10.1214/12-aoas591}, abstractNote={Extreme environmental phenomena such as major precipitation events manifestly exhibit spatial dependence. Max-stable processes are a class of asymptotically-justified models that are capable of representing spatial dependence among extreme values. While these models satisfy modeling requirements, they are limited in their utility because their corresponding joint likelihoods are unknown for more than a trivial number of spatial locations, preventing, in particular, Bayesian analyses. In this paper, we propose a new random effects model to account for spatial dependence. We show that our specification of the random effect distribution leads to a max-stable process that has the popular Gaussian extreme value process (GEVP) as a limiting case. The proposed model is used to analyze the yearly maximum precipitation from a regional climate model.}, number={4}, journal={ANNALS OF APPLIED STATISTICS}, author={Reich, Brian J. and Shaby, Benjamin A.}, year={2012}, month={Dec}, pages={1430–1451} } @article{foley_reich_napelenok_2012, title={Bayesian Analysis of a Reduced-Form Air Quality Model}, volume={46}, ISSN={["0013-936X"]}, DOI={10.1021/es300666e}, abstractNote={Numerical air quality models are being used for assessing emission control strategies for improving ambient pollution levels across the globe. This paper applies probabilistic modeling to evaluate the effectiveness of emission reduction scenarios aimed at lowering ground-level ozone concentrations. A Bayesian hierarchical model is used to combine air quality model output and monitoring data in order to characterize the impact of emissions reductions while accounting for different degrees of uncertainty in the modeled emissions inputs. The probabilistic model predictions are weighted based on population density in order to better quantify the societal benefits/disbenefits of four hypothetical emission reduction scenarios in which domain-wide NO(x) emissions from various sectors are reduced individually and then simultaneously. Cross validation analysis shows the statistical model performs well compared to observed ozone levels. Accounting for the variability and uncertainty in the emissions and atmospheric systems being modeled is shown to impact how emission reduction scenarios would be ranked, compared to standard methodology.}, number={14}, journal={ENVIRONMENTAL SCIENCE & TECHNOLOGY}, author={Foley, Kristen M. and Reich, Brian J. and Napelenok, Sergey L.}, year={2012}, month={Jul}, pages={7604–7611} } @article{shaby_reich_2012, title={Bayesian spatial extreme value analysis to assess the changing risk of concurrent high temperatures across large portions of European cropland}, volume={23}, ISSN={["1099-095X"]}, DOI={10.1002/env.2178}, abstractNote={There is strong evidence that extremely high temperatures are detrimental to the yield and quality of many economically and socially critical crops. Fortunately, the most deleterious conditions for agriculture occur rarely. We wish to assess the risk of the catastrophic scenario in which large areas of croplands experience extreme heat stress during the same growing season. Applying a hierarchical Bayesian spatial extreme value model that allows the distribution of extreme temperatures to change in time both marginally and in spatial coherence, we examine whether the risk of widespread extremely high temperatures across agricultural land in Europe has increased over the last century. Copyright © 2012 John Wiley & Sons, Ltd.}, number={8}, journal={ENVIRONMETRICS}, author={Shaby, Benjamin A. and Reich, Brian J.}, year={2012}, month={Dec}, pages={638–648} } @article{chang_reich_miranda_2012, title={Chang et al. Respond to "Environmental Exposures and Preterm Birth"}, volume={175}, ISSN={["1476-6256"]}, DOI={10.1093/aje/kwr406}, number={2}, journal={AMERICAN JOURNAL OF EPIDEMIOLOGY}, author={Chang, Howard H. and Reich, Brian J. and Miranda, Marie Lynn}, year={2012}, month={Jan}, pages={111–112} } @article{modlin_fuentes_reich_2012, title={Circular conditional autoregressive modeling of vector fields}, volume={23}, ISSN={["1180-4009"]}, DOI={10.1002/env.1133}, abstractNote={As hurricanes approach landfall, there are several hazards for which coastal populations must be prepared. Damaging winds, torrential rains, and tornadoes play havoc with both the coast and inland areas; but, the biggest seaside menace to life and property is the storm surge. Wind fields are used as the primary forcing for the numerical forecasts of the coastal ocean response to hurricane force winds, such as the height of the storm surge and the degree of coastal flooding. Unfortunately, developments in deterministic modeling of these forcings have been hindered by computational expenses. In this paper, we present a multivariate spatial model for vector fields, that we apply to hurricane winds. We parameterize the wind vector at each site in polar coordinates and specify a circular conditional autoregressive model for the vector direction, and a spatial CAR model for speed. We apply our framework for vector fields to hurricane surface wind fields for Hurricane Floyd of 1999 and compare our circular conditional autoregressive model to prior methods that decompose wind speed and direction into its N‐S and W‐E cardinal components. Copyright © 2011 John Wiley & Sons, Ltd.}, number={1}, journal={ENVIRONMETRICS}, author={Modlin, Danny and Fuentes, Montserrat and Reich, Brian}, year={2012}, month={Feb}, pages={46–53} } @article{bondell_reich_2012, title={Consistent High-Dimensional Bayesian Variable Selection via Penalized Credible Regions}, volume={107}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2012.716344}, abstractNote={For high-dimensional data, particularly when the number of predictors greatly exceeds the sample size, selection of relevant predictors for regression is a challenging problem. Methods such as sure screening, forward selection, or penalized regressions are commonly used. Bayesian variable selection methods place prior distributions on the parameters along with a prior over model space, or equivalently, a mixture prior on the parameters having mass at zero. Since exhaustive enumeration is not feasible, posterior model probabilities are often obtained via long Markov chain Monte Carlo (MCMC) runs. The chosen model can depend heavily on various choices for priors and also posterior thresholds. Alternatively, we propose a conjugate prior only on the full model parameters and use sparse solutions within posterior credible regions to perform selection. These posterior credible regions often have closed-form representations, and it is shown that these sparse solutions can be computed via existing algorithms. The approach is shown to outperform common methods in the high-dimensional setting, particularly under correlation. By searching for a sparse solution within a joint credible region, consistent model selection is established. Furthermore, it is shown that, under certain conditions, the use of marginal credible intervals can give consistent selection up to the case where the dimension grows exponentially in the sample size. The proposed approach successfully accomplishes variable selection in the high-dimensional setting, while avoiding pitfalls that plague typical Bayesian variable selection methods.}, number={500}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Bondell, Howard D. and Reich, Brian J.}, year={2012}, month={Dec}, pages={1610–1624} } @article{cooley_sain_gabda_towe_wadsworth_tawn_segers_shaby_reich_davison_et al._2012, title={Discussion of "Statistical modeling of spatial extremes" by A. C. Davison, S. A. Padoan and M. Ribatet}, volume={27}, number={2}, journal={Statistical Science}, author={Cooley, D. and Sain, S. R. and Gabda, D. and Towe, R. and Wadsworth, J. and Tawn, J. and Segers, J. and Shaby, B. and Reich, B. J. and Davison, A. C. and et al.}, year={2012}, pages={187–201} } @article{porter_reich_2012, title={Evaluating temporally weighted kernel density methods for predicting the next event location in a series}, volume={18}, ISSN={1947-5683 1947-5691}, url={http://dx.doi.org/10.1080/19475683.2012.691904}, DOI={10.1080/19475683.2012.691904}, abstractNote={One aspect of tactical crime or terrorism analysis is predicting the location of the next event in a series. The objective of this article is to present a methodology to identify the optimal parameters and to test the performance of temporally weighted kernel density estimation models for predicting the next event in a criminal or terrorist event series. By placing event series in a space–time point pattern framework, the next event prediction models are shown to be based on estimating a conditional spatial density function. We use temporal weights that indicate how much influence past events have toward predicting future event locations, which can also incorporate uncertainty in the event timing. Results of applying this methodology to crime series in Baltimore County, MD, indicate that performance can vary greatly by crime type and little by series length and is fairly robust to choice of bandwidth.}, number={3}, journal={Annals of GIS}, publisher={Informa UK Limited}, author={Porter, Michael D. and Reich, Brian J.}, year={2012}, month={Sep}, pages={225–240} } @article{hayashi_hayashi_reich_lee_sachdeva_mizoguchi_2012, title={Functional data analysis of mandibular movement using third-degree b-spline basis functions and self-modeling regression}, volume={71}, ISSN={1344-0241 1878-1837}, url={http://dx.doi.org/10.1016/j.odw.2011.11.001}, DOI={10.1016/j.odw.2011.11.001}, abstractNote={The purposes of this study were (1) to establish a new method for analyzing the movement of an incisor point on the mandible as mandibular movement and for analyzing noisy mandibular finite helical axis (FHA) parameters, and (2) to apply this new method in a clinical situation. The subjects were patients with anterior crossbite who were scheduled to receive orthognathic surgery. Chewing movement was measured by an opto-electronic motion-analysis system that can detect mandibular movement in space. The population average curves (trajectories) of the incisor point and the position vector of the FHA during chewing were calculated using third-degree b-spline basis functions and self-modeling regression (SEMOR). Although this study focused on the use of a new statistical model for assessing mandibular movement, the results demonstrated the effectiveness of the combination of the FHA and an additional, supplementary scientific expression of movement, the trajectory of an incisor point. Furthermore, the effectiveness of this new method was demonstrated in a clinical situation. The results of this study demonstrated that the calculation of population average curves is effective for clarifying the characteristics of functional data, such as in mandibular movement.}, number={1}, journal={Orthodontic Waves}, publisher={Informa UK Limited}, author={Hayashi, Kazuo and Hayashi, Meiri and Reich, Brian and Lee, Seung-Pyo and Sachdeva, Arjun U.C. and Mizoguchi, Itaru}, year={2012}, month={Mar}, pages={17–25} } @article{reich_fuentes_2012, title={Nonparametric Bayesian models for a spatial covariance}, volume={9}, ISSN={["1572-3127"]}, DOI={10.1016/j.stamet.2011.01.007}, abstractNote={A crucial step in the analysis of spatial data is to estimate the spatial correlation function that determines the relationship between a spatial process at two locations. The standard approach to selecting the appropriate correlation function is to use prior knowledge or exploratory analysis, such as a variogram analysis, to select the correct parametric correlation function. Rather that selecting a particular parametric correlation function, we treat the covariance function as an unknown function to be estimated from the data. We propose a flexible prior for the correlation function to provide robustness to the choice of correlation function. We specify the prior for the correlation function using spectral methods and the Dirichlet process prior, which is a common prior for an unknown distribution function. Our model does not require Gaussian data or spatial locations on a regular grid. The approach is demonstrated using a simulation study as well as an analysis of California air pollution data.}, number={1-2}, journal={STATISTICAL METHODOLOGY}, author={Reich, Brian J. and Fuentes, Montserrat}, year={2012}, pages={265–274} } @article{reich_2012, title={Spatiotemporal quantile regression for detecting distributional changes in environmental processes}, volume={61}, journal={Journal of the Royal Statistical Society. Series C, Applied Statistics}, author={Reich, B. J.}, year={2012}, pages={535–553} } @article{reich_kalendra_storlie_bondell_fuentes_2012, title={Variable selection for high dimensional Bayesian density estimation: application to human exposure simulation}, volume={61}, journal={Journal of the Royal Statistical Society. Series C, Applied Statistics}, author={Reich, B. J. and Kalendra, E. and Storlie, C. B. and Bondell, H. D. and Fuentes, M.}, year={2012}, pages={47–66} } @article{reich_bondell_2011, title={A Spatial Dirichlet Process Mixture Model for Clustering Population Genetics Data}, volume={67}, ISSN={["0006-341X"]}, DOI={10.1111/j.1541-0420.2010.01484.x}, abstractNote={Summary Identifying homogeneous groups of individuals is an important problem in population genetics. Recently, several methods have been proposed that exploit spatial information to improve clustering algorithms. In this article, we develop a Bayesian clustering algorithm based on the Dirichlet process prior that uses both genetic and spatial information to classify individuals into homogeneous clusters for further study. We study the performance of our method using a simulation study and use our model to cluster wolverines in Western Montana using microsatellite data.}, number={2}, journal={BIOMETRICS}, author={Reich, Brian J. and Bondell, Howard D.}, year={2011}, month={Jun}, pages={381–390} } @article{bandyopadhyay_reich_slate_2011, title={A spatial beta-binomial model for clustered count data on dental caries}, volume={20}, ISSN={["1477-0334"]}, DOI={10.1177/0962280210372453}, abstractNote={One of the most important indicators of dental caries prevalence is the total count of decayed, missing or filled surfaces in a tooth. These count data are often clustered in nature (several count responses clustered within a subject), over-dispersed as well as spatially referenced (a diseased tooth might be positively influencing the decay process of a set of neighbouring teeth). In this article, we develop a multivariate spatial betabinomial (BB) model for these data that accommodates both over-dispersion as well as latent spatial associations. Using a Bayesian paradigm, the re-parameterised marginal mean (as well as variance) under the BB framework are modelled using a regression on subject/tooth-specific co-variables and a conditionally autoregressive prior that models the latent spatial process. The necessity of exploiting spatial associations to model count data arising in dental caries research is demonstrated using a small simulation study. Real data confirms that our spatial BB model provides a superior estimation and model fit as compared to other sub-models that do not consider modelling spatial associations.}, number={2}, journal={STATISTICAL METHODS IN MEDICAL RESEARCH}, author={Bandyopadhyay, Dipankar and Reich, Brian J. and Slate, Elizabeth H.}, year={2011}, month={Apr}, pages={85–102} } @article{reich_fuentes_dunson_2011, title={Bayesian Spatial Quantile Regression}, volume={106}, ISSN={["1537-274X"]}, DOI={10.1198/jasa.2010.ap09237}, abstractNote={Tropospheric ozone is one of the six criteria pollutants regulated by the United States Environmental Protection Agency under the Clean Air Act and has been linked with several adverse health effects, including mortality. Due to the strong dependence on weather conditions, ozone may be sensitive to climate change and there is great interest in studying the potential effect of climate change on ozone, and how this change may affect public health. In this paper we develop a Bayesian spatial model to predict ozone under different meteorological conditions, and use this model to study spatial and temporal trends and to forecast ozone concentrations under different climate scenarios. We develop a spatial quantile regression model that does not assume normality and allows the covariates to affect the entire conditional distribution, rather than just the mean. The conditional distribution is allowed to vary from site-to-site and is smoothed with a spatial prior. For extremely large datasets our model is computationally infeasible, and we develop an approximate method. We apply the approximate version of our model to summer ozone from 1997-2005 in the Eastern U.S., and use deterministic climate models to project ozone under future climate conditions. Our analysis suggests that holding all other factors fixed, an increase in daily average temperature will lead to the largest increase in ozone in the Industrial Midwest and Northeast.}, number={493}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Reich, Brian J. and Fuentes, Montserrat and Dunson, David B.}, year={2011}, month={Mar}, pages={6–20} } @article{pati_reich_dunson_2011, title={Bayesian geostatistical modelling with informative sampling locations}, volume={98}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/asq067}, abstractNote={We consider geostatistical models that allow the locations at which data are collected to be informative about the outcomes. A Bayesian approach is proposed, which models the locations using a log Gaussian Cox process, while modelling the outcomes conditionally on the locations as Gaussian with a Gaussian process spatial random effect and adjustment for the location intensity process. We prove posterior propriety under an improper prior on the parameter controlling the degree of informative sampling, demonstrating that the data are informative. In addition, we show that the density of the locations and mean function of the outcome process can be estimated consistently under mild assumptions. The methods show significant evidence of informative sampling when applied to ozone data over Eastern U.S.A.}, number={1}, journal={BIOMETRIKA}, author={Pati, D. and Reich, B. J. and Dunson, D. B.}, year={2011}, month={Mar}, pages={35–48} } @article{reich_haran_2011, title={Guest Editors' Introduction to the Special Issue on "Computer Models and Spatial Statistics for Environmental Science"}, volume={16}, ISSN={["1085-7117"]}, DOI={10.1007/s13253-011-0071-9}, abstractNote={The December 2011 issue of the Journal of Agricultural, Biological, and Environmental Statistics is on the topic “Computer models and spatial statistics for environmental science.” This is a topic of great interest as the study of complex environmental phenomena increasingly relies on deterministic computer models. These models, for example regional climate models or rainfall-runoff simulators, are mathematical models that describe the evolution in time of a physical process. Usually, they consist of complex differential or partial differential equations that are not solvable in closed form. Hence, these are typically solved using numerical techniques, yielding deterministic predictions of a process. In this special issue, researchers tackle several important statistical problems that arise in the analysis of computer model output, for example calibrating model output with observed data, comparing and combing output from several computer models and physical observations, and building statistical emulators for computer models to predict the outcome of the models for new sets of input conditions. An important contribution of statisticians in the analysis of deterministic models is to quantify uncertainty in inferences and predictions in rigorous fashion. Uncertainty quantification is of great interest, especially as information from complex computer models and messy observational data is used for decision making. There are several types of uncertainty, including (1) parametric uncertainty in the model’s inputs or tuning parameters and (2) structural uncertainty in the mathematical equations that define the model. In “First-Order Emulator Inference for Parameters in Nonlinear Mechanistic Models”, Mevin B. Hooten, William B. Leeds, Jerome Fiechter, and Christopher K. Wikle provide a computationally-efficient method for quantifying parametric uncertainty. They approximate the complicated computer model with a more tractable statistical model, and use}, number={4}, journal={JOURNAL OF AGRICULTURAL BIOLOGICAL AND ENVIRONMENTAL STATISTICS}, author={Reich, Brian J. and Haran, Murali}, year={2011}, month={Dec}, pages={451–452} } @article{havard_reich_bean_chaix_2011, title={Social inequalities in residential exposure to road traffic noise: An environmental justice analysis based on the RECORD Cohort Study}, volume={68}, ISSN={["1470-7926"]}, DOI={10.1136/oem.2010.060640}, abstractNote={Objectives To explore social inequalities in residential exposure to road traffic noise in an urban area. Methods Environmental injustice in road traffic noise exposure was investigated in Paris, France, using the RECORD Cohort Study (n=2130) and modelled noise data. Associations were assessed by estimating noise exposure within the local area around participants' residence, considering various socioeconomic variables defined at both individual and neighbourhood level, and comparing different regression models attempting or not to control for spatial autocorrelation in noise levels. Results After individual-level adjustment, participants' noise exposure increased with neighbourhood educational level and dwelling value but also with proportion of non-French citizens, suggesting seemingly contradictory findings. However, when country of citizenship was defined according to its human development level, noise exposure in fact increased and decreased with the proportions of citizens from advantaged and disadvantaged countries, respectively. These findings were consistent with those reported for the other socioeconomic characteristics, suggesting higher road traffic noise exposure in advantaged neighbourhoods. Substantial collinearity between neighbourhood explanatory variables and spatial random effects caused identifiability problems that prevented successful control for spatial autocorrelation. Conclusions Contrary to previous literature, this study shows that people living in advantaged neighbourhoods were more exposed to road traffic noise in their residential environment than their deprived counterparts. This case study demonstrates the need to systematically perform sensitivity analyses with multiple socioeconomic characteristics to avoid incorrect inferences about an environmental injustice situation and the complexity of effectively controlling for spatial autocorrelation when fixed and random components of the model are correlated.}, number={5}, journal={OCCUPATIONAL AND ENVIRONMENTAL MEDICINE}, author={Havard, Sabrina and Reich, Brian J. and Bean, Kathy and Chaix, Basile}, year={2011}, month={May}, pages={366–374} } @article{reich_bondell_li_2011, title={Sufficient Dimension Reduction via Bayesian Mixture Modeling}, volume={67}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2010.01501.x}, abstractNote={Summary Dimension reduction is central to an analysis of data with many predictors. Sufficient dimension reduction aims to identify the smallest possible number of linear combinations of the predictors, called the sufficient predictors, that retain all of the information in the predictors about the response distribution. In this article, we propose a Bayesian solution for sufficient dimension reduction. We directly model the response density in terms of the sufficient predictors using a finite mixture model. This approach is computationally efficient and offers a unified framework to handle categorical predictors, missing predictors, and Bayesian variable selection. We illustrate the method using both a simulation study and an analysis of an HIV data set.}, number={3}, journal={BIOMETRICS}, author={Reich, Brian J. and Bondell, Howard D. and Li, Lexin}, year={2011}, month={Sep}, pages={886–895} } @article{storlie_bondell_reich_zhang_2011, title={Surface estimation, variable selection, and the nonparametric oracle property}, volume={21}, number={2}, journal={Statistica Sinica}, author={Storlie, C. B. and Bondell, H. D. and Reich, B. J. and Zhang, H. H.}, year={2011}, pages={679–705} } @article{reich_bandyopadhyay_2010, title={A LATENT FACTOR MODEL FOR SPATIAL DATA WITH INFORMATIVE MISSINGNESS}, volume={4}, ISSN={["1932-6157"]}, DOI={10.1214/09-aoas278}, abstractNote={A large amount of data is typically collected during a periodontal exam. Analyzing these data poses several challenges. Several types of measurements are taken at many locations throughout the mouth. These spatially-referenced data are a mix of binary and continuous responses, making joint modeling difficult. Also, most patients have missing teeth. Periodontal disease is a leading cause of tooth loss, so it is likely that the number and location of missing teeth informs about the patient's periodontal health. In this paper we develop a multivariate spatial framework for these data which jointly models the binary and continuous responses as a function of a single latent spatial process representing general periodontal health. We also use the latent spatial process to model the location of missing teeth. We show using simulated and real data that exploiting spatial associations and jointly modeling the responses and locations of missing teeth mitigates the problems presented by these data.}, number={1}, journal={ANNALS OF APPLIED STATISTICS}, author={Reich, Brian J. and Bandyopadhyay, Dipankar}, year={2010}, month={Mar}, pages={439–459} } @article{storlie_bondell_reich_2010, title={A Locally Adaptive Penalty for Estimation of Functions With Varying Roughness}, volume={19}, ISSN={["1537-2715"]}, DOI={10.1198/jcgs.2010.09020}, abstractNote={We propose a new regularization method called Loco-Spline for nonparametric function estimation. Loco-Spline uses a penalty which is data driven and locally adaptive. This allows for more flexible estimation of the function in regions of the domain where it has more curvature, without over fitting in regions that have little curvature. This methodology is also transferred into higher dimensions via the Smoothing Spline ANOVA framework. General conditions for optimal MSE rate of convergence are given and the Loco-Spline is shown to achieve this rate. In our simulation study, the Loco-Spline substantially outperforms the traditional smoothing spline and the locally adaptive kernel smoother. Code to fit Loco-Spline models is included with the Supplemental Materials for this article which are available online.}, number={3}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Storlie, Curtis B. and Bondell, Howard D. and Reich, Brian J.}, year={2010}, month={Sep}, pages={569–589} } @article{hodges_reich_2010, title={Adding Spatially-Correlated Errors Can Mess Up the Fixed Effect You Love}, volume={64}, ISSN={["0003-1305"]}, DOI={10.1198/tast.2010.10052}, abstractNote={Many statisticians have had the experience of fitting a linear model with uncorrelated errors, then adding a spatially-correlated error term (random effect) and finding that the estimates of the fixed-effect coefficients have changed substantially. We show that adding a spatially-correlated error term to a linear model is equivalent to adding a saturated collection of canonical regressors, the coefficients of which are shrunk toward zero, where the spatial map determines both the canonical regressors and the relative extent of the coefficients’ shrinkage. Adding a spatially-correlated error term can also be seen as inflating the error variances associated with specific contrasts of the data, where the spatial map determines the contrasts and the extent of error-variance inflation. We show how to avoid this spatial confounding by restricting the spatial random effect to the orthogonal complement (residual space) of the fixed effects, which we call restricted spatial regression. We consider five proposed interpretations of spatial confounding and draw implications about what, if anything, one should do about it. In doing so, we debunk the common belief that adding a spatially-correlated random effect adjusts fixed-effect estimates for spatially-structured missing covariates. This article has supplementary material online.}, number={4}, journal={AMERICAN STATISTICIAN}, author={Hodges, James S. and Reich, Brian J.}, year={2010}, month={Nov}, pages={325–334} } @article{reich_fuentes_herring_evenson_2010, title={Bayesian Variable Selection for Multivariate Spatially Varying Coefficient Regression}, volume={66}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2009.01333.x}, abstractNote={Summary Physical activity has many well‐documented health benefits for cardiovascular fitness and weight control. For pregnant women, the American College of Obstetricians and Gynecologists currently recommends 30 minutes of moderate exercise on most, if not all, days; however, very few pregnant women achieve this level of activity. Traditionally, studies have focused on examining individual or interpersonal factors to identify predictors of physical activity. There is a renewed interest in whether characteristics of the physical environment in which we live and work may also influence physical activity levels. We consider one of the first studies of pregnant women that examines the impact of characteristics of the built environment on physical activity levels. Using a socioecologic framework, we study the associations between physical activity and several factors including personal characteristics, meteorological/air quality variables, and neighborhood characteristics for pregnant women in four counties of North Carolina. We simultaneously analyze six types of physical activity and investigate cross‐dependencies between these activity types. Exploratory analysis suggests that the associations are different in different regions. Therefore, we use a multivariate regression model with spatially varying regression coefficients. This model includes a regression parameter for each covariate at each spatial location. For our data with many predictors, some form of dimension reduction is clearly needed. We introduce a Bayesian variable selection procedure to identify subsets of important variables. Our stochastic search algorithm determines the probabilities that each covariate's effect is null, non‐null but constant across space, and spatially varying. We found that individual‐level covariates had a greater influence on women's activity levels than neighborhood environmental characteristics, and some individual‐level covariates had spatially varying associations with the activity levels of pregnant women.}, number={3}, journal={BIOMETRICS}, author={Reich, Brian J. and Fuentes, Montserrat and Herring, Amy H. and Evenson, Kelly R.}, year={2010}, month={Sep}, pages={772–782} } @article{hayashi_mizoguchi_lee_reich_2010, title={Development of a novel statistical model for mandibular kinematics}, volume={32}, ISSN={["1350-4533"]}, DOI={10.1016/j.medengphy.2010.04.005}, abstractNote={The purpose of this study was to establish a new statistical method for the analysis of masticatory function. The subjects were patients with anterior crossbite who had received orthognathic surgery. Chewing movement was measured by means of an opto-electronic motion-analysis system. This movement was compared with similar movement in control patients. We sought to develop a statistical model to predict the population average curves of the chewing cycles. In this study, the mandibular incisor point was used as a target point of jaw movement. The combination of a spline function with random coefficients and self-modeling regression (SEMOR) extended to three dimensions was used to predict population average curves for each group. Unquestionably, significant differences were present in some areas. The present modeling method that uses the combination of a spline function and SEMOR is one of the best ways to eliminate subjective estimation with regard to predicting representative chewing cycles.}, number={5}, journal={MEDICAL ENGINEERING & PHYSICS}, author={Hayashi, Kazuo and Mizoguchi, Itaru and Lee, Seung-Pyo and Reich, Brian}, year={2010}, month={Jun}, pages={423–428} } @article{reich_bondell_wang_2010, title={Flexible Bayesian quantile regression for independent and clustered data}, volume={11}, ISSN={["1465-4644"]}, DOI={10.1093/biostatistics/kxp049}, abstractNote={Quantile regression has emerged as a useful supplement to ordinary mean regression. Traditional frequentist quantile regression makes very minimal assumptions on the form of the error distribution and thus is able to accommodate nonnormal errors, which are common in many applications. However, inference for these models is challenging, particularly for clustered or censored data. A Bayesian approach enables exact inference and is well suited to incorporate clustered, missing, or censored data. In this paper, we propose a flexible Bayesian quantile regression model. We assume that the error distribution is an infinite mixture of Gaussian densities subject to a stochastic constraint that enables inference on the quantile of interest. This method outperforms the traditional frequentist method under a wide array of simulated data models. We extend the proposed approach to analyze clustered data. Here, we differentiate between and develop conditional and marginal models for clustered data. We apply our methods to analyze a multipatient apnea duration data set.}, number={2}, journal={BIOSTATISTICS}, author={Reich, Brian J. and Bondell, Howard D. and Wang, Huixia J.}, year={2010}, month={Apr}, pages={337–352} } @article{bondell_reich_wang_2010, title={Noncrossing quantile regression curve estimation}, volume={97}, ISSN={["0006-3444"]}, DOI={10.1093/biomet/asq048}, abstractNote={Since quantile regression curves are estimated individually, the quantile curves can cross, leading to an invalid distribution for the response. A simple constrained version of quantile regression is proposed to avoid the crossing problem for both linear and nonparametric quantile curves. A simulation study and a reanalysis of tropical cyclone intensity data shows the usefulness of the procedure. Asymptotic properties of the estimator are equivalent to the typical approach under standard conditions, and the proposed estimator reduces to the classical one if there is no crossing. The performance of the constrained estimator has shown significant improvement by adding smoothing and stability across the quantile levels.}, number={4}, journal={BIOMETRIKA}, author={Bondell, Howard D. and Reich, Brian J. and Wang, Huixia}, year={2010}, month={Dec}, pages={825–838} } @article{hayashi_reich_delong_lee_mizoguchi_2009, title={A novel statistical model for mandibular helical axis analysis}, volume={36}, ISSN={["1365-2842"]}, DOI={10.1111/j.1365-2842.2008.01890.x}, abstractNote={Summary The purpose of this study was to establish a new statistical method for the analysis of noisy mandibular helical axis parameters, especially the position vector of the finite helical axis (FHA). The subjects were children with anterior cross‐bite who had received orthodontic treatment. Maximum mouth‐opening was measured by means of an opto‐electronic motion analysis system. These movements were compared with similar movement in the same group after treatment of their anterior cross‐bite. Each curve of FHA position vectors was modelled as a spline function with random coefficients. To determine the optimal number of knots, two criteria were used: deviance information criteria (DIC) and mean squared prediction error (MSE). We were interested in estimating a typical curve for a population. Self‐modelling regression (SEMOR) was extended to three dimensions to model groups of three‐dimensional curves. Each curve was modelled as a spline function using nine knots. Population average curves were created using SEMOR. This study provided detailed information about jaw movement for comparing cross‐bite to normal occlusion by calculating the population mean curves of the position vector of the FHA. Our results suggested that the two population mean curves for the position vector of the FHA were significantly different in the closing phase. The combination of a spline function with random coefficients and SEMOR extended to three dimensions can be used not only for FHA analysis but also for the analysis of other jaw movements.}, number={2}, journal={JOURNAL OF ORAL REHABILITATION}, author={Hayashi, K. and Reich, B. and Delong, R. and Lee, S. -P. and Mizoguchi, I.}, year={2009}, month={Feb}, pages={102–109} } @article{reich_fuentes_burke_2009, title={Analysis of the effects of ultrafine particulate matter while accounting for human exposure}, volume={20}, ISSN={["1099-095X"]}, DOI={10.1002/env.915}, abstractNote={Abstract}, number={2}, journal={ENVIRONMETRICS}, author={Reich, Brian J. and Fuentes, Montserrat and Burke, Janet}, year={2009}, month={Mar}, pages={131–146} } @article{bandyopadhyay_reich_slate_2009, title={Bayesian modeling of multivariate spatial binary data with applications to dental caries}, volume={28}, ISSN={["1097-0258"]}, DOI={10.1002/sim.3647}, abstractNote={Abstract}, number={28}, journal={STATISTICS IN MEDICINE}, author={Bandyopadhyay, Dipankar and Reich, Brian J. and Slate, Elizabeth H.}, year={2009}, month={Dec}, pages={3492–3508} } @article{costalonga_batas_reich_2009, title={Effects of Toll-like receptor 4 on Porphyromonas gingivalis-induced bone loss in mice}, volume={44}, ISSN={["1600-0765"]}, DOI={10.1111/j.1600-0765.2008.01152.x}, abstractNote={Background and Objective: Toll‐like receptor 4 (TLR‐4)/myeloid differentiation protein‐2 complex ligation by lipopolysaccharide induces production of pro‐inflammatory cytokines and co‐stimulatory molecules on antigen presenting cells. The aim of this study was to determine the role of the TLR‐4 in bone loss‐resistant C57BL mice and in bone loss‐susceptible BALB/c mice after infection with Porphyromonas gingivalis.}, number={4}, journal={JOURNAL OF PERIODONTAL RESEARCH}, author={Costalonga, M. and Batas, L. and Reich, B. J.}, year={2009}, month={Aug}, pages={537–542} } @article{choi_reich_fuentes_davis_2009, title={Multivariate Spatial-Temporal Modeling and Prediction of Speciated Fine Particles}, volume={3}, ISSN={1559-8608 1559-8616}, url={http://dx.doi.org/10.1080/15598608.2009.10411933}, DOI={10.1080/15598608.2009.10411933}, abstractNote={Fine particulate matter (PM2.5) is an atmospheric pollutant that has been linked to serious health problems, including mortality. PM2.5 has five main components: sulfate, nitrate, total carbonaceous mass, ammonium, and crustal material. These components have complex spatial-temporal dependency and cross dependency structures. It is important to gain better understanding about the spatial-temporal distribution of each component of the total PM2.5 mass, and also to estimate how the composition of PM2.5 changes with space and time to conduct spatial-temporal epidemiological studies of the association of these pollutants and adverse health effects. We introduce a multivariate spatial-temporal model for speciated PM2.5. Our hierarchical framework combines different sources of data and accounts for bias and measurement error in each data source. In addition, a spatiotemporal extension of the linear model of coregionalization is developed to account for spatial and temporal dependency structures for each component as well as the associations among the components. We apply our framework to speciated PM2.5 data in the United States for the year 2004.}, number={2}, journal={Journal of Statistical Theory and Practice}, publisher={Springer Science and Business Media LLC}, author={Choi, Jungsoon and Reich, Brian J. and Fuentes, Montserrat and Davis, Jerry M.}, year={2009}, month={Jun}, pages={407–418} } @article{bondell_reich_2009, title={Simultaneous Factor Selection and Collapsing Levels in ANOVA}, volume={65}, ISSN={["0006-341X"]}, DOI={10.1111/j.1541-0420.2008.01061.x}, abstractNote={Summary When performing an analysis of variance, the investigator often has two main goals: to determine which of the factors have a significant effect on the response, and to detect differences among the levels of the significant factors. Level comparisons are done via a post‐hoc analysis based on pairwise differences. This article proposes a novel constrained regression approach to simultaneously accomplish both goals via shrinkage within a single automated procedure. The form of this shrinkage has the ability to collapse levels within a factor by setting their effects to be equal, while also achieving factor selection by zeroing out entire factors. Using this approach also leads to the identification of a structure within each factor, as levels can be automatically collapsed to form groups. In contrast to the traditional pairwise comparison methods, these groups are necessarily nonoverlapping so that the results are interpretable in terms of distinct subsets of levels. The proposed procedure is shown to have the oracle property in that asymptotically it performs as well as if the exact structure were known beforehand. A simulation and real data examples show the strong performance of the method.}, number={1}, journal={BIOMETRICS}, author={Bondell, Howard D. and Reich, Brian J.}, year={2009}, month={Mar}, pages={169–177} } @article{choi_fuentes_reich_2009, title={Spatial-temporal association between fine particulate matter and daily mortality}, volume={53}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2008.05.018}, abstractNote={Fine particulate matter (PM(2.5)) is a mixture of pollutants that has been linked to serious health problems, including premature mortality. Since the chemical composition of PM(2.5) varies across space and time, the association between PM(2.5) and mortality could also change with space and season. In this work we develop and implement a statistical multi-stage Bayesian framework that provides a very broad, flexible approach to studying the spatiotemporal associations between mortality and population exposure to daily PM(2.5) mass, while accounting for different sources of uncertainty. In stage 1, we map ambient PM(2.5) air concentrations using all available monitoring data (IMPROVE and FRM) and an air quality model (CMAQ) at different spatial and temporal scales. In stage 2, we examine the spatial temporal relationships between the health end-points and the exposures to PM(2.5) by introducing a spatial-temporal generalized Poisson regression model. We adjust for time-varying confounders, such as seasonal trends. A common seasonal trends model is to use a fixed number of basis functions to account for these confounders, but the results can be sensitive to the number of basis functions. In this study, the number of the basis functions is treated as an unknown parameter in our Bayesian model and we use a space-time stochastic search variable selection approach. We apply our methods to a data set in North Carolina for the year 2001.}, number={8}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Choi, Jungsoon and Fuentes, Montserrat and Reich, Brian J.}, year={2009}, month={Jun}, pages={2989–3000} } @article{reich_storlie_bondell_2009, title={Variable Selection in Bayesian Smoothing Spline ANOVA Models: Application to Deterministic Computer Codes}, volume={51}, ISSN={["1537-2723"]}, DOI={10.1198/TECH.2009.0013}, abstractNote={With many predictors, choosing an appropriate subset of the covariates is a crucial—and difficult—step in nonparametric regression. We propose a Bayesian nonparametric regression model for curve fitting and variable selection. We use the smoothing splines ANOVA framework to decompose the regression function into interpretable main effect and interaction functions, and use stochastic search variable selection through Markov chain Monte Carlo sampling to search for models that fit the data well. We also show that variable selection is highly sensitive to hyperparameter choice, and develop a technique for selecting hyperparameters that control the long-run false-positive rate. We use our method to build an emulator for a complex computer model for two-phase fluid flow.}, number={2}, journal={TECHNOMETRICS}, author={Reich, Brian J. and Storlie, Curtis B. and Bondell, Howard D.}, year={2009}, month={May}, pages={110–120} } @article{reich_hodges_2008, title={Identitication of the variance components in the general two-variance linear model}, volume={138}, ISSN={["0378-3758"]}, DOI={10.1016/j.jspi.2007.05.046}, abstractNote={Bayesian analyses frequently employ two-stage hierarchical models involving two-variance parameters: one controlling measurement error and the other controlling the degree of smoothing implied by the model's higher level. These analyses can be hampered by poorly identified variances which may lead to difficulty in computing and in choosing reference priors for these parameters. In this paper, we introduce the class of two-variance hierarchical linear models and characterize the aspects of these models that lead to well-identified or poorly identified variances. These ideas are illustrated with a spatial analysis of a periodontal data set and examined in some generality for specific two-variance models including the conditionally autoregressive (CAR) and one-way random effect models. We also connect this theory with other constrained regression methods and suggest a diagnostic that can be used to search for missing spatially varying fixed effects in the CAR model.}, number={6}, journal={JOURNAL OF STATISTICAL PLANNING AND INFERENCE}, author={Reich, Brian J. and Hodges, James S.}, year={2008}, month={Jul}, pages={1592–1604} } @article{reich_hodges_2008, title={Modeling longitudinal spatial periodontal data: A spatially adaptive model with tools for specifying priors and checking fit}, volume={64}, ISSN={["0006-341X"]}, DOI={10.1111/j.1541-0420.2007.00956.x}, abstractNote={Summary Attachment loss (AL), the distance down a tooth's root that is no longer attached to surrounding bone by periodontal ligament, is a common measure of periodontal disease. In this article, we develop a spatiotemporal model to monitor the progression of AL. Our model is an extension of the conditionally autoregressive (CAR) prior, which spatially smooths estimates toward their neighbors. However, because AL often exhibits a burst of large values in space and time, we develop a nonstationary spatiotemporal CAR model that allows the degree of spatial and temporal smoothing to vary in different regions of the mouth. To do this, we assign each AL measurement site its own set of variance parameters and spatially smooth the variances with spatial priors. We propose a heuristic to measure the complexity of the site‐specific variances, and use it to select priors that ensure parameters in the model are well identified. In data from a clinical trial, this model improves the fit compared to the usual dynamic CAR model for 90 of 99 patients' AL measurements.}, number={3}, journal={BIOMETRICS}, author={Reich, Brian J. and Hodges, James S.}, year={2008}, month={Sep}, pages={790–799} } @article{fuentes_reich_lee_2008, title={SPATIAL-TEMPORAL MESOSCALE MODELING OF RAINFALL INTENSITY USING GAGE AND RADAR DATA}, volume={2}, ISSN={["1932-6157"]}, DOI={10.1214/08-AOAS166}, abstractNote={Gridded estimated rainfall intensity values at very high spatial and temporal resolution levels are needed as main inputs for weather prediction models to obtain accurate precipitation forecasts, and to verify the performance of precipitation forecast models. These gridded rainfall fields are also the main driver for hydrological models that forecast flash floods, and they are essential for disaster prediction associated with heavy rain. Rainfall information can be obtained from rain gages that provide relatively accurate estimates of the actual rainfall values at point-referenced locations, but they do not characterize well enough the spatial and temporal structure of the rainfall fields. Doppler radar data offer better spatial and temporal coverage, but Doppler radar measures effective radar reflectivity (Ze) rather than rainfall rate (R). Thus, rainfall estimates from radar data suffer from various uncertainties due to their measuring principle and the conversion from Ze to R. We introduce a framework to combine radar reflectivity and gage data, by writing the different sources of rainfall information in terms of an underlying unobservable spatial temporal process with the true rainfall values. We use spatial logistic regression to model the probability of rain for both sources of data in terms of the latent true rainfall process. We characterize the different sources of bias and error in the gage and radar data and we estimate the true rainfall intensity with its posterior predictive distribution, conditioning on the observed data. Our model allows for nonstationary and asymmetry in the spatio-temporal dependency structure of the rainfall process, and allows the temporal evolution of the rainfall process to depend on the motions of rain fields, and the spatial correlation to depend on geographic features. We apply our methods to estimate rainfall intensity every 10 minutes, in a subdomain over South Korea with a spatial resolution of 1km by 1km.}, number={4}, journal={ANNALS OF APPLIED STATISTICS}, author={Fuentes, Montserrat and Reich, Brian and Lee, Gyuwon}, year={2008}, month={Dec}, pages={1148–1169} } @article{bondell_reich_2008, title={Simultaneous regression shrinkage, variable selection, and supervised clustering of predictors with OSCAR}, volume={64}, ISSN={["0006-341X"]}, DOI={10.1111/j.1541-0420.2007.00843.x}, abstractNote={Summary Variable selection can be challenging, particularly in situations with a large number of predictors with possibly high correlations, such as gene expression data. In this article, a new method called the OSCAR (octagonal shrinkage and clustering algorithm for regression) is proposed to simultaneously select variables while grouping them into predictive clusters. In addition to improving prediction accuracy and interpretation, these resulting groups can then be investigated further to discover what contributes to the group having a similar behavior. The technique is based on penalized least squares with a geometrically intuitive penalty function that shrinks some coefficients to exactly zero. Additionally, this penalty yields exact equality of some coefficients, encouraging correlated predictors that have a similar effect on the response to form predictive clusters represented by a single coefficient. The proposed procedure is shown to compare favorably to the existing shrinkage and variable selection techniques in terms of both prediction error and model complexity, while yielding the additional grouping information.}, number={1}, journal={BIOMETRICS}, author={Bondell, Howard D. and Reich, Brian J.}, year={2008}, month={Mar}, pages={115–123} } @article{reich_fuentes_2007, title={A MULTIVARIATE SEMIPARAMETRIC BAYESIAN SPATIAL MODELING FRAMEWORK FOR HURRICANE SURFACE WIND FIELDS}, volume={1}, ISSN={["1932-6157"]}, DOI={10.1214/07-AOAS108}, abstractNote={Storm surge, the onshore rush of sea water caused by the high winds and low pressure associated with a hurricane, can compound the effects of inland flooding caused by rainfall, leading to loss of property and loss of life for residents of coastal areas. Numerical ocean models are essential for creating storm surge forecasts for coastal areas. These models are driven primarily by the surface wind forcings. Currently, the gridded wind fields used by ocean models are specified by deterministic formulas that are based on the central pressure and location of the storm center. While these equations incorporate important physical knowledge about the structure of hurricane surface wind fields, they cannot always capture the asymmetric and dynamic nature of a hurricane. A new Bayesian multivariate spatial statistical modeling framework is introduced combining data with physical knowledge about the wind fields to improve the estimation of the wind vectors. Many spatial models assume the data follow a Gaussian distribution. However, this may be overly-restrictive for wind fields data which often display erratic behavior, such as sudden changes in time or space. In this paper we develop a semiparametric multivariate spatial model for these data. Our model builds on the stick-breaking prior, which is frequently used in Bayesian modeling to capture uncertainty in the parametric form of an outcome. The stick-breaking prior is extended to the spatial setting by assigning each location a different, unknown distribution, and smoothing the distributions in space with a series of kernel functions. This semiparametric spatial model is shown to improve prediction compared to usual Bayesian Kriging methods for the wind field of Hurricane Ivan.}, number={1}, journal={ANNALS OF APPLIED STATISTICS}, author={Reich, Brian J. and Fuentes, Montserrat}, year={2007}, month={Jun}, pages={249–264} } @article{reich_hodges_carlin_reich_2006, title={A spatial analysis of basketball shot chart data}, volume={60}, ISSN={["1537-2731"]}, DOI={10.1198/000313006X90305}, abstractNote={Basketball coaches at all levels use shot charts to study shot locations and outcomes for their own teams as well as upcoming opponents. Shot charts are simple plots of the location and result of each shot taken during a game. Although shot chart data are rapidly increasing in richness and availability, most coaches still use them purely as descriptive summaries. However, a team's ability to defend a certain player could potentially be improved by using shot data to make inferences about the player's tendencies and abilities. This article develops hierarchical spatial models for shot-chart data, which allow for spatially varying effects of covariates. Our spatial models permit differential smoothing of the fitted surface in two spatial directions, which naturally correspond to polar coordinates: distance to the basket and angle from the line connecting the two baskets. We illustrate our approach using the 2003–2004 shot chart data for Minnesota Timberwolves guard Sam Cassell.}, number={1}, journal={AMERICAN STATISTICIAN}, author={Reich, BJ and Hodges, JS and Carlin, BP and Reich, AM}, year={2006}, month={Feb}, pages={3–12} } @article{reich_hodges_zadnik_2006, title={Effects of residual smoothing on the posterior of the fixed effects in disease-mapping models}, volume={62}, DOI={10.1111/j.1541-0420.2006.00617}, number={4}, journal={Biometrics}, author={Reich, Brian and Hodges, J. S. and Zadnik, V.}, year={2006}, pages={1197–1206} } @article{lemmonds_mooney_reich_hatsukami_2004, title={Characteristics of cigarette smokers seeking treatment for cessation versus reduction}, volume={29}, ISSN={0306-4603}, url={http://dx.doi.org/10.1016/j.addbeh.2003.08.049}, DOI={10.1016/j.addbeh.2003.08.049}, abstractNote={Comparisons were made between cigarette smokers seeking treatment to quit smoking and cigarette smokers seeking treatment to reduce the number of cigarettes they smoke. Potential subjects were recruited from the local metropolitan area by advertisement in the local media. A total of 665 cigarette smokers telephoned our clinic to seek treatment for smoking cessation and 565 cigarette smokers telephoned to seek treatment to gradually reduce the number of cigarettes they smoke but not quit smoking. Potential subjects were instructed to call the clinic to find out additional information about the studies, and while on the telephone they were asked questions pertaining to tobacco use and health status. The results show that the two populations are similar in many respects with the following exceptions: smokers seeking treatment to reduce cigarette use tend to smoke more cigarettes per day, are less motivated to quit, make fewer quit attempts, drink more alcoholic beverages per day, and have more health problems (Ps<.05). These results indicate that cigarette smokers seeking treatment for smoking reduction but not cessation may be more dependent smokers who experience more medical disorders.}, number={2}, journal={Addictive Behaviors}, publisher={Elsevier BV}, author={Lemmonds, Charlotte A. and Mooney, Marc and Reich, Brian and Hatsukami, Dorothy}, year={2004}, month={Feb}, pages={357–364} } @article{allen_brintnell_hatsukami_reich_2004, title={Energy intake and physical activity during short-term smoking cessation in postmenopausal women}, volume={29}, ISSN={0306-4603}, url={http://dx.doi.org/10.1016/j.addbeh.2004.02.041}, DOI={10.1016/j.addbeh.2004.02.041}, abstractNote={This study assessed the effect of short-term (2-week) smoking abstinence on weight gain, energy intake, and physical activity in 60 postmenopausal women. Participants were stratified by their use of hormone replacement therapy (HRT; currently taking/not taking) and then randomized to abstinence or continued smoking for 2 weeks. The 30 abstainers gained a mean of 1.28 kg, compared with a 0.54 kg loss for the 30 continued smokers (P=.002). The abstainers also reported a significantly greater increase than did the smokers in total kilocalorie and in carbohydrate consumption for both weeks. There were no changes in physical activity and HRT effect. These are the first published findings on caloric intake and weight gain during smoking abstinence in postmenopausal women—an understudied population in the smoking cessation literature.}, number={5}, journal={Addictive Behaviors}, publisher={Elsevier BV}, author={Allen, Sharon S and Brintnell, Dawn M and Hatsukami, Dorothy and Reich, Brian}, year={2004}, month={Jul}, pages={947–951} }