@article{yanchenko_bondell_reich_2023, title={Spatial regression modeling via the R2D2 framework}, volume={10}, ISSN={["1099-095X"]}, DOI={10.1002/env.2829}, abstractNote={Spatially dependent data arises in many applications, and Gaussian processes are a popular modeling choice for these scenarios. While Bayesian analyses of these problems have proven to be successful, selecting prior distributions for these complex models remains a difficult task. In this work, we propose a principled approach for setting prior distributions on model variance components by placing a prior distribution on a measure of model fit. In particular, we derive the distribution of the prior coefficient of determination. Placing a beta prior distribution on this measure induces a generalized beta prime prior distribution on the global variance of the linear predictor in the model. This method can also be thought of as shrinking the fit towards the intercept‐only (null) model. We derive an efficient Gibbs sampler for the majority of the parameters and use Metropolis–Hasting updates for the others. Finally, the method is applied to a marine protection area dataset. We estimate the effect of marine policies on biodiversity and conclude that no‐take restrictions lead to a slight increase in biodiversity and that the majority of the variance in the linear predictor comes from the spatial effect.}, journal={ENVIRONMETRICS}, author={Yanchenko, Eric and Bondell, Howard D. and Reich, Brian J.}, year={2023}, month={Oct} } @article{zhang_naughton_bondell_reich_2022, title={Bayesian Regression Using a Prior on the Model Fit: The R2-D2 Shrinkage Prior}, volume={117}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2020.1825449}, abstractNote={Abstract Prior distributions for high-dimensional linear regression require specifying a joint distribution for the unobserved regression coefficients, which is inherently difficult. We instead propose a new class of shrinkage priors for linear regression via specifying a prior first on the model fit, in particular, the coefficient of determination, and then distributing through to the coefficients in a novel way. The proposed method compares favorably to previous approaches in terms of both concentration around the origin and tail behavior, which leads to improved performance both in posterior contraction and in empirical performance. The limiting behavior of the proposed prior is , both around the origin and in the tails. This behavior is optimal in the sense that it simultaneously lies on the boundary of being an improper prior both in the tails and around the origin. None of the existing shrinkage priors obtain this behavior in both regions simultaneously. We also demonstrate that our proposed prior leads to the same near-minimax posterior contraction rate as the spike-and-slab prior. Supplementary materials for this article are available online.}, number={538}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Zhang, Yan Dora and Naughton, Brian P. and Bondell, Howard D. and Reich, Brian J.}, year={2022}, month={Apr}, pages={862–874} } @article{liu_yang_bondell_martin_2021, title={BAYESIAN INFERENCE IN HIGH-DIMENSIONAL LINEAR MODELS USING AN EMPIRICAL CORRELATION-ADAPTIVE PRIOR}, volume={31}, ISSN={["1996-8507"]}, DOI={10.5705/ss.202019.0133}, abstractNote={In the context of a high-dimensional linear regression model, we propose the use of an empirical correlation-adaptive prior that makes use of information in the observed predictor variable matrix to adaptively address high collinearity, determining if parameters associated with correlated predictors should be shrunk together or kept apart. Under suitable conditions, we prove that this empirical Bayes posterior concentrates around the true sparse parameter at the optimal rate asymptotically. A simplified version of a shotgun stochastic search algorithm is employed to implement the variable selection procedure, and we show, via simulation experiments across different settings and a real-data application, the favorable performance of the proposed method compared to existing methods.}, number={4}, journal={STATISTICA SINICA}, author={Liu, Chang and Yang, Yue and Bondell, Howard and Martin, Ryan}, year={2021}, month={Oct}, pages={2051–2072} } @article{li_reich_bondell_2021, title={Deep distribution regression}, volume={159}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2021.107203}, abstractNote={Due to their flexibility and predictive performance, machine-learning based regression methods have become an important tool for predictive modeling and forecasting. However, most methods focus on estimating the conditional mean or specific quantiles of the target quantity and do not provide the full conditional distribution, which contains uncertainty information that might be crucial for decision making. A general solution consists of transforming a conditional distribution estimation problem into a constrained multi-class classification problem, in which tools such as deep neural networks can be applied. A novel joint binary cross-entropy loss function is proposed to accomplish this goal. Its performance is compared to current state-of-the-art methods via simulation. The approach also shows improved accuracy in a probabilistic solar energy forecasting problem.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Li, Rui and Reich, Brian J. and Bondell, Howard D.}, year={2021}, month={Jul} } @article{huberman_reich_bondell_2021, title={Nonparametric conditional density estimation in a deep learning framework for short-term forecasting}, volume={5}, ISSN={["1573-3009"]}, DOI={10.1007/s10651-021-00499-z}, abstractNote={Short-term forecasting is an important tool in understanding environmental processes. In this paper, we incorporate machine learning algorithms into a conditional distribution estimator for the purposes of forecasting tropical cyclone intensity. Many machine learning techniques give a single-point prediction of the conditional distribution of the target variable, which does not give a full accounting of the prediction variability. Conditional distribution estimation can provide extra insight on predicted response behavior, which could influence decision-making and policy. We propose a technique that simultaneously estimates the entire conditional distribution and flexibly allows for machine learning techniques to be incorporated. A smooth model is fit over both the target variable and covariates, and a logistic transformation is applied on the model output layer to produce an expression of the conditional density function. We provide two examples of machine learning models that can be used, polynomial regression and deep learning models. To achieve computational efficiency, we propose a case–control sampling approximation to the conditional distribution. A simulation study for four different data distributions highlights the effectiveness of our method compared to other machine learning-based conditional distribution estimation techniques. We then demonstrate the utility of our approach for forecasting purposes using tropical cyclone data from the Atlantic Seaboard. This paper gives a proof of concept for the promise of our method, further computational developments can fully unlock its insights in more complex forecasting and other applications.}, journal={ENVIRONMENTAL AND ECOLOGICAL STATISTICS}, author={Huberman, David B. and Reich, Brian J. and Bondell, Howard D.}, year={2021}, month={May} } @article{zhao_bondell_2020, title={Solution paths for the generalized lasso with applications to spatially varying coefficients regression}, volume={142}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2019.106821}, abstractNote={Penalized regression can improve prediction accuracy and reduce dimension. The generalized lasso problem is used in many applications in various fields. The generalized lasso penalizes a linear transformation of the coefficients rather than the coefficients themselves. The proposed algorithm solves the generalized lasso problem and provides the full solution path. A confidence set can then be constructed on the generalized lasso parameters based on the modified residual bootstrap lasso. The approach is demonstrated using spatially varying coefficients regression, and it is shown to be both accurate and efficient compared to previous work.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Zhao, Yaqing and Bondell, Howard}, year={2020}, month={Feb} } @article{tian_bondell_wilson_2019, title={Bayesian variable selection for logistic regression}, volume={12}, ISSN={["1932-1872"]}, DOI={10.1002/sam.11428}, abstractNote={A key issue when using Bayesian variable selection for logistic regression is choosing an appropriate prior distribution. This can be particularly difficult for high‐dimensional data where complete separation will naturally occur in the high‐dimensional space. We propose the use of the Normal‐Gamma prior with recommendations on calibration of the hyper‐parameters. We couple this choice with the use of joint credible sets to avoid performing a search over the high‐dimensional model space. The approach is shown to outperform other methods in high‐dimensional settings, especially with highly correlated data. The Bayesian approach allows for a natural specification of the hyper‐parameters.}, number={5}, journal={STATISTICAL ANALYSIS AND DATA MINING}, author={Tian, Yiqing and Bondell, Howard D. and Wilson, Alyson}, year={2019}, month={Oct}, pages={378–393} } @article{su_bondell_2019, title={Best linear estimation via minimization of relative mean squared error}, volume={29}, ISSN={["1573-1375"]}, DOI={10.1007/s11222-017-9792-0}, number={1}, journal={STATISTICS AND COMPUTING}, author={Su, Lin and Bondell, Howard D.}, year={2019}, month={Jan}, pages={33–42} } @article{liu_bondell_2019, title={Binormal Precision-Recall Curves for Optimal Classification of Imbalanced Data}, volume={11}, ISSN={["1867-1772"]}, DOI={10.1007/s12561-019-09231-9}, number={1}, journal={STATISTICS IN BIOSCIENCES}, author={Liu, Zhongkai and Bondell, Howard D.}, year={2019}, month={Apr}, pages={141–161} } @article{stevenson_peterson_bondell_2018, title={Developing a model of climate change behavior among adolescents}, volume={151}, ISSN={["1573-1480"]}, url={https://doi.org/10.1007/s10584-018-2313-0}, DOI={10.1007/s10584-018-2313-0}, abstractNote={Research on adolescent climate change perceptions has uncovered key insights about how knowledge, concern, and hope might relate to behavior and the potential for educational interventions to influence these factors. However, few of these studies have employed treatment/control designs that might address causality and none have addressed how these factors might interact to influence behavior. We developed a model of behavior change where a climate education treatment impacted knowledge, knowledge impacted hope and concern, and hope and concern together impacted behavior. We empirically tested the utility of this model and the causal relationships within it using a pre/post, treatment/control evaluation of climate education among adolescents in North Carolina, USA (n = 1041). We found support for a causal relationship between the treatment and gains in knowledge, but not between treatment and behavior. However, we did find support for a path model in which climate change knowledge positively relates to increased climate change concern and hope, and increases in concern and hope predict changes in pro-environmental behavior. Low SES was related to smaller gains in knowledge, concern, and behavior. Our results contribute to a theoretical understanding of climate change behaviors among adolescents and suggest that climate education aiming to change behavior should focus on building hope and concern.}, number={3-4}, journal={CLIMATIC CHANGE}, publisher={Springer Nature}, author={Stevenson, Kathryn T. and Peterson, M. Nils and Bondell, Howard D.}, year={2018}, month={Dec}, pages={589–603} } @article{kong_bondell_wu_2018, title={FULLY EFFICIENT ROBUST ESTIMATION, OUTLIER DETECTION AND VARIABLE SELECTION VIA PENALIZED REGRESSION}, volume={28}, ISSN={["1996-8507"]}, DOI={10.5705/ss.202016.0441}, abstractNote={: This paper studies the outlier detection and variable selection problem in linear regression. A mean shift parameter is added to the linear model to reflect the effect of outliers, where an outlier has a nonzero shift parameter. We then apply an adaptive regularization to these shift parameters to shrink most of them to zero. Those observations with nonzero mean shift parameter estimates are regarded as outliers. An L1 penalty is added to the regression parameters to select important predictors. We propose an efficient algorithm to solve this jointly penalized optimization problem and use the extended Bayesian information criteria tuning method to select the regularization parameters, since the number of parameters exceeds the sample size. Theoretical results are provided in terms of high breakdown point, full efficiency, as well as outlier detection consistency. We illustrate our method with simulations and data. Our method is extended to high-dimensional problems with dimension much larger than the sample size.}, number={2}, journal={STATISTICA SINICA}, author={Kong, Dehan and Bondell, Howard D. and Wu, Yichao}, year={2018}, month={Apr}, pages={1031–1052} } @article{frew_peterson_sills_moorman_bondell_fuller_howell_2018, title={Market and Nonmarket Valuation of North Carolina's Tundra Swans among Hunters, Wildlife Watchers, and the Public}, volume={42}, ISSN={["1938-5463"]}, DOI={10.1002/wsb.915}, abstractNote={Wildlife-related tourism represents an important and growing economic sector for many rural communities and may be inadequately considered during regional planning. Providing robust estimates of wildlife values can help address this challenge. We used both market and nonmarket valuation methods to estimate the value of tundra swans ( Cygnus columbianus ) in North Carolina, USA, and compared tundra swan values among hunters, wildlife watchers, and general public. Wildlife watchers reported the greatest willingness-to-pay (US $ 35.2/wildlife watcher/yr), followed by hunters (US $ 30.53/hunter/yr), and residents (US $ 16.27/resident/yr). We used the Impact Analysis for Planning system software to estimate market values or economic activity associated with tundra swans. Tundra swan hunters spent an average of US $ 408.34/hunter/year. Depending on assumptions over the substitutability of tundra swan hunting, we estimate that it generates value added of between US $ 306,155/year and US $ 920,161/year for the state economy. Wildlife watchers spent an average of US $ 171.25/wildlife watcher/year. We estimate that this generates value added of between US $ 14 million/year and US $ 42.9 million/year for the state economy, again depending on assumptions about whether watching tundra swans would be substituted with other leisure activities in eastern North Carolina or out-of-state. Compared with studies of international nature tourism, we found relatively low leakage rates (i.e., loss of economic benefits outside the study region), suggesting that enhancing opportunities for hunting and wildlife-viewing may be an effective economic development strategy for rural areas in the United States. Presenting both market and nonmarket values provides a more complete picture of the value of wildlife and may facilitate more effective management decisions; therefore, we recommend that both market and nonmarket values be considered to optimize tradeoffs between development and wildlife recreation. ! 2018 The Wildlife Society.}, number={3}, journal={WILDLIFE SOCIETY BULLETIN}, author={Frew, Kristin N. and Peterson, M. Nils and Sills, Erin and Moorman, Christopher E. and Bondell, Howard and Fuller, Joseph C. and Howell, Douglas L.}, year={2018}, month={Sep}, pages={478–487} } @article{zhang_bondell_2018, title={Variable Selection via Penalized Credible Regions with Dirichlet-Laplace Global-Local Shrinkage Priors}, volume={13}, ISSN={["1936-0975"]}, DOI={10.1214/17-ba1076}, abstractNote={The method of Bayesian variable selection via penalized credible regions separates model fitting and variable selection. The idea is to search for the sparsest solution within the joint posterior credible regions. Although the approach was successful, it depended on the use of conjugate normal priors. More recently, improvements in the use of global-local shrinkage priors have been made for high-dimensional Bayesian variable selection. In this paper, we incorporate global-local priors into the credible region selection framework. The Dirichlet-Laplace (DL) prior is adapted to linear regression. Posterior consistency for the normal and DL priors are shown, along with variable selection consistency. We further introduce a new method to tune hyperparameters in prior distributions for linear regression. We propose to choose the hyperparameters to minimize a discrepancy between the induced distribution on R-square and a prespecified target distribution. Prior elicitation on R-square is more natural, particularly when there are a large number of predictor variables in which elicitation on that scale is not feasible. For a normal prior, these hyperparameters are available in closed form to minimize the Kullback-Leibler divergence between the distributions.}, number={3}, journal={BAYESIAN ANALYSIS}, author={Zhang, Yan and Bondell, Howard D.}, year={2018}, month={Sep}, pages={823–844} } @article{li_guindani_reich_bondell_vannucci_2017, title={A Bayesian mixture model for clustering and selection of feature occurrence rates under mean constraints}, volume={10}, ISSN={["1932-1872"]}, DOI={10.1002/sam.11350}, abstractNote={In this paper, we consider the problem of modeling a matrix of count data, where multiple features are observed as counts over a number of samples. Due to the nature of the data generating mechanism, such data are often characterized by a high number of zeros and overdispersion. In order to take into account the skewness and heterogeneity of the data, some type of normalization and regularization is necessary for conducting inference on the occurrences of features across samples. We propose a zero‐inflated Poisson mixture modeling framework that incorporates a model‐based normalization through prior distributions with mean constraints, as well as a feature selection mechanism, which allows us to identify a parsimonious set of discriminatory features, and simultaneously cluster the samples into homogenous groups. We show how our approach improves on the accuracy of the clustering with respect to more standard approaches for the analysis of count data, by means of a simulation study and an application to a bag‐of‐words benchmark data set, where the features are represented by the frequencies of occurrence of each word.}, number={6}, journal={STATISTICAL ANALYSIS AND DATA MINING}, author={Li, Qiwei and Guindani, Michele and Reich, Brian J. and Bondell, Howard D. and Vannucci, Marina}, year={2017}, month={Dec}, pages={393–409} } @article{peterson_chesonis_stevenson_bondell_2017, title={Evaluating relationships between hunting and biodiversity knowledge among children}, volume={41}, ISSN={1938-5463}, url={http://dx.doi.org/10.1002/WSB.792}, DOI={10.1002/wsb.792}, abstractNote={We investigated how hunting participation and associated demographic variables relate to biodiversity knowledge among children. Past participation in hunting among elementary age children in North Carolina, USA, surveyed during 2014 was high (29%) and a positive predictor of student's ability to name native wildlife species after controlling for gender, ethnicity, and grade level. Minorities and girls had lower biodiversity knowledge scores and were less supportive of hunting. Our findings suggest children may view hunting differently than adults and that youth hunting programs, particularly those targeting very young children, may be fruitful ways to promote biodiversity knowledge. Such efforts, however, may be most valuable among minorities because Hispanic children had the lowest participation in hunting and Black children had both low participation rates and lowest biodiversity knowledge scores. © 2017 The Wildlife Society.}, number={3}, journal={Wildlife Society Bulletin}, publisher={Wiley}, author={Peterson, M. Nils and Chesonis, Tessa and Stevenson, Kathryn T. and Bondell, Howard D.}, year={2017}, month={Aug}, pages={530–536} } @article{huque_bondell_carroll_ryan_2016, title={Spatial Regression with Covariate Measurement Error: A Semiparametric Approach}, volume={72}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12474}, abstractNote={Spatial data have become increasingly common in epidemiology and public health research thanks to advances in GIS (Geographic Information Systems) technology. In health research, for example, it is common for epidemiologists to incorporate geographically indexed data into their studies. In practice, however, the spatially defined covariates are often measured with error. Naive estimators of regression coefficients are attenuated if measurement error is ignored. Moreover, the classical measurement error theory is inapplicable in the context of spatial modeling because of the presence of spatial correlation among the observations. We propose a semiparametric regression approach to obtain bias‐corrected estimates of regression parameters and derive their large sample properties. We evaluate the performance of the proposed method through simulation studies and illustrate using data on Ischemic Heart Disease (IHD). Both simulation and practical application demonstrate that the proposed method can be effective in practice.}, number={3}, journal={BIOMETRICS}, author={Huque, Md Hamidul and Bondell, Howard D. and Carroll, Raymond J. and Ryan, Louise M.}, year={2016}, month={Sep}, pages={678–686} } @article{stevenson_peterson_bondell_2016, title={The influence of personal beliefs, friends, and family in building climate change concern among adolescents}, volume={25}, ISSN={1350-4622 1469-5871}, url={http://dx.doi.org/10.1080/13504622.2016.1177712}, DOI={10.1080/13504622.2016.1177712}, abstractNote={Abstract Understanding adolescent climate change concern (CCC) may be a key strategy for building a citizenry that supports climate change action, as adolescents are likely less influenced by ideological polarization than adults. Prior research shows that climate education may build concern among adolescents, but other factors such as peer pressure may also be important. We investigated the relationships between CCC, acceptance of anthropogenic global warming (AGW), perceived level of acceptance among friends and family, and frequency of discussion of the issue among 426 middle school students in North Carolina, USA, and developed a novel instrument to measure each of these constructs. Acceptance of AGW had the strongest association with CCC. Frequency of discussion with friends and family was the second strongest predictor. Perceived level of acceptance among family and friends was the third strongest predictor. Model selection results suggest family had more influence than friends in this study. Girls perceived climate change as a higher risk than boys. In addition to building acceptance of AGW, leveraging discussions with peers and especially family may help build concern for climate change among future generations.}, number={6}, journal={Environmental Education Research}, publisher={Informa UK Limited}, author={Stevenson, Kathryn T. and Peterson, M. Nils and Bondell, Howard D.}, year={2016}, month={Apr}, pages={832–845} } @article{neely_bondell_tzeng_2015, title={A penalized likelihood approach for investigating gene-drug interactions in pharmacogenetic studies}, volume={71}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12259}, abstractNote={Pharmacogenetics investigates the relationship between heritable genetic variation and the variation in how individuals respond to drug therapies. Often, gene–drug interactions play a primary role in this response, and identifying these effects can aid in the development of individualized treatment regimes. Haplotypes can hold key information in understanding the association between genetic variation and drug response. However, the standard approach for haplotype‐based association analysis does not directly address the research questions dictated by individualized medicine. A complementary post‐hoc analysis is required, and this post‐hoc analysis is usually under powered after adjusting for multiple comparisons and may lead to seemingly contradictory conclusions. In this work, we propose a penalized likelihood approach that is able to overcome the drawbacks of the standard approach and yield the desired personalized output. We demonstrate the utility of our method by applying it to the Scottish Randomized Trial in Ovarian Cancer. We also conducted simulation studies and showed that the proposed penalized method has comparable or more power than the standard approach and maintains low Type I error rates for both binary and quantitative drug responses. The largest performance gains are seen when the haplotype frequency is low, the difference in effect sizes are small, or the true relationship among the drugs is more complex.}, number={2}, journal={BIOMETRICS}, author={Neely, Megan L. and Bondell, Howard D. and Tzeng, Jung-Ying}, year={2015}, month={Jun}, pages={529–537} } @article{kong_bondell_wu_2015, title={Domain selection for the varying coefficient model via local polynomial regression}, volume={83}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2014.10.004}, abstractNote={In this article, we consider the varying coefficient model, which allows the relationship between the predictors and response to vary across the domain of interest, such as time. In applications, it is possible that certain predictors only affect the response in particular regions and not everywhere. This corresponds to identifying the domain where the varying coefficient is nonzero. Towards this goal, local polynomial smoothing and penalized regression are incorporated into one framework. Asymptotic properties of our penalized estimators are provided. Specifically, the estimators enjoy the oracle properties in the sense that they have the same bias and asymptotic variance as the local polynomial estimators as if the sparsity is known as a priori. The choice of appropriate bandwidth and computational algorithms are discussed. The proposed method is examined via simulations and a real data example.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Kong, Dehan and Bondell, Howard D. and Wu, Yichao}, year={2015}, month={Mar}, pages={236–250} } @article{li_staicu_bondell_2015, title={Incorporating covariates in skewed functional data models}, volume={16}, DOI={10.1093/biostatistics/kxu055}, abstractNote={We introduce a class of covariate-adjusted skewed functional models (cSFM) designed for functional data exhibiting location-dependent marginal distributions. We propose a semi-parametric copula model for the pointwise marginal distributions, which are allowed to depend on covariates, and the functional dependence, which is assumed covariate invariant. The proposed cSFM framework provides a unifying platform for pointwise quantile estimation and trajectory prediction. We consider a computationally feasible procedure that handles densely as well as sparsely observed functional data. The methods are examined numerically using simulations and is applied to a new tractography study of multiple sclerosis. Furthermore, the methodology is implemented in the R package cSFM, which is publicly available on CRAN.}, number={3}, journal={Biostatistics (Oxford, England)}, author={Li, M. and Staicu, Ana-Maria and Bondell, H. D.}, year={2015}, pages={413–426} } @article{chitwood_peterson_bondell_lashley_brown_deperno_2015, title={Perspectives of wildlife conservation professionals on intensive deer management}, volume={39}, ISSN={1938-5463}, url={http://dx.doi.org/10.1002/WSB.607}, DOI={10.1002/WSB.607}, abstractNote={Intensive deer management (IDM) is fundamentally changing how one of the most important game species in North America is being managed, but little is known about how wildlife conservation professionals view these changes. The IDM approach encourages privatization of deer (Odocoileus spp.) through practices including feeding, high fencing, artificial insemination and markets in deer semen, and translocation. To evaluate support for IDM practices, we surveyed 208 registrants of the 2010 Southeast Deer Study Group Meeting held in San Antonio, Texas, USA. Specifically, we evaluated support for IDM practices using state-agency wildlife biologists, private wildlife managers, and academics, and we evaluated how geographic region and employment type are related to opinions about IDM. Using Principal Components Analysis, we created 3 new scales that measured respondents’ opinions about deer management, deer husbandry, and deer hunting. We detected strong opposition to IDM among respondents, with respondents from universities having the strongest opposition, followed by state-agency employees from Texas, and private consultants from Texas (the latter having the greatest support for IDM). Our study highlights the need for critical and empirical evaluation of the articulation between IDM and the North American Model of Wildlife Conservation, particularly the tenets that assert wildlife are held in the public trust and advocate elimination of markets for wildlife. © 2015 The Wildlife Society.}, number={4}, journal={Wildlife Society Bulletin}, publisher={Wiley}, author={Chitwood, M. Colter and Peterson, M. Nils and Bondell, Howard D. and Lashley, Marcus A. and Brown, Robert D. and Deperno, Christopher S.}, year={2015}, month={Nov}, pages={751–756} } @article{jiang_bondell_wang_2014, title={Interquantile shrinkage and variable selection in quantile regression}, volume={69}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2013.08.006}, abstractNote={Examination of multiple conditional quantile functions provides a comprehensive view of the relationship between the response and covariates. In situations where quantile slope coefficients share some common features, estimation efficiency and model interpretability can be improved by utilizing such commonality across quantiles. Furthermore, elimination of irrelevant predictors will also aid in estimation and interpretation. These motivations lead to the development of two penalization methods, which can identify the interquantile commonality and nonzero quantile coefficients simultaneously. The developed methods are based on a fused penalty that encourages sparsity of both quantile coefficients and interquantile slope differences. The oracle properties of the proposed penalization methods are established. Through numerical investigations, it is demonstrated that the proposed methods lead to simpler model structure and higher estimation efficiency than the traditional quantile regression estimation.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Jiang, Liewen and Bondell, Howard D. and Wang, Huixia Judy}, year={2014}, month={Jan}, pages={208–219} } @article{huque_bondell_ryan_2014, title={On the impact of covariate measurement error on spatial regression modelling}, volume={25}, ISSN={["1099-095X"]}, DOI={10.1002/env.2305}, abstractNote={Spatial regression models have grown in popularity in response to rapid advances in geographic information system technology that allows epidemiologists to incorporate geographically indexed data into their studies. However, it turns out that there are some subtle pitfalls in the use of these models. We show that the presence of covariate measurement error can lead to significant sensitivity of parameter estimation to the choice of spatial correlation structure. We quantify the effect of measurement error on parameter estimates and then suggest two different ways to produce consistent estimates. We evaluate the methods through a simulation study. These methods are then applied to data on ischaemic heart disease. Copyright © 2014 John Wiley & Sons, Ltd.}, number={8}, journal={ENVIRONMETRICS}, author={Huque, Md Hamidul and Bondell, Howard D. and Ryan, Louise}, year={2014}, month={Dec}, pages={560–570} } @article{stevenson_peterson_bondell_moore_carrier_2014, title={Overcoming skepticism with education: interacting influences of worldview and climate change knowledge on perceived climate change risk among adolescents}, volume={126}, ISSN={0165-0009 1573-1480}, url={http://dx.doi.org/10.1007/s10584-014-1228-7}, DOI={10.1007/s10584-014-1228-7}, abstractNote={Though many climate literacy efforts attempt to communicate climate change as a risk, these strategies may be ineffective because among adults, worldview rather than scientific understanding largely drives climate change risk perceptions. Further, increased science literacy may polarize worldview-driven perceptions, making some climate literacy efforts ineffective among skeptics. Because worldviews are still forming in the teenage years, adolescents may represent a more receptive audience. This study examined how worldview and climate change knowledge related to acceptance of anthropogenic global warming (AGW) and in turn, climate change risk perception among middle school students in North Carolina, USA (n = 387). We found respondents with individualistic worldviews were 16.1 percentage points less likely to accept AGW than communitarian respondents at median knowledge levels, mirroring findings in similar studies among adults. The interaction between knowledge and worldview, however, was opposite from previous studies among adults, because increased climate change knowledge was positively related to acceptance of AGW among both groups, and had a stronger positive relationship among individualists. Though individualists were 24.1 percentage points less likely to accept AGW than communitarians at low levels (bottom decile) of climate change knowledge, there was no statistical difference in acceptance levels between individualists and communitarians at high levels of knowledge (top decile). Non-White and females also demonstrated higher levels of AGW acceptance and climate change risk perception, respectively. Thus, education efforts specific to climate change may counteract divisions based on worldviews among adolescents.}, number={3-4}, journal={Climatic Change}, publisher={Springer Science and Business Media LLC}, author={Stevenson, Kathryn T. and Peterson, M. Nils and Bondell, Howard D. and Moore, Susan E. and Carrier, Sarah J.}, year={2014}, month={Aug}, pages={293–304} } @article{stevenson_peterson_carrier_strnad_bondell_kirby-hathaway_moore_2014, title={Role of Significant Life Experiences in Building Environmental Knowledge and Behavior Among Middle School Students}, volume={45}, ISSN={0095-8964 1940-1892}, url={http://dx.doi.org/10.1080/00958964.2014.901935}, DOI={10.1080/00958964.2014.901935}, abstractNote={Significant life experience research suggests that the presence of role models, time outdoors, and nature-related media foster pro-environmental behavior, but most research is qualitative. Based on a random sample of middle school students in North Carolina, USA, we found limited positive associations between presence of a role model and time outdoors with behavior and a negative association between watching nature television and environmental knowledge. The strongest predictors of environmental knowledge and behavior were student/teacher ratio and county income levels, respectively. We also found that Native Americans engaged in environmental behaviors more than Caucasians, and that African American and Hispanic students had lower levels of environmental knowledge. Accordingly, life experiences appear less important than promoting small class sizes and addressing challenges associated with lower incomes in schools.}, number={3}, journal={The Journal of Environmental Education}, publisher={Informa UK Limited}, author={Stevenson, Kathryn T. and Peterson, M. Nils and Carrier, Sarah J. and Strnad, Renee L. and Bondell, Howard D. and Kirby-Hathaway, Terri and Moore, Susan E.}, year={2014}, month={May}, pages={163–177} } @article{reich_bandyopadhyay_bondell_2013, title={A Nonparametric Spatial Model for Periodontal Data With Nonrandom Missingness}, volume={108}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2013.795487}, abstractNote={Periodontal disease (PD) progression is often quantified by clinical attachment level (CAL) defined as the distance down a tooth's root that is detached from the surrounding bone. Measured at six locations per tooth throughout the mouth (excluding the molars), it gives rise to a dependent data setup. These data are often reduced to a one-number summary, such as the whole-mouth average or the number of observations greater than a threshold, to be used as the response in a regression to identify important covariates related to the current state of a subject's periodontal health. Rather than a simple one-number summary, we set forward to analyze all available CAL data for each subject, exploiting the presence of spatial dependence, nonstationarity, and nonnormality. Also, many subjects have a considerable proportion of missing teeth, which cannot be considered missing at random because PD is the leading cause of adult tooth loss. Under a Bayesian paradigm, we propose a nonparametric flexible spatial (joint) model of observed CAL and the location of missing tooth via kernel convolution methods, incorporating the aforementioned features of CAL data under a unified framework. Application of this methodology to a dataset recording the periodontal health of an African-American population, as well as simulation studies reveal the gain in model fit and inference, and provides a new perspective into unraveling covariate–response relationships in the presence of complexities posed by these data.}, number={503}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Reich, Brian J. and Bandyopadhyay, Dipankar and Bondell, Howard D.}, year={2013}, month={Sep}, pages={820–831} } @article{sharma_bondell_zhang_2013, title={Consistent Group Identification and Variable Selection in Regression With Correlated Predictors}, volume={22}, ISSN={["1537-2715"]}, DOI={10.1080/15533174.2012.707849}, abstractNote={Statistical procedures for variable selection have become integral elements in any analysis. Successful procedures are characterized by high predictive accuracy, yielding interpretable models while retaining computational efficiency. Penalized methods that perform coefficient shrinkage have been shown to be successful in many cases. Models with correlated predictors are particularly challenging to tackle. We propose a penalization procedure that performs variable selection while clustering groups of predictors automatically. The oracle properties of this procedure, including consistency in group identification, are also studied. The proposed method compares favorably with existing selection approaches in both prediction accuracy and model discovery, while retaining its computational efficiency. Supplementary materials are available online.}, number={2}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Sharma, Dhruv B. and Bondell, Howard D. and Zhang, Hao Helen}, year={2013}, month={Jun}, pages={319–340} } @article{bondell_stefanski_2013, title={Efficient Robust Regression via Two-Stage Generalized Empirical Likelihood}, volume={108}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2013.779847}, abstractNote={Large- and finite-sample efficiency and resistance to outliers are the key goals of robust statistics. Although often not simultaneously attainable, we develop and study a linear regression estimator that comes close. Efficiency is obtained from the estimator's close connection to generalized empirical likelihood, and its favorable robustness properties are obtained by constraining the associated sum of (weighted) squared residuals. We prove maximum attainable finite-sample replacement breakdown point and full asymptotic efficiency for normal errors. Simulation evidence shows that compared to existing robust regression estimators, the new estimator has relatively high efficiency for small sample sizes and comparable outlier resistance. The estimator is further illustrated and compared to existing methods via application to a real dataset with purported outliers.}, number={502}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Bondell, Howard D. and Stefanski, Leonard A.}, year={2013}, month={Jun}, pages={644–655} } @article{stevenson_peterson_bondell_mertig_moore_2013, title={Environmental, institutional, and demographic predictors of environmental literacy among middle school children}, volume={8}, number={3}, journal={PLoS One}, author={Stevenson, K. T. and Peterson, M. N. and Bondell, H. D. and Mertig, A. G. and Moore, S. E.}, year={2013} } @article{post_bondell_2013, title={Factor Selection and Structural Identification in the Interaction ANOVA Model}, volume={69}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2012.01810.x}, abstractNote={When faced with categorical predictors and a continuous response, the objective of an analysis often consists of two tasks: finding which factors are important and determining which levels of the factors differ significantly from one another. Often times, these tasks are done separately using Analysis of Variance (ANOVA) followed by a post hoc hypothesis testing procedure such as Tukey's Honestly Significant Difference test. When interactions between factors are included in the model the collapsing of levels of a factor becomes a more difficult problem. When testing for differences between two levels of a factor, claiming no difference would refer not only to equality of main effects, but also to equality of each interaction involving those levels. This structure between the main effects and interactions in a model is similar to the idea of heredity used in regression models. This article introduces a new method for accomplishing both of the common analysis tasks simultaneously in an interaction model while also adhering to the heredity‐type constraint on the model. An appropriate penalization is constructed that encourages levels of factors to collapse and entire factors to be set to zero. It is shown that the procedure has the oracle property implying that asymptotically it performs as well as if the exact structure were known beforehand. We also discuss the application to estimating interactions in the unreplicated case. Simulation studies show the procedure outperforms post hoc hypothesis testing procedures as well as similar methods that do not include a structural constraint. The method is also illustrated using a real data example.}, number={1}, journal={BIOMETRICS}, author={Post, Justin B. and Bondell, Howard D.}, year={2013}, month={Mar}, pages={70–79} } @article{jiang_wang_bondell_2013, title={Interquantile Shrinkage in Regression Models}, volume={22}, ISSN={["1537-2715"]}, DOI={10.1080/10618600.2012.707454}, abstractNote={Conventional analysis using quantile regression typically focuses on fitting the regression model at different quantiles separately. However, in situations where the quantile coefficients share some common feature, joint modeling of multiple quantiles to accommodate the commonality often leads to more efficient estimation. One example of common features is that a predictor may have a constant effect over one region of quantile levels but varying effects in other regions. To automatically perform estimation and detection of the interquantile commonality, we develop two penalization methods. When the quantile slope coefficients indeed do not change across quantile levels, the proposed methods will shrink the slopes toward constant and thus improve the estimation efficiency. We establish the oracle properties of the two proposed penalization methods. Through numerical investigations, we demonstrate that the proposed methods lead to estimations with competitive or higher efficiency than the standard quantile regression estimation in finite samples. Supplementary materials for the article are available online.}, number={4}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Jiang, Liewen and Wang, Huixia Judy and Bondell, Howard D.}, year={2013}, month={Dec}, pages={970–986} } @article{lin_bondell_zhang_zou_2013, title={Variable selection for non-parametric quantile regression via smoothing spline analysis of variance}, volume={2}, ISSN={2049-1573}, url={http://dx.doi.org/10.1002/STA4.33}, DOI={10.1002/STA4.33}, abstractNote={Quantile regression provides a more thorough view of the effect of covariates on a response. Non‐parametric quantile regression has become a viable alternative to avoid restrictive parametric assumption. The problem of variable selection for quantile regression is challenging, as important variables can influence various quantiles in different ways. We tackle the problem via regularization in the context of smoothing spline analysis of variance models. The proposed sparse non‐parametric quantile regression can identify important variables and provide flexible estimates for quantiles. Our numerical study suggests the promising performance of the new procedure in variable selection and function estimation. Copyright © 2013 John Wiley & Sons Ltd}, number={1}, journal={Stat}, publisher={Wiley}, author={Lin, Chen-Yen and Bondell, Howard and Zhang, Hao Helen and Zou, Hui}, year={2013}, month={Nov}, pages={255–268} } @article{gunes_bondell_2012, title={A Confidence Region Approach to Tuning for Variable Selection}, volume={21}, ISSN={["1061-8600"]}, DOI={10.1080/10618600.2012.679890}, abstractNote={We develop an approach to tuning of penalized regression variable selection methods by calculating the sparsest estimator contained in a confidence region of a specified level. Because confidence intervals/regions are generally understood, tuning penalized regression methods in this way is intuitive and more easily understood by scientists and practitioners. More importantly, our work shows that tuning to a fixed confidence level often performs better than tuning via the common methods based on Akaike information criterion (AIC), Bayesian information criterion (BIC), or cross-validation (CV) over a wide range of sample sizes and levels of sparsity. Additionally, we prove that by tuning with a sequence of confidence levels converging to one, asymptotic selection consistency is obtained, and with a simple two-stage procedure, an oracle property is achieved. The confidence-region-based tuning parameter is easily calculated using output from existing penalized regression computer packages. Our work also shows how to map any penalty parameter to a corresponding confidence coefficient. This mapping facilitates comparisons of tuning parameter selection methods such as AIC, BIC, and CV, and reveals that the resulting tuning parameters correspond to confidence levels that are extremely low, and can vary greatly across datasets. Supplemental materials for the article are available online.}, number={2}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Gunes, Funda and Bondell, Howard D.}, year={2012}, month={Jun}, pages={295–314} } @article{bondell_reich_2012, title={Consistent High-Dimensional Bayesian Variable Selection via Penalized Credible Regions}, volume={107}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2012.716344}, abstractNote={For high-dimensional data, particularly when the number of predictors greatly exceeds the sample size, selection of relevant predictors for regression is a challenging problem. Methods such as sure screening, forward selection, or penalized regressions are commonly used. Bayesian variable selection methods place prior distributions on the parameters along with a prior over model space, or equivalently, a mixture prior on the parameters having mass at zero. Since exhaustive enumeration is not feasible, posterior model probabilities are often obtained via long Markov chain Monte Carlo (MCMC) runs. The chosen model can depend heavily on various choices for priors and also posterior thresholds. Alternatively, we propose a conjugate prior only on the full model parameters and use sparse solutions within posterior credible regions to perform selection. These posterior credible regions often have closed-form representations, and it is shown that these sparse solutions can be computed via existing algorithms. The approach is shown to outperform common methods in the high-dimensional setting, particularly under correlation. By searching for a sparse solution within a joint credible region, consistent model selection is established. Furthermore, it is shown that, under certain conditions, the use of marginal credible intervals can give consistent selection up to the case where the dimension grows exponentially in the sample size. The proposed approach successfully accomplishes variable selection in the high-dimensional setting, while avoiding pitfalls that plague typical Bayesian variable selection methods.}, number={500}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Bondell, Howard D. and Reich, Brian J.}, year={2012}, month={Dec}, pages={1610–1624} } @article{dalrymple_peterson_cobb_sills_bondell_dalrymple_2012, title={Estimating public willingness to fund nongame conservation through state tax initiatives}, volume={36}, ISSN={1938-5463}, url={http://dx.doi.org/10.1002/wsb.164}, DOI={10.1002/wsb.164}, abstractNote={Nongame conservation is insufficiently funded at local, national, and global levels. Despite campaigns and reforms over the past 30 years in the United States, adequate and consistent federal funding has failed to materialize and shifted the focus to state-level initiatives. We surveyed North Carolina residents during April–May 2010, to assess public willingness to fund nongame conservation, preferred nongame conservation funding mechanisms, and key predictors of support for nongame funding. We estimated a model of willingness-to-pay (WTP) using interval-censored data modeling and compared models using the Akaike Information Criterion. The mean WTP was US$98.80/year/household when respondents were allowed to choose their own tax vehicle, thus removing any payment vehicle bias; an additional sales tax on outdoor recreation equipment was the most preferred funding mechanism. In a follow-up question, respondents indicated a mean WTP of US$32.92/employed adult (equivalent to about $65/household) annually via a flat income tax. The importance of nongame conservation to respondents, frequency of watching and/or enjoying wildlife, and education were positively related to WTP, whereas age was negatively related to WTP. Prisons were the most popular source from which to reallocate funds to nongame conservation (48%), and respondents believed an average of US$545,000 should be reallocated. Our findings suggested that while the general public indicated that they valued nongame conservation and were amenable to tax increases or reallocations for nongame conservation, they believed that taxes should be user-based and specialized (e.g., outdoor equipment taxes). These findings highlighted public WTP for nongame conservation even during an economic recession. © 2012 The Wildlife Society.}, number={3}, journal={Wildlife Society Bulletin}, publisher={Wiley}, author={Dalrymple, C. Jane and Peterson, M. Nils and Cobb, David T. and Sills, Erin O. and Bondell, Howard D. and Dalrymple, D. Joseph}, year={2012}, month={Jul}, pages={483–491} } @article{peterson_thurmond_mchale_rodriguez_bondell_cook_2012, title={Predicting native plant landscaping preferences in urban areas}, volume={5}, ISSN={2210-6707}, url={http://dx.doi.org/10.1016/j.scs.2012.05.007}, DOI={10.1016/j.scs.2012.05.007}, abstractNote={The rapidly growing physical footprint of cities makes understanding residential landscaping preferences increasingly important for water quality, biodiversity conservation, and addressing climate change. In this paper we answer four interrelated questions about residential landscaping preferences with a case study in Raleigh, NC: (1) How are residents’ landscaping preferences influenced by what residents believe their neighbors prefer? (2) Do residents accurately assess their neighbors’ landscaping preferences? (3) How does ethnicity influence landscaping preferences? and (4) Do the socio-demographic and neighborhood norm based correlates of landscaping preferences persist when both are accounted for in multivariate models? Respondents (n = 179) in this study preferred a 50% native plant garden design over 100% turf grass or the 75% and 100% native plant garden designs, and inaccurately assumed that their neighbors preferred turf over the native plant garden based landscaping designs. These results suggest that correcting erroneous assumptions about neighborhood preferences may alleviate normative pressure against adopting alternatives to turf grass landscaping. Although landscaping choices were best predicted by what residents perceived their neighbors preferred, ethnicity, income, and home ownership were also related to landscape preferences. African American ethnicity and income were positively related to preference for turf grass coverage. Environmental justice concerns linked to urban vegetation should be considered in light of the finding that African Americans appeared to prefer turf grass dominated landscaping. Results from this study indicate that middle income neighborhoods with high levels of home ownership may prove most receptive to initiatives aimed at increasing the use of more sustainable landscaping.}, journal={Sustainable Cities and Society}, publisher={Elsevier BV}, author={Peterson, M. Nils and Thurmond, Brandi and Mchale, Melissa and Rodriguez, Shari and Bondell, Howard D. and Cook, Merril}, year={2012}, month={Dec}, pages={70–76} } @article{rodriguez_peterson_cubbage_sills_bondell_2012, title={Private landowner interest in market-based incentive programs for endangered species habitat conservation}, volume={36}, ISSN={1938-5463}, url={http://dx.doi.org/10.1002/wsb.159}, DOI={10.1002/wsb.159}, abstractNote={More than 75% of endangered species in the United States rely on private lands for habitat. Although this habitat has long been regulated under the Endangered Species Act, there is now broad agreement that economic incentives are also needed for effective protection on private land. Many different mechanisms for incentive programs have been proposed and tested. For example, recovery credit systems use term-duration market-based contracts to engage landowners in endangered species conservation. We examined how market-mechanism design influences interest in endangered species habitat conservation using a survey of North Carolina Farm Bureau county advisory board members in 93 of the 100 North Carolina counties (n = 735) in 2009. Respondents preferred contracts (57% were interested) over easements (39% were interested). Endangered species conservation ranked low in importance relative to other conservation issues, but 45% of respondents were interested in contracts to conserve endangered species habitat on their property. The preferred contract duration was 10 years, and respondents preferred state- and agricultural-related organizations over federal and wildlife conservation-related organizations for managing contracts. Younger respondents, respondents who had previously participated in conservation programs, respondents who perceived endangered species conservation as important, and respondents who had lower property-rights orientation scores, were most likely to be interested in contracts to restore and maintain endangered species habitat on their lands. Our results suggest that market mechanisms could drive down costs and drive up durations for endangered species habitat conservation contracts. Further, term contracts may prove critical for endangered species conservation efforts that require high levels of landowner support and spatial flexibility within relatively short-time frames. © 2012 The Wildlife Society.}, number={3}, journal={Wildlife Society Bulletin}, publisher={Wiley}, author={Rodriguez, Shari L. and Peterson, M. Nils and Cubbage, Frederick W. and Sills, Erin O. and Bondell, Howard D.}, year={2012}, month={Jul}, pages={469–476} } @article{reich_kalendra_storlie_bondell_fuentes_2012, title={Variable selection for high dimensional Bayesian density estimation: application to human exposure simulation}, volume={61}, journal={Journal of the Royal Statistical Society. Series C, Applied Statistics}, author={Reich, B. J. and Kalendra, E. and Storlie, C. B. and Bondell, H. D. and Fuentes, M.}, year={2012}, pages={47–66} } @article{reich_bondell_2011, title={A Spatial Dirichlet Process Mixture Model for Clustering Population Genetics Data}, volume={67}, ISSN={["0006-341X"]}, DOI={10.1111/j.1541-0420.2010.01484.x}, abstractNote={Summary Identifying homogeneous groups of individuals is an important problem in population genetics. Recently, several methods have been proposed that exploit spatial information to improve clustering algorithms. In this article, we develop a Bayesian clustering algorithm based on the Dirichlet process prior that uses both genetic and spatial information to classify individuals into homogeneous clusters for further study. We study the performance of our method using a simulation study and use our model to cluster wolverines in Western Montana using microsatellite data.}, number={2}, journal={BIOMETRICS}, author={Reich, Brian J. and Bondell, Howard D.}, year={2011}, month={Jun}, pages={381–390} } @article{freire_robertson_bondell_brown_hash_pease_lascelles_2011, title={RADIOGRAPHIC EVALUATION OF FELINE APPENDICULAR DEGENERATIVE JOINT DISEASE VS. MACROSCOPIC APPEARANCE OF ARTICULAR CARTILAGE}, volume={52}, ISSN={["1058-8183"]}, url={https://dx.doi.org/10.1111/j.1740-8261.2011.01803.x}, DOI={10.1111/j.1740-8261.2011.01803.x}, abstractNote={Degenerative joint disease (DJD) is common in domesticated cats. Our purpose was to describe how radiographic findings thought to indicate feline DJD relate to macroscopic cartilage degeneration in appendicular joints. Thirty adult cats euthanized for reasons unrelated to this study were evaluated. Orthogonal digital radiographs of the elbow, tarsus, stifle, and coxofemoral joints were evaluated for the presence of DJD. The same joints were dissected for visual inspection of changes indicative of DJD and macroscopic cartilage damage was graded using a Total Cartilage Damage Score. When considering all joints, there was statistically significant fair correlation between cartilage damage and the presence of osteophytes and joint-associated mineralizations, and the subjective radiographic DJD score. Most correlations were statistically significant when looking at the different joints individually, but only the correlation between the presence of osteophytes and the subjective radiographic DJD score with the presence of cartilage damage in the elbow and coxofemoral joints had a value above 0.4 (moderate correlation). The joints most likely to have cartilage damage without radiographic evidence of DJD are the stifle (71% of radiographically normal joints) followed by the coxofemoral joint (57%), elbow (57%), and tarsal joint (46%). Our data support radiographic findings not relating well to cartilage degeneration, and that other modalities should be evaluated to aid in making a diagnosis of feline DJD.}, number={3}, journal={VETERINARY RADIOLOGY & ULTRASOUND}, author={Freire, Mila and Robertson, Ian and Bondell, Howard D. and Brown, James and Hash, Jon and Pease, Anthony P. and Lascelles, B. Duncan X.}, year={2011}, pages={239–247} } @article{reich_bondell_li_2011, title={Sufficient Dimension Reduction via Bayesian Mixture Modeling}, volume={67}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2010.01501.x}, abstractNote={Summary Dimension reduction is central to an analysis of data with many predictors. Sufficient dimension reduction aims to identify the smallest possible number of linear combinations of the predictors, called the sufficient predictors, that retain all of the information in the predictors about the response distribution. In this article, we propose a Bayesian solution for sufficient dimension reduction. We directly model the response density in terms of the sufficient predictors using a finite mixture model. This approach is computationally efficient and offers a unified framework to handle categorical predictors, missing predictors, and Bayesian variable selection. We illustrate the method using both a simulation study and an analysis of an HIV data set.}, number={3}, journal={BIOMETRICS}, author={Reich, Brian J. and Bondell, Howard D. and Li, Lexin}, year={2011}, month={Sep}, pages={886–895} } @article{storlie_bondell_reich_zhang_2011, title={Surface estimation, variable selection, and the nonparametric oracle property}, volume={21}, number={2}, journal={Statistica Sinica}, author={Storlie, C. B. and Bondell, H. D. and Reich, B. J. and Zhang, H. H.}, year={2011}, pages={679–705} } @article{storlie_bondell_reich_2010, title={A Locally Adaptive Penalty for Estimation of Functions With Varying Roughness}, volume={19}, ISSN={["1537-2715"]}, DOI={10.1198/jcgs.2010.09020}, abstractNote={We propose a new regularization method called Loco-Spline for nonparametric function estimation. Loco-Spline uses a penalty which is data driven and locally adaptive. This allows for more flexible estimation of the function in regions of the domain where it has more curvature, without over fitting in regions that have little curvature. This methodology is also transferred into higher dimensions via the Smoothing Spline ANOVA framework. General conditions for optimal MSE rate of convergence are given and the Loco-Spline is shown to achieve this rate. In our simulation study, the Loco-Spline substantially outperforms the traditional smoothing spline and the locally adaptive kernel smoother. Code to fit Loco-Spline models is included with the Supplemental Materials for this article which are available online.}, number={3}, journal={JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, author={Storlie, Curtis B. and Bondell, Howard D. and Reich, Brian J.}, year={2010}, month={Sep}, pages={569–589} } @article{koehler_bondell_tzeng_2010, title={Evaluating Haplotype Effects in Case-Control Studies via Penalized-Likelihood Approaches: Prospective or Retrospective Analysis?}, volume={34}, ISSN={["1098-2272"]}, DOI={10.1002/gepi.20545}, abstractNote={Penalized likelihood methods have become increasingly popular in recent years for evaluating haplotype‐phenotype association in case‐control studies. Although a retrospective likelihood is dictated by the sampling scheme, these penalized methods are typically built on prospective likelihoods due to their modeling simplicity and computational feasibility. It has been well documented that for unpenalized methods, prospective analyses of case‐control data can be valid but less efficient than their retrospective counterparts when testing for association, and result in substantial bias when estimating the haplotype effects. For penalized methods, which combine effect estimation and testing in one step, the impact of using a prospective likelihood is not clear. In this work, we examine the consequences of ignoring the sampling scheme for haplotype‐based penalized likelihood methods. Our results suggest that the impact of prospective analyses depends on (1) the underlying genetic mode and (2) the genetic model adopted in the analysis. When the correct genetic model is used, the difference between the two analyses is negligible for additive and slight for dominant haplotype effects. For recessive haplotype effects, the more appropriate retrospective likelihood clearly outperforms the prospective likelihood. If an additive model is incorrectly used, as the true underlying genetic mode is unknown a priori, both retrospective and prospective penalized methods suffer from a sizeable power loss and increase in bias. The impact of using the incorrect genetic model is much bigger on retrospective analyses than prospective analyses, and results in comparable performances for both methods. An application of these methods to the Genetic Analysis Workshop 15 rheumatoid arthritis data is provided. Genet. Epidemiol. 34:892–911, 2010. © 2010 Wiley‐Liss, Inc.}, number={8}, journal={GENETIC EPIDEMIOLOGY}, author={Koehler, Megan L. and Bondell, Howard D. and Tzeng, Jung-Ying}, year={2010}, month={Dec}, pages={892–911} } @article{reich_bondell_wang_2010, title={Flexible Bayesian quantile regression for independent and clustered data}, volume={11}, ISSN={["1465-4644"]}, DOI={10.1093/biostatistics/kxp049}, abstractNote={Quantile regression has emerged as a useful supplement to ordinary mean regression. Traditional frequentist quantile regression makes very minimal assumptions on the form of the error distribution and thus is able to accommodate nonnormal errors, which are common in many applications. However, inference for these models is challenging, particularly for clustered or censored data. A Bayesian approach enables exact inference and is well suited to incorporate clustered, missing, or censored data. In this paper, we propose a flexible Bayesian quantile regression model. We assume that the error distribution is an infinite mixture of Gaussian densities subject to a stochastic constraint that enables inference on the quantile of interest. This method outperforms the traditional frequentist method under a wide array of simulated data models. We extend the proposed approach to analyze clustered data. Here, we differentiate between and develop conditional and marginal models for clustered data. We apply our methods to analyze a multipatient apnea duration data set.}, number={2}, journal={BIOSTATISTICS}, author={Reich, Brian J. and Bondell, Howard D. and Wang, Huixia J.}, year={2010}, month={Apr}, pages={337–352} } @article{bondell_krishna_ghosh_2010, title={Joint Variable Selection for Fixed and Random Effects in Linear Mixed-Effects Models}, volume={66}, ISSN={["1541-0420"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-78650073483&partnerID=MN8TOARS}, DOI={10.1111/j.1541-0420.2010.01391.x}, abstractNote={Summary It is of great practical interest to simultaneously identify the important predictors that correspond to both the fixed and random effects components in a linear mixed‐effects (LME) model. Typical approaches perform selection separately on each of the fixed and random effect components. However, changing the structure of one set of effects can lead to different choices of variables for the other set of effects. We propose simultaneous selection of the fixed and random factors in an LME model using a modified Cholesky decomposition. Our method is based on a penalized joint log likelihood with an adaptive penalty for the selection and estimation of both the fixed and random effects. It performs model selection by allowing fixed effects or standard deviations of random effects to be exactly zero. A constrained expectation–maximization algorithm is then used to obtain the final estimates. It is further shown that the proposed penalized estimator enjoys the Oracle property, in that, asymptotically it performs as well as if the true model was known beforehand. We demonstrate the performance of our method based on a simulation study and a real data example.}, number={4}, journal={BIOMETRICS}, author={Bondell, Howard D. and Krishna, Arun and Ghosh, Sujit K.}, year={2010}, month={Dec}, pages={1069–1077} } @article{bondell_reich_wang_2010, title={Noncrossing quantile regression curve estimation}, volume={97}, ISSN={["0006-3444"]}, DOI={10.1093/biomet/asq048}, abstractNote={Since quantile regression curves are estimated individually, the quantile curves can cross, leading to an invalid distribution for the response. A simple constrained version of quantile regression is proposed to avoid the crossing problem for both linear and nonparametric quantile curves. A simulation study and a reanalysis of tropical cyclone intensity data shows the usefulness of the procedure. Asymptotic properties of the estimator are equivalent to the typical approach under standard conditions, and the proposed estimator reduces to the classical one if there is no crossing. The performance of the constrained estimator has shown significant improvement by adding smoothing and stability across the quantile levels.}, number={4}, journal={BIOMETRIKA}, author={Bondell, Howard D. and Reich, Brian J. and Wang, Huixia}, year={2010}, month={Dec}, pages={825–838} } @article{tzeng_bondell_2009, title={A comprehensive approach to haplotype-specific analysis by penalized likelihood}, volume={18}, ISSN={1018-4813 1476-5438}, url={http://dx.doi.org/10.1038/ejhg.2009.118}, DOI={10.1038/ejhg.2009.118}, abstractNote={Haplotypes can hold key information to understand the role of candidate genes in disease etiology. However, standard haplotype analysis has yet been able to fully reveal the information retained by haplotypes. In most analysis, haplotype inference focuses on relative effects compared with an arbitrarily chosen baseline haplotype. It does not depict the effect structure unless an additional inference procedure is used in a secondary post hoc analysis, and such analysis tends to be lack of power. In this study, we propose a penalized regression approach to systematically evaluate the pattern and structure of the haplotype effects. By specifying an L1 penalty on the pairwise difference of the haplotype effects, we present a model-based haplotype analysis to detect and to characterize the haplotypic association signals. The proposed method avoids the need to choose a baseline haplotype; it simultaneously carries out the effect estimation and effect comparison of all haplotypes, and outputs the haplotype group structure based on their effect size. Finally, our penalty weights are theoretically designed to balance the likelihood and the penalty term in an appropriate manner. The proposed method can be used as a tool to comprehend candidate regions identified from a genome or chromosomal scan. Simulation studies reveal the better abilities of the proposed method to identify the haplotype effect structure compared with the traditional haplotype association methods, demonstrating the informativeness and powerfulness of the proposed method.}, number={1}, journal={European Journal of Human Genetics}, publisher={Springer Science and Business Media LLC}, author={Tzeng, Jung-Ying and Bondell, Howard D}, year={2009}, month={Jul}, pages={95–103} } @article{krishna_bondell_ghosh_2009, title={Bayesian variable selection using an adaptive powered correlation prior}, volume={139}, ISSN={["1873-1171"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-67349268430&partnerID=MN8TOARS}, DOI={10.1016/j.jspi.2008.12.004}, abstractNote={The problem of selecting the correct subset of predictors within a linear model has received much attention in recent literature. Within the Bayesian framework, a popular choice of prior has been Zellner's g-prior which is based on the inverse of empirical covariance matrix of the predictors. An extension of the Zellner's prior is proposed in this article which allow for a power parameter on the empirical covariance of the predictors. The power parameter helps control the degree to which correlated predictors are smoothed towards or away from one another. In addition, the empirical covariance of the predictors is used to obtain suitable priors over model space. In this manner, the power parameter also helps to determine whether models containing highly collinear predictors are preferred or avoided. The proposed power parameter can be chosen via an empirical Bayes method which leads to a data adaptive choice of prior. Simulation studies and a real data example are presented to show how the power parameter is well determined from the degree of cross-correlation within predictors. The proposed modification compares favorably to the standard use of Zellner's prior and an intrinsic prior in these examples.}, number={8}, journal={JOURNAL OF STATISTICAL PLANNING AND INFERENCE}, author={Krishna, Arun and Bondell, Howard D. and Ghosh, Sujit K.}, year={2009}, month={Aug}, pages={2665–2674} } @article{bondell_li_2009, title={Shrinkage inverse regression estimation for model-free variable selection}, volume={71}, ISSN={["1467-9868"]}, DOI={10.1111/j.1467-9868.2008.00686.x}, abstractNote={Summary}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B-STATISTICAL METHODOLOGY}, author={Bondell, Howard D. and Li, Lexin}, year={2009}, pages={287–299} } @article{bondell_reich_2009, title={Simultaneous Factor Selection and Collapsing Levels in ANOVA}, volume={65}, ISSN={["0006-341X"]}, DOI={10.1111/j.1541-0420.2008.01061.x}, abstractNote={Summary When performing an analysis of variance, the investigator often has two main goals: to determine which of the factors have a significant effect on the response, and to detect differences among the levels of the significant factors. Level comparisons are done via a post‐hoc analysis based on pairwise differences. This article proposes a novel constrained regression approach to simultaneously accomplish both goals via shrinkage within a single automated procedure. The form of this shrinkage has the ability to collapse levels within a factor by setting their effects to be equal, while also achieving factor selection by zeroing out entire factors. Using this approach also leads to the identification of a structure within each factor, as levels can be automatically collapsed to form groups. In contrast to the traditional pairwise comparison methods, these groups are necessarily nonoverlapping so that the results are interpretable in terms of distinct subsets of levels. The proposed procedure is shown to have the oracle property in that asymptotically it performs as well as if the exact structure were known beforehand. A simulation and real data examples show the strong performance of the method.}, number={1}, journal={BIOMETRICS}, author={Bondell, Howard D. and Reich, Brian J.}, year={2009}, month={Mar}, pages={169–177} } @article{reich_storlie_bondell_2009, title={Variable Selection in Bayesian Smoothing Spline ANOVA Models: Application to Deterministic Computer Codes}, volume={51}, ISSN={["1537-2723"]}, DOI={10.1198/TECH.2009.0013}, abstractNote={With many predictors, choosing an appropriate subset of the covariates is a crucial—and difficult—step in nonparametric regression. We propose a Bayesian nonparametric regression model for curve fitting and variable selection. We use the smoothing splines ANOVA framework to decompose the regression function into interpretable main effect and interaction functions, and use stochastic search variable selection through Markov chain Monte Carlo sampling to search for models that fit the data well. We also show that variable selection is highly sensitive to hyperparameter choice, and develop a technique for selecting hyperparameters that control the long-run false-positive rate. We use our method to build an emulator for a complex computer model for two-phase fluid flow.}, number={2}, journal={TECHNOMETRICS}, author={Reich, Brian J. and Storlie, Curtis B. and Bondell, Howard D.}, year={2009}, month={May}, pages={110–120} } @article{bondell_2008, title={A characteristic function approach to the biased sampling model, with application to robust logistic regression}, volume={138}, ISSN={["0378-3758"]}, DOI={10.1016/j.jspi.2007.01.004}, abstractNote={Many sampling problems from multiple populations can be considered under the semiparametric framework of the biased, or weighted, sampling model. Included under this framework is logistic regression under case–control sampling. For any model, atypical observations can greatly influence the maximum likelihood estimate of the parameters. Several robust alternatives have been proposed for the special case of logistic regression. However, some current techniques can exhibit poor behavior in many common situations. In this paper a new family of procedures are constructed to estimate the parameters in the semiparametric biased sampling model. The procedures incorporate a minimum distance approach, but are instead based on characteristic functions. The estimators can also be represented as the minimizers of quadratic forms in simple residuals, thus yielding straightforward computation. For the case of logistic regression, the resulting estimators are shown to be competitive with the existing robust approaches in terms of both robustness and efficiency, while maintaining affine equivariance. The approach is developed under the case–control sampling scheme, yet is shown to be applicable under prospective sampling logistic regression as well.}, number={3}, journal={JOURNAL OF STATISTICAL PLANNING AND INFERENCE}, author={Bondell, Howard D.}, year={2008}, month={Mar}, pages={742–755} } @article{bondell_2008, title={On robust and efficient estimation of the center of symmetry}, volume={37}, ISSN={["0361-0926"]}, DOI={10.1080/03610920701653144}, abstractNote={In this article, a class of estimators of the center of symmetry based on the empirical characteristic function is examined. In the spirit of the Hodges–Lehmann estimator, the resulting procedures are shown to be a function of the pairwise averages. The proposed procedures are also shown to have an equivalent representation as the minimizers of certain distances between two corresponding kernel density estimators. An alternative characterization of the Hodges–Lehmann estimator is established upon the use of a particularly simple choice of kernel.}, number={3}, journal={COMMUNICATIONS IN STATISTICS-THEORY AND METHODS}, author={Bondell, Howard D.}, year={2008}, pages={318–327} } @article{bondell_reich_2008, title={Simultaneous regression shrinkage, variable selection, and supervised clustering of predictors with OSCAR}, volume={64}, ISSN={["0006-341X"]}, DOI={10.1111/j.1541-0420.2007.00843.x}, abstractNote={Summary Variable selection can be challenging, particularly in situations with a large number of predictors with possibly high correlations, such as gene expression data. In this article, a new method called the OSCAR (octagonal shrinkage and clustering algorithm for regression) is proposed to simultaneously select variables while grouping them into predictive clusters. In addition to improving prediction accuracy and interpretation, these resulting groups can then be investigated further to discover what contributes to the group having a similar behavior. The technique is based on penalized least squares with a geometrically intuitive penalty function that shrinks some coefficients to exactly zero. Additionally, this penalty yields exact equality of some coefficients, encouraging correlated predictors that have a similar effect on the response to form predictive clusters represented by a single coefficient. The proposed procedure is shown to compare favorably to the existing shrinkage and variable selection techniques in terms of both prediction error and model complexity, while yielding the additional grouping information.}, number={1}, journal={BIOMETRICS}, author={Bondell, Howard D. and Reich, Brian J.}, year={2008}, month={Mar}, pages={115–123} } @article{bondell_liu_schisterman_2007, title={Statistical inference based on pooled data: A moment-based estimating equation approach}, volume={34}, ISSN={["1360-0532"]}, DOI={10.1080/02664760600994844}, abstractNote={Abstract We consider statistical inference on parameters of a distribution when only pooled data are observed. A moment-based estimating equation approach is proposed to deal with situations where likelihood functions based on pooled data are difficult to work with. We outline the method to obtain estimates and test statistics of the parameters of interest in the general setting. We demonstrate the approach on the family of distributions generated by the Box–Cox transformation model, and, in the process, construct tests for goodness of fit based on the pooled data.}, number={2}, journal={JOURNAL OF APPLIED STATISTICS}, author={Bondell, Howard D. and Liu, Aiyi and Schisterman, Enrique F.}, year={2007}, pages={129–140} } @article{bondell_2007, title={Testing goodness-of-fit in logistic case-control studies}, volume={94}, ISSN={["0006-3444"]}, DOI={10.1093/biomet/asm033}, abstractNote={We present a goodness-of-fit test for the logistic regression model under case-control sampling. The test statistic is constructed via a discrepancy between two competing kernel density estimators of the underlying conditional distributions given case-control status. The proposed goodness-of-fit test is shown to compare very favourably with previously proposed tests for case-control sampling in terms of power. The test statistic can be easily computed as a quadratic form in the residuals from a prospective logistic regression maximum likelihood fit. In addition, the proposed test is affine invariant and has an alternative representation in terms of empirical characteristic functions. Copyright 2007, Oxford University Press.}, number={2}, journal={BIOMETRIKA}, author={Bondell, Howard D.}, year={2007}, month={Jun}, pages={487–495} }