@article{hu_laber_barker_stefanski_2019, title={Assessing Tuning Parameter Selection Variability in Penalized Regression}, volume={61}, ISSN={["1537-2723"]}, DOI={10.1080/00401706.2018.1513380}, abstractNote={ABSTRACT Penalized regression methods that perform simultaneous model selection and estimation are ubiquitous in statistical modeling. The use of such methods is often unavoidable as manual inspection of all possible models quickly becomes intractable when there are more than a handful of predictors. However, automated methods usually fail to incorporate domain-knowledge, exploratory analyses, or other factors that might guide a more interactive model-building approach. A hybrid approach is to use penalized regression to identify a set of candidate models and then to use interactive model-building to examine this candidate set more closely. To identify a set of candidate models, we derive point and interval estimators of the probability that each model along a solution path will minimize a given model selection criterion, for example, Akaike information criterion, Bayesian information criterion (AIC, BIC), etc., conditional on the observed solution path. Then models with a high probability of selection are considered for further examination. Thus, the proposed methodology attempts to strike a balance between algorithmic modeling approaches that are computationally efficient but fail to incorporate expert knowledge, and interactive modeling approaches that are labor intensive but informed by experience, intuition, and domain knowledge. Supplementary materials for this article are available online.}, number={2}, journal={TECHNOMETRICS}, author={Hu, Wenhao and Laber, Eric B. and Barker, Clay and Stefanski, Leonard A.}, year={2019}, month={Apr}, pages={154–164} } @article{feng_wu_stefanski_2018, title={Nonparametric independence screening via favored smoothing bandwidth}, volume={197}, ISSN={["1873-1171"]}, DOI={10.1016/j.jspi.2017.11.006}, abstractNote={We propose a flexible nonparametric regression method for ultrahigh-dimensional data. As a first step, we propose a fast screening method based on the favored smoothing bandwidth of the marginal local constant regression. Then, an iterative procedure is developed to recover both the important covariates and the regression function. Theoretically, we prove that the favored smoothing bandwidth based screening possesses the model selection consistency property. Simulation studies as well as real data analysis show the competitive performance of the new procedure.}, journal={JOURNAL OF STATISTICAL PLANNING AND INFERENCE}, author={Feng, Yang and Wu, Yichao and Stefanski, Leonard A.}, year={2018}, month={Dec}, pages={1–14} } @article{linn_laber_stefanski_2017, title={Interactive Q-Learning for Quantiles}, volume={112}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2016.1155993}, abstractNote={ABSTRACT A dynamic treatment regime is a sequence of decision rules, each of which recommends treatment based on features of patient medical history such as past treatments and outcomes. Existing methods for estimating optimal dynamic treatment regimes from data optimize the mean of a response variable. However, the mean may not always be the most appropriate summary of performance. We derive estimators of decision rules for optimizing probabilities and quantiles computed with respect to the response distribution for two-stage, binary treatment settings. This enables estimation of dynamic treatment regimes that optimize the cumulative distribution function of the response at a prespecified point or a prespecified quantile of the response distribution such as the median. The proposed methods perform favorably in simulation experiments. We illustrate our approach with data from a sequentially randomized trial where the primary outcome is remission of depression symptoms. Supplementary materials for this article are available online.}, number={518}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Linn, Kristin A. and Laber, Eric B. and Stefanski, Leonard A.}, year={2017}, month={Jun}, pages={638–649} } @article{white_stefanski_wu_2017, title={Variable Selection in Kernel Regression Using Measurement Error Selection Likelihoods}, volume={112}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2016.1222287}, abstractNote={ABSTRACT This article develops a nonparametric shrinkage and selection estimator via the measurement error selection likelihood approach recently proposed by Stefanski, Wu, and White. The measurement error kernel regression operator (MEKRO) has the same form as the Nadaraya–Watson kernel estimator, but optimizes a measurement error model selection likelihood to estimate the kernel bandwidths. Much like LASSO or COSSO solution paths, MEKRO results in solution paths depending on a tuning parameter that controls shrinkage and selection via a bound on the harmonic mean of the pseudo-measurement error standard deviations. We use small-sample-corrected AIC to select the tuning parameter. Large-sample properties of MEKRO are studied and small-sample properties are explored via Monte Carlo experiments and applications to data. Supplementary materials for this article are available online.}, number={520}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={White, Kyle R. and Stefanski, Leonard A. and Wu, Yichao}, year={2017}, pages={1587–1597} } @article{wu_stefanski_2015, title={Automatic structure recovery for additive models}, volume={102}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/asu070}, abstractNote={We propose an automatic structure recovery method for additive models, based on a backfitting algorithm coupled with local polynomial smoothing, in conjunction with a new kernel-based variable selection strategy. Our method produces estimates of the set of noise predictors, the sets of predictors that contribute polynomially at different degrees up to a specified degree M, and the set of predictors that contribute beyond polynomially of degree M. We prove consistency of the proposed method, and describe an extension to partially linear models. Finite-sample performance of the method is illustrated via Monte Carlo studies and a real-data example.}, number={2}, journal={BIOMETRIKA}, author={Wu, Yichao and Stefanski, Leonard A.}, year={2015}, month={Jun}, pages={381–395} } @article{linn_laber_stefanski_2015, title={iqLearn: Interactive Q-Learning in R}, volume={64}, number={1}, journal={Journal of Statistical Software}, author={Linn, K. A. and Laber, E. B. and Stefanski, L. A.}, year={2015} } @book{davidian_lin_morris_stefanski_2014, title={The Work of Raymond J. Carroll}, ISBN={9783319058009 9783319058016}, url={http://dx.doi.org/10.1007/978-3-319-05801-6}, DOI={10.1007/978-3-319-05801-6}, abstractNote={Measurement Error.- Transformation and Weighting.- Epidemiology.- Nonparametric and Semiparametric Regression for Independent Data.- Nonparametric and Semiparametric Regression for Dependent Data.- Robustness.- Other Work Article list for each of these areas is in attachment.}, publisher={Springer International Publishing}, year={2014} } @article{stefanski_wu_white_2014, title={Variable Selection in Nonparametric Classification Via Measurement Error Model Selection Likelihoods}, volume={109}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2013.858630}, abstractNote={Using the relationships among ridge regression, LASSO estimation, and measurement error attenuation as motivation, a new measurement-error-model-based approach to variable selection is developed. After describing the approach in the familiar context of linear regression, we apply it to the problem of variable selection in nonparametric classification, resulting in a new kernel-based classifier with LASSO-like shrinkage and variable-selection properties. Finite-sample performance of the new classification method is studied via simulation and real data examples, and consistency of the method is studied theoretically. Supplementary materials for the article are available online.}, number={506}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Stefanski, L. A. and Wu, Yichao and White, Kyle}, year={2014}, month={Jun}, pages={574–589} } @inbook{thomas_stefanski_davidian_2013, title={Bias Reduction in Logistic Regression with Estimated Variance Predictors}, ISBN={9781461468707 9781461468714}, ISSN={0930-0325}, url={http://dx.doi.org/10.1007/978-1-4614-6871-4_2}, DOI={10.1007/978-1-4614-6871-4_2}, abstractNote={We study the problem of modeling a response as a function of baseline covariates and a primary predictor of interest that is a noisy measurement of a subject-specific variance. The problem arises naturally in biostatistical joint models wherein the subjects’ primary endpoints are related to the features of subject-specific longitudinal risk processes or profiles. Often the longitudinal process features of interest are parameters of a longitudinal mean function. However, there is a relatively recent and growing interest in relating primary endpoints to longitudinal process variances. In the application motivating our work longitudinal processes consist of 30-day blood pressure trajectories measured between 91 and 120 days post dialysis therapy, with the primary endpoints being short-term mortality. Often the longitudinal risk processes are adequately characterized in terms of trends such as the slopes and intercepts identified with the subject-specific biomarker processes. Modeling of the trend lines results in subject-specific estimated intercepts and slopes, thus inducing a heteroscedastic measurement-error model structure where the estimated trend parameters play the role of measurements of the “true” subject-specific trend parameters that appear as predictors in the primary endpoint model. Our interest lies in models in which the residual variances of the longitudinal processes feed into the model for the primary endpoint. These subject-specific variance parameters are estimated in the course of trend-line fitting creating a measurement error model scenario where variances are predictors and mean squared errors are their noisy measurements. Background literature is reviewed and several methodological approaches for addressing the resulting errors-in-variances problem are studied.}, booktitle={ISS-2012 Proceedings Volume On Longitudinal Data Analysis Subject to Measurement Errors, Missing Values, and/or Outliers}, publisher={Springer New York}, author={Thomas, Laine and Stefanski, Leonard A. and Davidian, Marie}, year={2013}, pages={33–51} } @article{bondell_stefanski_2013, title={Efficient Robust Regression via Two-Stage Generalized Empirical Likelihood}, volume={108}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2013.779847}, abstractNote={Large- and finite-sample efficiency and resistance to outliers are the key goals of robust statistics. Although often not simultaneously attainable, we develop and study a linear regression estimator that comes close. Efficiency is obtained from the estimator's close connection to generalized empirical likelihood, and its favorable robustness properties are obtained by constraining the associated sum of (weighted) squared residuals. We prove maximum attainable finite-sample replacement breakdown point and full asymptotic efficiency for normal errors. Simulation evidence shows that compared to existing robust regression estimators, the new estimator has relatively high efficiency for small sample sizes and comparable outlier resistance. The estimator is further illustrated and compared to existing methods via application to a real dataset with purported outliers.}, number={502}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Bondell, Howard D. and Stefanski, Leonard A.}, year={2013}, month={Jun}, pages={644–655} } @book{boos_stefanski_2013, title={Essential statistical inference: Theory and methods}, publisher={New York: Springer}, author={Boos, D. D. and Stefanski, L. A.}, year={2013} } @article{thomas_stefanski_davidian_2013, title={Moment adjusted imputation for multivariate measurement error data with applications to logistic regression}, volume={67}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2013.04.017}, abstractNote={In clinical studies, covariates are often measured with error due to biological fluctuations, device error and other sources. Summary statistics and regression models that are based on mismeasured data will differ from the corresponding analysis based on the "true" covariate. Statistical analysis can be adjusted for measurement error, however various methods exhibit a tradeo between convenience and performance. Moment Adjusted Imputation (MAI) is method for measurement error in a scalar latent variable that is easy to implement and performs well in a variety of settings. In practice, multiple covariates may be similarly influenced by biological fluctuastions, inducing correlated multivariate measurement error. The extension of MAI to the setting of multivariate latent variables involves unique challenges. Alternative strategies are described, including a computationally feasible option that is shown to perform well.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Thomas, Laine and Stefanski, Leonard A. and Davidian, Marie}, year={2013}, month={Nov}, pages={15–24} } @article{wang_stefanski_zhu_2012, title={Corrected-loss estimation for quantile regression with covariate measurement errors}, volume={99}, ISSN={["0006-3444"]}, DOI={10.1093/biomet/ass005}, abstractNote={We study estimation in quantile regression when covariates are measured with errors. Existing methods require stringent assumptions, such as spherically symmetric joint distribution of the regression and measurement error variables, or linearity of all quantile functions, which restrict model flexibility and complicate computation. In this paper, we develop a new estimation approach based on corrected scores to account for a class of covariate measurement errors in quantile regression. The proposed method is simple to implement. Its validity requires only linearity of the particular quantile function of interest, and it requires no parametric assumptions on the regression error distributions. Finite-sample results demonstrate that the proposed estimators are more efficient than the existing methods in various models considered.}, number={2}, journal={BIOMETRIKA}, author={Wang, Huixia Judy and Stefanski, Leonard A. and Zhu, Zhongyi}, year={2012}, month={Jun}, pages={405–421} } @article{thomas_stefanski_davidian_2011, title={A Moment-Adjusted Imputation Method for Measurement Error Models}, volume={67}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2011.01569.x}, abstractNote={Summary Studies of clinical characteristics frequently measure covariates with a single observation. This may be a mismeasured version of the “true” phenomenon due to sources of variability like biological fluctuations and device error. Descriptive analyses and outcome models that are based on mismeasured data generally will not reflect the corresponding analyses based on the “true” covariate. Many statistical methods are available to adjust for measurement error. Imputation methods like regression calibration and moment reconstruction are easily implemented but are not always adequate. Sophisticated methods have been proposed for specific applications like density estimation, logistic regression, and survival analysis. However, it is frequently infeasible for an analyst to adjust each analysis separately, especially in preliminary studies where resources are limited. We propose an imputation approach called moment‐adjusted imputation that is flexible and relatively automatic. Like other imputation methods, it can be used to adjust a variety of analyses quickly, and it performs well under a broad range of circumstances. We illustrate the method via simulation and apply it to a study of systolic blood pressure and health outcomes in patients hospitalized with acute heart failure.}, number={4}, journal={BIOMETRICS}, author={Thomas, Laine and Stefanski, Leonard and Davidian, Marie}, year={2011}, month={Dec}, pages={1461–1470} } @article{mcintyre_stefanski_2011, title={Density Estimation with Replicate Heteroscedastic Measurements}, volume={63}, ISSN={["1572-9052"]}, DOI={10.1007/s10463-009-0220-x}, abstractNote={We present a deconvolution estimator for the density function of a random variable from a set of independent replicate measurements. We assume that measurements are made with normally distributed errors having unknown and possibly heterogeneous variances. The estimator generalizes well-known deconvoluting kernel density estimators, with error variances estimated from the replicate observations. We derive expressions for the integrated mean squared error and examine its rate of convergence as n → ∞ and the number of replicates is fixed. We investigate the finite-sample performance of the estimator through a simulation study and an application to real data.}, number={1}, journal={ANNALS OF THE INSTITUTE OF STATISTICAL MATHEMATICS}, author={McIntyre, Julie and Stefanski, Leonard A.}, year={2011}, month={Feb}, pages={81–99} } @article{crews_boos_stefanski_2011, title={FSR methods for second-order regression models}, volume={55}, ISSN={["0167-9473"]}, DOI={10.1016/j.csda.2011.01.009}, abstractNote={Most variable selection techniques focus on first-order linear regression models. Often, interaction and quadratic terms are also of interest, but the number of candidate predictors grows very fast with the number of original predictors, making variable selection more difficult. Forward selection algorithms are thus developed that enforce natural hierarchies in second-order models to control the entry rate of uninformative effects and to equalize the false selection rates from first-order and second-order terms. Method performance is compared through Monte Carlo simulation and illustrated with data from a Cox regression and from a response surface experiment.}, number={6}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Crews, Hugh B. and Boos, Dennis D. and Stefanski, Leonard A.}, year={2011}, month={Jun}, pages={2026–2037} } @article{boos_stefanski_2011, title={P-Value Precision and Reproducibility}, volume={65}, ISSN={["0003-1305"]}, DOI={10.1198/tas.2011.10129}, abstractNote={P-values are useful statistical measures of evidence against a null hypothesis. In contrast to other statistical estimates, however, their sample-to-sample variability is usually not considered or estimated, and therefore not fully appreciated. Via a systematic study of log-scale p-value standard errors, bootstrap prediction bounds, and reproducibility probabilities for future replicate p-values, we show that p-values exhibit surprisingly large variability in typical data situations. In addition to providing context to discussions about the failure of statistical results to replicate, our findings shed light on the relative value of exact p-values vis-a-vis approximate p-values, and indicate that the use of *, **, and *** to denote levels 0.05, 0.01, and 0.001 of statistical significance in subject-matter journals is about the right level of precision for reporting p-values when judged by widely accepted rules for rounding statistical estimates.}, number={4}, journal={AMERICAN STATISTICIAN}, author={Boos, Dennis D. and Stefanski, Leonard A.}, year={2011}, month={Nov}, pages={213–221} } @article{mcintyre_stefanski_2011, title={Regression-assisted deconvolution}, volume={30}, ISSN={["0277-6715"]}, DOI={10.1002/sim.4186}, abstractNote={Abstract}, number={14}, journal={STATISTICS IN MEDICINE}, author={McIntyre, Julie and Stefanski, Leonard A.}, year={2011}, month={Jun}, pages={1722–1734} } @article{boos_stefanski_wu_2009, title={Fast FSR Variable Selection with Applications to Clinical Trials}, volume={65}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2008.01127.x}, abstractNote={Summary A new version of the false selection rate variable selection method of Wu, Boos, and Stefanski (2007, Journal of the American Statistical Association 102, 235–243) is developed that requires no simulation. This version allows the tuning parameter in forward selection to be estimated simply by hand calculation from a summary table of output even for situations where the number of explanatory variables is larger than the sample size. Because of the computational simplicity, the method can be used in permutation tests and inside bagging loops for improved prediction. Illustration is provided in clinical trials for linear regression, logistic regression, and Cox proportional hazards regression.}, number={3}, journal={BIOMETRICS}, author={Boos, Dennis D. and Stefanski, Leonard A. and Wu, Yujun}, year={2009}, month={Sep}, pages={692–700} } @article{huang_stefanski_davidian_2009, title={Latent-Model Robustness in Joint Models for a Primary Endpoint and a Longitudinal Process}, volume={65}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2008.01171.x}, abstractNote={Summary Joint modeling of a primary response and a longitudinal process via shared random effects is widely used in many areas of application. Likelihood‐based inference on joint models requires model specification of the random effects. Inappropriate model specification of random effects can compromise inference. We present methods to diagnose random effect model misspecification of the type that leads to biased inference on joint models. The methods are illustrated via application to simulated data, and by application to data from a study of bone mineral density in perimenopausal women and data from an HIV clinical trial.}, number={3}, journal={BIOMETRICS}, author={Huang, Xianzheng and Stefanski, Leonard A. and Davidian, Marie}, year={2009}, month={Sep}, pages={719–727} } @article{ogorek_stefanski_2009, title={Orthology-based multilevel modeling of differentially expressed mouse and human gene pairs}, volume={8}, number={1}, journal={Statistical Applications in Genetics and Molecular Biology}, author={Ogorek, B. A. and Stefanski, L. A.}, year={2009} } @article{wang_stefanski_genton_boos_2009, title={Robust time series analysis via measurement error modeling}, volume={19}, number={3}, journal={Statistica Sinica}, author={Wang, Q. and Stefanski, L. A. and Genton, M. G. and Boos, D. D.}, year={2009}, pages={1263–1280} } @article{aneja_arya_rumsey_kim_bajwa_arkinson_semunegus_dickey_stefanski_todd_et al._2008, title={Characterizing ammonia emissions from swine farms in eastern North Carolina: Part 2 - Potential environmentally superior technologies for waste treatment}, volume={58}, ISSN={["2162-2906"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-53849100726&partnerID=MN8TOARS}, DOI={10.3155/1047-3289.58.9.1145}, abstractNote={Abstract The need for developing environmentally superior and sustainable solutions for managing the animal waste at commercial swine farms in eastern North Carolina has been recognized in recent years. Program OPEN (Odor, Pathogens, and Emissions of Nitrogen), funded by the North Carolina State University Animal and Poultry Waste Management Center (APWMC), was initiated and charged with the evaluation of potential environmentally superior technologies (ESTs) that have been developed and implemented at selected swine farms or facilities. The OPEN program has demonstrated the effectiveness of a new paradigm for policy-relevant environmental research related to North Carolina’s animal waste management programs. This new paradigm is based on a commitment to improve scientific understanding associated with a wide array of environmental issues (i.e., issues related to the movement of N from animal waste into air, water, and soil media; the transmission of odor and odorants; disease-transmitting vectors; and airborne pathogens). The primary focus of this paper is on emissions of ammonia (NH3) from some potential ESTs that were being evaluated at full-scale swine facilities. During 2-week-long periods in two different seasons (warm and cold), NH3 fluxes from water-holding structures and NH3 emissions from animal houses or barns were measured at six potential EST sites: (1) Barham farm—in-ground ambient temperature anaerobic digester/energy recovery/greenhouse vegetable production system; (2) BOC #93 farm—upflow biofiltration system—EKOKAN ; (3) Carrolls farm—aerobic blanket system—ISSUES-ABS; (4) Corbett #1 farm—solids separation/gasification for energy and ash recovery centralized system—BEST; (5) Corbett #2 farm—solid separation/reciprocating water technology—ReCip; and (6) Vestal farm—Recycling of Nutrient, Energy and Water System—ISSUES—RENEW. The ESTs were compared with similar measurements made at two conventional lagoon and spray technology (LST) farms (Moore farm and Stokes farm). A flow-through dynamic chamber system and two sets of open-path Fourier transform infrared (OP-FTIR) spectrometers measured NH3 fluxes continuously from water-holding structures and emissions from housing units at the EST and conventional LST sites. A statisticalobservational model for lagoon NH3 flux was developed using a multiple linear regression analysis of 15-min averaged NH3 flux data against the relevant environmental parameters measured at the two conventional farms during two different seasons of the year. This was used to compare the water-holding structures at ESTs with those from lagoons at conventional sites under similar environmental conditions. Percentage reductions in NH3 emissions from different components of each potential EST, as well as the whole farm on which the EST was located were evaluated from the estimated emissions from water-holding structures, barns, etc., all normalized by the appropriate nitrogen excretion rate at the potential EST farm, as well as from the appropriate conventional farm. This study showed that ammonia emissions were reduced by all but one potential EST for both experimental periods. However, on the basis of our evaluation results and analysis and available information in the scientific literature, the evaluated alternative technologies may require additional technical modifications to be qualified as unconditional ESTs relative to NH3 emissions reductions.}, number={9}, journal={JOURNAL OF THE AIR & WASTE MANAGEMENT ASSOCIATION}, author={Aneja, Viney P. and Arya, S. Pal and Rumsey, Ian C. and Kim, D. -S. and Bajwa, K. and Arkinson, H. L. and Semunegus, H. and Dickey, D. A. and Stefanski, L. A. and Todd, L. and et al.}, year={2008}, month={Sep}, pages={1145–1157} } @article{aneja_arya_kim_rumsey_arkinson_semunegus_bajwa_dickey_stefanski_todd_et al._2008, title={Characterizing ammonia emissions from swine farms in eastern north carolina: Part 1-conventional lagoon and spray technology for waste treatment}, volume={58}, ISSN={["1047-3289"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-53849107841&partnerID=MN8TOARS}, DOI={10.3155/1047-3289.58.9.1130}, abstractNote={Abstract Ammonia (NH3) fluxes from waste treatment lagoons and barns at two conventional swine farms in eastern North Carolina were measured. The waste treatment lagoon data were analyzed to elucidate the temporal (seasonal and diurnal) variability and to derive regression relationships between NH3 flux and lagoon temperature, pH and ammonium content of the lagoon, and the most relevant meteorological parameters. NH3 fluxes were measured at various sampling locations on the lagoons by a flow-through dynamic chamber system interfaced to an environmentally controlled mobile laboratory. Two sets of open-path Fourier transform infrared (FTIR) spectrometers were also used to measure NH3 concentrations for estimating NH3 emissions from the animal housing units (barns) at the lagoon and spray technology (LST) sites.Two different types of ventilation systems were used at the two farms. Moore farm used fan ventilation, and Stokes farm used natural ventilation. The early fall and winter season intensive measurement campaigns were conducted during September 9 to October 11, 2002 (lagoon temperature ranged from 21.2 to 33.6 °C) and January 6 to February 2, 2003 (lagoon temperature ranged from 1.7 to 12 °C), respectively. Significant differences in seasonal NH3 fluxes from the waste treatment lagoons were found at both farms. Typical diurnal variation of NH3 flux with its maximum value in the afternoon was observed during both experimental periods. Exponentially increasing flux with increasing surface lagoon temperature was observed, and a linear regression relationship between logarithm of NH3 flux and lagoon surface temperature (T l) was obtained. Correlations between lagoon NH3 flux and chemical parameters, such as pH, total Kjeldahl nitrogen (TKN), and total ammoniacal nitrogen (TAN) were found to be statistically insignificant or weak. In addition to lagoon surface temperature, the difference (D) between air temperature and the lagoon surface temperature was also found to influence the NH3 flux, especially when D > 0 (i.e., air hotter than lagoon). This hot-air effect is included in the statistical-observational model obtained in this study, which was used further in the companion study (Part II), to compare the emissions from potential environmental superior technologies to evaluate the effectiveness of each technology.}, number={9}, journal={JOURNAL OF THE AIR & WASTE MANAGEMENT ASSOCIATION}, author={Aneja, Viney P. and Arya, S. Pal and Kim, D. -S. and Rumsey, Ian C. and Arkinson, H. L. and Semunegus, H. and Bajwa, K. S. and Dickey, D. A. and Stefanski, L. A. and Todd, L. and et al.}, year={2008}, month={Sep}, pages={1130–1144} } @article{stefanski_2008, title={The North Carolina lottery coincidence}, volume={62}, ISSN={["1537-2731"]}, DOI={10.1198/000313008X306844}, abstractNote={The sets of five numbers picked in the North Carolina Cash-5 Lottery game were identical on July 9th and 11th, 2007. This coincidence was the topic of a local television station news story on July 12 in which I played a minor role. This article documents the coincidence, my interactions with the television reporter seeking to understand how likely, or unlikely, the coincidence was, and some afterthoughts, including an analysis of the likelihood of matching sets of numbers.}, number={2}, journal={AMERICAN STATISTICIAN}, author={Stefanski, Leonard A.}, year={2008}, month={May}, pages={130–134} } @article{wu_boos_stefanski_2007, title={Controlling variable selection by the addition of pseudovariables}, volume={102}, ISSN={["1537-274X"]}, DOI={10.1198/016214506000000843}, abstractNote={We propose a new approach to variable selection designed to control the false selection rate (FSR), defined as the proportion of uninformative variables included in selected models. The method works by adding a known number of pseudovariables to the real dataset, running a variable selection procedure, and monitoring the proportion of pseudovariables falsely selected. Information obtained from bootstrap-like replications of this process is used to estimate the proportion of falsely selected real variables and to tune the selection procedure to control the FSR.}, number={477}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Wu, Yujun and Boos, Dennis D. and Stefanski, Leonard A.}, year={2007}, month={Mar}, pages={235–243} } @article{childs_daubert_stefanski_foegeding_2007, title={Factors regulating cheese shreddability}, volume={90}, ISSN={["0022-0302"]}, DOI={10.3168/jds.2006-618}, abstractNote={Two sets of cheeses were evaluated to determine factors that affect shred quality. The first set of cheeses was made up of 3 commercial cheeses, Monterey Jack, Mozzarella, and process. The second set of cheeses was made up of 3 Mozzarella cheeses with varying levels of protein and fat at a constant moisture content. A shred distribution of long shreds, short shreds, and fines was obtained by shredding blocks of cheese in a food processor. A probe tack test was used to directly measure adhesion of the cheese to a stainless-steel surface. Surface energy was determined based on the contact angles of standard liquids, and rheological characterization was done by a creep and recovery test. Creep and recovery data were used to calculate the maximum and initial compliance and retardation time. Shredding defects of fines and adhesion to the blade were observed in commercial cheeses. Mozzarella did not adhere to the blade but did produce the most fines. Both Monterey Jack and process cheeses adhered to the blade and produced fines. Furthermore, adherence to the blade was correlated positively with tack energy and negatively with retardation time. Mozzarella cheese, with the highest fat and lowest protein contents, produced the most fines but showed little adherence to the blade, even though tack energy increased with fat content. Surface energy was not correlated with shredding defects in either group of cheese. Rheological properties and tack energy appeared to be the key factors involved in shredding defects.}, number={5}, journal={JOURNAL OF DAIRY SCIENCE}, author={Childs, J. L. and Daubert, C. R. and Stefanski, L. and Foegeding, E. A.}, year={2007}, month={May}, pages={2163–2174} } @article{stefanski_2007, title={Residual (sur)realism}, volume={61}, ISSN={["1537-2731"]}, DOI={10.1198/000313007X190079}, abstractNote={We show how to construct multiple linear regression datasets with the property that the plot of residuals versus predicted values from the least squares fit of the correct model reveals a hidden image or message. In the full PDF version of this article, the abstract itself is one such plot.}, number={2}, journal={AMERICAN STATISTICIAN}, author={Stefanski, Leonard A.}, year={2007}, month={May}, pages={163–177} } @article{wu_genton_stefanski_2006, title={A Multivariate two-sample mean test for small sample size and missing data}, volume={62}, ISSN={["0006-341X"]}, DOI={10.1111/j.1541-0420.2006.00533.x}, abstractNote={Summary We develop a new statistic for testing the equality of two multivariate mean vectors. A scaled chi‐squared distribution is proposed as an approximating null distribution. Because the test statistic is based on componentwise statistics, it has the advantage over Hotelling's T2 test of being applicable to the case where the dimension of an observation exceeds the number of observations. An appealing feature of the new test is its ability to handle missing data by relying on only componentwise sample moments. Monte Carlo studies indicate good power compared to Hotelling's T2 and a recently proposed test by Srivastava (2004, Technical Report, University of Toronto). The test is applied to drug discovery data.}, number={3}, journal={BIOMETRICS}, author={Wu, Yujun and Genton, Marc G. and Stefanski, Leonard A.}, year={2006}, month={Sep}, pages={877–885} } @article{huang_stefanski_davidian_2006, title={Latent-model robustness in structural measurement error models}, volume={93}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/93.1.53}, abstractNote={We present methods for diagnosing the effects of model misspecification of the true-predictor distribution in structural measurement error models. We first formulate latent-model robustness theoretically. Then we provide practical techniques for examining the adequacy of an assumed latent predictor model. The methods are illustrated via analytical examples, application to simulated data and with data from a study of coronary heart disease. Copyright 2006, Oxford University Press.}, number={1}, journal={BIOMETRIKA}, author={Huang, XZ and Stefanski, LA and Davidian, M}, year={2006}, month={Mar}, pages={53–64} } @book{carroll_ruppert_stefanski_crainiceanu_2006, title={Measurement error in nonlinear models: A modern perspective. (2nd ed.)}, ISBN={1584886331}, DOI={10.1201/9781420010138}, abstractNote={Guide to Notation Introduction The Double/Triple-Whammy of Measurement Error Classical Measurement Error A Nutrition Example Measurement Error Examples Radiation Epidemiology and Berkson Errors Classical Measurement Error Model Extensions Other Examples of Measurement Error Models Checking The Classical Error Model Loss of Power A Brief Tour Bibliographic Notes Important Concepts Functional and Structural Models Models for Measurement Error Sources of Data Is There an "Exact" Predictor? What is Truth? Differential and Nondifferential Error Prediction Bibliographic Notes Linear Regression and Attenuation Introduction Bias Caused by Measurement Error Multiple and Orthogonal Regression Correcting for Bias Bias Versus Variance Attenuation in General Problems Bibliographic Notes Regression Calibration Overview The Regression Calibration Algorithm NHANES Example Estimating the Calibration Function Parameters Multiplicative Measurement Error Standard Errors Expanded Regression Calibration Models Examples of the Approximations Theoretical Examples Bibliographic Notes and Software Simulation Extrapolation Overview Simulation Extrapolation Heuristics The SIMEX Algorithm Applications SIMEX in Some Important Special Cases Extensions and Related Methods Bibliographic Notes Instrumental Variables Overview Instrumental Variables in Linear Models Approximate Instrumental Variable Estimation Adjusted Score Method Examples Other Methodologies Bibliographic Notes Score Function Methods Overview Linear and Logistic Regression Conditional Score Functions Corrected Score Functions Computation and Asymptotic Approximations Comparison of Conditional and Corrected Scores Bibliographic Notes Likelihood and Quasilikelihood Introduction Steps 2 and 3: Constructing Likelihoods Step 4: Numerical Computation of Likelihoods Cervical Cancer and Herpes Framingham Data Nevada Test Site Reanalysis Bronchitis Example Quasilikelihood and Variance Function Models Bibliographic Notes Bayesian Methods Overview The Gibbs Sampler Metropolis-Hastings Algorithm Linear Regression Nonlinear Models Logistic Regression Berkson Errors Automatic implementation Cervical Cancer and Herpes Framingham Data OPEN Data: A Variance Components Model Bibliographic Notes Hypothesis Testing Overview The Regression Calibration Approximation Illustration: OPEN Data Hypotheses about Sub-Vectors of ssx and ssz Efficient Score Tests of H0 : ssx = 0 Bibliographic Notes Longitudinal Data and Mixed Models Mixed Models for Longitudinal Data Mixed Measurement Error Models A Bias Corrected Estimator SIMEX for GLMMEMs Regression Calibration for GLMMs Maximum Likelihood Estimation Joint Modeling Other Models and Applications Example: The CHOICE Study Bibliographic Notes Nonparametric Estimation Deconvolution Nonparametric Regression Baseline Change Example Bibliographic Notes Semiparametric Regression Overview Additive Models MCMC for Additive Spline Models Monte-Carlo EM-Algorithm Simulation with Classical Errors Simulation with Berkson Errors Semiparametrics: X Modeled Parametrically Parametric Models: No Assumptions on X Bibliographic Notes Survival Data Notation and Assumptions Induced Hazard Function Regression Calibration for Survival Analysis SIMEX for Survival Analysis Chronic Kidney Disease Progression Semi and Nonparametric Methods Likelihood Inference for Frailty Models Bibliographic Notes Response Variable Error Response Error and Linear Regression Other Forms of Additive Response Error Logistic Regression with Response Error Likelihood Methods Use of Complete Data Only Semiparametric Methods for Validation Data Bibliographic Notes Appendix A: Background Material Overview Normal and Lognormal Distributions Gamma and Inverse Gamma Distributions Best and Best Linear Prediction and Regression Likelihood Methods Unbiased Estimating Equations Quasilikelihood and Variance Function Models (QVF) Generalized Linear Models Bootstrap Methods Appendix B: Technical Details Appendix to Chapter 1: Power in Berkson and Classical Error Models Appendix to Chapter 3: Linear Regression and Attenuation Regression Calibration SIMEX Instrumental Variables Score Function Methods Likelihood and Quasilikelihood Bayesian Methods References Applications and Examples Index Index}, publisher={Boca Raton, FL: Chapman & Hall}, author={Carroll, R. J. and Ruppert, D. and Stefanski, L. A. and Crainiceanu, C. A.}, year={2006} } @article{luo_stefanski_boos_2006, title={Tuning variable selection procedures by adding noise}, volume={48}, DOI={10.1198/004017005000000319}, abstractNote={Many variable selection methods for linear regression depend critically on tuning parameters that control the performance of the method, for example, “entry” and “stay” significance levels in forward and backward selection. However, most methods do not adapt the tuning parameters to particular datasets. We propose a general strategy for adapting variable selection tuning parameters that effectively estimates the tuning parameters so that the selection method avoids overfitting and underfitting. The strategy is based on the principle that overfitting and underfitting can be directly observed in estimates of the error variance after adding controlled amounts of additional independent noise to the response variable, then running a variable selection method. It is related to the simulation technique SIMEX found in the measurement error literature. We focus on forward selection because of its simplicity and ability to handle large numbers of explanatory variables. Monte Carlo studies show that the new method compares favorably with established methods.}, number={2}, journal={Technometrics}, author={Luo, X. H. and Stefanski, L. A. and Boos, D. D.}, year={2006}, pages={165–175} } @article{booker_miller_fiscus_pursley_stefanski_2005, title={Comparative responses of container- versus ground-grown soybean to elevated carbon dioxide and ozone}, volume={45}, ISSN={["1435-0653"]}, DOI={10.2135/cropsci2004.0198}, abstractNote={In studies of CO2–enrichment effects on plants, the applicability of results derived from experiments using container‐grown plants for predictions of future crop performance in a CO2–enriched atmosphere has been questioned. Concerns also have been expressed about plant growth studies with the air pollutant O3 in pot‐grown plants. Further, since elevated CO2 and O3 co‐occur, studies are required with the combination of gases. In this 2‐yr experiment, soybean [Glycine max (L.) Merr.] plants grown in large pots (15 and 21 L) and in the ground were exposed to mixtures of CO2 and O3 in open‐top chambers. The CO2 treatments were ambient and CO2 enrichment of approximately 337 μmol mol−1 added 24 h d−1 Ozone treatments were charcoal‐filtered (CF) air (23 nmol mol−1) and approximately 1.5 times ambient O3 levels (71 nmol mol−1) given 12 h d−1 Relative effects of elevated CO2 and O3 on aboveground biomass and seed yield were quite similar for plants grown in pots compared with plants grown in the ground. Elevated CO2 increased total seed mass and O3 suppressed it to similar magnitudes in both rooting environments. Elevated CO2 also reduced the toxic effects of O3 Net photosynthesis (A) was similar while stomatal conductance (gs) was higher in pot‐grown compared with ground‐grown plants, possibly due to better soil moisture status. The results indicated that planting density and rooting environment affected plant morphology, but relative responses of seed yield to elevated CO2 and O3 were not fundamentally different between soybean plants grown in large pots and in the ground in open‐top chambers.}, number={3}, journal={CROP SCIENCE}, author={Booker, FL and Miller, JE and Fiscus, EL and Pursley, WA and Stefanski, LA}, year={2005}, pages={883–895} } @article{stefanski_novick_devanarayan_2005, title={Estimating a nonlinear function of a normal mean}, volume={92}, ISSN={["0006-3444"]}, DOI={10.1093/biomet/92.3.732}, abstractNote={We derive a Monte-Carlo-amenable, minimum variance unbiased estimator of a nonlinear function of a normal mean and the variance of the estimator. Applications to problems arising in the analysis of data measured with error are described. Copyright 2005, Oxford University Press.}, number={3}, journal={BIOMETRIKA}, author={Stefanski, LA and Novick, SJ and Devanarayan, V}, year={2005}, month={Sep}, pages={732–736} } @article{glenn_daubert_farkas_stefanski_2003, title={A statistical analysis of creaming variables impacting process cheese melt quality}, volume={26}, ISSN={["0146-9428"]}, DOI={10.1111/j.1745-4557.2003.tb00247.x}, abstractNote={Abstract Although many variables influence the melt quality of finished processed cheese, this investigation focused on mechanical and thermal energy transport involved during the creaming process. To simulate commercial processing, a pilot scale 10‐gallon (0.04m3), dual ribbon blender was equipped with a thermal control system and a 0.75 hp (559.27 W) electrical motor. An experimental design consisted of three temperatures (75, 80, 85C), three mixing rates (50, 100,150 RPM), and six durations (1, 5, 10, 15, 25, 35 min). Quantified process variables included: process strain and thermal history, and total, instantaneous, and change in mechanical energy. The Schreiber melt test was used to examine the relationship between the processing parameters and melt performance. A statistical analysis revealed significant parameter estimates (P < 0.0001) for each quantified variable in a general linear model. The process cheese industry will gain insight into controlled manufacturing conditions to deliver desired melt functionality.}, number={4}, journal={JOURNAL OF FOOD QUALITY}, author={Glenn, TA and Daubert, CR and Farkas, BE and Stefanski, LA}, year={2003}, month={Oct}, pages={299–321} } @article{stefanski_rubino_hess_2003, title={Estimating patch occupancy when patches are incompletely surveyed}, volume={2543}, journal={Insect Biochemistry and Molecular Biology}, author={Stefanski, L. A. and Rubino, M. J. and Hess, G. R.}, year={2003}, pages={1–20} } @article{novick_stefanski_2002, title={Corrected score estimation via complex variable simulation extrapolation}, volume={97}, ISSN={["0162-1459"]}, DOI={10.1198/016214502760047005}, abstractNote={A Monte Carlo method of computing unbiased estimating equations for the analysis of data measured with error is described. Asymptotic distribution results are obtained for estimators derived from the Monte Carlo estimating equations. The method is illustrated with examples, applications, and simulation studies. The Monte Carlo estimating equations are corrected scores in the sense of Nakamura, and the proposed methods are closely related to the simulation method described by Cook and Stefanski.}, number={458}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Novick, SJ and Stefanski, LA}, year={2002}, month={Jun}, pages={472–481} } @article{devanarayan_stefanski_2002, title={Empirical simulation extrapolation for measurement error models with replicate measurements}, volume={59}, ISSN={["0167-7152"]}, DOI={10.1016/S0167-7152(02)00098-6}, abstractNote={We present a variation of the simex algorithm (J. Amer. statist. Assoc. 89 (1994) 1314) appropriate for the case in which the measurement error variance(s) are unknown but replicate measurements are available. The method used pseudo errors generated from random linear contrasts of the observed replicate measurements. An attractive feature of the new method is its ability to accommodate heteroscedastic measurement error.}, number={3}, journal={STATISTICS & PROBABILITY LETTERS}, author={Devanarayan, V and Stefanski, LA}, year={2002}, month={Oct}, pages={219–225} } @article{stefanski_boos_2002, title={The calculus of M-estimation}, volume={56}, ISSN={["0003-1305"]}, DOI={10.1198/000313002753631330}, abstractNote={Since the seminal papers by Huber in the 1960s, M-estimation methods (also known as estimating equation methods) have been increasingly important for asymptotic analysis and approximate inference. This article illustrates the breadth and generality of the M-estimation approach, thereby facilitating its use inpractice and in the classroom as a unifying approach to the study of large-sample inference.}, number={1}, journal={AMERICAN STATISTICIAN}, author={Stefanski, LA and Boos, DD}, year={2002}, month={Feb}, pages={29–38} } @article{blankenship_stefanski_2001, title={Statistical estimation of ozone exposure metrics}, volume={35}, ISSN={["1352-2310"]}, DOI={10.1016/S1352-2310(01)00227-8}, abstractNote={Data from recent experiments at North Carolina State University and other locations provide a unique opportunity to study the effect of ambient ozone on the growth of clover. The data consist of hourly ozone measurements over a 140 day growing season at eight sites in the US, coupled with clover growth response data measured every 28 days. The objective is to model an indicator of clover growth as a function of ozone exposure. A common strategy for dealing with the numerous hourly ozone measurements is to reduce these to a single summary measurement, a so-called exposure metric, for the growth period of interest. However, the mean ozone value is not necessarily the best summarization, as it is widely believed that low levels of ozone have a negligible effect on growth, whereas peak ozone values are deleterious to plant growth. There are also suspected interactions with available sunlight, temperature and humidity. A number of exposure metrics have been proposed that reflect these beliefs by assigning different weights to ozone values according to magnitude, time of day, temperature and humidity. These weighting schemes generally depend on parameters that have, to date, been subjectively determined. We propose a statistical approach based on profile likelihoods to estimate the parameters in these exposure metrics.}, number={26}, journal={ATMOSPHERIC ENVIRONMENT}, author={Blankenship, EE and Stefanski, LA}, year={2001}, month={Sep}, pages={4499–4510} } @article{stefanski_2000, title={Measurement error models}, volume={95}, ISSN={["0162-1459"]}, DOI={10.2307/2669787}, number={452}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Stefanski, LA}, year={2000}, month={Dec}, pages={1353–1358} } @article{heagle_stefanski_2000, title={Relationships between ambient ozone regimes and white clover forage production using different ozone exposure indexes}, volume={34}, ISSN={["1352-2310"]}, DOI={10.1016/s1352-2310(99)00354-4}, abstractNote={Plant responses to seasonal exposure to tropospheric ozone (O3) are mediated by interactions with physical and genetic factors that complicate attempts to develop a measure of O3 exposure (exposure index) that best relates to plant response. Dozens of exposure indexes have been tested for best fit to yield response data from open-top chamber studies. These tests have limited applicability because of possible confounding caused by variability in experimental protocols used in chamber dose–response studies. A 2-yr study in ambient air at eight locations in the USA measured relative effects of ambient O3 on forage weight of a sensitive (NC-S) and a resistant (NC-R) clone of white clover. Protocols included uniform growth medium, irrigation, exposure duration and genetics (clones). Plants were harvested to determine NC-S/NC-R forage biomass ratios after each of four 28-day periods. High ratios indicated low O3 concentrations, and low ratios indicated high concentrations. We used these results in attempts to identify the relative suitability of several exposure indexes in defining O3 exposure–forage biomass relationships. Indexes were calculated using combinations of O3 exposure forms (SUM00, SUM06, W95, W126, and AOT04), diurnal and seasonal accumulating times and harvests. Squared correlations (r2's) between the index and biomass ratio were used as a general indication of relative suitability of the different indexes. Squared correlations were much higher for indexes coupled with harvests 2, 3 and 4, than for harvest 1. Even higher r2's occurred for indexes coupled with the combined mean forage ratio for harvests 2, 3 and 4. Squared correlations were most sensitive to the choice of hourly averaging times. Lowest r2's occurred for the 24 h accumulating period, much higher r2's occurred for the 12 h daylight period, and the highest r2's occurred for periods of 6 h or less during midday, regardless of all other factors. The exposure form was important only for 24 h indexes for which SUM00 gave the lowest r2's. All forms, including SUM00, produced similarly high r2's for 6, 5, and 4 h midday accumulating times.}, number={5}, journal={ATMOSPHERIC ENVIRONMENT}, author={Heagle, AS and Stefanski, LA}, year={2000}, pages={735–744} } @article{heagle_booker_miller_pursley_stefanski_1999, title={Influence of daily carbon dioxide exposure duration and root environment on soybean response to elevated carbon dioxide}, volume={28}, ISSN={["0047-2425"]}, DOI={10.2134/jeq1999.00472425002800020034x}, abstractNote={Abstract}, number={2}, journal={JOURNAL OF ENVIRONMENTAL QUALITY}, author={Heagle, AS and Booker, FL and Miller, JE and Pursley, WA and Stefanski, LA}, year={1999}, pages={666–675} } @article{carroll_ruppert_stefanski_1999, title={Regression depth - Comment}, volume={94}, ISSN={["1537-274X"]}, DOI={10.2307/2670159}, number={446}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Carroll, RJ and Ruppert, D and Stefanski, LA}, year={1999}, month={Jun}, pages={410–411} } @article{gould_stefanski_pollock_1999, title={Use of simulation-extrapolation estimation in catch-effort analyses}, volume={56}, ISSN={["0706-652X"]}, DOI={10.1139/cjfas-56-7-1234}, number={7}, journal={CANADIAN JOURNAL OF FISHERIES AND AQUATIC SCIENCES}, author={Gould, WR and Stefanski, LA and Pollock, KH}, year={1999}, month={Jul}, pages={1234–1240} } @article{park_stefanski_1998, title={Relative-error prediction}, volume={40}, ISSN={["0167-7152"]}, DOI={10.1016/S0167-7152(98)00088-1}, abstractNote={We derive the form of the best mean squared relative error predictor of Y given X. Some methods of estimating predictors with good relative error properties are proposed and studied via simulation. The methods are illustrated with an example in which county-level gasoline sales are predicted from county-level population.}, number={3}, journal={STATISTICS & PROBABILITY LETTERS}, author={Park, H and Stefanski, LA}, year={1998}, month={Oct}, pages={227–236} } @article{gould_stefanski_pollock_1997, title={Effects of measurement error on catch-effort estimation}, volume={54}, ISSN={["1205-7533"]}, DOI={10.1139/cjfas-54-4-898}, number={4}, journal={CANADIAN JOURNAL OF FISHERIES AND AQUATIC SCIENCES}, author={Gould, WR and Stefanski, LA and Pollock, KH}, year={1997}, month={Apr}, pages={898–906} } @book{carroll_ruppert_stefanski_1995, title={Measurement error in nonlinear models}, ISBN={0412047217}, publisher={London; New York: Chapman & Hall}, author={Carroll, R. J. and Ruppert, D. and Stefanski, L. A.}, year={1995} } @book{doorn_stefanski_barlaz_1994, title={Estimate of methane emissions from U.S. landfills: Project summary}, publisher={Research Triangle Park, NC: U.S. Environmental Protection Agency, Air and Energy Engineering Research Laboratory}, author={Doorn, M. R. J. and Stefanski, L. A. and Barlaz, M. A.}, year={1994} }