@article{somers_winger_fisher_hyland_davidian_laber_miller_kelleher_vilardaga_majestic_et al._2023, title={Behavioral cancer pain intervention dosing: results of a Sequential Multiple Assignment Randomized Trial}, volume={164}, ISSN={["1872-6623"]}, DOI={10.1097/j.pain.0000000000002915}, number={9}, journal={PAIN}, author={Somers, Tamara J. J. and Winger, Joseph G. G. and Fisher, Hannah M. M. and Hyland, Kelly A. A. and Davidian, Marie and Laber, Eric B. B. and Miller, Shannon N. N. and Kelleher, Sarah A. A. and Vilardaga, Jennifer C. Plumb C. and Majestic, Catherine and et al.}, year={2023}, month={Sep}, pages={1935–1941} } @article{manschot_laber_davidian_2023, title={Interim monitoring of sequential multiple assignment randomized trials using partial information}, volume={3}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13854}, abstractNote={The sequential multiple assignment randomized trial (SMART) is the gold standard trial design to generate data for the evaluation of multistage treatment regimes. As with conventional (single-stage) randomized clinical trials, interim monitoring allows early stopping; however, there are few methods for principled interim analysis in SMARTs. Because SMARTs involve multiple stages of treatment, a key challenge is that not all enrolled participants will have progressed through all treatment stages at the time of an interim analysis. Wu et al. (2021) propose basing interim analyses on an estimator for the mean outcome under a given regime that uses data only from participants who have completed all treatment stages. We propose an estimator for the mean outcome under a given regime that gains efficiency by using partial information from enrolled participants regardless of their progression through treatment stages. Using the asymptotic distribution of this estimator, we derive associated Pocock and O'Brien-Fleming testing procedures for early stopping. In simulation experiments, the estimator controls type I error and achieves nominal power while reducing expected sample size relative to the method of Wu et al. (2021). We present an illustrative application of the proposed estimator based on a recent SMART evaluating behavioral pain interventions for breast cancer patients.}, journal={BIOMETRICS}, author={Manschot, Cole and Laber, Eric and Davidian, Marie}, year={2023}, month={Mar} } @article{johnson_lu_davidian_2022, title={A general framework for subgroup detection via one-step value difference estimation}, volume={8}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13711}, abstractNote={Abstract Recent statistical methodology for precision medicine has focused on either identification of subgroups with enhanced treatment effects or estimating optimal treatment decision rules so that treatment is allocated in a way that maximizes, on average, predefined patient outcomes. Less attention has been given to subgroup testing, which involves evaluation of whether at least a subgroup of the population benefits from an investigative treatment, compared to some control or standard of care. In this work, we propose a general framework for testing for the existence of a subgroup with enhanced treatment effects based on the difference of the estimated value functions under an estimated optimal treatment regime and a fixed regime that assigns everyone to the same treatment. Our proposed test does not require specification of the parametric form of the subgroup and allows heterogeneous treatment effects within the subgroup. The test applies to cases when the outcome of interest is either a time‐to‐event or a (uncensored) scalar, and is valid at the exceptional law. To demonstrate the empirical performance of the proposed test, we study the type I error and power of the test statistics in simulations and also apply our test to data from a Phase III trial in patients with hematological malignancies.}, journal={BIOMETRICS}, author={Johnson, Dana and Lu, Wenbin and Davidian, Marie}, year={2022}, month={Aug} } @article{tsiatis_davidian_2022, title={Group sequential methods for interim monitoring of randomized clinical trials with time-lagged outcome}, volume={9}, ISSN={["1097-0258"]}, DOI={10.1002/sim.9580}, abstractNote={The primary analysis in two-arm clinical trials usually involves inference on a scalar treatment effect parameter; for example, depending on the outcome, the difference of treatment-specific means, risk difference, risk ratio, or odds ratio. Most clinical trials are monitored for the possibility of early stopping. Because ordinarily the outcome on any given subject can be ascertained only after some time lag, at the time of an interim analysis, among the subjects already enrolled, the outcome is known for only a subset and is effectively censored for those who have not been enrolled sufficiently long for it to be observed. Typically, the interim analysis is based only on the data from subjects for whom the outcome has been ascertained. A goal of an interim analysis is to stop the trial as soon as the evidence is strong enough to do so, suggesting that the analysis ideally should make the most efficient use of all available data, thus including information on censoring as well as other baseline and time-dependent covariates in a principled way. A general group sequential framework is proposed for clinical trials with a time-lagged outcome. Treatment effect estimators that take account of censoring and incorporate covariate information at an interim analysis are derived using semiparametric theory and are demonstrated to lead to stronger evidence for early stopping than standard approaches. The associated test statistics are shown to have the independent increments structure, so that standard software can be used to obtain stopping boundaries.}, journal={STATISTICS IN MEDICINE}, author={Tsiatis, Anastasios A. and Davidian, Marie}, year={2022}, month={Sep} } @article{davidian_2022, title={Methods Based on Semiparametric Theory for Analysis in the Presence of Missing Data}, volume={9}, ISSN={["2326-831X"]}, DOI={10.1146/annurev-statistics-040120-025906}, abstractNote={A statistical model is a class of probability distributions assumed to contain the true distribution generating the data. In parametric models, the distributions are indexed by a finite-dimensional parameter characterizing the scientific question of interest. Semiparametric models describe the distributions in terms of a finite-dimensional parameter and an infinite-dimensional component, offering more flexibility. Ordinarily, the statistical model represents distributions for the full data intended to be collected. When elements of these full data are missing, the goal is to make valid inference on the full-data-model parameter using the observed data. In a series of fundamental works, Robins, Rotnitzky, and colleagues derived the class of observed-data estimators under a semiparametric model assuming that the missingness mechanism is at random, which leads to practical, robust methodology for many familiar data-analytic challenges. This article reviews semiparametric theory and the key steps in this derivation. Expected final online publication date for the Annual Review of Statistics, Volume 9 is March 2022. Please see http://www.annualreviews.org/page/journal/pubdates for revised estimates.}, journal={ANNUAL REVIEW OF STATISTICS AND ITS APPLICATION}, author={Davidian, Marie}, year={2022}, pages={167–196} } @article{cooks_duke_neil_vilaro_wilson-howard_modave_george_odedina_lok_carek_et al._2022, title={Telehealth and racial disparities in colorectal cancer screening: A pilot study of how virtual clinician characteristics influence screening intentions}, volume={6}, ISSN={["2059-8661"]}, DOI={10.1017/cts.2022.386}, abstractNote={Abstract Introduction: Racial disparities in colorectal cancer (CRC) can be addressed through increased adherence to screening guidelines. In real-life encounters, patients may be more willing to follow screening recommendations delivered by a race concordant clinician. The growth of telehealth to deliver care provides an opportunity to explore whether these effects translate to a virtual setting. The primary purpose of this pilot study is to explore the relationships between virtual clinician (VC) characteristics and CRC screening intentions after engagement with a telehealth intervention leveraging technology to deliver tailored CRC prevention messaging. Methods: Using a posttest-only design with three factors (VC race-matching, VC gender, intervention type), participants ( N = 2267) were randomised to one of eight intervention treatments. Participants self-reported perceptions and behavioral intentions. Results: The benefits of matching participants with a racially similar VC trended positive but did not reach statistical significance. Specifically, race-matching positively influenced screening intentions for Black participants but not for Whites ( b = 0.29, p = 0.10). Importantly, perceptions of credibility, attractiveness, and message relevance significantly influenced screening intentions and the relationship with race-matching. Conclusions: To reduce racial CRC screening disparities, investments are needed to identify patient-focused interventions to address structural barriers to screening. This study suggests that telehealth interventions that match Black patients with a Black VC can enhance perceptions of credibility and message relevance, which may then improve screening intentions. Future research is needed to examine how to increase VC credibility and attractiveness, as well as message relevance without race-matching.}, number={1}, journal={JOURNAL OF CLINICAL AND TRANSLATIONAL SCIENCE}, author={Cooks, Eric J. and Duke, Kyle A. and Neil, Jordan M. and Vilaro, Melissa J. and Wilson-Howard, Danyell and Modave, Francois and George, Thomas J. and Odedina, Folakemi T. and Lok, Benjamin C. and Carek, Peter and et al.}, year={2022}, month={Apr} } @article{krieger_neil_duke_zalake_tavassoli_vilaro_wilson-howard_chavez_laber_davidian_et al._2021, title={A Pilot Study Examining the Efficacy of Delivering Colorectal Cancer Screening Messages via Virtual Health Assistants}, volume={61}, url={http://dx.doi.org/10.1016/j.amepre.2021.01.014}, DOI={10.1016/j.amepre.2021.01.014}, abstractNote={IntroductionPatients are more likely to complete colorectal cancer screening when recommended by a race-concordant healthcare provider. Leveraging virtual healthcare assistants to deliver tailored screening interventions may promote adherence to colorectal cancer screening guidelines among diverse patient populations. The purpose of this pilot study is to determine the efficacy of the Agent Leveraging Empathy for eXams virtual healthcare assistant intervention to increase patient intentions to talk to their doctor about colorectal cancer screening. It also examines the influence of animation and race concordance on intentions to complete colorectal cancer screening.MethodsWhite and Black adults (N=1,363) aged 50–73 years and not adherent to colorectal cancer screening guidelines were recruited from Qualtrics Panels in 2018 to participate in a 3-arm (animated virtual healthcare assistant, static virtual healthcare assistant, attention control) message design experiment. In 2020, a probit regression model was used to identify the intervention effects.ResultsParticipants assigned to the animated virtual healthcare assistant (p<0.01) reported higher intentions to talk to their doctor about colorectal cancer screening than participants assigned to the other conditions. There was a significant effect of race concordance on colorectal cancer screening intentions but only in the static virtual healthcare assistant condition (p=0.04). Participant race, age, trust in healthcare providers, health literacy, and cancer information overload were also significant predictors of colorectal cancer screening intentions.ConclusionsAnimated virtual healthcare assistants were efficacious compared with the static virtual healthcare assistant and attention control conditions. The influence of race concordance between source and participant was inconsistent across conditions. This warrants additional investigation in future studies given the potential for virtual healthcare assistant‒assisted interventions to promote colorectal cancer screening within guidelines. Patients are more likely to complete colorectal cancer screening when recommended by a race-concordant healthcare provider. Leveraging virtual healthcare assistants to deliver tailored screening interventions may promote adherence to colorectal cancer screening guidelines among diverse patient populations. The purpose of this pilot study is to determine the efficacy of the Agent Leveraging Empathy for eXams virtual healthcare assistant intervention to increase patient intentions to talk to their doctor about colorectal cancer screening. It also examines the influence of animation and race concordance on intentions to complete colorectal cancer screening. White and Black adults (N=1,363) aged 50–73 years and not adherent to colorectal cancer screening guidelines were recruited from Qualtrics Panels in 2018 to participate in a 3-arm (animated virtual healthcare assistant, static virtual healthcare assistant, attention control) message design experiment. In 2020, a probit regression model was used to identify the intervention effects. Participants assigned to the animated virtual healthcare assistant (p<0.01) reported higher intentions to talk to their doctor about colorectal cancer screening than participants assigned to the other conditions. There was a significant effect of race concordance on colorectal cancer screening intentions but only in the static virtual healthcare assistant condition (p=0.04). Participant race, age, trust in healthcare providers, health literacy, and cancer information overload were also significant predictors of colorectal cancer screening intentions. Animated virtual healthcare assistants were efficacious compared with the static virtual healthcare assistant and attention control conditions. The influence of race concordance between source and participant was inconsistent across conditions. This warrants additional investigation in future studies given the potential for virtual healthcare assistant‒assisted interventions to promote colorectal cancer screening within guidelines.}, number={2}, journal={American Journal of Preventive Medicine}, publisher={Elsevier BV}, author={Krieger, Janice L. and Neil, Jordan M. and Duke, Kyle A. and Zalake, Mohan S. and Tavassoli, Fatemeh and Vilaro, Melissa J. and Wilson-Howard, Danyell S. and Chavez, Sarah Y. and Laber, Eric B. and Davidian, Marie and et al.}, year={2021}, month={Aug}, pages={251–255} } @article{tsiatis_davidian_2021, title={Estimating vaccine efficacy over time after a randomized study is unblinded}, volume={8}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13509}, abstractNote={The COVID-19 pandemic due to the novel coronavirus SARS CoV-2 has inspired remarkable breakthroughs in the development of vaccines against the virus and the launch of several phase 3 vaccine trials in Summer 2020 to evaluate vaccine efficacy (VE). Trials of vaccine candidates using mRNA delivery systems developed by Pfizer-BioNTech and Moderna have shown substantial VEs of 94–95%, leading the US Food and Drug Administration to issue Emergency Use Authorizations and subsequent widespread administration of the vaccines. As the trials continue, a key issue is the possibility that VE may wane over time. Ethical considerations dictate that trial participants be unblinded and those randomized to placebo be offered study vaccine, leading to trial protocol amendments specifying unblinding strategies. Crossover of placebo subjects to vaccine complicates inference on waning of VE. We focus on the particular features of the Moderna trial and propose a statistical framework based on a potential outcomes formulation within which we develop methods for inference on potential waning of VE over time and estimation of VE at any postvaccination time. The framework clarifies assumptions made regarding individual- and population-level phenomena and acknowledges the possibility that subjects who are more or less likely to become infected may be crossed over to vaccine differentially over time. The principles of the framework can be adapted straightforwardly to other trials.}, journal={BIOMETRICS}, author={Tsiatis, Anastasios A. and Davidian, Marie}, year={2021}, month={Aug} } @article{tsiatis_davidian_holloway_2021, title={Estimation of the odds ratio in a proportional odds model with censored time-lagged outcome in a randomized clinical trial}, volume={12}, ISSN={["1541-0420"]}, url={https://doi.org/10.1111/biom.13603}, DOI={10.1111/biom.13603}, abstractNote={In many randomized clinical trials of therapeutics for COVID-19, the primary outcome is an ordinal categorical variable, and interest focuses on the odds ratio (OR; active agent vs control) under the assumption of a proportional odds model. Although at the final analysis the outcome will be determined for all subjects, at an interim analysis, the status of some participants may not yet be determined, for example, because ascertainment of the outcome may not be possible until some prespecified follow-up time. Accordingly, the outcome from these subjects can be viewed as censored. A valid interim analysis can be based on data only from those subjects with full follow-up; however, this approach is inefficient, as it does not exploit additional information that may be available on those for whom the outcome is not yet available at the time of the interim analysis. Appealing to the theory of semiparametrics, we propose an estimator for the OR in a proportional odds model with censored, time-lagged categorical outcome that incorporates additional baseline and time-dependent covariate information and demonstrate that it can result in considerable gains in efficiency relative to simpler approaches. A byproduct of the approach is a covariate-adjusted estimator for the OR based on the full data that would be available at a final analysis.}, journal={BIOMETRICS}, author={Tsiatis, Anastasios A. and Davidian, Marie and Holloway, Shannon T.}, year={2021}, month={Dec} } @article{tsiatis_davidian_2021, title={Rejoinder: Estimating vaccine efficacy over time after a randomized study is unblinded}, volume={8}, ISSN={["1541-0420"]}, DOI={10.1111/biom.13539}, abstractNote={We are honored to have our work critiqued by such distinguished, internationally recognized authorities on vaccine efficacy and vaccine trials. When the first author (AAT) was appointed to the Data and Safety Monitoring Board for the U.S. government-sponsored COVID-19 vaccine trials, we were embarrassingly unacquainted with even the basic concepts in this area, starting with the definition of vaccine efficacy (VE), and it was to the fundamental work of these researchers we turned to get up to speed. Responding to the points they raise has enhanced our understanding of the area and the role of our work within it. We comment on the issues raised in each discussion in turn; because all note challenges posed by viral variants, we address this point separately at the end. Heterogeneity of vaccine efficacy (HVE) and bias. We thank Drs. Janes, Gao, and Luedtke (JGL henceforth) for raising this issue, as they inspired us to think more deeply about the role of heterogeneity. As JGL note, our assumption (ii), E { π 1 ( t , τ ) | X } / E { π 0 ( t ) | X } = q ( τ ) , precludes HVE, as we demonstrate shortly. Our assumption of no HVE embodied in (ii) was based on the emerging evidence in the vaccine trials suggesting little variation in VE across subgroups defined by baseline characteristics X, which, as cited by JGL, persists to the present, as well as scant information on viral variants available at the time. JGL conjecture that the result in our simulations that the methods with stabilized weights equal to 1 yield unbiased inference similar to that obtained with the inverse probability weighted methods, which adjust for possible confounding, could be a consequence of the no-HVE assumption. Their comments are relevant without reference to VE waning, so, for simplicity, we discuss them in the case π 1 ( t , τ ) = π 1 ( t ) , so that VE does not depend on time since vaccination τ. Here, our assumption (ii) of no HVE becomes E { π 1 ( t ) | X } / E { π 0 ( t ) | X } = q , a constant. In our framework, X comprises individual-specific covariates, such as age, gender, and so on; thus, just as c b ( t ) , c 0 u ( t ) , c 01 ℓ u ( t ) , c 1 u ( t ) , π 1 ( t ) , π 0 ( t ) are inherent, individual-specific characteristics of trial participants (albeit unobservable), so are the components of X. In contrast, viral variants are external forces to which individuals are exposed. We focus here on HVE due to variation in X and discuss heterogeneity in VE across variants in Section 5. As in most clinical trials, interest focuses on population-averaged inference, so on marginal VE in the overall population, which here, from (2) of our article, is V E ( t ) = 1 − R b ( t ) = 1 − E { p ( t , S ) c b ( t ) π 1 ( t ) } / E { p ( t , S ) c b ( t ) π 0 ( t ) } . Analogously, V E ( t , X ) = 1 − R b ( t , X ) = 1 − E { p ( t , S ) c b ( t ) π 1 ( t ) | X } / E { p ( t , S ) c b ( t ) π 0 ( t ) X } is VE in the subpopulation defined by X, if dependent on X implies HVE. Under our assumptions (i) and (ii), V E ( t , X ) = V E ( t ) = 1 − q , so there is no HVE, and VE is constant over time. If (ii) is relaxed to E { π 1 ( t ) | X } / E { π 0 ( t ) | X } = q ( X ) , then under (i) and this version of (ii), V E ( t , X ) = 1 − q ( X ) (HVE), and it is straightforward that the marginal VE is V E ( t ) = 1 − R b ( t ) = 1 − E { w ( t , X ) q ( X ) } / E { w ( t , X ) } = E { w ( t , X ) V E ( t , X ) } / E { w ( t , X ) } , where w ( t , X ) = E { p ( t , S ) c b ( t ) | X } E { π 0 ( t ) | X } , so can be viewed as a weighted average of X-specific VEs. Thus, HVE introduces the complication that marginal VE is time dependent. Inspection of the weights w ( t , X ) suggests that they may not vary substantially over time; for example, it may be reasonable to assume that p ( t , S ) is independent of all other individual-specific quantities and thus factors out of R b ( t ) , and that the ratio of c b ( t ) for two randomly chosen individuals might stay in roughly constant proportion over time, as some are inherently risk-averse and others not. If w ( t , X ) do not vary substantially, then neither does marginal VE. Accordingly, we discuss JGL's conjecture under this scenario, as we believe they implicitly intended, by considering estimation of V E ( t ) , equivalently of R b ( t ) , at a specific time t. Taking infection rates and hazard rates to be equivalent as in Section 4.3 of our article, R b ( t ) = E { w ( t , X ) q ( X ) } / E { w ( t , X ) } is approximated by the ratio of the marginal hazard rates for potential infection times under vaccine and placebo and is the estimand of interest. JGL consider the situation where X contains age, V E ( t , X ) is lower for older individuals (HVE), and such individuals have a higher probability of being unblinded earlier. They assert that the “standard” analysis based on Cox models, which estimates the marginal hazard ratio by the usual partial likelihood estimator and is roughly equivalent to our approach with stability weights equal to 1, will yield positively biased inference on the marginal VE with HVE but consistent inference under no HVE. To gain insight, we consider the implications of the relationship between HVE and the unblinding process for estimation of marginal VE. It is straightforward to show that, if there were no unblinding at all, or if the unblinding probability does not depend on X, then the standard analysis leads to a consistent estimator for V E ( t ) whether or not there is HVE. If the unblinding probability is X-dependent, but there is no HVE, JGL contend that the standard analysis and our method with stabilized weights equal to 1 also lead to consistent inference on marginal VE, which could explain our simulation results. We can show, however, that if the dependence of unblinding probability on X is different for vaccine and placebo, then even with no HVE ( q ( X ) = q ), bias can arise if E { π 0 ( t ) | X } depends on X. This is the configuration in our simulations for unblinding in the interval [ T P , T U ) , suggesting the potential for bias; we speculate that the negligible bias seen in our simulations is partially due to the shortness (1 week) of this interval. We can also show that, with both HVE and unblinding probability depending on X, bias results and is positive when both q ( X ) and unblinding probability decrease with X, as noted by JGL. Our method with estimated stability weights based on correct models for unblinding depending on X leads to consistent estimation of marginal VE whether or not HVE holds. The foregoing developments are for a fixed t. It is well known that, if the proportional hazards assumption is violated, the standard partial likelihood estimator for the assumed constant hazard ratio estimates a weighted average over time of the time-dependent hazard ratio, R b ( t ) in our case. If the w ( t , X ) do not vary substantially with t, as above, neither will R b ( t ) and V E ( t ) , and this weighted average may have public health relevance. Here, the results above still apply; under no HVE, marginal VE is constant and consistently estimated, and under HVE and X-dependent unblinding, the standard analysis will be biased while our methods consistently estimate this weighted average. If instead the w ( t , X ) and thus R b ( t ) and V E ( t ) do vary nontrivially with t, it may be possible to incorporate estimation of the vaccine and placebo hazard rates via nonparametric smoothing. Potential contact rates as potential outcomes. Dr. Halloran raises the subtlety of referring to the individual-specific contact rates as “potential outcomes.” In the causal inference literature, ordinarily, a potential outcome is a characteristic that is potentially observable, as for a clinical outcome if an individual were to receive placebo or active treatment. In contrast, the contact rates are conceptual, unobservable quantities, as are the transmission probabilities. Accordingly, { c b ( t ) , c 0 u ( t ) , c 01 ℓ u ( t ) , c 1 u ( t ) t > 0 , π 0 ( t ) , π 1 ( t , τ ) , τ ≥ 0 } are similar to unobservable random effects or frailties that characterize heterogeneity across individuals. Model for VE waning. The model g ( u ; θ 1 ) = θ 1 I ( u > v ) we used in the simulations is admittedly simplistic, and we chose it to simplify interpretation of the results. Dr. Halloran rightly notes that the analyst must select the change point v at which efficacy is thought to shift, and clearly inference on waning is predicated on this choice. Such a model is most likely a considerable simplification of a more complex truth under which waning of VE occurs smoothly over time, but it could be a useful tool for preliminary exploratory analysis: one could estimate θ1 over a range of v to gain insight, then adopt a linear or cubic spline representation with knot selection informed by these preliminary analyses to obtain a more nuanced approximation to smoothly continuous waning (these choices are built-in options in our R package VEwaning). As Dr. Follmann suggests in his discussion, it may be possible to prove that the VE as a function of τ is nonparametrically recoverable from the data, although sample size considerations may limit the complexity of how g ( u ; θ 1 ) is represented. Symptomatic viral infection. Dr. Halloran points out that the primary endpoint in the Moderna trial is symptomatic COVID-19 infection, but our presentation is admittedly unclear regarding the meaning of “infection” in our potential outcomes formulation. We tacitly take π 0 ( t ) and π 1 ( t , τ ) to be the individual-specific probabilities of transmission per contact leading to symptomatic infection and thus lump asymptomatic infection with no infection without comment. We thus do not acknowledge explicitly that symptomatic infection results from transmission that, with some probability, results in symptomatic disease. Dr. Halloran rightly raises the issue of how the formulation should be modified to acknowledge this reality. Let ρ 0 ( t ) and ρ 1 ( t , τ ) be the individual-specific probabilities of transmission per contact and s 0 ( t ) and s 1 ( t , τ ) be individual-specific conditional probabilities of becoming symptomatic given transmission, that is, pathogenicity, under placebo and vaccine. Then π 0 ( t ) = ρ 0 ( t ) s 0 ( t ) and π 1 ( t , τ ) = ρ 1 ( t , τ ) s 1 ( t , τ ) . Because pathogenicity is a biological characteristic, (i) can be modified reasonably to { ρ 1 ( t , τ ) , ρ 0 ( t ) , s 1 ( t , τ ) , s 0 ( t ) } ⊥ { S , c b ( t ) } | X and { ρ 1 ( t , τ ) , ρ 0 ( t ) , s 1 ( t , τ ) , s 0 ( t ) } ⊥ { S , c 01 ℓ u ( t ) , c 1 u ( t ) } | X . Assumption (ii) is equivalent to E { ρ 1 ( t , τ ) s 1 ( t , τ ) | X } / E { ρ 0 ( t ) s 0 ( t ) | X } = q ( τ ) . To modify (ii) to incorporate pathogenicity, one can assume that (ii)(a) s 1 ( t , τ ) ⊥ ρ 1 ( t , τ ) | X and s 0 ( t ) ⊥ ρ 0 ( t ) | X , and (ii)(b) E { ρ 1 ( t , τ ) | X } / E { ρ 0 ( t ) | X } and E { s 1 ( t , τ ) | X } / E { s 0 ( t ) | X } do not depend on t or X so are functions only of τ. Assumption (ii)(b) can be viewed as Dr. Halloran's speculated constant of proportionality. If one is willing to assume that there is no effect of vaccine on pathogenicity, then s 1 ( t , τ ) = s 0 ( t ) at any t regardless of τ, and (ii)(b) is unnecessary. Dr. Follmann provides an excellent example that clarifies the challenges of estimating VE and waning of VE after unblinding and how differential unblinding can lead to biased inference on waning. This example and Dr. Follmann's nice summary of the main principles underlying our approach in his Section 3 strongly complement our account of the methodology by making the key issues more accessible. We comment on two main points raised by Dr. Follmann. Celebratory bias. In our formulation, unblinded placebo participants who receive study vaccine engage in behavior represented by c 01 ℓ u ( t ) prior to reaching full efficacy after an interval of length ℓ and then adopt behavior c 1 u ( t ) , whereas unblinded vaccine participants adopt c 1 u ( t ) immediately. Dr. Follmann suggests that, while unblinded placebo participants now on study vaccine will behave as c 01 ℓ u ( t ) for the efficacy lag interval, unblinded vaccine participants may experience a celebratory interval of length C during which they engage in more risky behavior, which we could represent in our framework by c 1 c u ( t ) , say. We agree with Dr. Follmann that it is prudent to remove these individuals from the risk sets during the celebratory interval, just as we remove unblinded placebo participants behaving as c 01 ℓ u ( t ) during the efficacy lag interval. Given that C would be unknown, a possible sensitivity analysis would involve specifying a range of values for C and examining the stability of the results. Time-dependent covariate information. Dr. Follmann raises the possibility of exploiting time-dependent, post-randomization covariate information to account for potential confounding, and he provides interesting examples of such covariates. Our methodology readily incorporates time-dependent covariates. Values of such covariates up to time r could be included in the specifications of models for the unblinding hazard functions λ R , j ( r | X , A , E ) , j = 1 , 2 ; similarly, such information could be incorporated in the model for pr ( Ψ = 1 | X , E , Γ , R ) . All discussants note the potential for variability in VE across emerging new variants of the SARS-CoV-2 virus. Dr. Follmann sketches how, given data on viral genotypes from infected trial participants, variant-specific analyses of VE waning can be carried out. We briefly outline how our framework can be modified to allow for such variant-specific inference, using ν = 1 , … , V to index V variants of interest. As noted in Section 2, variants are external forces to which individuals are exposed. Thus, prevalence of infection can differ by variant, represented by defining p ( t , s , ν ) to be the prevalence for variant ν at time t at site s. Likewise, in accordance with emerging evidence (e.g., the delta variant), it is natural to take individual-specific transmission probabilities per contact at time t to differ by variant, denoted by π 0 ( t , ν ) and π 1 ( t , τ , ν ) for variant ν under placebo and vaccination with study vaccine for τ > 0 time units. We take the contact rates reflecting individual-specific behavior c b ( t ) , c 0 u ( t ) , c 01 ℓ u ( t ) , c 1 u ( t ) to remain unchanged. The infection rates in the study population at time t for variant ν if all individuals were to receive placebo or vaccine at time t − τ are then I 0 b ( t , ν ) = E { p ( t , S , ν ) c b ( t ) π 0 ( t , ν ) } and I 1 b ( t , τ , ν ) = E { p ( t , S , ν ) c b ( t ) π 1 ( t , τ , ν ) } , and, analogous to (2) of our article, define VE for variant ν at time t after vaccination at t − τ as V E ( t , τ , ν ) = 1 − R b ( t , τ , ν ) = 1 − I 1 b ( t , τ , ν ) / I 0 b ( t , ν ) . Assumption (i) is generalized to { π 1 ( t , τ , ν ) , π 0 ( t , ν ) } ⊥ { S , c b ( t ) } | X , and { π 1 ( t , τ , ν ) , π 0 ( t , ν ) } ⊥ { S , c 01 ℓ u ( t ) , c 1 u ( t ) } | X similarly. We modify (ii) to reflect the belief that, while VE can vary by variant, within variants, there is no additional HVE associated with components of X; namely, (ii) becomes E { π 1 ( t , τ , ν ) | X } / E { π 0 ( t , ν ) | X } = q ( τ , ν ) . Under (i) and (ii), V E ( t , τ , ν ) = V E ( τ , ν ) = 1 − R b ( τ , ν ) = 1 − q ( τ , ν ) is the VE for variant ν = 1 , … , V . Moreover, with I 1 u ( t , τ , ν ) = E { p ( t , S , ν ) c 1 u ( t ) π 1 ( t , τ , ν ) } , τ ≥ ℓ , (4) of our article becomes I 1 u ( t , τ , ν ) = I 1 u ( t , ν ) R b ( τ , ν ) / R b ( ℓ , ν ) , τ ≥ ℓ . Representing the variant-specific infection rate ratio R b ( τ , ν ) = exp { ζ ν ( τ ) } I ( τ < ℓ ) + exp { θ 0 ν + g ν ( τ − ℓ ; θ 1 ν ) } I ( τ ≥ ℓ ) , where now ζ ν ( · ) and g ν ( · ; · ) are variant-specific, we have for τ ≥ ℓ , analogous to (8), I 1 b ( t , τ , ν ) = I 1 b ( t , ν ) exp { θ 0 ν + g ν ( τ − ℓ ; θ 1 ν ) } , I 1 u ( t , τ , ν ) = I 1 u ( t , ν ) exp { g ν ( τ − ℓ ; θ 1 ν ) } , and thus V E ( τ , ν ) = 1 − exp { θ 0 ν + g ν ( τ − ℓ ; θ 1 ν ) } , ν = 1 , … , V , as given by Dr. Follmann. With the observed data as in (1) of our article, redefine Δ so Δ = 0 if U > L and Δ = 1 , … , V according to the infection variant otherwise, and define d N ( t , ν ) = I ( U = t , Δ = ν ) , ν = 1 , … , V . Then, under obvious modifications of the consistency assumptions (16) and (17)–(20), results analogous to (21)–(24) hold, and observed data-estimating functions analogous to those in Section 4.4 can be formulated. Defining d N ∼ b ( t , ν ) , Y ∼ b ( t , ν ) , d N ∼ u ( t , ν ) , Y ∼ u ( t , ν ) , Z b ( t , ν ) , and Z u ( t , ν ) , ν = 1 , … , V , as d N ∼ b ( t ) , Y ∼ b ( t ) , d N ∼ u ( t ) , Y ∼ u ( t ) , Z b ( t ) , and Z u ( t ) in Section 4.4 with d N ( t ) replaced by d N ( t , ν ) , g ( v , θ 1 ) by g ν ( v , θ 1 ν ) , and ( θ 0 , θ 1 ) by ( θ 0 ν , θ 1 ν ) , one is led to an estimating equation of the form (30), solution of which in ( θ 0 ν , θ 1 ν ) , ν = 1 , … , V , reduces to solving separate equations in ( θ 0 ν , θ 1 ν ) for each ν. Elaborating on Dr. Follmann's final key point, information on a given variant will be available only during time intervals when it was/is in circulation. If these intervals traverse blinded and unblinded periods of the trial, estimation of both θ 0 ν and θ 1 ν is possible, whereas, as Dr. Follmann notes, if the intervals are primarily within the unblinded phase, only θ 1 ν will be estimable, but will still provide evidence of possibly waning for variant ν.}, journal={BIOMETRICS}, author={Tsiatis, Anastasios A. and Davidian, Marie}, year={2021}, month={Aug} } @book{tsiatis_davidian_laber_holloway_2020, place={Boca Raton}, title={Dynamic Treatment Regimes: Statistical Methods for Precision Medicine}, ISBN={9781498769778}, url={https://www.taylorfrancis.com/books/mono/10.1201/9780429192692/dynamic-treatment-regimes-anastasios-tsiatis-marie-davidian-shannon-holloway-eric-labe}, DOI={10.1201/9780429192692/dynamic-treatment-regimes-anastasios-tsiatis-marie-davidian-shannon-holloway-eric-labe}, journal={CRC Press}, publisher={Chapman & Hall/CRC Press}, author={Tsiatis, A.A. and Davidian, M. and Laber, E.B. and Holloway, S.T.}, year={2020} } @misc{ruppert_yin_davidian_tsiatis_byrd_woyach_mandrekar_2019, title={Application of a sequential multiple assignment randomized trial (SMART) design in older patients with chronic lymphocytic leukemia}, volume={30}, ISSN={["1569-8041"]}, DOI={10.1093/annonc/mdz053}, abstractNote={BackgroundIbrutinib therapy is safe and effective in patients with chronic lymphocytic leukemia (CLL). Currently, ibrutinib is administered continuously until disease progression. Combination regimens with ibrutinib are being developed to deepen response which could allow for ibrutinib maintenance (IM) discontinuation. Among untreated older patients with CLL, clinical investigators had the following questions: (i) does ibrutinib + venetoclax + obinutuzumab (IVO) with IM have superior progression-free survival (PFS) compared with ibrutinib + obinutuzumab (IO) with IM, and (ii) does the treatment strategy of IVO + IM for patients without minimal residual disease complete response (MRD- CR) or IVO + IM discontinuation for patients with MRD- CR have superior PFS compared with IO + IM.DesignConventional designs randomize patients to IO with IM or IVO with IM to address the first objective, or randomize patients to each treatment strategy to address the second objective. A sequential multiple assignment randomized trial (SMART) design and analysis is proposed to address both objectives.ResultsA SMART design strategy is appropriate when comparing adaptive interventions, which are defined by an individual’s sequence of treatment decisions and guided by intermediate outcomes, such as response to therapy. A review of common applications of SMART design strategies is provided. Specific to the SMART design previously considered for Alliance study A041702, the general structure of the SMART is presented, an approach to sample size and power calculations when comparing adaptive interventions embedded in the SMART with a time-to-event end point is fully described, and analyses plans are outlined.ConclusionSMART design strategies can be used in cancer clinical trials with adaptive interventions to identify optimal treatment strategies. Further, standard software exists to provide sample size, power calculations, and data analysis for a SMART design.}, number={4}, journal={ANNALS OF ONCOLOGY}, author={Ruppert, A. S. and Yin, J. and Davidian, M. and Tsiatis, A. A. and Byrd, J. C. and Woyach, J. A. and Mandrekar, S. J.}, year={2019}, month={Apr}, pages={542–550} } @article{zhang_laber_davidian_tsiatis_2018, title={Interpretable Dynamic Treatment Regimes}, volume={113}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2017.1345743}, abstractNote={Precision medicine is currently a topic of great interest in clinical and intervention science. A key component of precision medicine is that it is evidence-based, i.e., data-driven, and consequently there has been tremendous interest in estimation of precision medicine strategies using observational or randomized study data. One way to formalize precision medicine is through a treatment regime, which is a sequence of decision rules, one per stage of clinical intervention, that map up-to-date patient information to a recommended treatment. An optimal treatment regime is defined as maximizing the mean of some cumulative clinical outcome if applied to a population of interest. It is well-known that even under simple generative models an optimal treatment regime can be a highly nonlinear function of patient information. Consequently, a focal point of recent methodological research has been the development of flexible models for estimating optimal treatment regimes. However, in many settings, estimation of an optimal treatment regime is an exploratory analysis intended to generate new hypotheses for subsequent research and not to directly dictate treatment to new patients. In such settings, an estimated treatment regime that is interpretable in a domain context may be of greater value than an unintelligible treatment regime built using 'black-box' estimation methods. We propose an estimator of an optimal treatment regime composed of a sequence of decision rules, each expressible as a list of "if-then" statements that can be presented as either a paragraph or as a simple flowchart that is immediately interpretable to domain experts. The discreteness of these lists precludes smooth, i.e., gradient-based, methods of estimation and leads to non-standard asymptotics. Nevertheless, we provide a computationally efficient estimation algorithm, prove consistency of the proposed estimator, and derive rates of convergence. We illustrate the proposed methods using a series of simulation examples and application to data from a sequential clinical trial on bipolar disorder.}, number={524}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Zhang, Yichi and Laber, Eric B. and Davidian, Marie and Tsiatis, Anastasios A.}, year={2018}, pages={1541–1549} } @article{hager_tsiatis_davidian_2018, title={Optimal two-stage dynamic treatment regimes from a classification perspective with censored survival data}, volume={74}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12894}, abstractNote={Clinicians often make multiple treatment decisions at key points over the course of a patient's disease. A dynamic treatment regime is a sequence of decision rules, each mapping a patient's observed history to the set of available, feasible treatment options at each decision point, and thus formalizes this process. An optimal regime is one leading to the most beneficial outcome on average if used to select treatment for the patient population. We propose a method for estimation of an optimal regime involving two decision points when the outcome of interest is a censored survival time, which is based on maximizing a locally efficient, doubly robust, augmented inverse probability weighted estimator for average outcome over a class of regimes. By casting this optimization as a classification problem, we exploit well-studied classification techniques such as support vector machines to characterize the class of regimes and facilitate implementation via a backward iterative algorithm. Simulation studies of performance and application of the method to data from a sequential, multiple assignment randomized clinical trial in acute leukemia are presented.}, number={4}, journal={BIOMETRICS}, author={Hager, Rebecca and Tsiatis, Anastasios A. and Davidian, Marie}, year={2018}, month={Dec}, pages={1180–1192} } @misc{laber_rose_davidian_tsiatis_2018, title={Q-Learning}, ISBN={9781118445112}, url={http://dx.doi.org/10.1002/9781118445112.stat07998}, DOI={10.1002/9781118445112.stat07998}, abstractNote={Q-learning is a regression-based approximate dynamic programming algorithm that is commonly used to estimate sequences of decision rules that maximizes mean utility when applied to the population of interest. Because Q-learning is based on fitting a series of regression models, it is (i) highly configurable in the sense that these regression models can be chosen to be parametric, semiparametric, or nonparametric; and (ii) extensible to settings with censored, high-dimensional, or missing data. This entry reviews Q-learning for decision problems evolving over a finite time horizon including a brief review of open questions and active lines of research.}, journal={Wiley StatsRef: Statistics Reference Online}, publisher={John Wiley & Sons, Ltd}, author={Laber, Eric B. and Rose, Eric J. and Davidian, Marie and Tsiatis, Anastasios A.}, year={2018}, month={Feb}, pages={1–10} } @article{thompson_davidian_buckland_2017, title={Biometrics, JABES and the International Biometric Society}, volume={22}, ISSN={1085-7117 1537-2693}, url={http://dx.doi.org/10.1007/S13253-017-0302-9}, DOI={10.1007/S13253-017-0302-9}, number={3}, journal={Journal of Agricultural, Biological and Environmental Statistics}, publisher={Springer Nature}, author={Thompson, Elizabeth and Davidian, Marie and Buckland, Stephen}, year={2017}, month={Aug}, pages={221–223} } @article{laber_davidian_2017, title={Dynamic treatment regimes, past, present, and future: A conversation with experts}, volume={26}, ISSN={["1477-0334"]}, DOI={10.1177/0962280217708661}, abstractNote={We asked three leading researchers in the area of dynamic treatment regimes to share their stories on how they became interested in this topic and their perspectives on the most important opportunities and challenges for the future.}, number={4}, journal={STATISTICAL METHODS IN MEDICAL RESEARCH}, author={Laber, Eric B. and Davidian, Marie}, year={2017}, month={Aug}, pages={1605–1610} } @article{kelleher_dorfman_plumb vilardaga_majestic_winger_gandhi_nunez_van denburg_shelby_reed_et al._2017, title={Optimizing delivery of a behavioral pain intervention in cancer patients using a sequential multiple assignment randomized trial SMART}, volume={57}, ISSN={1551-7144}, url={http://dx.doi.org/10.1016/J.CCT.2017.04.001}, DOI={10.1016/J.CCT.2017.04.001}, abstractNote={Pain is common in cancer patients and results in lower quality of life, depression, poor physical functioning, financial difficulty, and decreased survival time. Behavioral pain interventions are effective and nonpharmacologic. Traditional randomized controlled trials (RCT) test interventions of fixed time and dose, which poorly represent successive treatment decisions in clinical practice. We utilize a novel approach to conduct a RCT, the sequential multiple assignment randomized trial (SMART) design, to provide comparative evidence of: 1) response to differing initial doses of a pain coping skills training (PCST) intervention and 2) intervention dose sequences adjusted based on patient response. We also examine: 3) participant characteristics moderating intervention responses and 4) cost-effectiveness and practicality.Breast cancer patients (N=327) having pain (ratings≥5) are recruited and randomly assigned to: 1) PCST-Full or 2) PCST-Brief. PCST-Full consists of 5 PCST sessions. PCST-Brief consists of one 60-min PCST session. Five weeks post-randomization, participants re-rate their pain and are re-randomized, based on intervention response, to receive additional PCST sessions, maintenance calls, or no further intervention. Participants complete measures of pain intensity, interference and catastrophizing.Novel RCT designs may provide information that can be used to optimize behavioral pain interventions to be adaptive, better meet patients' needs, reduce barriers, and match with clinical practice. This is one of the first trials to use a novel design to evaluate symptom management in cancer patients and in chronic illness; if successful, it could serve as a model for future work with a wide range of chronic illnesses.}, journal={Contemporary Clinical Trials}, publisher={Elsevier BV}, author={Kelleher, Sarah A. and Dorfman, Caroline S. and Plumb Vilardaga, Jen C. and Majestic, Catherine and Winger, Joseph and Gandhi, Vicky and Nunez, Christine and Van Denburg, Alyssa and Shelby, Rebecca A. and Reed, Shelby D. and et al.}, year={2017}, month={Jun}, pages={51–57} } @article{davidian_ivanova_marchenko_2017, title={Special Issue of Journal of Biopharmaceutical Statistics dedicated to 2016 Trends and Innovations in Clinical Trial Statistics (TICTS) Conference}, volume={27}, ISSN={["1520-5711"]}, DOI={10.1080/10543406.2016.1273038}, abstractNote={Should the Food and Drug Administration (FDA) approve a new weight loss drug if Pr (weight loss of 5kg) > 0.95 instead of using the usual two-sided p-value < 0.05 paradigm? This was just one of the...}, number={3}, journal={JOURNAL OF BIOPHARMACEUTICAL STATISTICS}, author={Davidian, Marie and Ivanova, Anastasia and Marchenko, Olga}, year={2017}, pages={357–357} } @article{vock_durheim_tsuang_copeland_tsiatis_davidian_neely_lederer_palmer_2017, title={Survival benefit of lung transplantation in the modern era of lung allocation}, volume={14}, number={2}, journal={Annals of the American Thoracic Society}, author={Vock, D. M. and Durheim, M. T. and Tsuang, W. M. and Copeland, C. A. F. and Tsiatis, A. A. and Davidian, M. and Neely, M. L. and Lederer, D. J. and Palmer, S. M.}, year={2017}, pages={172–181} } @misc{verbeke_fieuws_molenberghs_davidian_2017, title={The analysis of multivariate longitudinal data: A review: Response to letter of M. Gebregziabher}, volume={26}, ISSN={["1477-0334"]}, DOI={10.1177/0962280214539862}, abstractNote={[Verbeke, Geert; Fieuws, Steffen; Molenberghs, Geert] Katholieke Univ Leuven, Interuniv Inst Biostat & Stat Bioinformat, B-3000 Leuven, Belgium. [Verbeke, Geert; Molenberghs, Geert] Univ Hasselt, Interuniv Inst Biostat & Stat Bioinformat, Diepenbeek, Belgium. [Davidian, Marie] North Carolina State Univ, Dept Stat, Raleigh, NC USA.}, number={1}, journal={STATISTICAL METHODS IN MEDICAL RESEARCH}, author={Verbeke, Geert and Fieuws, Steffen and Molenberghs, Geert and Davidian, Marie}, year={2017}, month={Feb}, pages={112–112} } @article{olby_muguet-chanoit_lim_davidian_mariani_freeman_platt_humphrey_kent_giovanella_et al._2016, title={A Placebo-Controlled, Prospective, Randomized Clinical Trial of Polyethylene Glycol and Methylprednisolone Sodium Succinate in Dogs with Intervertebral Disk Herniation}, volume={30}, ISSN={["1939-1676"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84956678975&partnerID=MN8TOARS}, DOI={10.1111/jvim.13657}, abstractNote={Background Acute intervertebral disk herniation (IVDH) is a common cause of spinal cord injury in dogs and currently there is no proven medical treatment to counter secondary injury effects. Use of methylprednisolone sodium succinate (MPSS) or polyethylene glycol (PEG) as neuroprotectants is advocated but controversial because neither treatment has been tested in placebo-controlled, randomized, blinded trials in dogs. Hypothesis Polyethylene glycol will improve the outcome of severe spinal cord injury caused by IVDH compared to MPSS or placebo. Animals Client-owned dogs with acute onset of thoracolumbar IVDH causing paralysis and loss of nociception for <24 hours. Methods Dogs were randomized to receive MPSS, PEG, or placebo; drugs appeared identical and group allocation was masked. Drug administration was initiated once the diagnosis of IVDH was confirmed and all dogs underwent hemilaminectomy. Neurologic function was assessed 2, 4, 8, and 12 weeks postoperatively using an open field gait score (OFS) as the primary outcome measure. Outcomes were compared by the Wilcoxon rank sum test. Results Sixty-three dogs were recruited and 47.6% recovered ambulation. 17.5% developed progressive myelomalacia but there was no association with group. There was no difference in OFS among groups. Although full study power was not reached, conditional power analyses indicated the futility of continued case recruitment. Conclusions This clinical trial did not show a benefit of either MPSS or PEG in the treatment of acute, severe thoracolumbar IVDH when used as adjunctive medical treatment administered to dogs presenting within 24 hours of onset of paralysis.}, number={1}, journal={JOURNAL OF VETERINARY INTERNAL MEDICINE}, author={Olby, N. J. and Muguet-Chanoit, A. C. and Lim, J. -H. and Davidian, M. and Mariani, C. L. and Freeman, A. C. and Platt, S. R. and Humphrey, J. and Kent, M. and Giovanella, C. and et al.}, year={2016}, pages={206–214} } @inbook{davidian_tsiatis_laber_2016, place={Boca Raton}, title={Dynamic treatment regimes}, booktitle={Cancer Clinical Trials: Current and Controversial Issues in Design and Analysis}, publisher={Chapman & Hall/CRC Press}, author={Davidian, M. and Tsiatis, A.A. and Laber, E.B.}, editor={George, S.L. and Wang, X. and Pang, H.Editors}, year={2016}, pages={409–446} } @article{zhang_tsiatis_davidian_zhang_laber_2016, title={Estimating optimal treatment regimes from a classification perspective (vol 1, pg 103, 2012)}, volume={5}, ISSN={["2049-1573"]}, DOI={10.1002/sta4.124}, number={1}, journal={STAT}, author={Zhang, Baqun and Tsiatis, Anastasios A. and Davidian, Marie and Zhang, Min and Laber, Eric}, year={2016}, pages={278–278} } @article{jiang_lu_song_davidian_2016, title={On estimation of optimal treatment regimes for maximizing t -year survival probability}, volume={79}, ISSN={1369-7412}, url={http://dx.doi.org/10.1111/rssb.12201}, DOI={10.1111/rssb.12201}, abstractNote={Summary A treatment regime is a deterministic function that dictates personalized treatment based on patients’ individual prognostic information. There is increasing interest in finding optimal treatment regimes, which determine treatment at one or more treatment decision points to maximize expected long-term clinical outcomes, where larger outcomes are preferred. For chronic diseases such as cancer or human immunodeficiency virus infection, survival time is often the outcome of interest, and the goal is to select treatment to maximize survival probability. We propose two non-parametric estimators for the survival function of patients following a given treatment regime involving one or more decisions, i.e. the so-called value. On the basis of data from a clinical or observational study, we estimate an optimal regime by maximizing these estimators for the value over a prespecified class of regimes. Because the value function is very jagged, we introduce kernel smoothing within the estimator to improve performance. Asymptotic properties of the proposed estimators of value functions are established under suitable regularity conditions, and simulation studies evaluate the finite sample performance of the regime estimators. The methods are illustrated by application to data from an acquired immune deficiency syndrome clinical trial.}, number={4}, journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, publisher={Wiley}, author={Jiang, Runchao and Lu, Wenbin and Song, Rui and Davidian, Marie}, year={2016}, month={Sep}, pages={1165–1185} } @misc{davidian_tsiatis_laber_2016, title={Optimal Dynamic Treatment Regimes}, ISBN={9781118445112}, url={http://dx.doi.org/10.1002/9781118445112.stat07895}, DOI={10.1002/9781118445112.stat07895}, journal={Wiley StatsRef: Statistics Reference Online}, publisher={John Wiley & Sons, Ltd}, author={Davidian, Marie and Tsiatis, Anastasios A. and Laber, Eric B.}, year={2016}, month={Nov}, pages={1–7} } @article{kass_caffo_davidian_meng_yu_reid_2016, title={Ten Simple Rules for Effective Statistical Practice}, volume={12}, ISSN={1553-7358}, url={http://dx.doi.org/10.1371/journal.pcbi.1004961}, DOI={10.1371/journal.pcbi.1004961}, abstractNote={Several months ago, Phil Bourne, the initiator and frequent author of the wildly successful and incredibly useful “Ten Simple Rules” series, suggested that some statisticians put together a Ten Simple Rules article related to statistics. (One of the rules for writing a PLOS Ten Simple Rules article is to be Phil Bourne [1]. In lieu of that, we hope effusive praise for Phil will suffice.) Implicit in the guidelines for writing Ten Simple Rules [1] is “know your audience.” We developed our list of rules with researchers in mind: researchers having some knowledge of statistics, possibly with one or more statisticians available in their building, or possibly with a healthy do-it-yourself attitude and a handful of statistical packages on their laptops. We drew on our experience in both collaborative research and teaching, and, it must be said, from our frustration at being asked, more than once, to “take a quick look at my student’s thesis/my grant application/my referee’s report: it needs some input on the stats, but it should be pretty straightforward.” There are some outstanding resources available that explain many of these concepts clearly and in much more detail than we have been able to do here: among our favorites are Cox and Donnelly [2], Leek [3], Peng [4], Kass et al. [5], Tukey [6], and Yu [7]. Every article on statistics requires at least one caveat. Here is ours: we refer in this article to “science” as a convenient shorthand for investigations using data to study questions of interest. This includes social science, engineering, digital humanities, finance, and so on. Statisticians are not shy about reminding administrators that statistical science has an impact on nearly every part of almost all organizations.}, number={6}, journal={PLOS Computational Biology}, publisher={Public Library of Science (PLoS)}, author={Kass, Robert E. and Caffo, Brian S. and Davidian, Marie and Meng, Xiao-Li and Yu, Bin and Reid, Nancy}, editor={Lewitter, FranEditor}, year={2016}, month={Jun}, pages={e1004961} } @article{zhang_tsiatis_laber_davidian_2015, title={A robust method for estimating optimal treatment regimes}, volume={71}, DOI={10.1111/biom.12229}, abstractNote={BiometricsVolume 71, Issue 1 p. 267-273 READER REACTIONFree Access Response to reader reaction First published: 29 October 2014 https://doi.org/10.1111/biom.12229AboutSectionsPDF ToolsRequest permissionExport citationAdd to favoritesTrack citation ShareShare Give accessShare full text accessShare full-text accessPlease review our Terms and Conditions of Use and check box below to share full-text version of article.I have read and accept the Wiley Online Library Terms and Conditions of UseShareable LinkUse the link below to share a full-text version of this article with your friends and colleagues. Learn more.Copy URL Share a linkShare onFacebookTwitterLinkedInRedditWechat The authors replied as follows: We applaud Taylor, Cheng, and Foster (henceforth TCF) for carrying out additional empirical studies of methods for estimating optimal treatment regimes, as further elucidation of the relative performance of competing methods is sorely needed. We hope that the evidence suggesting that these methods can perform well and yield comparable results under conditions likely to hold in practice will encourage more widespread interest in estimation of optimal treatment regimes. TCF consider the situation where the class of regimes of interest has elements of the form . They study the estimators IPWE and AIPWE for an optimal regime proposed in our 2012 paper (Zhang et al., 2012b), which are based on maximizing in inverse probability weighted estimators for the expected outcome, or value, under a regime in a specified class ; we have referred to estimators for an optimal regime found by maximizing an estimator for the value in as value search or policy search estimators. These are compared to two competing approaches. The first is the regression estimator RG, which is based on a posited parametric model for that induces the class of regimes when the model involves an interaction term of the form . The estimated optimal regime is found directly as . In the approach TCF call , a different estimator for the value from those in IPWE or AIPWE, given in their Equation (6), is maximized in . This method is proposed by us in Zhang et al. (2012a), in which we expressed (6) equivalently in terms an estimator for the contrast function . In particular, we suggested estimating an optimal regime of a specified form by maximizing in the estimator for the value given by , where is formed by representing in by a nonparametric estimator , such as support vector regression (Vapnik, Golowich, and Smola, 1997) or boosting (Freund and Schapire, 1997). TCF use random forests, noting that any flexible nonparametric estimator could be used. We thus disagree with TCF's characterization of this as a “regression” method and find reference to it as “RG with random forests” to be a bit misleading. This method, like IPWE and AIPWE, is a value search approach. These three methods are thus different in spirit and construction from RG, which bases the estimated optimal regime directly on a fitted regression model. We did not evaluate the performance of the contrast-based value search approach empirically in Zhang et al. (2012a), so the studies by TCF in which it is implemented using the particular choice of random forests, , fill an important gap and demonstrate its feasibility and robustness. For RG, TCF rightly consider misspecified parametric models that are closer to the true than the linear model we adopted to represent model misspecification in Zhang et al. (2012b), as would likely be formulated by a careful data analyst. We fully agree with their contention that RG can perform well under these conditions, as the simulations they present demonstrate. TCF also confirm our finding that IPWE is inferior to the other methods. The evidence from their studies along with that in our paper demonstrates that all of RG, AIPWE, and , and especially the latter two value search approaches, lead to high quality estimated regimes, providing the data analyst with a range of options. In fact, the analyst can employ competing approaches and compare the results to gain an understanding of sensitivity to modeling choices. A limitation of RG is that, as noted above, the class of regimes considered and the resulting estimated regimes are dictated by the form of the posited parametric regression model. On the other hand, if one were to use flexible nonparametric estimators like random forests to represent directly in the regression-based method, which seems to us an approach for which “” is a more appropriate acronym than its use by TCF, the result would be estimated regimes of a “black box” nature, which may elicit skepticism from clinicians. In contrast, the value search approaches, IPWE, AIPWE, and TCF's , search within a user-defined class of regimes whose specification need not be connected with the form of models for . This is advantageous if interest is in regimes having a specific form on the basis of interpretability, cost, or feasibility in practice that may not be induced straightforwardly from models for . For instance, as we demonstrated in Zhang et al. (2012b), the class of regimes can be restricted to have elements defined by rectangular regions; for example, , , which clinicians may find more interpretable than regimes involving linear combinations of covariates. TCF consider primarily the case of randomized studies, where the propensity scores are known and generally constant in . Here, IPWE and AIPWE are based on estimators for the value of a regime in that are guaranteed by construction to be consistent, which, intuitively, would be expected to lead to well-performing estimated optimal regimes. Moreover, these methods require no additional modeling, as the propensity score is estimated by the sample randomization proportion. The estimator for the value in (6) of TCF that forms the basis for is, in contrast, not consistent unless the model for is correctly specified, and the RG method depends critically on a correct model. As TCF demonstrate, this may be of little consequence with and a sufficiently flexible representation for or with RG and a “nearly correct” parametric model, although the evidence in TCF is less compelling for the latter estimator. Overall, we agree with TCF that the value search estimators AIPWE, incorporating a flexible model for in the “augmentation term,” and are the most promising in this setting. From a theoretical point of view, an advantage of AIPWE is that in this setting it yields the locally efficient estimator for the value; see Robins and Ritov (1997). In an observational study, AIPWE is based on a value estimator that is doubly robust; that is, guaranteed to be consistent as long as at least one of the propensity score model or model for is correctly specified, whereas, as TCF note, is not doubly robust. We agree with TCF that, if one has considerable confidence in the nonparametric random forest representation for the contrast function, including its incorporated adjustment for confounding, the additional protection afforded by the AIPWE may be unnecessary. However, AIPWE implemented with careful modeling of the propensity score in the same spirit as TCF propose in RG could provide the analyst with additional trust in the robustness of results. A challenge with all of the value search methods IPWE, AIPWE, and is that the maximization of the value estimator in is a nonsmooth optimization problem that cannot be addressed using standard optimization methods. In problems where the restricted class of regimes involves rich covariate information, so that is high-dimensional, implementation becomes computationally prohibitive and the quality of estimation will be degraded. One practical approach to circumventing this difficulty is described in Zhang et al. (2012a), where we demonstrated how the problem of maximizing value search estimators in can be recast as minimizing a weighted classification error; see also Zhao et al. (2012). Thus, estimation of an optimal treatment regime can be likened to a classification problem, viewing as a classifier, with the class of regimes determined by the choice of classifier; for example, classification and regression trees (Breiman et al., 1984) or support vector machines (Cortes and Vapnik, 1995). In this formulation, the “class label” and “weight” are functions of the estimated contrast function. Existing software for carrying out the minimization for a given choice of classifier can then be used to estimate an optimal regime in this class. Although this is also a nonstandard optimization problem, an advantage in practice is that computational techniques to approximate it efficiently and to carry out the variable selection involved are embedded in off-the-shelf software. A possible advantage of AIPWE over RG is the extension to more than one treatment decision point. The extension of RG, Q-learning, requires positing a sequence of regression models at each decision point that ideally must be compatible with one another. In practice, such a specification is almost impossible (but see Laber, Linn, and Stefanski, in press), so that the models at decision points other than the last one are almost certainly misspecified, even if flexible methods are used, which will compromise the quality of estimated regimes. The extension of AIPWE we present in Zhang et al. (2013) ideally requires specification of compatible such models, but only for the purpose of gaining efficiency and ensuring approximate double robustness. Extension of TCF's and related contrast-based value search estimators to this setting should be investigated. More generally, further research is needed to clarify the performance of approaches in the multiple decision setting. Given the well-performing options available for estimating optimal regimes, we believe that the most pressing challenge is that the methodological advances have far outpaced current practice. We must encourage our clinician collaborators and practicing biostatisticians to consider estimation of dynamic treatment regimes as a meaningful, primary data-analytic objective. Although this perspective has been embraced by some researchers in the behavioral sciences, it is not as prevalent in chronic disease research, where interest focuses primarily on identifying subgroups of patients to whom treatment may be targeted; that is, identifying the “right patient for the treatment.” Thinking in terms of optimal treatment regimes, so identifying “the right treatment for the patient,” offers a valuable complementary perspective. The critical next step for the treatment regime research community is a proactive effort to communicate the concepts and methods and their scientific relevance to health sciences researchers more broadly. That said, an outstanding methodological challenge is inference for the estimated regime. The value of an estimated regime is equivalent to the weighted test error of an estimated classifier and is thus a data-dependent, nonsmooth functional of the underlying generative distribution (Laber and Murphy, 2011; Chakraborty, Laber, and Zhao, 2014). Standard asymptotic methods for inference, including the bootstrap and series approximations, do not apply without modification, and the small sample performance of these methods can be quite poor under some generative models. Inference for the parameters indexing the optimal regime has been another focus for inference (Robins, 2004; Chakraborty, Murphy, and Strecher, 2010; Laber et al., 2014). However, it is not clear that this is an appropriate target for inference for value search estimators where the objective is to estimate a high-quality regime within a prespecified class, which need not be assumed to contain the true optimal regime. We thank TCF again for a thoughtful and important demonstration of the relative merits of estimators for optimal dynamic treatment regimes. Their findings, in conjunction with other work cited herein, make a strong case for the use of value search estimators in practice. Acknowledgements This work was supported by NIH grants P01 CA142538, R01 HL118336, and R01 CA085848. References Breiman, L, Freidman, J. L., Olshen, R. A., and Stone, C. J. (1984). Classification and Regression Trees. Belmont, California: Wadsworth. Chakraborty, B., Laber, E. B., and Zhao, Y. Q. (2014). Inference about the expected performance of a data-driven dynamic treatment regime. Clinical Trials 11, 408– 417. Chakraborty, B., Murphy, S., and Strecher, V. (2010). Inference for non-regular parameters in optimal dynamic treatment regimes. Statistical Methods in Medical Research 19, 317– 343. Cortes, C. and Vapnik, V. (1995). Support-vector networks. Machine Learning 20, 273– 297. Freund, B. and Schapire, R. E. (1997). A decision-theoretic generalization of on-line learning and an application to boosting. Journal of Computer and System Sciences 55, 119– 139. Laber, E. B., Linn, K. A., and Stefanski, L. A. (in press). Interactive model building for Q-learning. Biometrika. Laber, E. B., Lizotte, D. J., Qian, M., Pelham, W. E., and Murphy, S. A. (2014). Dynamic treatment regimes: Technical challenges and applications. Electronic Journal of Statistics 8, 1225– 1272. Laber, E. B. and Murphy, S. A. (2011). Adaptive confidence intervals for the test error in classification. Journal of the American Statistical Association 106, 904– 913. Robins, J. M. (2004). Optimal structural nested models for optimal sequential decisions. In Proceedings of the Second Seattle Symposium in Biostatistics, 189– 326. New York: Springer. Robins, J. M. and Ritov, Y. (1997). Toward a curse of dimensionality appropriate (CODA) asymptotic theory for semi-parametric models. Statistics in Medicine 16, 285– 319. Vapnik, V., Golowich, S., and Smola, A. (1997). Support vector regression for function approximation, regression estimation, and signal processing. Advances in Neural Information Processing Systems 9, 281– 287. Zhang, B., Tsiatis, A. A., Davidian, M., Zhang, M., and Laber, E. B. (2012a). Estimating optimal treatment regimes from a classification perspective. Stat 1, 103– 114. Zhang, B., Tsiatis, A. A., Laber, E. B., and Davidian, M. (2012b). A robust method for estimating optimal treatment regimes. Biometrics 68, 1010– 1018. Zhang, B., Tsiatis, A. A., Laber, E. B., and Davidian, M. (2013). Robust estimation of optimal dynamic treatment regimes for sequential treatment decisions. Biometrika 100, 681– 694. Zhao, Y., Zeng, D., Rush, A. J., and Kosorok, M. R. (2012). Estimating individualized treatment rules using outcome weighted learning. Journal of the American Statistical Association 107, 1106– 1118. Baqun Zhang School of Statistics Renmin University of China Beijing 100872, China email: zhangbaqun@ruc.edu.cn Anastasios A. Tsiatis Department of Statistics, Box 8203, North Carolina State University, Raleigh, North Carolina 27695, U.S.A. email: tsiatis@ncsu.edu Eric B. Laber Department of Statistics, Box 8203, North Carolina State University, Raleigh, North Carolina 27695, U.S.A. email: eblaber@ncsu.edu and Marie Davidian Department of Statistics, Box 8203, North Carolina State University, Raleigh, North Carolina 27695, U.S.A. email: davidian@ncsu.edu Volume71, Issue1March 2015Pages 267-273 ReferencesRelatedInformation}, journal={Biometrics}, author={Zhang, B. and Tsiatis, A.A. and Laber, E.B. and Davidian, M.}, year={2015}, pages={271–273} } @inbook{davidian_tsiatis_laber_2015, title={Chapter 9: Value search estimators for optimal dynamic treatment regimes}, ISBN={9781611974171 9781611974188}, url={http://dx.doi.org/10.1137/1.9781611974188.ch9}, DOI={10.1137/1.9781611974188.ch9}, booktitle={Adaptive Treatment Strategies in Practice}, publisher={Society for Industrial and Applied Mathematics}, author={Davidian, Marie and Tsiatis, Anastasios A. and Laber, Eric B.}, year={2015}, month={Dec}, pages={135–155} } @article{milanzi_molenberghs_alonso_kenward_tsiatis_davidian_verbeke_2015, title={Estimation After a Group Sequential Trial}, volume={7}, ISSN={["1867-1772"]}, DOI={10.1007/s12561-014-9112-6}, abstractNote={Group sequential trials are one important instance of studies for which the sample size is not fixed a priori but rather takes one of a finite set of pre-specified values, dependent on the observed data. Much work has been devoted to the inferential consequences of this design feature. Molenberghs et al. (Statistical Methods in Medical Research, 2012) and Milanzi et al. (Properties of estimators in exponential family settings with observation-based stopping rules, 2012) reviewed and extended the existing literature, focusing on a collection of seemingly disparate, but related, settings, namely completely random sample sizes, group sequential studies with deterministic and random stopping rules, incomplete data, and random cluster sizes. They showed that the ordinary sample average is a viable option for estimation following a group sequential trial, for a wide class of stopping rules and for random outcomes with a distribution in the exponential family. Their results are somewhat surprising in the sense that the sample average is not optimal, and further, there does not exist an optimal, or even, unbiased linear estimator. However, the sample average is asymptotically unbiased, both conditionally upon the observed sample size as well as marginalized over it. By exploiting ignorability they showed that the sample average is the conventional maximum likelihood estimator. They also showed that a conditional maximum likelihood estimator is finite sample unbiased, but is less efficient than the sample average and has the larger mean squared error. Asymptotically, the sample average and the conditional maximum likelihood estimator are equivalent. This previous work is restricted, however, to the situation in which the the random sample size can take only two values, $$N=n$$ or $$N=2n$$ . In this paper, we consider the more practically useful setting of sample sizes in a the finite set $$\{n_1,n_2,\dots ,n_L\}$$ . It is shown that the sample average is then a justifiable estimator , in the sense that it follows from joint likelihood estimation, and it is consistent and asymptotically unbiased. We also show why simulations can give the false impression of bias in the sample average when considered conditional upon the sample size. The consequence is that no corrections need to be made to estimators following sequential trials. When small-sample bias is of concern, the conditional likelihood estimator (CLE) provides a relatively straightforward modification to the sample average. Finally, it is shown that classical likelihood-based standard errors and confidence intervals can be applied, obviating the need for technical corrections.}, number={2}, journal={STATISTICS IN BIOSCIENCES}, author={Milanzi, Elasma and Molenberghs, Geert and Alonso, Ariel and Kenward, Michael G. and Tsiatis, Anastasios A. and Davidian, Marie and Verbeke, Geert}, year={2015}, month={Oct}, pages={187–205} } @article{milanzi_molenberghs_2015, title={Properties of Estimators in Exponential Family Settings with Observationbased Stopping Rules}, volume={07}, ISSN={2155-6180}, url={http://dx.doi.org/10.4172/2155-6180.1000272}, DOI={10.4172/2155-6180.1000272}, abstractNote={Often, sample size is not fixed by design. A key example is a sequential trial with a stopping rule, where stopping is based on what has been observed at an interim look. While such designs are used for time and cost efficiency, and hypothesis testing theory has been well developed, estimation following a sequential trial is a challenging, still controversial problem. Progress has been made in the literature, predominantly for normal outcomes and/or for a deterministic stopping rule. Here, we place these settings in a broader context of outcomes following an exponential family distribution and, with a stochastic stopping rule that includes a deterministic rule and completely random sample size as special cases. It is shown that the estimation problem is usually simpler than often thought. In particular, it is established that the ordinary sample average is a very sensible choice, contrary to commonly encountered statements. We study (1) The so-called incompleteness property of the sufficient statistics, (2) A general class of linear estimators, and (3) Joint and conditional likelihood estimation. Apart from the general exponential family setting, normal and binary outcomes are considered as key examples. While our results hold for a general number of looks, for ease of exposition, we focus on the simple yet generic setting of two possible sample sizes, N=n or N=2n.}, number={01}, journal={Journal of Biometrics & Biostatistics}, publisher={OMICS Publishing Group}, author={Milanzi, Elasma and Molenberghs, Geert}, year={2015} } @article{zhang_laber_tsiatis_davidian_2015, title={Using Decision Lists to Construct Interpretable and Parsimonious Treatment Regimes}, volume={71}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12354}, abstractNote={A treatment regime formalizes personalized medicine as a function from individual patient characteristics to a recommended treatment. A high-quality treatment regime can improve patient outcomes while reducing cost, resource consumption, and treatment burden. Thus, there is tremendous interest in estimating treatment regimes from observational and randomized studies. However, the development of treatment regimes for application in clinical practice requires the long-term, joint effort of statisticians and clinical scientists. In this collaborative process, the statistician must integrate clinical science into the statistical models underlying a treatment regime and the clinician must scrutinize the estimated treatment regime for scientific validity. To facilitate meaningful information exchange, it is important that estimated treatment regimes be interpretable in a subject-matter context. We propose a simple, yet flexible class of treatment regimes whose members are representable as a short list of if-then statements. Regimes in this class are immediately interpretable and are therefore an appealing choice for broad application in practice. We derive a robust estimator of the optimal regime within this class and demonstrate its finite sample performance using simulation experiments. The proposed method is illustrated with data from two clinical trials.}, number={4}, journal={BIOMETRICS}, author={Zhang, Yichi and Laber, Eric B. and Tsiatis, Anastasios and Davidian, Marie}, year={2015}, month={Dec}, pages={895–904} } @article{laber_zhao_regh_davidian_tsiatis_stanford_zeng_song_kosorok_2015, title={Using pilot data to size a two-arm randomized trial to find a nearly optimal personalized treatment strategy}, volume={35}, ISSN={0277-6715}, url={http://dx.doi.org/10.1002/SIM.6783}, DOI={10.1002/SIM.6783}, abstractNote={A personalized treatment strategy formalizes evidence-based treatment selection by mapping patient information to a recommended treatment. Personalized treatment strategies can produce better patient outcomes while reducing cost and treatment burden. Thus, among clinical and intervention scientists, there is a growing interest in conducting randomized clinical trials when one of the primary aims is estimation of a personalized treatment strategy. However, at present, there are no appropriate sample size formulae to assist in the design of such a trial. Furthermore, because the sampling distribution of the estimated outcome under an estimated optimal treatment strategy can be highly sensitive to small perturbations in the underlying generative model, sample size calculations based on standard (uncorrected) asymptotic approximations or computer simulations may not be reliable. We offer a simple and robust method for powering a single stage, two-armed randomized clinical trial when the primary aim is estimating the optimal single stage personalized treatment strategy. The proposed method is based on inverting a plugin projection confidence interval and is thereby regular and robust to small perturbations of the underlying generative model. The proposed method requires elicitation of two clinically meaningful parameters from clinical scientists and uses data from a small pilot study to estimate nuisance parameters, which are not easily elicited. The method performs well in simulated experiments and is illustrated using data from a pilot study of time to conception and fertility awareness.}, number={8}, journal={Statistics in Medicine}, publisher={Wiley}, author={Laber, Eric B. and Zhao, Ying-Qi and Regh, Todd and Davidian, Marie and Tsiatis, Anastasios and Stanford, Joseph B. and Zeng, Donglin and Song, Rui and Kosorok, Michael R.}, year={2015}, month={Oct}, pages={1245–1256} } @misc{davidian_kutal_2014, title={Collaboration To Meet the Statistical Needs in the Chemistry Curriculum}, volume={91}, ISSN={["1938-1328"]}, DOI={10.1021/ed400516y}, abstractNote={Recent articles have recommended adding statistical topics to the chemistry curriculum. This letter to the editor recommends: (i) collaboration between statisticians and chemists; and (ii) emulating existing successful models to meet the statistical needs in the chemistry curriculum.}, number={1}, journal={JOURNAL OF CHEMICAL EDUCATION}, author={Davidian, Marie and Kutal, Charles}, year={2014}, month={Jan}, pages={12–12} } @article{laber_tsiatis_davidian_holloway_2014, title={Combining Biomarkers to Optimize Patient Treatment Recommendations Discussions}, volume={70}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12187}, abstractNote={BiometricsVolume 70, Issue 3 p. 707-710 BIOMETRIC PRACTICE Discussion of “Combining biomarkers to optimize patient treatment recommendation” Eric B. Laber, Corresponding Author Eric B. Laber Department of Statistics, North Carolina State University, Raleigh, North Carolina 27695–8203, U.S.A.email: [email protected]Search for more papers by this authorAnastasios A. Tsiatis, Anastasios A. Tsiatis Department of Statistics, North Carolina State University, Raleigh, North Carolina 27695–8203, U.S.A.Search for more papers by this authorMarie Davidian, Marie Davidian Department of Statistics, North Carolina State University, Raleigh, North Carolina 27695–8203, U.S.A.Search for more papers by this authorShannon T. Holloway, Shannon T. Holloway Department of Statistics, North Carolina State University, Raleigh, North Carolina 27695–8203, U.S.A.Search for more papers by this author Eric B. Laber, Corresponding Author Eric B. Laber Department of Statistics, North Carolina State University, Raleigh, North Carolina 27695–8203, U.S.A.email: [email protected]Search for more papers by this authorAnastasios A. Tsiatis, Anastasios A. Tsiatis Department of Statistics, North Carolina State University, Raleigh, North Carolina 27695–8203, U.S.A.Search for more papers by this authorMarie Davidian, Marie Davidian Department of Statistics, North Carolina State University, Raleigh, North Carolina 27695–8203, U.S.A.Search for more papers by this authorShannon T. Holloway, Shannon T. Holloway Department of Statistics, North Carolina State University, Raleigh, North Carolina 27695–8203, U.S.A.Search for more papers by this author First published: 02 June 2014 https://doi.org/10.1111/biom.12187Citations: 4Read the full textAboutPDF ToolsRequest permissionExport citationAdd to favoritesTrack citation ShareShare Give accessShare full text accessShare full-text accessPlease review our Terms and Conditions of Use and check box below to share full-text version of article.I have read and accept the Wiley Online Library Terms and Conditions of UseShareable LinkUse the link below to share a full-text version of this article with your friends and colleagues. Learn more.Copy URL Share a linkShare onEmailFacebookTwitterLinkedInRedditWechat References Barto, A. and Dieterich, T. (2004). Reinforcement learning and its relation to supervised learning. In Handbook of Learning and Approximate Dynamic Programming, J. Si, A. G. Barto, W. B. Powell, and D. Wunsch (eds), 45–63. New York: Wiley. Breiman, L. and Shang, N. (1996). Born again trees. Available at ftp://ftp.stat.berkeley. edu/pub/users/breiman/BAtrees.ps. Chakraborty, B. and Moodie, E. E. M. (2013). Statistical Methods for Dynamic Treatment Regimes: Reinforcement Learning, Causal Inference, and Personalized Medicine. New York: Springer. Fan, W., Stolfo, S. J., Zhang, J., and Chan, P. K. (1999). Adacost: misclassification cost-sensitive boosting. In Proceedings of the Sixteenth International Conference on Machine Learning (ICML'99), 97–105, Bled, Slovenia, June 1999. Laber, E., Linn, K., and Stefanski, L. (in press). Interactive model-building for Q-learning. Biometrika. Moodie, E. E. M., Dean, N., and Sun, Y. R. (2013). Q-learning: Flexible learning about useful utilities. Statistics in Biosciences, 1–21. Orellana, L., Rotnitzky, A., and Robins, J. M. (2010). Dynamic regime marginal structural mean models for estimation of optimal treatment regimes, part I: Main content. International Journal of Biostatistics 6, Article 8. Robins, J. M. (2004). Optimal structured nested models for optimal sequential decisions. In Proceedings of the Second Seattle Symposium on Biostatistics, D. Y. Lin and P. J. Heagerty (eds), 189–326. New York: Springer. Scharfstein, D. O., Rotnitzky, A., and Robins, J. M. (1999). Adjusting for nonignorable drop-out using semiparametric nonresponse models. Journal of the American Statistical Association 94, 1096–1120. Schulte, P., Tsiatis, A., Laber, E., and Davidian, M. (in press). Q-and A-learning methods for estimating optimal dynamic treatment regimes. Statistical Science. Zhang, B., Tsiatis, A. A., Davidian, M., Zhang, M., and Laber, E. (2012a). Estimating optimal treatment regimes from a classification perspective. Stat 1, 103–114. Zhang, B., Tsiatis, A. A., Laber, E. B., and Davidian, M. (2012b). A robust method for estimating optimal treatment regimes. Biometrics 68, 1010–1018. Zhang, B., Tsiatis, A. A., Laber, E. B., and Davidian, M. (2013). Robust estimation of optimal dynamic treatment regimes for sequential treatment decisions. Biometrika 100, 681–694. Zhao, Y., Zeng, D., Rush, A. J., and Kosorok, M. R. (2012). Estimating individualized treatment rules using outcome weighted learning. Journal of the American Statistical Association 107, 1106–1118. Zhao, Y., Zeng, D., Socinski, M. A., and Kosorok, M. R. (2011). Reinforcement learning strategies for clinical trials in nonsmall cell lung cancer. Biometrics 67, 1422–1433. Citing Literature Volume70, Issue3September 2014Pages 707-710 ReferencesRelatedInformation}, number={3}, journal={BIOMETRICS}, author={Laber, Eric B. and Tsiatis, Anastasios A. and Davidian, Marie and Holloway, Shannon T.}, year={2014}, month={Sep}, pages={707–710} } @inbook{tsiatis_davidian_2014, place={Boca Raton}, title={Missing data methods: A semiparametric perspective}, booktitle={Handbook of Missing Data}, publisher={Chapman & Hall/CRC Press}, author={Tsiatis, A.A. and Davidian, M.}, editor={Fitzmaurice, G. and Kenward, M. and Molenberghs, G. and Tsiatis, A.A. and Verbeke, G.Editors}, year={2014} } @article{molenberghs_kenward_aerts_verbeke_tsiatis_davidian_rizopoulos_2014, title={On random sample size, ignorability, ancillarity, completeness, separability, and degeneracy: Sequential trials, random sample sizes, and missing data}, volume={23}, ISSN={["1477-0334"]}, DOI={10.1177/0962280212445801}, abstractNote={The vast majority of settings for which frequentist statistical properties are derived assume a fixed, a priori known sample size. Familiar properties then follow, such as, for example, the consistency, asymptotic normality, and efficiency of the sample average for the mean parameter, under a wide range of conditions. We are concerned here with the alternative situation in which the sample size is itself a random variable which may depend on the data being collected. Further, the rule governing this may be deterministic or probabilistic. There are many important practical examples of such settings, including missing data, sequential trials, and informative cluster size. It is well known that special issues can arise when evaluating the properties of statistical procedures under such sampling schemes, and much has been written about specific areas (Grambsch P. Sequential sampling based on the observed Fisher information to guarantee the accuracy of the maximum likelihood estimator. Ann Stat 1983; 11: 68–77; Barndorff-Nielsen O and Cox DR. The effect of sampling rules on likelihood statistics. Int Stat Rev 1984; 52: 309–326). Our aim is to place these various related examples into a single framework derived from the joint modeling of the outcomes and sampling process and so derive generic results that in turn provide insight, and in some cases practical consequences, for different settings. It is shown that, even in the simplest case of estimating a mean, some of the results appear counterintuitive. In many examples, the sample average may exhibit small sample bias and, even when it is unbiased, may not be optimal. Indeed, there may be no minimum variance unbiased estimator for the mean. Such results follow directly from key attributes such as non-ancillarity of the sample size and incompleteness of the minimal sufficient statistic of the sample size and sample sum. Although our results have direct and obvious implications for estimation following group sequential trials, there are also ramifications for a range of other settings, such as random cluster sizes, censored time-to-event data, and the joint modeling of longitudinal and time-to-event data. Here, we use the simplest group sequential setting to develop and explicate the main results. Some implications for random sample sizes and missing data are also considered. Consequences for other related settings will be considered elsewhere.}, number={1}, journal={STATISTICAL METHODS IN MEDICAL RESEARCH}, author={Molenberghs, Geert and Kenward, Michael G. and Aerts, Marc and Verbeke, Geert and Tsiatis, Anastasios A. and Davidian, Marie and Rizopoulos, Dimitris}, year={2014}, month={Feb}, pages={11–41} } @article{davidian_2014, title={Publishing without perishing and other career advice}, journal={Past, Present, and Future of Statistical Science}, author={Davidian, M.}, year={2014}, pages={581–591} } @article{schulte_tsiatis_laber_davidian_2014, title={Q- and A-Learning Methods for Estimating Optimal Dynamic Treatment Regimes}, volume={29}, ISSN={["0883-4237"]}, DOI={10.1214/13-sts450}, abstractNote={In clinical practice, physicians make a series of treatment decisions over the course of a patient's disease based on his/her baseline and evolving characteristics. A dynamic treatment regime is a set of sequential decision rules that operationalizes this process. Each rule corresponds to a decision point and dictates the next treatment action based on the accrued information. Using existing data, a key goal is estimating the optimal regime, that, if followed by the patient population, would yield the most favorable outcome on average. Q- and A-learning are two main approaches for this purpose. We provide a detailed account of these methods, study their performance, and illustrate them using data from a depression study.}, number={4}, journal={STATISTICAL SCIENCE}, author={Schulte, Phillip J. and Tsiatis, Anastasios A. and Laber, Eric B. and Davidian, Marie}, year={2014}, month={Nov}, pages={640–661} } @inbook{ren_davidian_george_goldberg_wright_tsiatis_kosorok_2014, title={Research Methods for Clinical Trials in Personalized Medicine: A Systematic Review}, ISBN={9789814489065 9789814489072}, url={http://dx.doi.org/10.1142/9789814489072_0025}, DOI={10.1142/9789814489072_0025}, abstractNote={Background: Personalized medicine, the notion that an individual’s genetic and other characteristics can be used to individualize the diagnosis, treatment and prevention of disease, is an active and exciting area of research, with tremendous potential to improve the health of society. Methods: Seventy-six studies using personalized medicine analysis techniques published from 2006 to 2010 in six high-impact journals Journal of the American Medical Association, Journal of the National Cancer Institute, Lancet, Nature, Nature Medicine, and the New England Journal of Medicine were reviewed. Selected articles were manually selected based on reporting of the use of genetic information to stratify subjects and on analyses of the association between biomarkers and patient clinical outcomes. Results: We found considerable variability and limited consensus in approaches. Approaches could largely be classified as data-driven, seeking discovery through statistical analysis of data, or knowledge-driven, relying heavily on prior biological information. Some studies took a hybrid approach. Eliminating two articles that were retracted after publication, 56 of the remaining 74 (76%) were cancerrelated. Conclusions: Much work is needed to standardize and improve statistical methods for finding biomarkers, validating results, and efficiently optimizing better individual treatment strategies. Several promising new analytic approaches are available and should be considered in future studies of personalized medicine.}, booktitle={Lost in Translation}, publisher={WORLD SCIENTIFIC}, author={Ren, Zheng and Davidian, Marie and George, Stephen L. and Goldberg, Richard M. and Wright, Fred A. and Tsiatis, Anastasios A. and Kosorok, Michael R.}, year={2014}, month={Mar}, pages={659–684} } @article{vock_davidian_tsiatis_2014, title={SNP_NLMM: A SAS Macro to Implement a Flexible Random Effects Density for Generalized Linear and Nonlinear Mixed Models}, volume={56}, ISSN={1548-7660}, url={http://dx.doi.org/10.18637/jss.v056.c02}, DOI={10.18637/jss.v056.c02}, abstractNote={Generalized linear and nonlinear mixed models (GMMMs and NLMMs) are commonly used to represent non-Gaussian or nonlinear longitudinal or clustered data. A common assumption is that the random effects are Gaussian. However, this assumption may be unrealistic in some applications, and misspecification of the random effects density may lead to maximum likelihood parameter estimators that are inconsistent, biased, and inefficient. Because testing if the random effects are Gaussian is difficult, previous research has recommended using a flexible random effects density. However, computational limitations have precluded widespread use of flexible random effects densities for GLMMs and NLMMs. We develop a SAS macro, SNP_NLMM, that overcomes the computational challenges to fit GLMMs and NLMMs where the random effects are assumed to follow a smooth density that can be represented by the seminonparametric formulation proposed by Gallant and Nychka (1987). The macro is flexible enough to allow for any density of the response conditional on the random effects and any nonlinear mean trajectory. We demonstrate the SNP_NLMM macro on a GLMM of the disease progression of toenail infection and on a NLMM of intravenous drug concentration over time.}, number={Code Snippet 2}, journal={Journal of Statistical Software}, publisher={Foundation for Open Access Statistic}, author={Vock, David M. and Davidian, Marie and Tsiatis, Anastasios A.}, year={2014} } @book{davidian_lin_morris_stefanski_2014, title={The Work of Raymond J. Carroll}, ISBN={9783319058009 9783319058016}, url={http://dx.doi.org/10.1007/978-3-319-05801-6}, DOI={10.1007/978-3-319-05801-6}, abstractNote={Measurement Error.- Transformation and Weighting.- Epidemiology.- Nonparametric and Semiparametric Regression for Independent Data.- Nonparametric and Semiparametric Regression for Dependent Data.- Robustness.- Other Work Article list for each of these areas is in attachment.}, publisher={Springer International Publishing}, year={2014} } @article{verbeke_fieuws_molenberghs_davidian_2014, title={The analysis of multivariate longitudinal data: A review}, volume={23}, ISSN={["1477-0334"]}, DOI={10.1177/0962280212445834}, abstractNote={Longitudinal experiments often involve multiple outcomes measured repeatedly within a set of study participants. While many questions can be answered by modeling the various outcomes separately, some questions can only be answered in a joint analysis of all of them. In this article, we will present a review of the many approaches proposed in the statistical literature. Four main model families will be presented, discussed and compared. Focus will be on presenting advantages and disadvantages of the different models rather than on the mathematical or computational details.}, number={1}, journal={STATISTICAL METHODS IN MEDICAL RESEARCH}, author={Verbeke, Geert and Fieuws, Steffen and Molenberghs, Geert and Davidian, Marie}, year={2014}, month={Feb}, pages={42–59} } @article{vock_tsiatis_davidian_laber_tsuang_copeland_palmer_2013, title={Assessing the Causal Effect of Organ Transplantation on the Distribution of Residual Lifetime}, volume={69}, ISSN={["1541-0420"]}, DOI={10.1111/biom.12084}, abstractNote={Because the number of patients waiting for organ transplants exceeds the number of organs available, a better understanding of how transplantation affects the distribution of residual lifetime is needed to improve organ allocation. However, there has been little work to assess the survival benefit of transplantation from a causal perspective. Previous methods developed to estimate the causal effects of treatment in the presence of time-varying confounders have assumed that treatment assignment was independent across patients, which is not true for organ transplantation. We develop a version of G-estimation that accounts for the fact that treatment assignment is not independent across individuals to estimate the parameters of a structural nested failure time model. We derive the asymptotic properties of our estimator and confirm through simulation studies that our method leads to valid inference of the effect of transplantation on the distribution of residual lifetime. We demonstrate our method on the survival benefit of lung transplantation using data from the United Network for Organ Sharing.}, number={4}, journal={BIOMETRICS}, author={Vock, David M. and Tsiatis, Anastasios A. and Davidian, Marie and Laber, Eric B. and Tsuang, Wayne M. and Copeland, C. Ashley Finlen and Palmer, Scott M.}, year={2013}, month={Dec}, pages={820–829} } @inbook{thomas_stefanski_davidian_2013, title={Bias Reduction in Logistic Regression with Estimated Variance Predictors}, ISBN={9781461468707 9781461468714}, ISSN={0930-0325}, url={http://dx.doi.org/10.1007/978-1-4614-6871-4_2}, DOI={10.1007/978-1-4614-6871-4_2}, abstractNote={We study the problem of modeling a response as a function of baseline covariates and a primary predictor of interest that is a noisy measurement of a subject-specific variance. The problem arises naturally in biostatistical joint models wherein the subjects’ primary endpoints are related to the features of subject-specific longitudinal risk processes or profiles. Often the longitudinal process features of interest are parameters of a longitudinal mean function. However, there is a relatively recent and growing interest in relating primary endpoints to longitudinal process variances. In the application motivating our work longitudinal processes consist of 30-day blood pressure trajectories measured between 91 and 120 days post dialysis therapy, with the primary endpoints being short-term mortality. Often the longitudinal risk processes are adequately characterized in terms of trends such as the slopes and intercepts identified with the subject-specific biomarker processes. Modeling of the trend lines results in subject-specific estimated intercepts and slopes, thus inducing a heteroscedastic measurement-error model structure where the estimated trend parameters play the role of measurements of the “true” subject-specific trend parameters that appear as predictors in the primary endpoint model. Our interest lies in models in which the residual variances of the longitudinal processes feed into the model for the primary endpoint. These subject-specific variance parameters are estimated in the course of trend-line fitting creating a measurement error model scenario where variances are predictors and mean squared errors are their noisy measurements. Background literature is reviewed and several methodological approaches for addressing the resulting errors-in-variances problem are studied.}, booktitle={ISS-2012 Proceedings Volume On Longitudinal Data Analysis Subject to Measurement Errors, Missing Values, and/or Outliers}, publisher={Springer New York}, author={Thomas, Laine and Stefanski, Leonard A. and Davidian, Marie}, year={2013}, pages={33–51} } @article{sullivan_davidian_destefano_stone_2013, title={Building the Biostatistics Pipeline: Summer Institutes for Training in Biostatistics (SIBS)}, volume={26}, ISSN={0933-2480 1867-2280}, url={http://dx.doi.org/10.1080/09332480.2013.772386}, DOI={10.1080/09332480.2013.772386}, abstractNote={Almost every day, another finding related to human health is reported in the news. A new drug shows promise for treating Alzheimer's disease. A study in a leading medical journal suggests an associ...}, number={1}, journal={CHANCE}, publisher={Informa UK Limited}, author={Sullivan, Lisa M. and Davidian, Marie and DeStefano, Anita L. and Stone, Roslyn A.}, year={2013}, month={Feb}, pages={4–9} } @article{thomas_stefanski_davidian_2013, title={Moment adjusted imputation for multivariate measurement error data with applications to logistic regression}, volume={67}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2013.04.017}, abstractNote={In clinical studies, covariates are often measured with error due to biological fluctuations, device error and other sources. Summary statistics and regression models that are based on mismeasured data will differ from the corresponding analysis based on the "true" covariate. Statistical analysis can be adjusted for measurement error, however various methods exhibit a tradeo between convenience and performance. Moment Adjusted Imputation (MAI) is method for measurement error in a scalar latent variable that is easy to implement and performs well in a variety of settings. In practice, multiple covariates may be similarly influenced by biological fluctuastions, inducing correlated multivariate measurement error. The extension of MAI to the setting of multivariate latent variables involves unique challenges. Alternative strategies are described, including a computationally feasible option that is shown to perform well.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Thomas, Laine and Stefanski, Leonard A. and Davidian, Marie}, year={2013}, month={Nov}, pages={15–24} } @article{zhang_tsiatis_laber_davidian_2013, title={Robust estimation of optimal dynamic treatment regimes for sequential treatment decisions}, volume={100}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/ast014}, abstractNote={Journal Article Robust estimation of optimal dynamic treatment regimes for sequential treatment decisions Get access Baqun Zhang, Baqun Zhang School of Statistics, Renmin University of China, Beijing 100872, China, zhangbaqun@ruc.edu.cn Search for other works by this author on: Oxford Academic Google Scholar Anastasios A. Tsiatis, Anastasios A. Tsiatis Department of Statistics, North Carolina State University, Raleigh, North Carolina, 27695-8203, U.S.A., tsiatis@ncsu.edu Search for other works by this author on: Oxford Academic Google Scholar Eric B. Laber, Eric B. Laber Department of Statistics, North Carolina State University, Raleigh, North Carolina, 27695-8203, U.S.A., eblaber@ncsu.edu Search for other works by this author on: Oxford Academic Google Scholar Marie Davidian Marie Davidian Department of Statistics, North Carolina State University, Raleigh, North Carolina, 27695-8203, U.S.A., davidian@ncsu.edu Search for other works by this author on: Oxford Academic Google Scholar Biometrika, Volume 100, Issue 3, September 2013, Pages 681–694, https://doi.org/10.1093/biomet/ast014 Published: 30 May 2013 Article history Received: 01 July 2012 Revision received: 01 March 2013 Published: 30 May 2013}, number={3}, journal={BIOMETRIKA}, author={Zhang, Baqun and Tsiatis, Anastasios A. and Laber, Eric B. and Davidian, Marie}, year={2013}, month={Sep}, pages={681–694} } @article{davidian_2013, title={The International Year of Statistics: A Celebration and A Call to Action}, volume={108}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2013.844019}, abstractNote={We are almost two-thirds of the way through 2013, the International Year of Statistics. Statistics2013, as it is also known, is a year-long recognition of the impact of our discipline on science and society. Almost daily, events are taking place across the globe highlighting the contributions of statistics to just about everything. Improving human health. Understanding our environment. Informing decision-making in business and in government. All of us here in this room know the importance of our field. Statistics2013 is devoted to sharing that knowledge – with fellow scientists, industry leaders, government policymakers, students, the media, and the public. Raising awareness of how statistics and statistical thinking affect every one of us will expand opportunities for us to make a difference. And will attract students to the study of statistics, which is essential to meeting the demands posed by this new, data-driven age. And, ultimately, will increase the ability of the public to make sense of what those data mean to them.}, number={504}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Davidian, Marie}, year={2013}, month={Dec}, pages={1141–1146} } @article{zhang_tsiatis_laber_davidian_2012, title={A Robust Method for Estimating Optimal Treatment Regimes}, volume={68}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2012.01763.x}, abstractNote={A treatment regime is a rule that assigns a treatment, among a set of possible treatments, to a patient as a function of his/her observed characteristics, hence "personalizing" treatment to the patient. The goal is to identify the optimal treatment regime that, if followed by the entire population of patients, would lead to the best outcome on average. Given data from a clinical trial or observational study, for a single treatment decision, the optimal regime can be found by assuming a regression model for the expected outcome conditional on treatment and covariates, where, for a given set of covariates, the optimal treatment is the one that yields the most favorable expected outcome. However, treatment assignment via such a regime is suspect if the regression model is incorrectly specified. Recognizing that, even if misspecified, such a regression model defines a class of regimes, we instead consider finding the optimal regime within such a class by finding the regime that optimizes an estimator of overall population mean outcome. To take into account possible confounding in an observational study and to increase precision, we use a doubly robust augmented inverse probability weighted estimator for this purpose. Simulations and application to data from a breast cancer clinical trial demonstrate the performance of the method.}, number={4}, journal={BIOMETRICS}, author={Zhang, Baqun and Tsiatis, Anastasios A. and Laber, Eric B. and Davidian, Marie}, year={2012}, month={Dec}, pages={1010–1018} } @article{zhang_tsiatis_davidian_zhang_laber_2012, title={Estimating optimal treatment regimes from a classification perspective}, volume={1}, ISSN={2049-1573}, url={http://dx.doi.org/10.1002/sta.411}, DOI={10.1002/sta.411}, abstractNote={A treatment regime maps observed patient characteristics to a recommended treatment. Recent technological advances have increased the quality, accessibility, and volume of patient-level data; consequently, there is a growing need for powerful and flexible estimators of an optimal treatment regime that can be used with either observational or randomized clinical trial data. We propose a novel and general framework that transforms the problem of estimating an optimal treatment regime into a classification problem wherein the optimal classifier corresponds to the optimal treatment regime. We show that commonly employed parametric and semi-parametric regression estimators, as well as recently proposed robust estimators of an optimal treatment regime can be represented as special cases within our framework. Furthermore, our approach allows any classification procedure that can accommodate case weights to be used without modification to estimate an optimal treatment regime. This introduces a wealth of new and powerful learning algorithms for use in estimating treatment regimes. We illustrate our approach using data from a breast cancer clinical trial. Copyright © 2012 John Wiley & Sons, Ltd.}, number={1}, journal={Stat}, publisher={Wiley}, author={Zhang, Baqun and Tsiatis, Anastasios A. and Davidian, Marie and Zhang, Min and Laber, Eric}, year={2012}, month={Oct}, pages={103–114} } @article{vock_davidian_tsiatis_muir_2012, title={Mixed model analysis of censored longitudinal data with flexible random-effects density}, volume={13}, ISSN={["1468-4357"]}, DOI={10.1093/biostatistics/kxr026}, abstractNote={Mixed models are commonly used to represent longitudinal or repeated measures data. An additional complication arises when the response is censored, for example, due to limits of quantification of the assay used. While Gaussian random effects are routinely assumed, little work has characterized the consequences of misspecifying the random-effects distribution nor has a more flexible distribution been studied for censored longitudinal data. We show that, in general, maximum likelihood estimators will not be consistent when the random-effects density is misspecified, and the effect of misspecification is likely to be greatest when the true random-effects density deviates substantially from normality and the number of noncensored observations on each subject is small. We develop a mixed model framework for censored longitudinal data in which the random effects are represented by the flexible seminonparametric density and show how to obtain estimates in SAS procedure NLMIXED. Simulations show that this approach can lead to reduction in bias and increase in efficiency relative to assuming Gaussian random effects. The methods are demonstrated on data from a study of hepatitis C virus.}, number={1}, journal={BIOSTATISTICS}, author={Vock, David M. and Davidian, Marie and Tsiatis, Anastasios A. and Muir, Andrew J.}, year={2012}, month={Jan}, pages={61–73} } @article{yuan_zhang_davidian_2012, title={Variable selection for covariate-adjusted semiparametric inference in randomized clinical trials}, volume={31}, ISSN={["1097-0258"]}, DOI={10.1002/sim.5433}, abstractNote={Extensive baseline covariate information is routinely collected on participants in randomized clinical trials, and it is well recognized that a proper covariate-adjusted analysis can improve the efficiency of inference on the treatment effect. However, such covariate adjustment has engendered considerable controversy, as post hoc selection of covariates may involve subjectivity and may lead to biased inference, whereas prior specification of the adjustment may exclude important variables from consideration. Accordingly, how to select covariates objectively to gain maximal efficiency is of broad interest. We propose and study the use of modern variable selection methods for this purpose in the context of a semiparametric framework, under which variable selection in modeling the relationship between outcome and covariates is separated from estimation of the treatment effect, circumventing the potential for selection bias associated with standard analysis of covariance methods. We demonstrate that such objective variable selection techniques combined with this framework can identify key variables and lead to unbiased and efficient inference on the treatment effect. A critical issue in finite samples is validity of estimators of uncertainty, such as standard errors and confidence intervals for the treatment effect. We propose an approach to estimation of sampling variation of estimated treatment effect and show its superior performance relative to that of existing methods. Copyright © 2012 John Wiley & Sons, Ltd.}, number={29}, journal={STATISTICS IN MEDICINE}, author={Yuan, Shuai and Zhang, Hao Helen and Davidian, Marie}, year={2012}, month={Dec}, pages={3789–3804} } @article{davidian_louis_2012, title={Why Statistics?}, volume={336}, ISSN={["0036-8075"]}, DOI={10.1126/science.1218685}, abstractNote={Popular media and science publications sound the drum: “Big Data” will drive our future, from translating genomic information into new therapies, to harnessing the Web to untangle complex social interactions, to detecting infectious disease outbreaks. Statistics is the science of learning from data, and of measuring, controlling, and communicating uncertainty; and it thereby provides the navigation essential for controlling the course of scientific and societal advances. This field will become ever more critical as academia, businesses, and governments rely increasingly on data-driven decisions, expanding the demand for statistics expertise.}, number={6077}, journal={SCIENCE}, author={Davidian, Marie and Louis, Thomas A.}, year={2012}, month={Apr}, pages={12–12} } @article{thomas_stefanski_davidian_2011, title={A Moment-Adjusted Imputation Method for Measurement Error Models}, volume={67}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2011.01569.x}, abstractNote={Studies of clinical characteristics frequently measure covariates with a single observation. This may be a mismeasured version of the "true" phenomenon due to sources of variability like biological fluctuations and device error. Descriptive analyses and outcome models that are based on mismeasured data generally will not reflect the corresponding analyses based on the "true" covariate. Many statistical methods are available to adjust for measurement error. Imputation methods like regression calibration and moment reconstruction are easily implemented but are not always adequate. Sophisticated methods have been proposed for specific applications like density estimation, logistic regression, and survival analysis. However, it is frequently infeasible for an analyst to adjust each analysis separately, especially in preliminary studies where resources are limited. We propose an imputation approach called moment-adjusted imputation that is flexible and relatively automatic. Like other imputation methods, it can be used to adjust a variety of analyses quickly, and it performs well under a broad range of circumstances. We illustrate the method via simulation and apply it to a study of systolic blood pressure and health outcomes in patients hospitalized with acute heart failure.}, number={4}, journal={BIOMETRICS}, author={Thomas, Laine and Stefanski, Leonard and Davidian, Marie}, year={2011}, month={Dec}, pages={1461–1470} } @article{tsiatis_davidian_2011, title={Connections between survey calibration estimators and semiparametric models for incomplete data discussion}, volume={79}, number={2}, journal={International Statistical Review}, author={Tsiatis, A. A. and Davidian, M.}, year={2011}, pages={221–223} } @article{funk_westreich_wiesen_stuermer_brookhart_davidian_2011, title={Doubly Robust Estimation of Causal Effects}, volume={173}, ISSN={["1476-6256"]}, DOI={10.1093/aje/kwq439}, abstractNote={Doubly robust estimation combines a form of outcome regression with a model for the exposure (i.e., the propensity score) to estimate the causal effect of an exposure on an outcome. When used individually to estimate a causal effect, both outcome regression and propensity score methods are unbiased only if the statistical model is correctly specified. The doubly robust estimator combines these 2 approaches such that only 1 of the 2 models need be correctly specified to obtain an unbiased effect estimator. In this introduction to doubly robust estimators, the authors present a conceptual overview of doubly robust estimation, a simple worked example, results from a simulation study examining performance of estimated and bootstrapped standard errors, and a discussion of the potential advantages and limitations of this method. The supplementary material for this paper, which is posted on the Journal's Web site (http://aje.oupjournals.org/), includes a demonstration of the doubly robust property (Web Appendix 1) and a description of a SAS macro (SAS Institute, Inc., Cary, North Carolina) for doubly robust estimation, available for download at http://www.unc.edu/~mfunk/dr/.}, number={7}, journal={AMERICAN JOURNAL OF EPIDEMIOLOGY}, author={Funk, Michele Jonsson and Westreich, Daniel and Wiesen, Chris and Stuermer, Til and Brookhart, M. Alan and Davidian, Marie}, year={2011}, month={Apr}, pages={761–767} } @article{tsiatis_davidian_cao_2011, title={Improved Doubly Robust Estimation When Data Are Monotonely Coarsened, with Application to Longitudinal Studies with Dropout}, volume={67}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2010.01476.x}, abstractNote={Summary A routine challenge is that of making inference on parameters in a statistical model of interest from longitudinal data subject to dropout, which are a special case of the more general setting of monotonely coarsened data. Considerable recent attention has focused on doubly robust (DR) estimators, which in this context involve positing models for both the missingness (more generally, coarsening) mechanism and aspects of the distribution of the full data, that have the appealing property of yielding consistent inferences if only one of these models is correctly specified. DR estimators have been criticized for potentially disastrous performance when both of these models are even only mildly misspecified. We propose a DR estimator applicable in general monotone coarsening problems that achieves comparable or improved performance relative to existing DR methods, which we demonstrate via simulation studies and by application to data from an AIDS clinical trial.}, number={2}, journal={BIOMETRICS}, author={Tsiatis, Anastasios A. and Davidian, Marie and Cao, Weihua}, year={2011}, month={Jun}, pages={536–545} } @article{zhang_tsiatis_davidian_pieper_mahaffey_2011, title={Inference on treatment effects from a randomized clinical trial in the presence of premature treatment discontinuation: the SYNERGY trial}, volume={12}, ISSN={["1465-4644"]}, DOI={10.1093/biostatistics/kxq054}, abstractNote={The Superior Yield of the New Strategy of Enoxaparin, Revascularization, and GlYcoprotein IIb/IIIa inhibitors (SYNERGY) was a randomized, open-label, multicenter clinical trial comparing 2 anticoagulant drugs on the basis of time-to-event endpoints. In contrast to other studies of these agents, the primary, intent-to-treat analysis did not find evidence of a difference, leading to speculation that premature discontinuation of the study agents by some subjects may have attenuated the apparent treatment effect and thus to interest in inference on the difference in survival distributions were all subjects in the population to follow the assigned regimens, with no discontinuation. Such inference is often attempted via ad hoc analyses that are not based on a formal definition of this treatment effect. We use SYNERGY as a context in which to describe how this effect may be conceptualized and to present a statistical framework in which it may be precisely identified, which leads naturally to inferential methods based on inverse probability weighting.}, number={2}, journal={BIOSTATISTICS}, author={Zhang, Min and Tsiatis, Anastasios A. and Davidian, Marie and Pieper, Karen S. and Mahaffey, Kenneth W.}, year={2011}, month={Apr}, pages={258–269} } @article{funk_fusco_cole_thomas_porter_kaufman_davidian_white_hartmann_eron_2011, title={Timing of HAART initiation and clinical outcomes in human immunodeficiency virus type 1 seroconverters}, volume={171}, number={17}, journal={Archives of Internal Medicine}, author={Funk, M. J. and Fusco, J. S. and Cole, S. R. and Thomas, J. C. and Porter, K. and Kaufman, J. S. and Davidian, M. and White, A. D. and Hartmann, K. E. and Eron, J. J.}, year={2011}, pages={1560–1569} } @article{hawkins_clay_bradley_davidian_2010, title={Demographic and Historical Findings, Including Exposure to Environmental Tobacco Smoke, in Dogs with Chronic Cough}, volume={24}, ISSN={["1939-1676"]}, DOI={10.1111/j.1939-1676.2010.0530.x}, abstractNote={Background: Controlled studies investigating risk factors for the common presenting problem of chronic cough in dogs are lacking. Hypothesis/Objectives: To identify demographic and historical factors associated with chronic cough in dogs, and associations between the characteristics of cough and diagnosis. Animals: Dogs were patients of an academic internal medicine referral service. Coughing dogs had a duration of cough ≥2 months (n = 115). Control dogs had presenting problems other than cough (n = 104). Methods: Owners completed written questionnaires. Demographic information and diagnoses were obtained from medical records. Demographic and historical data were compared between coughing and control dogs. Demographic data and exposure to environmental tobacco smoke (ETS) also were compared with hospital accessions and adult smoking rates, respectively. Characteristics of cough were compared among diagnoses. Results: Most coughing dogs had a diagnosis of large airway disease (n = 88; 77%). Tracheobronchomalacia (TBM) was diagnosed in 59 dogs (51%), including 79% of toy breed dogs. Demographic risk factors included older age, smaller body weight, and being toy breed (P < .001). No association was found between coughing and month (P= .239) or season (P= .414) of presentation. Exposure to ETS was not confirmed to be a risk factor (P= .243). No historical description of cough was unique to a particular diagnosis. Conclusions and Clinical Importance: Associations with age, size, and toy breeds were strong. TBM is frequent in dogs with chronic cough, but descriptions of cough should be used cautiously in prioritizing differential diagnoses. The association between exposure to ETS and chronic cough deserves additional study.}, number={4}, journal={JOURNAL OF VETERINARY INTERNAL MEDICINE}, author={Hawkins, E. C. and Clay, L. D. and Bradley, J. M. and Davidian, M.}, year={2010}, pages={825–831} } @inbook{jonsson funk_westreich_weisen_davidian_2010, place={Cary, NC}, title={Doubly robust estimation of treatment effects}, booktitle={Analysis of Observational Health-Care Data Using SAS}, publisher={SAS Press}, author={Jonsson Funk, M. and Westreich, D. and Weisen, C. and Davidian, M.}, editor={Faries, D. and Leon, A. and Haro, J.M. and Obenchain, R.Editors}, year={2010} } @inbook{bang_davidian_2010, place={New York}, title={Experimental Statistics for biological sciences}, booktitle={Statistical Methods in Molecular Biology}, publisher={Springer (Humana Press)}, author={Bang, H. and Davidian, M.}, editor={Bang, H. and Zhou, X.K. and Van Epps, H.L. and Mazumdar, M.Editors}, year={2010}, pages={3–104} } @article{kepler_banksa_davidian_rosenberg_2009, title={A model for HCMV infection in immunosuppressed patients}, volume={49}, ISSN={["1872-9479"]}, DOI={10.1016/j.mcm.2008.06.003}, abstractNote={We propose a model for HCMV infection in healthy and immunosuppressed patients. First, we present the biological model and formulate a system of ordinary differential equations to describe the pathogenesis of primary HCMV infection in immunocompetent and immunosuppressed individuals. We then investigate how clinical data can be applied to this model. Approximate parameter values for the model are derived from data available in the literature and from mathematical and physiological considerations. Simulations with the approximated parameter values demonstrates that the model is capable of describing primary, latent, and secondary (reactivated) HCMV infection. Reactivation simulations with this model provide a window into the dynamics of HCMV infection in (D-R+) transplant situations, where latently-infected recipients (R+) receive transplant tissue from HCMV-naive donors (D-).}, number={7-8}, journal={MATHEMATICAL AND COMPUTER MODELLING}, author={Kepler, G. M. and Banksa, H. T. and Davidian, M. and Rosenberg, E. S.}, year={2009}, month={Apr}, pages={1653–1663} } @inbook{banks_davidian_samuels_sutton_2009, title={An Inverse Problem Statistical Methodology Summary}, ISBN={9789048123124 9789048123131}, url={http://dx.doi.org/10.1007/978-90-481-2313-1_11}, DOI={10.1007/978-90-481-2313-1_11}, abstractNote={We discuss statistical and computational aspects of inverse or parameter estimation problems for deterministic dynamical systems based on Ordinary Least Squares and Generalized Least Squares with appropriate corresponding data noise assumptions of constant variance and nonconstant variance (relative error), respectively. Among the topics included here are mathematical model, statistical model and data assumptions, and some techniques (residual plots, sensitivity analysis, model comparison tests) for verifying these. The ideas are illustrated throughout with the popular logistic growth model of Verhulst and Pearl as well as with a recently developed population level model of pneumococcal disease spread.}, booktitle={Mathematical and Statistical Estimation Approaches in Epidemiology}, publisher={Springer Netherlands}, author={Banks, H. Thomas and Davidian, Marie and Samuels, John R. and Sutton, Karyn L.}, year={2009}, pages={249–302} } @article{tzeng_zhang_chang_thomas_davidian_2009, title={Gene-Trait Similarity Regression for Multimarker-Based Association Analysis}, volume={65}, ISSN={0006-341X}, url={http://dx.doi.org/10.1111/j.1541-0420.2008.01176.x}, DOI={10.1111/j.1541-0420.2008.01176.x}, abstractNote={Summary We propose a similarity-based regression method to detect associations between traits and multimarker genotypes. The model regresses similarity in traits for pairs of “unrelated” individuals on their haplotype similarities, and detects the significance by a score test for which the limiting distribution is derived. The proposed method allows for covariates, uses phase-independent similarity measures to bypass the needs to impute phase information, and is applicable to traits of general types (e.g., quantitative and qualitative traits). We also show that the gene-trait similarity regression is closely connected with random effects haplotype analysis, although commonly they are considered as separate modeling tools. This connection unites the classic haplotype sharing methods with the variance-component approaches, which enables direct derivation of analytical properties of the sharing statistics even when the similarity regression model becomes analytically challenging.}, number={3}, journal={Biometrics}, publisher={Wiley}, author={Tzeng, Jung-Ying and Zhang, Daowen and Chang, Sheng-Mao and Thomas, Duncan C. and Davidian, Marie}, year={2009}, month={Feb}, pages={822–832} } @article{cao_tsiatis_davidian_2009, title={Improving efficiency and robustness of the doubly robust estimator for a population mean with incomplete data}, volume={96}, ISSN={["0006-3444"]}, DOI={10.1093/biomet/asp033}, abstractNote={Considerable recent interest has focused on doubly robust estimators for a population mean response in the presence of incomplete data, which involve models for both the propensity score and the regression of outcome on covariates. The usual doubly robust estimator may yield severely biased inferences if neither of these models is correctly specified and can exhibit nonnegligible bias if the estimated propensity score is close to zero for some observations. We propose alternative doubly robust estimators that achieve comparable or improved performance relative to existing methods, even with some estimated propensity scores close to zero.}, number={3}, journal={BIOMETRIKA}, author={Cao, Weihua and Tsiatis, Anastasios A. and Davidian, Marie}, year={2009}, month={Sep}, pages={723–734} } @article{huang_stefanski_davidian_2009, title={Latent-Model Robustness in Joint Models for a Primary Endpoint and a Longitudinal Process}, volume={65}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2008.01171.x}, abstractNote={Summary Joint modeling of a primary response and a longitudinal process via shared random effects is widely used in many areas of application. Likelihood-based inference on joint models requires model specification of the random effects. Inappropriate model specification of random effects can compromise inference. We present methods to diagnose random effect model misspecification of the type that leads to biased inference on joint models. The methods are illustrated via application to simulated data, and by application to data from a study of bone mineral density in perimenopausal women and data from an HIV clinical trial.}, number={3}, journal={BIOMETRICS}, author={Huang, Xianzheng and Stefanski, Leonard A. and Davidian, Marie}, year={2009}, month={Sep}, pages={719–727} } @article{serroyen_molenberghs_verbeke_davidian_2009, title={Nonlinear Models for Longitudinal Data}, volume={63}, ISSN={["1537-2731"]}, DOI={10.1198/tast.2009.07256}, abstractNote={Whereas marginal models, random-effects models, and conditional models are routinely considered to be the three main modeling families for continuous and discrete repeated measures with linear and generalized linear mean structures, respectively, it is less common to consider nonlinear models, let alone frame them within the above taxonomy. In the latter situation, indeed, when considered at all, the focus is often exclusively on random-effects models. In this article, we consider all three families, exemplify their great flexibility and relative ease of use, and apply them to a simple but illustrative set of data on tree circumference growth of orange trees. This article has supplementary material online.}, number={4}, journal={AMERICAN STATISTICIAN}, author={Serroyen, Jan and Molenberghs, Geert and Verbeke, Geert and Davidian, Marie}, year={2009}, month={Nov}, pages={378–388} } @article{zhang_davidian_2008, title={"Smooth" semiparametric regression analysis for arbitrarily censored time-to-event data}, volume={64}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2007.00928.x}, abstractNote={Summary A general framework for regression analysis of time-to-event data subject to arbitrary patterns of censoring is proposed. The approach is relevant when the analyst is willing to assume that distributions governing model components that are ordinarily left unspecified in popular semiparametric regression models, such as the baseline hazard function in the proportional hazards model, have densities satisfying mild “smoothness” conditions. Densities are approximated by a truncated series expansion that, for fixed degree of truncation, results in a “parametric” representation, which makes likelihood-based inference coupled with adaptive choice of the degree of truncation, and hence flexibility of the model, computationally and conceptually straightforward with data subject to any pattern of censoring. The formulation allows popular models, such as the proportional hazards, proportional odds, and accelerated failure time models, to be placed in a common framework; provides a principled basis for choosing among them; and renders useful extensions of the models straightforward. The utility and performance of the methods are demonstrated via simulations and by application to data from time-to-event studies.}, number={2}, journal={BIOMETRICS}, author={Zhang, Min and Davidian, Marie}, year={2008}, month={Jun}, pages={567–576} } @article{doehler_davidian_2008, title={'Smooth' inference for survival functions with arbitrarily censored data}, volume={27}, DOI={10.1002/sim.3368}, abstractNote={We propose a procedure for estimating the survival function of a time-to-event random variable under arbitrary patterns of censoring. The method is predicated on the mild assumption that the distribution of the random variable, and hence the survival function, has a density that lies in a class of ‘smooth’ densities whose elements can be represented by an infinite Hermite series. Truncation of the series yields a ‘parametric’ expression that can well approximate any plausible survival density, and hence survival function, provided the degree of truncation is suitably chosen. The representation admits a convenient expression for the likelihood for the ‘parameters’ in the approximation under arbitrary censoring/truncation that is straightforward to compute and maximize. A test statistic for comparing two survival functions, which is based on an integrated weighted difference of estimates of each under this representation, is proposed. Via simulation studies and application to a number of data sets, we demonstrate that the approach yields reliable inferences and can result in gains in efficiency over traditional nonparametric methods. Copyright © 2008 John Wiley & Sons, Ltd.}, number={26}, journal={Statistics in Medicine}, author={Doehler, K. and Davidian, Marie}, year={2008}, pages={5421–5439} } @article{tsiatis_davidian_zhang_lu_2008, title={Covariate adjustment for two-sample treatment comparisons in randomized clinical trials: A principled yet flexible approach}, volume={27}, ISSN={["1097-0258"]}, DOI={10.1002/sim.3113}, abstractNote={There is considerable debate regarding whether and how covariate-adjusted analyses should be used in the comparison of treatments in randomized clinical trials. Substantial baseline covariate information is routinely collected in such trials, and one goal of adjustment is to exploit covariates associated with outcome to increase precision of estimation of the treatment effect. However, concerns are routinely raised over the potential for bias when the covariates used are selected post hoc and the potential for adjustment based on a model of the relationship between outcome, covariates, and treatment to invite a ‘fishing expedition’ for that leading to the most dramatic effect estimate. By appealing to the theory of semiparametrics, we are led naturally to a characterization of all treatment effect estimators and to principled, practically feasible methods for covariate adjustment that yield the desired gains in efficiency and that allow covariate relationships to be identified and exploited while circumventing the usual concerns. The methods and strategies for their implementation in practice are presented. Simulation studies and an application to data from an HIV clinical trial demonstrate the performance of the techniques relative to the existing methods. Copyright © 2007 John Wiley & Sons, Ltd.}, number={23}, journal={STATISTICS IN MEDICINE}, author={Tsiatis, Anastasios A. and Davidian, Marie and Zhang, Min and Lu, Xiaomin}, year={2008}, month={Oct}, pages={4658–4677} } @article{zhang_tsiatis_davidian_2008, title={Improving efficiency of inferences in randomized clinical trials using auxiliary covariates}, volume={64}, ISSN={["1541-0420"]}, DOI={10.1111/j.1541-0420.2007.00976.x}, abstractNote={The primary goal of a randomized clinical trial is to make comparisons among two or more treatments. For example, in a two-arm trial with continuous response, the focus may be on the difference in treatment means; with more than two treatments, the comparison may be based on pairwise differences. With binary outcomes, pairwise odds ratios or log odds ratios may be used. In general, comparisons may be based on meaningful parameters in a relevant statistical model. Standard analyses for estimation and testing in this context typically are based on the data collected on response and treatment assignment only. In many trials, auxiliary baseline covariate information may also be available, and it is of interest to exploit these data to improve the efficiency of inferences. Taking a semiparametric theory perspective, we propose a broadly applicable approach to adjustment for auxiliary covariates to achieve more efficient estimators and tests for treatment parameters in the analysis of randomized clinical trials. Simulations and applications demonstrate the performance of the methods.}, number={3}, journal={BIOMETRICS}, author={Zhang, Min and Tsiatis, Anastasios A. and Davidian, Marie}, year={2008}, month={Sep}, pages={707–715} } @inbook{verbeke_davidian_2008, title={Joint models for longitudinal data}, ISBN={9781584886587 9781420011579}, ISSN={2154-5944}, url={http://dx.doi.org/10.1201/9781420011579.pt4}, DOI={10.1201/9781420011579.pt4}, booktitle={Chapman & Hall/CRC Handbooks of Modern Statistical Methods}, publisher={Chapman and Hall/CRC}, author={Verbeke, Geert and Davidian, Marie}, year={2008}, month={Aug}, pages={319–326} } @book{fitzmaurice_davidian_verbeke_molenberghs_2008, title={Longitudinal Data Analysis}, ISBN={9780429142673}, url={http://dx.doi.org/10.1201/9781420011579}, DOI={10.1201/9781420011579}, abstractNote={Introduction and Historical Overview Advances in Longitudinal Data Analysis: A Historical Perspective Garrett Fitzmaurice and Geert Molenberghs Parametric Modeling of Longitudinal Data Parametric Modeling of Longitudinal Data: Introduction and Overview Garrett Fitzmaurice and Geert Verbeke Generalized Estimating Equations for Longitudinal Data Analysis Stuart Lipsitz and Garrett Fitzmaurice Generalized Linear Mixed-Effects Models Sophia Rabe-Hesketh and Anders Skrondal Nonlinear Mixed-Effects Models Marie Davidian Growth Mixture Modeling: Analysis with Non-Gaussian Random Effects Bengt Muthen and Tihomir Asparouhov Targets of Inference in Hierarchical Models for Longitudinal Data Stephen W. Raudenbush Nonparametric and Semiparametric Methods for Longitudinal Data Nonparametric and Semiparametric Regression Methods: Introduction and Overview Xihong Lin and Raymond J. Carroll Nonparametric and Semiparametric Regression Methods for Longitudinal Data Xihong Lin and Raymond J. Carroll Functional Modeling of Longitudinal Data Hans-Georg Muller Smoothing Spline Models for Longitudinal Data S.J. Welham Penalized Spline Models for Longitudinal Data Babette A. Brumback, Lyndia C. Brumback, and Mary J. Lindstrom Joint Models for Longitudinal Data Joint Models for Longitudinal Data: Introduction and Overview Geert Verbeke and Marie Davidian Joint Models for Continuous and Discrete Longitudinal Data Christel Faes, Helena Geys, and Paul Catalano Random-Effects Models for Joint Analysis of Repeated-Measurement and Time-to-Event Outcomes Peter Diggle, Robin Henderson, and Peter Philipson Joint Models for High-Dimensional Longitudinal Data Steffen Fieuws and Geert Verbeke Incomplete Data Incomplete Data: Introduction and Overview Geert Molenberghs and Garrett Fitzmaurice Selection and Pattern-Mixture Models Roderick Little Shared-Parameter Models Paul S. Albert and Dean A. Follmann Inverse Probability Weighted Methods Andrea Rotnitzky Multiple Imputation Michael G. Kenward and James R. Carpenter Sensitivity Analysis for Incomplete Data Geert Molenberghs, Geert Verbeke, and Michael G. Kenward Estimation of the Causal Effects of Time-Varying Exposures James M. Robins and Miguel A. Hernan Index About the Editors Garrett Fitzmaurice is Associate Professor of Psychiatry at the Harvard Medical School, Associate Professor of Biostatistics at the Harvard School of Public Health, and Foreign Adjunct Professor of Biostatistics at the Karolinska Institute in Sweden. He is a fellow of the American Statistical Association, a member of the International Statistical Institute, and a recipient of the American Statistical Association's Excellence in Continuing Education Award. Marie Davidian is William Neal Reynolds Distinguished Professor of Statistics at North Carolina State University and Adjunct Professor of Biostatistics and Bioinformatics at Duke University. She is a fellow of the American Statistical Association, the Institute of Mathematical Statistics, and the American Association for the Advancement of Science. Dr. Davidian is also a member of the International Statistical Institute and executive editor of Biometrics. Geert Verbeke is Professor of Biostatistics in the Biostatistical Centre at the Catholic University of Leuven in Belgium. He is a past president of the Belgian Region of the International Biometric Society, joint editor of the Journal of the Royal Statistical Society, Series A, and an international representative on the board of directors and a fellow of the American Statistical Association. Jointly with Geert Molenberghs, Dr. Verbeke twice received the American Statistical Association's Excellence in Continuing Education Award. Geert Molenberghs is Professor of Biostatistics in the Center for Statistics at Hasselt University and in the Biostatistical Centre at the Catholic University of Leuven in Belgium. He is a fellow of the American Statistical Association, a member of the International Statistical Institute, a recipient of the Guy Medal in Bronze from the Royal Statistical Society, and coeditor of Biometrics. Together with Geert Verbeke, Dr. Molenberghs twice received the American Statistical Association's Excellence in Continuing Education Award.}, publisher={Chapman and Hall/CRC}, year={2008}, month={Aug} } @book{fitzmaurice_davidian_verbeke_molenberghs_2008, place={Boca Raton, FL}, title={Longitudinal Data Analysis}, journal={Chapman & Hall/CRC, Taylor & Francis Group}, publisher={Chapman & Hall/CRC}, author={Fitzmaurice, G. and Davidian, M. and Verbeke, G. and Molenberghs, G.}, year={2008}, month={Aug} } @article{banks_davidian_hu_kepler_rosenberg_2008, title={Modelling HIV immune response and validation with clinical data}, volume={2}, ISSN={1751-3758 1751-3766}, url={http://dx.doi.org/10.1080/17513750701813184}, DOI={10.1080/17513750701813184}, abstractNote={A system of ordinary differential equations is formulated to describe the pathogenesis of HIV infection, wherein certain features that have been shown to be important by recent experimental research are incorporated in the model. These include the role of CD4+ memory cells that serve as a major reservoir of latently infected cells, a critical role for T-helper cells in the generation of CD8 memory cells capable of efficient recall response, and stimulation by antigens other than HIV. A stability analysis illustrates the capability of this model in admitting multiple locally asymptotically stable (locally a.s.) off-treatment equilibria.We show that this more biologically detailed model can exhibit the phenomenon of transient viremia experienced by some patients on therapy with viral load levels suppressed below the detection limit. We also show that the loss of CD4+ T-cell help in the generation of CD8+ memory cells leads to larger peak values for the viral load during transient viremia. Censored clinical data is used to obtain parameter estimates. We demonstrate that using a reduced set of 16 free parameters, obtained by fixing some parameters at their population averages, the model provides reasonable fits to the patient data and, moreover, that it exhibits good predictive capability. We further show that parameter values obtained for most clinical patients do not admit multiple locally a.s off-treatment equilibria. This suggests that treatment to move from a high viral load equilibrium state to an equilibrium state with a lower (or zero) viral load is not possible for these patients.}, number={4}, journal={Journal of Biological Dynamics}, publisher={Informa UK Limited}, author={Banks, H. T. and Davidian, M. and Hu, Shuhua and Kepler, Grace M. and Rosenberg, E. S.}, year={2008}, month={Oct}, pages={357–385} } @inbook{davidian_2008, title={Non-linear mixed-effects models}, ISBN={9781584886587 9781420011579}, ISSN={2154-5944}, url={http://dx.doi.org/10.1201/9781420011579.ch5}, DOI={10.1201/9781420011579.ch5}, booktitle={Chapman & Hall/CRC Handbooks of Modern Statistical Methods}, publisher={Chapman and Hall/CRC}, author={Davidian, Marie}, year={2008}, month={Aug}, pages={107–141} } @article{tsiatis_davidian_2007, title={Comment: Demystifying Double Robustness: A Comparison of Alternative Strategies for Estimating a Population Mean from Incomplete Data}, volume={22}, ISSN={0883-4237}, url={http://dx.doi.org/10.1214/07-sts227b}, DOI={10.1214/07-sts227b}, abstractNote={Comment on ``Demystifying Double Robustness: A Comparison of Alternative Strategies for Estimating a Population Mean from Incomplete Data'' [arXiv:0804.2958]}, number={4}, journal={Statistical Science}, publisher={Institute of Mathematical Statistics}, author={Tsiatis, Anastasios A. and Davidian, Marie}, year={2007}, month={Nov}, pages={569–573} } @article{adams_banks_davidian_rosenberg_2007, title={Estimation and prediction with HIV-treatment interruption data}, volume={69}, ISSN={["0092-8240"]}, DOI={10.1007/s11538-006-9140-6}, number={2}, journal={BULLETIN OF MATHEMATICAL BIOLOGY}, author={Adams, B. M. and Banks, H. T. and Davidian, M. and Rosenberg, E. S.}, year={2007}, month={Feb}, pages={563–584} } @article{li_zhang_davidian_2007, title={Likelihood and pseudo-likelihood methods for semiparametric joint models for a primary endpoint and longitudinal data}, volume={51}, ISSN={0167-9473}, url={http://dx.doi.org/10.1016/j.csda.2006.10.008}, DOI={10.1016/j.csda.2006.10.008}, abstractNote={Inference on the association between a primary endpoint and features of longitudinal profiles of a continuous response is of central interest in medical and public health research. Joint models that represent the association through shared dependence of the primary and longitudinal data on random effects are increasingly popular; however, existing inferential methods may be inefficient or sensitive to assumptions on the random effects distribution. We consider a semiparametric joint model that makes only mild assumptions on this distribution and develop likelihood-based inference on the association and distribution, which offers improved performance relative to existing methods that is insensitive to the true random effects distribution. Moreover, the estimated distribution can reveal interesting population features, as we demonstrate for a study of the association between longitudinal hormone levels and bone status in peri-menopausal women.}, number={12}, journal={Computational Statistics & Data Analysis}, publisher={Elsevier BV}, author={Li, Erning and Zhang, Daowen and Davidian, Marie}, year={2007}, month={Aug}, pages={5776–5790} } @article{rosenberg_davidian_banks_2007, title={Using mathematical modeling and control to develop structured treatment interruption strategies for HIV infection}, volume={88}, ISSN={["1879-0046"]}, DOI={10.1016/j.drugalcdep.2006.12.024}, abstractNote={The goal of this article is to suggest that mathematical models describing biological processes taking place within a patient over time can be used to design adaptive treatment strategies. We demonstrate using the key example of treatment strategies for human immunodeficiency virus type-1 (HIV) infection. Although there has been considerable progress in management of HIV infection using highly active antiretroviral therapies, continuous treatment with these agents involves significant cost and burden, toxicities, development of drug resistance, and problems with adherence; these latter complications are of particular concern in substance-abusing individuals. This has inspired interest in structured or supervised treatment interruption (STI) strategies, which involve cycles of treatment withdrawal and re-initiation. We argue that the most promising STI strategies are adaptive treatment strategies. We then describe how biological mechanisms governing the interaction over time between HIV and a patient's immune system may be represented by mathematical models and how control methods applied to these models can be used to design adaptive STI strategies seeking to maintain long-term suppression of the virus. We advocate that, when such mathematical representations of processes underlying a disease or disorder are available, they can be an important tool for suggesting adaptive treatment strategies for clinical study.}, journal={DRUG AND ALCOHOL DEPENDENCE}, author={Rosenberg, Eric S. and Davidian, Marie and Banks, H. Thomas}, year={2007}, month={May}, pages={S41–S51} } @misc{davidian_2006, title={Dose Calibration}, ISBN={0471899976 9780471899976 0470057335 9780470057339}, url={http://dx.doi.org/10.1002/9780470057339.vad038}, DOI={10.1002/9780470057339.vad038}, abstractNote={Abstract Calibration refers broadly to the determination of a variable x on the basis of measurement(s) on another variable y . In the dose calibration problem, x is the unknown level (concentration, dose) of antigen (enzyme, hormone, protein, compound) in a biological test sample to be determined from y . It may be infeasible to determine x directly from the specimen, but the specimen may be subjected to an assay procedure that yields an associated measurement y . Given an understanding of the relationship between x and y , x may be estimated. Such calibration is commonplace in environmental health studies. Toxicokinetic modeling for estimation of internal doses and validation of mechanistic hypotheses is a popular approach in characterizing the health effects of environmental exposures; underlying such studies is the need for precise calibration of levels of exposure agents and their metabolites from blood or other samples.}, journal={Encyclopedia of Environmetrics}, publisher={John Wiley & Sons, Ltd}, author={Davidian, Marie}, year={2006}, month={Sep} } @article{huang_stefanski_davidian_2006, title={Latent-model robustness in structural measurement error models}, volume={93}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/93.1.53}, abstractNote={SUMMARY We present methods for diagnosing the effects of model misspecification of the true predictor distribution in structural measurement error models. We first formulate latent model robustness theoretically. Then we provide practical techniques for examining the adequacy of an assumed latent predictor model. The methods are illustrated via analytical examples, application to simulated data and with data from a study of coronary heart disease.}, number={1}, journal={BIOMETRIKA}, author={Huang, XZ and Stefanski, LA and Davidian, M}, year={2006}, month={Mar}, pages={53–64} } @article{lin_zhang_davidian_2006, title={Smoothing spline-based score tests for proportional hazards models}, volume={62}, ISSN={["0006-341X"]}, DOI={10.1111/j.1541-0420.2005.00521.x}, abstractNote={We propose "score-type" tests for the proportional hazards assumption and for covariate effects in the Cox model using the natural smoothing spline representation of the corresponding nonparametric functions of time or covariate. The tests are based on the penalized partial likelihood and are derived by viewing the inverse of the smoothing parameter as a variance component and testing an equivalent null hypothesis that the variance component is zero. We show that the tests have a size close to the nominal level and good power against general alternatives, and we apply them to data from a cancer clinical trial.}, number={3}, journal={BIOMETRICS}, author={Lin, Jiang and Zhang, Daowen and Davidian, Marie}, year={2006}, month={Sep}, pages={803–812} } @misc{davidian_mcgilchrist_2005, title={Biometrics}, ISBN={047084907X 9780470849071 0470011815 9780470011812}, url={http://dx.doi.org/10.1002/0470011815.b2a17011}, DOI={10.1002/0470011815.b2a17011}, abstractNote={Abstract Biometrics is a quarterly journal of the International Biometric Society (IBS), which is a highly regarded outlet for reports on statistical and mathematical research relevant to the biological sciences. We review the history, structure, and content of the journal.}, journal={Encyclopedia of Biostatistics}, publisher={John Wiley & Sons, Ltd}, author={Davidian, M. and McGilchrist, C. A.}, year={2005}, month={Jul} } @article{adams_banks_davidian_kwon_tran_wynne_rosenberg_2005, title={HIV dynamics: Modeling, data analysis, and optimal treatment protocols}, volume={184}, ISSN={["1879-1778"]}, DOI={10.1016/j.cam.2005.02.004}, abstractNote={We present an overview of some concepts and methodologies we believe useful in modeling HIV pathogenesis. After a brief discussion of motivation for and previous efforts in the development of mathematical models for progression of HIV infection and treatment, we discuss mathematical and statistical ideas relevant to Structured Treatment Interruptions (STI). Among these are model development and validation procedures including parameter estimation, data reduction and representation, and optimal control relative to STI. Results from initial attempts in each of these areas by an interdisciplinary team of applied mathematicians, statisticians and clinicians are presented.}, number={1}, journal={JOURNAL OF COMPUTATIONAL AND APPLIED MATHEMATICS}, author={Adams, BM and Banks, HT and Davidian, M and Kwon, HD and Tran, HT and Wynne, SN and Rosenberg, ES}, year={2005}, month={Dec}, pages={10–49} } @article{davidian_tsiatis_leon_2005, title={Semiparametric estimation of treatment effect in a pretest-posttest study with missing data}, volume={20}, number={3}, journal={Statistical Science}, author={Davidian, M. and Tsiatis, A. A. and Leon, S.}, year={2005}, pages={261–282} } @article{tsiatis_davidian_2005, title={Statistical issues arising in the Women's Health Initiative - Discussion}, volume={61}, ISSN={["1541-0420"]}, DOI={10.1111/j.0006-341X.2005.454_9.x}, abstractNote={BiometricsVolume 61, Issue 4 p. 933-935 Discussion on "Statistical Issues Arising in the Women's Health Initiative" Anastasios A. Tsiatis, Anastasios A. Tsiatis Department of Statistics Box 8203, North Carolina State University Raleigh, North Carolina 27695-8203, U.S.A. Search for more papers by this authorMarie Davidian, Marie Davidian Department of Statistics Box 8203, North Carolina State University Raleigh, North Carolina 27695-8203, U.S.A. email:[email protected]Search for more papers by this author Anastasios A. Tsiatis, Anastasios A. Tsiatis Department of Statistics Box 8203, North Carolina State University Raleigh, North Carolina 27695-8203, U.S.A. Search for more papers by this authorMarie Davidian, Marie Davidian Department of Statistics Box 8203, North Carolina State University Raleigh, North Carolina 27695-8203, U.S.A. email:[email protected]Search for more papers by this author First published: 02 December 2005 https://doi.org/10.1111/j.0006-341X.2005.454_9.xRead the full textAboutPDF ToolsRequest permissionExport citationAdd to favoritesTrack citation ShareShare Give accessShare full text accessShare full-text accessPlease review our Terms and Conditions of Use and check box below to share full-text version of article.I have read and accept the Wiley Online Library Terms and Conditions of UseShareable LinkUse the link below to share a full-text version of this article with your friends and colleagues. Learn more.Copy URL Share a linkShare onFacebookTwitterLinkedInRedditWechat Volume61, Issue4December 2005Pages 933-935 RelatedInformation}, number={4}, journal={BIOMETRICS}, author={Tsiatis, AA and Davidian, M}, year={2005}, month={Dec}, pages={933–935} } @article{eisenstein_bethea_muhlbaier_davidian_peterson_stafford_mark_2005, title={Surgeons? Economic Profiles: Can We Get the ?Right? Answers?}, volume={29}, ISSN={0148-5598 1573-689X}, url={http://dx.doi.org/10.1007/s10916-005-3000-z}, DOI={10.1007/s10916-005-3000-z}, number={2}, journal={Journal of Medical Systems}, publisher={Springer Science and Business Media LLC}, author={Eisenstein, Eric L. and Bethea, Charles F. and Muhlbaier, Lawrence H. and Davidian, Marie and Peterson, Eric D. and Stafford, Judith A. and Mark, Daniel B.}, year={2005}, month={Apr}, pages={111–124} } @article{li_zhang_davidian_2004, title={Conditional estimation for generalized linear models when covariates are subject-specific parameters in a mixed model for longitudinal measurements}, volume={60}, number={1}, journal={Biometrics}, author={Li, E. N. and Zhang, D. W. and Davidian, M.}, year={2004}, pages={07-} } @article{pieper_tsiatis_davidian_hasselblad_kleiman_boersma_chang_griffin_armstrong_califf_et al._2004, title={Differential Treatment Benefit of Platelet Glycoprotein IIb/IIIa Inhibition With Percutaneous Coronary Intervention Versus Medical Therapy for Acute Coronary Syndromes}, volume={109}, ISSN={0009-7322 1524-4539}, url={http://dx.doi.org/10.1161/01.cir.0000112570.97220.89}, DOI={10.1161/01.cir.0000112570.97220.89}, abstractNote={Although many believe that platelet glycoprotein IIb/IIIa inhibitors should be used only in acute coronary syndrome patients undergoing percutaneous coronary intervention, supporting data from randomized clinical trials are tenuous. The assumption that these agents are useful only in conjunction with percutaneous coronary intervention is based primarily on inappropriate subgroup analyses performed across the glycoprotein IIb/IIIa inhibitor trials.We describe the problems with these analytical techniques and demonstrate that different approaches to the question can result in opposing answers.Clinical-practice decisions and practice guidelines should be based on overall trial results and not analyses of post-randomization subgroups.}, number={5}, journal={Circulation}, publisher={Ovid Technologies (Wolters Kluwer Health)}, author={Pieper, Karen S. and Tsiatis, Anastasios A. and Davidian, Marie and Hasselblad, Vic and Kleiman, Neal S. and Boersma, Eric and Chang, Wei-Ching and Griffin, Jeffrey and Armstrong, Paul W. and Califf, Robert M. and et al.}, year={2004}, month={Feb}, pages={641–646} } @article{marron_muller_rice_wang_wang_wang_davidian_diggle_follmann_louis_et al._2004, title={Discussion of nonparametric and semiparametric regression}, volume={14}, number={3}, journal={Statistica Sinica}, author={Marron, J. S. and Muller, H. G. and Rice, J. and Wang, J. L. and Wang, N. Y. and Wang, Y. D. and Davidian, M. and Diggle, P. and Follmann, D. and Louis, T. A. and et al.}, year={2004}, pages={615–629} } @article{davidian_lin_wang_2004, title={Emerging issues in longitudinal and functional data analysis - Introduction}, volume={14}, number={3}, journal={Statistica Sinica}, author={Davidian, M. and Lin, X. H. and Wang, J. L.}, year={2004}, pages={613–614} } @article{powell_cheshire_laban_colvocoresses_o'donnell_davidian_2004, title={Growth, mortality, and hatchdate distributions of larval and juvenile spotted seatrout (Cynoscion nebulosus) in Florida Bay, Everglades National Park}, volume={102}, number={1}, journal={Fishery Bulletin (Washington, D.C.)}, author={Powell, A. B. and Cheshire, R. T. and Laban, E. H. and Colvocoresses, J. and O'Donnell, P. and Davidian, M.}, year={2004}, pages={142–155} } @article{tsiatis_davidian_2004, title={Joint modeling of longitudinal and time-to-event data: An overview}, volume={14}, url={http://www.jstor.org/stable/24307417}, number={3}, journal={Statistica Sinica}, publisher={Institute of Statistical Science, Academia Sinica}, author={Tsiatis, Anastasios A. and Davidian, Marie}, year={2004}, pages={809–834} } @article{zhang_davidian_2004, title={Likelihood and conditional likelihood inference for generalized additive mixed models for clustered data}, volume={91}, ISSN={["0047-259X"]}, DOI={10.1016/j.jmva.2004.04.007}, abstractNote={Lin and Zhang (J. Roy. Statist. Soc. Ser. B 61 (1999) 381) proposed the generalized additive mixed model (GAMM) as a framework for analysis of correlated data, where normally distributed random effects are used to account for correlation in the data, and proposed to use double penalized quasi-likelihood (DPQL) to estimate the nonparametric functions in the model and marginal likelihood to estimate the smoothing parameters and variance components simultaneously. However, the normal distributional assumption for the random effects may not be realistic in many applications, and it is unclear how violation of this assumption affects ensuing inferences for GAMMs. For a particular class of GAMMs, we propose a conditional estimation procedure built on a conditional likelihood for the response given a sufficient statistic for the random effect, treating the random effect as a nuisance parameter, which thus should be robust to its distribution. In extensive simulation studies, we assess performance of this estimator under a range of conditions and use it as a basis for comparison to DPQL to evaluate the impact of violation of the normality assumption. The procedure is illustrated with application to data from the Multicenter AIDS Cohort Study (MACS).}, number={1}, journal={JOURNAL OF MULTIVARIATE ANALYSIS}, author={Zhang, DW and Davidian, M}, year={2004}, month={Oct}, pages={90–106} } @inbook{ma_genton_davidian_2004, title={Linear mixed effects models with flexible generalized skew-elliptical random effects}, ISBN={1584884312}, booktitle={Skew-elliptical distibutions and their applications: A journey beyond normality}, publisher={Boca Raton, FL: Chapman & Hall/CRC}, author={Ma, Y. Y. and Genton, M. G. and Davidian, M.}, year={2004} } @article{bodnar_davidian_siega-riz_tsiatis_2004, title={Marginal structural models for analyzing causal effects of time-dependent treatments: An application in perinatal epidemiology}, volume={159}, ISSN={["1476-6256"]}, DOI={10.1093/aje/kwh131}, abstractNote={Marginal structural models (MSMs) are causal models designed to adjust for time-dependent confounding in observational studies of time-varying treatments. MSMs are powerful tools for assessing causality with complicated, longitudinal data sets but have not been widely used by practitioners. The objective of this paper is to illustrate the fitting of an MSM for the causal effect of iron supplement use during pregnancy (time-varying treatment) on odds of anemia at delivery in the presence of time-dependent confounding. Data from pregnant women enrolled in the Iron Supplementation Study (Raleigh, North Carolina, 1997-1999) were used. The authors highlight complexities of MSMs and key issues epidemiologists should recognize before and while undertaking an analysis with these methods and show how such methods can be readily interpreted in existing software packages, including SAS and Stata. The authors emphasize that if a data set with rich information on confounders is available, MSMs can be used straightforwardly to make robust inferences about causal effects of time-dependent treatments/exposures in epidemiologic research.}, number={10}, journal={AMERICAN JOURNAL OF EPIDEMIOLOGY}, author={Bodnar, LM and Davidian, M and Siega-Riz, AM and Tsiatis, AA}, year={2004}, month={May}, pages={926–934} } @article{lunceford_davidian_2004, title={Stratification and weighting via the propensity score in estimation of causal treatment effects: a comparative study}, volume={23}, ISSN={["1097-0258"]}, DOI={10.1002/SIM.1903}, abstractNote={Estimation of treatment effects with causal interpretation from observational data is complicated because exposure to treatment may be confounded with subject characteristics. The propensity score, the probability of treatment exposure conditional on covariates, is the basis for two approaches to adjusting for confounding: methods based on stratification of observations by quantiles of estimated propensity scores and methods based on weighting observations by the inverse of estimated propensity scores. We review popular versions of these approaches and related methods offering improved precision, describe theoretical properties and highlight their implications for practice, and present extensive comparisons of performance that provide guidance for practical use. Copyright © 2004 John Wiley & Sons, Ltd.}, number={19}, journal={STATISTICS IN MEDICINE}, author={Lunceford, JK and Davidian, M}, year={2004}, month={Oct}, pages={2937–2960} } @article{davidian_giltinan_2003, title={Nonlinear models for repeated measurement data: An overview and update}, volume={8}, ISSN={["1537-2693"]}, DOI={10.1198/1085711032697}, number={4}, journal={JOURNAL OF AGRICULTURAL BIOLOGICAL AND ENVIRONMENTAL STATISTICS}, author={Davidian, M and Giltinan, DM}, year={2003}, month={Dec}, pages={387–419} } @article{yeap_catalano_ryan_davidian_2003, title={Robust two-stage approach to repeated measurements analysis of chronic ozone exposure in rats}, volume={8}, ISSN={["1085-7117"]}, DOI={10.1198/1085711032552}, number={4}, journal={JOURNAL OF AGRICULTURAL BIOLOGICAL AND ENVIRONMENTAL STATISTICS}, author={Yeap, BY and Catalano, PJ and Ryan, LM and Davidian, M}, year={2003}, month={Dec}, pages={438–454} } @article{leon_tsiatis_davidian_2003, title={Semiparametric estimation of treatment effect in a pretest-posttest study}, volume={59}, ISSN={["0006-341X"]}, DOI={10.1111/j.0006-341X.2003.00120.x}, abstractNote={Inference on treatment effects in a pretest-posttest study is a routine objective in medicine, public health, and other fields. A number of approaches have been advocated. We take a semiparametric perspective, making no assumptions about the distributions of baseline and posttest responses. By representing the situation in terms of counterfactual random variables, we exploit recent developments in the literature on missing data and causal inference, to derive the class of all consistent treatment effect estimators, identify the most efficient such estimator, and outline strategies for implementation of estimators that may improve on popular methods. We demonstrate the methods and their properties via simulation and by application to a data set from an HIV clinical trial.}, number={4}, journal={BIOMETRICS}, author={Leon, S and Tsiatis, AA and Davidian, M}, year={2003}, month={Dec}, pages={1046–1055} } @article{chen_zhang_davidian_2002, title={A Monte Carlo EM algorithm for generalized linear mixed models with flexible random effects distribution.}, volume={3}, url={https://doi.org/10.1093/biostatistics/3.3.347}, DOI={10.1093/biostatistics/3.3.347}, abstractNote={A popular way to represent clustered binary, count, or other data is via the generalized linear mixed model framework, which accommodates correlation through incorporation of random effects. A standard assumption is that the random effects follow a parametric family such as the normal distribution; however, this may be unrealistic or too restrictive to represent the data. We relax this assumption and require only that the distribution of random effects belong to a class of ‘smooth’ densities and approximate the density by the seminonparametric (SNP) approach of Gallant and Nychka (1987). This representation allows the density to be skewed, multi‐modal, fat‐ or thin‐tailed relative to the normal and includes the normal as a special case. Because an efficient algorithm to sample from an SNP density is available, we propose a Monte Carlo EM algorithm using a rejection sampling scheme to estimate the fixed parameters of the linear predictor, variance components and the SNP density. The approach is illustrated by application to a data set and via simulation.}, number={3}, journal={Biostatistics (Oxford, England)}, author={Chen, J and Zhang, D and Davidian, M}, year={2002}, month={Sep}, pages={347–360} } @article{song_davidian_tsiatis_2002, title={A semiparametric likelihood approach to joint modeling of longitudinal and time-to-event data}, volume={58}, ISSN={["0006-341X"]}, DOI={10.1111/j.0006-341X.2002.00742.x}, abstractNote={Joint models for a time-to-event (e.g., survival) and a longitudinal response have generated considerable recent interest. The longitudinal data are assumed to follow a mixed effects model, and a proportional hazards model depending on the longitudinal random effects and other covariates is assumed for the survival endpoint. Interest may focus on inference on the longitudinal data process, which is informatively censored, or on the hazard relationship. Several methods for fitting such models have been proposed, most requiring a parametric distributional assumption (normality) on the random effects. A natural concern is sensitivity to violation of this assumption; moreover, a restrictive distributional assumption may obscure key features in the data. We investigate these issues through our proposal of a likelihood-based approach that requires only the assumption that the random effects have a smooth density. Implementation via the EM algorithm is described, and performance and the benefits for uncovering noteworthy features are illustrated by application to data from an HIV clinical trial and by simulation.}, number={4}, journal={BIOMETRICS}, author={Song, X and Davidian, M and Tsiatis, AA}, year={2002}, month={Dec}, pages={742–753} } @article{song_davidian_tsiatis_2002, title={An estimator for the proportional hazards model with multiple longitudinal covariates measured with error}, volume={3}, number={4}, journal={Biostatistics (Oxford, England)}, author={Song, X. A. and Davidian, M. and Tsiatis, A. A.}, year={2002}, pages={511–528} } @inbook{altan_manola_davidian_raghavarao_2002, place={Basel}, series={Developments in Biologicals}, title={Constrained four parameter logistic model}, volume={107}, booktitle={The Design and Analysis of Potency Assays for Biotechnology Products}, publisher={Karger}, author={Altan, S. and Manola, A. and Davidian, M. and Raghavarao, D.}, year={2002}, pages={71–76}, collection={Developments in Biologicals} } @article{lunceford_davidian_tsiatis_2002, title={Estimation of survival distributions of treatment policies in two-stage randomization designs in clinical trials}, volume={58}, ISSN={["0006-341X"]}, DOI={10.1111/j.0006-341X.2002.00048.x}, abstractNote={Some clinical trials follow a design where patients are randomized to a primary therapy at entry followed by another randomization to maintenance therapy contingent upon disease remission. Ideally, analysis would allow different treatment policies, i.e., combinations of primary and maintenance therapy if specified up-front, to be compared. Standard practice is to conduct separate analyses for the primary and follow-up treatments, which does not address this issue directly. We propose consistent estimators for the survival distribution and mean restricted survival time for each treatment policy in such two-stage studies and derive large-sample properties. The methods are demonstrated on a leukemia clinical trial data set and through simulation.}, number={1}, journal={BIOMETRICS}, author={Lunceford, JK and Davidian, M and Tsiatis, AA}, year={2002}, month={Mar}, pages={48–57} } @article{tsiatis_davidian_mcneney_2002, title={Multiple imputation methods for testing treatment differences in survival distributions with missing cause of failure}, volume={89}, number={1}, journal={Biometrika}, author={Tsiatis, A. A. and Davidian, M. and Mcneney, B.}, year={2002}, pages={238–244} } @article{randomized comparison of platelet inhibition with abciximab, tirofiban and eptifibatide during percutaneous coronary intervention in acute coronary syndromes - the compare trial_2002, volume={106}, number={12}, journal={Circulation (New York, N.Y. : 1950)}, year={2002}, pages={1470–1476} } @article{tsiatis_davidian_2001, title={A semiparametric estimator for the proportional hazards model with longitudinal covariates measured with error}, volume={88}, ISSN={["0006-3444"]}, url={http://www.jstor.org/stable/2673492}, DOI={10.1093/biomet/88.2.447}, abstractNote={SUMMARY A common objective in longitudinal studies is to characterise the relationship between a failure time process and time-independent and time-dependent covariates. Timedependent covariates are generally available as longitudinal data collected periodically during the course of the study. We assume that these data follow a linear mixed effects model with normal measurement error and that the hazard of failure depends both on the underlying random effects describing the covariate process and other time-independent covariates through a proportional hazards relationship. A routine assumption is that the random effects are normally distributed; however, this need not hold in practice. Within this framework, we develop a simple method for estimating the proportional hazards model parameters that requires no assumption on the distribution of the random effects. Large-sample properties are discussed, and finite-sample performance is assessed and compared to competing methods via simulation.}, number={2}, journal={BIOMETRIKA}, publisher={[Oxford University Press, Biometrika Trust]}, author={Tsiatis, AA and Davidian, M}, year={2001}, month={Jun}, pages={447–458} } @article{zhang_davidian_2001, title={Linear mixed models with flexible distributions of random effects for longitudinal data}, volume={57}, ISSN={["0006-341X"]}, DOI={10.1111/j.0006-341X.2001.00795.x}, abstractNote={Normality of random effects is a routine assumption for the linear mixed model, but it may be unrealistic, obscuring important features of among-individual variation. We relax this assumption by approximating the random effects density by the seminonparameteric (SNP) representation of Gallant and Nychka (1987, Econometrics 55, 363-390), which includes normality as a special case and provides flexibility in capturing a broad range of nonnormal behavior, controlled by a user-chosen tuning parameter. An advantage is that the marginal likelihood may be expressed in closed form, so inference may be carried out using standard optimization techniques. We demonstrate that standard information criteria may be used to choose the tuning parameter and detect departures from normality, and we illustrate the approach via simulation and using longitudinal data from the Framingham study.}, number={3}, journal={BIOMETRICS}, author={Zhang, DW and Davidian, M}, year={2001}, month={Sep}, pages={795–802} } @article{yeap_davidian_2001, title={Robust two-stage estimation in hierarchical nonlinear models}, volume={57}, ISSN={["0006-341X"]}, DOI={10.1111/j.0006-341X.2001.00266.x}, abstractNote={Summary. Hierarchical models encompass two sources of variation, namely within and among individuals in the population; thus, it is important to identify outliers that may arise at each sampling level. A two-stage approach to analyzing nonlinear repeated measurements naturally allows parametric modeling of the respective variance structure for the intraindividual random errors and interindividual random effects. We propose a robust two-stage procedure based on Huber's (1981, Robust Statistics) theory of M-estimation to accommodate separately aberrant responses within an experimental unit and subjects deviating from the study population when the usual assumptions of normality are violated. A toxicology study of chronic ozone exposure in rats illustrates the impact of outliers on the population inference and hence the advantage of adopting the robust methodology. The robust weights generated by the two-stage M-estimation process also serve as diagnostics for gauging the relative influence of outliers at each level of the hierarchical model. A practical appeal of our proposal is the computational simplicity since the estimation algorithm may be implemented using standard statistical software with a nonlinear least squares routine and iterative capability.}, number={1}, journal={BIOMETRICS}, author={Yeap, BY and Davidian, M}, year={2001}, month={Mar}, pages={266–272} } @article{hartford_davidian_2000, title={Consequences of misspecifying assumptions in nonlinear mixed effects models}, volume={34}, ISSN={["0167-9473"]}, DOI={10.1016/S0167-9473(99)00076-6}, abstractNote={The nonlinear mixed effects model provides a framework for inference in a number of applications, most notably pharmacokinetics and pharmacodynamics, but also in HIV and other disease dynamics and in a host of other longitudinal-data settings. In these models, to characterize population variation, individual-specific parameters are modeled as functions of fixed effects and mean-zero random effects. A standard assumption is that of normality of the random effects, but this assumption may not always be realistic, and, because the random effects are not observed, it may be difficult to verify. An additional issue is specifying the form of the function relating individual-specific parameters to fixed and random effects. Again, because this relationship is not observed explicitly, it may be difficult to specify. Popular methods for fitting these models are predicated on the normality assumption, and past studies evaluating their performance have assumed that normality and the form of the model are correct specifications. We investigate the consequences for population inferences using these methods when the normality assumption is inappropriate and/or the model is misspecified.}, number={2}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Hartford, A and Davidian, M}, year={2000}, month={Aug}, pages={139–164} } @article{ko_davidian_2000, title={Correcting for measurement error in individual-level covariates in nonlinear mixed effects models}, volume={56}, ISSN={["0006-341X"]}, DOI={10.1111/j.0006-341X.2000.00368.x}, abstractNote={Summary. The nonlinear mixed effects model is used to represent data in pharmacokinetics, viral dynamics, and other areas where an objective is to elucidate associations among individual-specific model parameters and covariates; however, covariates may be measured with error. For additive measurement error, we show substitution of mismeasured covariates for true covariates may lead to biased estimators for fixed effects and random effects covariance parameters, while regression calibration may eliminate bias in fixed effects but fail to correct that in covariance parameters. We develop methods to take account of measurement error that correct this bias and may be implemented with standard software, and we demonstrate their utility via simulation and application to data from a study of HIV dynamics.}, number={2}, journal={BIOMETRICS}, author={Ko, HJ and Davidian, M}, year={2000}, month={Jun}, pages={368–375} } @article{neumann_lam_dahari_davidian_wiley_mika_perelson_layden_2000, title={Differences in viral dynamics between genotypes 1 and 2 of hepatitis C virus}, volume={182}, ISSN={["1537-6613"]}, DOI={10.1086/315661}, abstractNote={Many studies have shown that patients infected with hepatitis C virus (HCV) of genotype 2 have better response to interferon (IFN)-alpha treatment than genotype 1 patients; however, the mechanisms responsible for this difference are not understood. In this study, viral dynamics during high-dose IFN induction treatment were compared between the genotypes. Patients in each group received 10 MU of IFN-alpha2b for 14 days, and HCV RNA levels were frequently determined. Nonlinear fitting, both individually for each patient and using a mixed-effects approach, of the viral kinetic data to a mathematical model of the IFN effect on HCV infection was performed. The antiviral effectiveness of IFN in blocking virus production, the free virion clearance rate, and the HCV-infected cell death rate were all significantly higher for genotype 2 patients than for genotype 1 patients. Thus, the better response rate of patients infected with HCV genotype 2 is multifactorial. This is the first finding of a difference in viral dynamics between subtypes of the same virus and demonstrates the importance of subtype-specific virus-host-drug interactions.}, number={1}, journal={JOURNAL OF INFECTIOUS DISEASES}, author={Neumann, AU and Lam, NP and Dahari, H and Davidian, M and Wiley, TE and Mika, BP and Perelson, AS and Layden, TJ}, year={2000}, month={Jul}, pages={28–35} } @article{oberg_davidian_2000, title={Estimating data transformations in nonlinear mixed effects models}, volume={56}, ISSN={["0006-341X"]}, DOI={10.1111/j.0006-341X.2000.00065.x}, abstractNote={A routine practice in the analysis of repeated measurement data is to represent individual responses by a mixed effects model on some transformed scale. For example, for pharmacokinetic, growth, and other data, both the response and the regression model are typically transformed to achieve approximate within-individual normality and constant variance on the new scale; however, the choice of transformation is often made subjectively or by default, with adoption of a standard choice such as the log. We propose a mixed effects framework based on the transform-both-sides model, where the transformation is represented by a monotone parametric function and is estimated from the data. For this model, we describe a practical fitting strategy based on approximation of the marginal likelihood. Inference is complicated by the fact that estimation of the transformation requires modification of the usual standard errors for estimators of fixed effects; however, we show that, under conditions relevant to common applications, this complication is asymptotically negligible, allowing straightforward implementation via standard software.}, number={1}, journal={BIOMETRICS}, author={Oberg, A and Davidian, M}, year={2000}, month={Mar}, pages={65–72} } @article{betts_krowka_kepler_davidian_christopherson_kwok_louie_eron_sheppard_frelinger_1999, title={Human immunodeficiency virus type 1-specific cytotoxic T lymphocyte activity is inversely correlated with HIV type 1 viral load in HIV type 1-infected long-term survivors}, volume={15}, ISSN={["1931-8405"]}, DOI={10.1089/088922299310313}, abstractNote={HIV-1-specific cytotoxic T cell (CTL) activity has been suggested to correlate with protection from progression to AIDS. We have examined the relationship between HIV-specific CTL activity and maintenance of peripheral blood CD4+ T lymphocyte counts and control of viral load in 17 long-term survivors (LTSs) of HIV-1 infection. Longitudinal analysis indicated that the LTS cohort demonstrated a decreased rate of CD4+ T cell loss (18 cells/mm3/year) compared with typical normal progressors (approximately 60 cells/mm3/year). The majority of the LTSs had detectable, variable, and in some individuals, quite high (>10(4) RNA copies/ml) plasma viral load during the study period. In a cross-sectional analysis, HIV-specific CTL activity to HIV Gag, Pol, and Env proteins was detectable in all 17 LTSs. Simultaneous analysis of HIV-1 Gag-Pol, and Env-specific CTLs and virus load in protease inhibitor-naive individuals showed a significant inverse correlation between Pol-specific CTL activity and plasma HIV-1 RNA levels (p = 0.001). Furthermore, using a mixed linear effects model the combined effects of HIV-1 Pol- and Env-specific CTL activity on the viral load were significantly stronger than the effects of HIV-1 Pol-specific CTL activity alone on predicted virus load. These data suggest that the presence of HIV-1-specific CTL activity in HIV-1-infected long-term survivors is an important component in the effective control of HIV-1 replication.}, number={13}, journal={AIDS RESEARCH AND HUMAN RETROVIRUSES}, author={Betts, MR and Krowka, JF and Kepler, TB and Davidian, M and Christopherson, C and Kwok, S and Louie, L and Eron, J and Sheppard, H and Frelinger, JA}, year={1999}, month={Sep}, pages={1219–1228} } @inbook{davidian_1999, place={New York}, title={Invited discussion of “The Bayesian approach to population pharmacokinetic-pharmacodynamic modeling” by Wakefield, Aarons, and Racine-Poon}, volume={IV}, booktitle={Case Studies in Bayesian Statistics}, publisher={Springer-Verlag}, author={Davidian, M.}, editor={Gatsonis, C. and Kass, R.E. and Carlin, B. and Carriquiry, A. and Gelman, A. and Verdinelli, I. and West, M.Editors}, year={1999}, pages={257–263} } @article{hu_tsiatis_davidian_1998, title={Estimating the parameters in the Cox model when covariate variables are measured with error}, volume={54}, ISSN={["0006-341X"]}, DOI={10.2307/2533667}, abstractNote={The Cox proportional hazards model is commonly used to model survival data as a function of covariates. Because of the measuring mechanism or the nature of the environment, covariates are often measured with error and are not directly observable. A naive approach is to use the observed values of the covariates in the Cox model, which usually produces biased estimates of the true association of interest. An alternative strategy is to take into account the error in measurement, which may be carried out for the Cox model in a number of ways. We examine several such approaches and compare and contrast them through several simulation studies. We introduce a likelihood-based approach, which we refer to as the semiparametric method, and show that this method is an appealing alternative. The methods are applied to analyze the relationship between survival and CD4 count in patients with AIDS.}, number={4}, journal={BIOMETRICS}, author={Hu, P and Tsiatis, AA and Davidian, M}, year={1998}, month={Dec}, pages={1407–1419} } @article{smith_evans_davidian_1998, title={Statistical properties of fitted estimates of apparent in vivo metabolic constants obtained from gas uptake data. I. Lipophilic and slowly metabolized VOCs}, volume={10}, number={5}, journal={Inhalation Toxicology}, author={Smith, A. E. and Evans, M. V. and Davidian, M.}, year={1998}, pages={383–409} } @article{higgins_davidian_chew_burge_1998, title={The effect of serial dilution error on calibration inference in immunoassay}, volume={54}, ISSN={["0006-341X"]}, DOI={10.2307/2533992}, abstractNote={A common practice in immunoassay is the use of sequential dilutions of an initial stock solution of the antigen of interest to obtain standard samples in a desired concentration range. Nonlinear, heteroscedastic regression models are a common framework for analysis, and the usual methods for fitting the model assume that measured responses on the standards are independent. However, the dilution procedure introduces a propagation of random measurement error that may invalidate this assumption. We demonstrate that failure to account for serial dilution error in calibration inference on unknown samples leads to serious inaccuracy of assessments of assay precision such as confidence intervals and precision profiles. Techniques for taking serial dilution error into account based on data from multiple assay runs are discussed and are shown to yield valid calibration inferences.}, number={1}, journal={BIOMETRICS}, author={Higgins, KM and Davidian, M and Chew, G and Burge, H}, year={1998}, month={Mar}, pages={19–32} } @article{higgins_davidian_giltinan_1997, title={A two-step approach to measurement error in time-dependent covariates in nonlinear mixed-effects models, with application to IGF-I pharmacokinetics}, volume={92}, DOI={10.1080/01621459.1997.10473995}, abstractNote={Abstract The usual approach to the analysis of population pharmacokinetic studies is to represent the concentration-time data by a nonlinear mixed-effects model. Primary objectives are to characterize the pattern of drug disposition in the population and to identify individual-specific covariates associated with pharmacokinetic behavior. We consider data from a study of insulin-like growth factor I (IGF-I) administered by intravenous infusion to patients with severe head trauma. Failure to maintain steady-state levels of IGF-I was thought to be related to the temporal pattern of several covariates measured in the study, and an analysis investigating this issue was of interest. Observations on these potentially relevant covariates for each subject were made at time points different from those at which IGF-I concentrations were determined; moreover, the covariates themselves were likely subject to measurement error. The usual approach to time-dependent covariates in population analysis is to invoke a simple interpolation scheme, such as carrying forward the most recent covariate value, ignoring measurement error; however, for these data, the complicated observed covariate pattern makes this approach suspect. A nonlinear mixed-effects model incorporating a model for time-dependent covariates measured with error is used to describe the IGF-I data, and fitting is accomplished by a two-step strategy implemented using standard software. The performance of the method is evaluated via simulation.}, number={438}, journal={Journal of the American Statistical Association}, author={Higgins, K. M. and Davidian, Marie and Giltinan, D. M.}, year={1997}, pages={436–448} } @article{zeng_davidian_1997, title={Bootstrap-Adjusted Calibration Confidence Intervals for Immunoassay}, volume={92}, ISSN={0162-1459 1537-274X}, url={http://dx.doi.org/10.1080/01621459.1997.10473625}, DOI={10.1080/01621459.1997.10473625}, abstractNote={Abstract In immunoassay, a nonlinear heteroscedastic regression model is used to characterize assay concentration-response, and the model fitted to data from standard samples is used to calibrate unknown test samples. Usual large-sample methods to construct individual confidence intervals for calibrated concentrations have been observed in empirical studies to be seriously inaccurate in terms of achieving the nominal level of coverage. We show theoretically that this inaccuracy is due largely to estimation of parameters characterizing assay response variance. By exploiting the theory, we propose a bootstrap procedure to adjust the usual intervals to achieve a higher degree of accuracy. We provide both theoretical results and simulation evidence to show that the proposed method attains the nominal level. A practical advantage of the procedure is that it may be implemented reliably using far fewer bootstrap samples than are needed in other resampling schemes.}, number={437}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Zeng, Qi and Davidian, Marie}, year={1997}, month={Mar}, pages={278–290} } @article{zeng_davidian_1997, title={Calibration inference based on multiple runs of an immunoassay}, volume={53}, ISSN={["0006-341X"]}, DOI={10.2307/2533499}, abstractNote={Several authors have documented the poor performance of usual large-sample, individual calibration confidence intervals based on a single run of an immunoassay. Inaccuracy of these intervals may be attributed to the paucity of information on model parameters available in a single run. Methods for combining information from multiple runs to estimate assay response variance parameters and to refine characterization of the standard curve for the current run via empirical Bayes techniques have been proposed. We investigate formally the utility of these techniques for improving the quality of routine individual calibration inference.}, number={4}, journal={BIOMETRICS}, author={Zeng, Q and Davidian, M}, year={1997}, month={Dec}, pages={1304–1317} } @article{zeng_davidian_1997, title={Testing homogeneity of intra-run variance parameters in immunoassay}, volume={16}, ISSN={["0277-6715"]}, DOI={10.1002/(SICI)1097-0258(19970815)16:15<1765::AID-SIM603>3.0.CO;2-P}, abstractNote={A common assumption in the analysis of immunoassay data is a similar pattern of within-run variation across runs of the assay. One makes this assumption without formal investigation of its validity, despite the widely acknowledged fact that accurate understanding of intra-run variation is critical to reliable calibration inference. We propose a simple procedure for a formal test of the assumption of the homogeneity of parameters that characterize intra-run variation based on representation of standard curve data from multiple assay runs by a non-linear mixed effects model. We examine the performance of the procedure and investigate the robustness of calibration inference to incorrect assumptions about the pattern of intra-run variation. ©1997 by John Wiley & Sons, Ltd.}, number={15}, journal={STATISTICS IN MEDICINE}, author={Zeng, Q and Davidian, M}, year={1997}, month={Aug}, pages={1765–1776} } @article{wang_davidian_1996, title={A note on covariate measurement error in nonlinear mixed effects models}, volume={83}, ISSN={0006-3444 1464-3510}, url={http://dx.doi.org/10.1093/biomet/83.4.801}, DOI={10.1093/biomet/83.4.801}, abstractNote={Little is known about the effects of measurement error in intra-individual covariates on inference for the nonlinear mixed effects model. We investigate this issue for a controlled variable measurement error model, and find that a major consequence may be substantial bias in estimates of parameters characterising intra-individual variation. Estimation of population parameters may also be affected. The dramatic effect of measurement error on estimation of intra-individual variance parameters has implications not only in the repeated measurement context, but also in individual nonlinear regression models.}, number={4}, journal={Biometrika}, publisher={Oxford University Press (OUP)}, author={Wang, N. and Davidian, M.}, year={1996}, month={Dec}, pages={801–812} } @article{liu_foegeding_wang_smith_davidian_1996, title={Denaturation and Aggregation of Chicken Myosin Isoforms}, volume={44}, ISSN={0021-8561 1520-5118}, url={http://dx.doi.org/10.1021/jf9503422}, DOI={10.1021/jf9503422}, abstractNote={Heat-induced denaturation and aggregation of chicken myosins isolated from one white muscle (pectoralis) and two red muscles (iliotibialis and gastrocnemius) were investigated using differential sc...}, number={6}, journal={Journal of Agricultural and Food Chemistry}, publisher={American Chemical Society (ACS)}, author={Liu, Martha N. and Foegeding, E. Allen and Wang, Shue-Fung and Smith, Denise M. and Davidian, Marie}, year={1996}, month={Jan}, pages={1435–1440} } @article{trettin_davidian_jurgensen_lea_1996, title={Organic Matter Decomposition following Harvesting and Site Preparation of a Forested Wetland}, volume={60}, ISSN={0361-5995}, url={http://dx.doi.org/10.2136/sssaj1996.03615995006000060053x}, DOI={10.2136/sssaj1996.03615995006000060053x}, abstractNote={Organic matter accumulation is an important process that affects ecosystem function in many northern wetlands. The cotton strip assay (CSA) was used to measure the effect of harvesting and two different site preparation treatments, bedding and trenching, on organic matter decomposition in a forested wetland. A Latin square experimental design was used to determine the effect of harvesting, site preparation, and relative position within the wetland on organic matter decomposition at soil depths of 5, 10, and 20 cm. Repeated measures analysis of variance was used to test for treatment effects on organic matter decomposition, soil temperature, and soil oxidation depth. Cellulose decomposition increased at each soil depth as site disturbance increased, with bedding > trenching > whole-tree harvest > reference. The cellulose decomposition response was correlated with changes in soil temperature; the temperature coefficient Q10 equaled 6.0, which is greater than previously reported values. Position within the wetland relative to an adjoining river affected the decomposition and soil oxidation depth. Because the rate of decomposition is strongly controlled by temperature, higher rates of organic matter decay are expected to continue on harvested and regenerated sites until canopy closure reduces soil temperature.}, number={6}, journal={Soil Science Society of America Journal}, publisher={Wiley}, author={Trettin, C. C. and Davidian, M. and Jurgensen, M. F. and Lea, R.}, year={1996}, month={Nov}, pages={1994–2003} } @article{jacobson_davidian_rainey_hafner_raasch_luft_1996, title={Pyrimethamine pharmacokinetics in human immunodeficiency virus-positive patients seropositive for Toxoplasma gondii.}, volume={40}, ISSN={0066-4804 1098-6596}, url={http://dx.doi.org/10.1128/aac.40.6.1360}, DOI={10.1128/aac.40.6.1360}, abstractNote={Pyrimethamine pharmacokinetics were studied in 11 human immunodeficiency virus (HIV)-positive patients who were seropositive for exposure to Toxoplasma gondii and were taking zidovudine (AIDS Clinical Trials Group Protocol 102). Pyrimethamine was administered at 50 mg daily for 3 weeks to achieve steady state, and pharmacokinetic profiles were determined after administration of the last dose. Noncompartmental and compartmental analyses were performed. Population pharmacokinetic analysis assuming a one-compartment model yielded the following estimates: area under the 24-h concentration-time curve, 42.7 +/- 12.3 micrograms.h/ml; halflife, 139 +/- 34 h; clearance, 1.28 +/- 0.41 liters/h; volume of distribution, 246 +/- 641; and absorption rate constant, 1.5 +/- 1.3 liters/h. These values are similar to those seen in subjects without HIV infection. Pyrimethamine pharmacokinetics did not differ significantly in those subjects who were intravenous drug users. Adverse effects were noted in 73% of those initially enrolled in this study, leading to discontinuation for 38%. No association was noted between pyrimethamine levels and the incidence of adverse events. No significant differences were seen in zidovudine pharmacokinetic parameters obtained from studies performed before and during treatment with pyrimethamine. In summary, pyrimethamine exhibited pharmacokinetics in HIV-infected patients that were similar to those in non-HIV-infected subjects and it did not alter the pharmacokinetics of zidovudine in these patients.}, number={6}, journal={Antimicrobial Agents and Chemotherapy}, publisher={American Society for Microbiology}, author={Jacobson, J M and Davidian, M and Rainey, P M and Hafner, R and Raasch, R H and Luft, B J}, year={1996}, month={Jun}, pages={1360–1365} } @article{belanger_davidian_giltinan_1996, title={The Effect of Variance Function Estimation on Nonlinear Calibration Inference in Immunoassay Data}, volume={52}, ISSN={0006-341X}, url={http://dx.doi.org/10.2307/2533153}, DOI={10.2307/2533153}, abstractNote={Often with data from immunoassays, the concentration-response relationship is nonlinear and intra-assay response variance is heterogeneous. Estimation of the standard curve is usually based on a nonlinear heteroscedastic regression model for concentration-response, where variance is modeled as a function of mean response and additional variance parameters. This paper discusses calibration inference for immunoassay data which exhibit this nonlinear heteroscedastic mean-variance relationship. An assessment of the effect of variance function estimation in three types of approximate large-sample confidence intervals for unknown concentrations is given by theoretical and empirical investigation and application to two examples. A major finding is that the accuracy of such calibration intervals depends critically on the nature of response variance and the quality with which variance parameters are estimated.}, number={1}, journal={Biometrics}, publisher={JSTOR}, author={Belanger, Bruce A. and Davidian, Marie and Giltinan, David M.}, year={1996}, month={Mar}, pages={158} } @book{davidian_giltinan_1995, place={Boca Raton, FL}, series={Monographs on statistics and applied probability}, title={Nonlinear models for repeated measurement data}, ISBN={9780412983412}, journal={Chapman & Hall/CRC}, publisher={Boca Raton: Chapman & Hall}, author={Davidian, M. and Giltinan, D. M.}, year={1995}, collection={Monographs on statistics and applied probability} } @article{nelson_sellon_novotney_devera_davidian_english_tompkins_tompkins_1995, title={Therapeutic effects of diethylcarbamazine and 3′-azido-3′-deoxythymidine on feline leukemia virus lymphoma formation}, volume={46}, ISSN={0165-2427}, url={http://dx.doi.org/10.1016/0165-2427(94)07017-2}, DOI={10.1016/0165-2427(94)07017-2}, abstractNote={Twenty-four specific pathogen-free kittens were infected with the Rickard strain of feline leukemia virus (FeLVR). The kittens were divided into four equal groups and were orally administered either a high dose of diethylcarbamazine (DECH, 12 mg kg-1), a low dose of diethylcarbamazine (DECL, 3 mg kg-1), 3'-azido-3'-deoxythymidine (AZT, 15 mg kg-1, b.i.d.), or a placebo (250 mg granular dextrose) daily for 10 weeks. Blood was collected at 2-week intervals for complete blood counts (CBC) and flow cytometric analysis (FACS) of peripheral blood lymphocytes (PBL). Plasma was assayed for antibodies to FeLV gp70 and for FeLV p27 antigen using ELISA assays. For FACS analysis, lymphocytes were incubated with monoclonal antibodies to feline Pan T, CD8+, CD4+, and B cell (Anti-Ig) antigens. In the placebo treated cats, FeLVR infection caused an early (2 weeks p.i.) and persistent decrease in leukocyte numbers attributable primarily to a decrease in neutrophil numbers and a secondary lesser decrease in B and CD4+ lymphocyte numbers. The DEC-treated groups showed a delayed but similar leukopenia by 4 weeks p.i. The lymphopenia in the DEC groups (primarily B cells and CD4+ cells) was reversed by 10 weeks p.i., but the neutropenia persisted. AZT treatment inhibited FeLVR-induced lymphopenia but did not prevent a reduction in neutrophil numbers. A marked p27 antigenemia that peaked at 4 weeks p.i. was noted in the placebo treated cats and in most cats (11/12) treated with either dose of DEC. However, AZT significantly inhibited the p27 antigenemia and all cats were negative for p27 antigen between 6 and 10 weeks of treatment. In general, placebo treated cats as well as DECH and DECL cats had low levels of antibody to gp70 throughout the study, suggesting FeLVR-induced immunosuppression. In contrast, significantly higher titers of anti-gp70 antibodies were seen in AZT-treated cats at 6 weeks p.i., and were maintained throughout treatment. Eighteen month survival rates provide efficacy data for AZT as well as both DEC treatment groups. While all placebo treated cats were euthanized by 52 weeks p.i. due to FeLV associated lymphomas with a mean survival time of 35.5 weeks p.i., median survival time of the AZT treated group was > or = 102 weeks p.i., while that of the DECH and DECL groups was 69.7 and 72 weeks p.i., respectively. Thus, DEC as well as AZT therapy delays the development of lymphomas associated with FeLV infection and significantly improves survival.}, number={1-2}, journal={Veterinary Immunology and Immunopathology}, publisher={Elsevier BV}, author={Nelson, Phillip and Sellon, Rance and Novotney, Carol and Devera, Cristina and Davidian, Marie and English, Robert and Tompkins, Mary and Tompkins, Wayne}, year={1995}, month={May}, pages={181–194} } @article{giltinan_davidian_1994, title={Assays for recombinant proteins: A problem in non-linear calibration}, volume={13}, ISSN={0277-6715 1097-0258}, url={http://dx.doi.org/10.1002/sim.4780131107}, DOI={10.1002/sim.4780131107}, abstractNote={Quantification of protein levels in biological matrices such as serum or plasma frequently relies on the techniques of immunoassay or bioassay. The relevant statistical problem is that of non-linear calibration, where one estimates analyte concentration in an unknown sample from a calibration curve fit to known standard concentrations. This paper discusses a general framework for calibration curve fit to known standard concentrations. This paper discusses a general framework for calibration inference, that of the non-linear mixed effects model. Within this framework, we consider two issues in depth: accurate characterization of intra-assay variation, and the use of empirical Bayes methods in calibration. We show that proper characterization of intra-assay variability requires pooling of information across several assay runs. Simulation work indicates that use of empirical Bayes methods may afford considerable gain in efficiency; one must weigh this gain against practical considerations in the implementation of Bayesian techniques. We illustrate the methods discussed using a cell-based bioassay for the recombinant hormone relaxin.}, number={11}, journal={Statistics in Medicine}, publisher={Wiley}, author={Giltinan, David M. and Davidian, Marie}, year={1994}, month={Jun}, pages={1165–1179} } @article{noga_engel_arroll_mckenna_davidian_1994, title={Low serum antibacterial activity coincides with increased prevalence of shell disease in blue crabs Callinectes sapidus}, volume={19}, ISSN={0177-5103 1616-1580}, url={http://dx.doi.org/10.3354/dao019121}, DOI={10.3354/dao019121}, number={2}, journal={Diseases of Aquatic Organisms}, publisher={Inter-Research Science Center}, author={Noga, EJ and Engel, DP and Arroll, TW and McKenna, S and Davidian, M}, year={1994}, pages={121–128} } @article{yuh_beal_davidian_harrison_hester_kowalski_vonesh_wolfinger_1994, title={Population Pharmacokinetic/Pharmacodynamic Methodology and Applications: A Bibliography}, volume={50}, ISSN={0006-341X}, url={http://dx.doi.org/10.2307/2533402}, DOI={10.2307/2533402}, abstractNote={This bibliography lists all published methodological works (statistical methodology, implementation methodology, review papers, descriptions of software) in the area of population pharmacokinetics/pharmacodynamics up to 1993 and all published works describing applications of population pharmacokinetics/pharmacodynamics up to 1992.}, number={2}, journal={Biometrics}, publisher={JSTOR}, author={Yuh, Lianng and Beal, Stuart and Davidian, Marie and Harrison, Ferrin and Hester, Allen and Kowalski, Kenneth and Vonesh, Edward and Wolfinger, Russell}, year={1994}, month={Jun}, pages={566} } @article{davidian_giltinan_1993, title={Analysis of repeated measurement data using the nonlinear mixed effects model}, volume={20}, ISSN={0169-7439}, url={http://dx.doi.org/10.1016/0169-7439(93)80017-c}, DOI={10.1016/0169-7439(93)80017-c}, abstractNote={Abstract Davidian, M. and Giltinan, D.M., 1993. Analysis of repeated measurement data using the nonlinear mixed effects model. Chemometrics and Intelligent Laboratory Systems, 20: 1–24. Situations in which repeated measurements are taken on each of several individual items arise in many areas. These include assay development, where concentration—response data are available for each assay run in a series of assay experiments; pharmacokinetic analysis, where repeated blood concentration measurements are obtained from each of several subjects; and growth or decay studies, where growth or decay are measured over time for each plant, animal, or some other experimental unit. In these situations the model describing the response is often nonlinear in the parameters to be estimated, as is the case for the four-parameter logistic model, which is frequently used to characterize concentration—response relationships for radioimmunoassay enzyme-linked immunosorbent assay. Furthermore, response variability typically increases with level of response. The objectives of an analysis vary according to the application: for assay analysis, calibration of unknowns for the most recent run may be of interest; in pharmacokinetics, characterization of drug disposition for a patient population may be the focus. The nonlinear mixed effects (NME) model has been used to describe repeated measurement data for which the mean response function is nonlinear. In this tutorial, the NME model is motivated and described, and several methods are given for estimation and inference in the context of the model. The methods are illustrated by application to examples from the fields of water transport kinetics, assay development, and pharmacokinetics.}, number={1}, journal={Chemometrics and Intelligent Laboratory Systems}, publisher={Elsevier BV}, author={Davidian, Marie and Giltinan, David M.}, year={1993}, month={Aug}, pages={1–24} } @article{davidian_giltinan_1993, title={Some Simple Methods for Estimating Intraindividual Variability in Nonlinear Mixed Effects Models}, volume={49}, ISSN={0006-341X}, url={http://dx.doi.org/10.2307/2532602}, DOI={10.2307/2532602}, abstractNote={Methods are proposed to improve both population and individual inference within the nonlinear random coefficient regression framework by incorporating possible heterogeneity in the intraindividual variance structure. These methods extend existing variance function estimation techniques by pooling information across individuals. The methods are appropriate when it is reasonable to assume that there exists a common intraindividual variance structure.}, number={1}, journal={Biometrics}, publisher={JSTOR}, author={Davidian, Marie and Giltinan, David M.}, year={1993}, month={Mar}, pages={59} } @article{davidian_giltinan_1993, title={Some general estimation methods for nonlinear mixed-effects model}, volume={3}, ISSN={1054-3406 1520-5711}, url={http://dx.doi.org/10.1080/10543409308835047}, DOI={10.1080/10543409308835047}, abstractNote={A nonlinear mixed-effects model suitable for characterizing repeated measurement data is described. The model allows dependence of random coefficients on covariate information and accommodates general specifications of a common intraindividual covariance structure, such as models for variance within individuals that depend on individual mean response and autocorrelation. Two classes of procedures for estimation in this model are described, which incorporate estimation of unknown parameters in the assumed intraindividual covariance structure. The procedures are straightforward to implement using standard statistical software. The techniques are illustrated by examples in growth analysis and assay development.}, number={1}, journal={Journal of Biopharmaceutical Statistics}, publisher={Informa UK Limited}, author={Davidian, Marie and Giltinan, David M.}, year={1993}, month={Jan}, pages={23–55} } @article{davidian_gallant_1993, title={The nonlinear mixed effects model with a smooth random effects density}, volume={80}, ISSN={0006-3444 1464-3510}, url={http://dx.doi.org/10.1093/biomet/80.3.475}, DOI={10.1093/biomet/80.3.475}, abstractNote={Journal Article The nonlinear mixed effects model with a smooth random effects density Get access MARIE DAVIDIAN, MARIE DAVIDIAN Department of Statistics, North Carolina State UniversityCampus Box 8203, Raleigh, North Carolina 27695-8203, U. S.A. Search for other works by this author on: Oxford Academic Google Scholar A. RONALD GALLANT A. RONALD GALLANT Department of Statistics, North Carolina State UniversityCampus Box 8203, Raleigh, North Carolina 27695-8203, U. S.A. Search for other works by this author on: Oxford Academic Google Scholar Biometrika, Volume 80, Issue 3, September 1993, Pages 475–488, https://doi.org/10.1093/biomet/80.3.475 Published: 01 September 1993}, number={3}, journal={Biometrika}, publisher={Oxford University Press (OUP)}, author={Davidian, Marie and Gallant, A. Ronald}, year={1993}, pages={475–488} } @article{davidian_gallant_1992, title={Smooth nonparametric maximum likelihood estimation for population pharmacokinetics, with application to quinidine}, volume={20}, ISSN={0090-466X}, url={http://dx.doi.org/10.1007/bf01061470}, DOI={10.1007/bf01061470}, abstractNote={The seminonparametric (SNP) method, popular in the econometrics literature, is proposed for use in population pharmacokinetic analysis. For data that can be described by the nonlinear mixed effects model, the method produces smooth nonparametric estimates of the entire random effects density and simultaneous estimates of fixed effects by maximum likelihood. A graphical model-building strategy based on the SNP method is described. The methods are illustrated by a population analysis of plasma levels in 136 patients undergoing oral quinidine therapy.}, number={5}, journal={Journal of Pharmacokinetics and Biopharmaceutics}, publisher={Springer Science and Business Media LLC}, author={Davidian, Marie and Gallant, A. Ronald}, year={1992}, month={Oct}, pages={529–556} } @book{rives_davidian_ley_1991, title={Infectious bursal disease virus titers may be misleading}, volume={15}, number={2}, journal={Breakthrough, North Carolina Cooperative Extension Service}, author={Rives, D.V. and Davidian, M. and Ley, D.H.}, year={1991} } @inproceedings{davidian_gupta_1991, place={Atlanta, Georgia}, title={The use of regression analysis in nonwovens research}, booktitle={Proceedings of the TAPPI 1991 Nonwovens Conference}, publisher={TAPPI Press}, author={Davidian, M. and Gupta, B.S.}, year={1991}, pages={27–33} } @article{davidian_1990, title={Estimation of variance functions in assays with possibly unequal replication and nonnormal data}, volume={77}, ISSN={0006-3444 1464-3510}, url={http://dx.doi.org/10.1093/biomet/77.1.43}, DOI={10.1093/biomet/77.1.43}, abstractNote={Estimation of parametric variance functions using transformations of standard deviations based on replication at each design point is common in, but not limited to, assay analysis. It is shown that ignoring unequal replication can lead to bias and inefficiency in estimation. Efficiency comparisons for different transformations for nonnormal distributions are given. A method to account for bias is described that can offer robustness to nonnormality and leads to a comparison of Gini's mean difference to sample standard deviation. A method for computing all of these estimators using standard software is described.}, number={1}, journal={Biometrika}, publisher={Oxford University Press (OUP)}, author={Davidian, M.}, year={1990}, month={Mar}, pages={43–54} } @article{davidian_haaland_1990, title={Regression and calibration with nonconstant error variance}, volume={9}, ISSN={0169-7439}, url={http://dx.doi.org/10.1016/0169-7439(90)80074-g}, DOI={10.1016/0169-7439(90)80074-g}, abstractNote={Davidian, M. and Haaland, P., 1990. Regression and calibration with nonconstant error variance. Chemometrics and Intelligent Laboratory Systems, 9: 231–248. Ordinary least squares regression analysis is generally inappropriate for calibration and regression problems when the usual assumption of constant variance across all observations does not hold. Estimators of regression parameters are of relatively poor quality and the resulting inference can be misleading. The use of standard data transformations is a common alternative but may not provide enough flexibility for some cases. The use of weighted regression with weights estimated from replicates is generally unreliable for reasonable sample sizes. However, when the error variance changes systematically with the mean response or other variables, generalized least squares (GLS) and variance function estimation (VFE) methods can be used. The GLS-VFE approach allows the experimenter to specify a model for the systematic change in variance, estimate unknown parameters, and to use this information to provide more efficient estimates of the regression parameters. In this tutorial, GLS-VFE methods are introduced and described in the context of regression and calibration. An example of calibration for a chemical assay is used to motivate discussion and illustrate the implementation of these methods using standard software packages.}, number={3}, journal={Chemometrics and Intelligent Laboratory Systems}, publisher={Elsevier BV}, author={Davidian, Marie and Haaland, Perry D.}, year={1990}, month={Dec}, pages={231–248} } @article{davidian_carroll_1988, title={A Note on Extended Quasi-Likelihood}, volume={50}, ISSN={0035-9246}, url={http://dx.doi.org/10.1111/j.2517-6161.1988.tb01712.x}, DOI={10.1111/j.2517-6161.1988.tb01712.x}, abstractNote={Abstract : We study the method of extended quasi-likelihood estimation and inference of a variance function recently proposed by Nelder & Pregibon (1987). The estimates are inconsistent in general, and the test levels can be biased, but in many cases such as the exponential family the inconsistency and bias will not be a major concern. Extended quasi-likelihood is compared with Carroll & Ruppert's (1982) pseudo-likelihood method, which gives consistent estimates and, when slightly modified, asymptotically unbiased tests. We quantify the showing in this instance that the two estimates are closely related and may be asymptotically equivalent in many important cases. However, in some cases outside the exponential family, an asymptotic bias can persist. Heteroscedastic regression model.}, number={1}, journal={Journal of the Royal Statistical Society: Series B (Methodological)}, publisher={Wiley}, author={Davidian, M. and Carroll, R. J.}, year={1988}, month={Sep}, pages={74–82} } @article{davidian_carroll_smith_1988, title={Variance functions and the minimum detectable concentration in assays}, volume={75}, ISSN={0006-3444 1464-3510}, url={http://dx.doi.org/10.1093/biomet/75.3.549}, DOI={10.1093/biomet/75.3.549}, abstractNote={Journal Article Variance functions and the minimum detectable concentration in assays Get access M. DAVIDIAN, M. DAVIDIAN Department of Statistics, North Carolina State UniversityRaleigh, North Carolina 27695-8203, U.S.A. Search for other works by this author on: Oxford Academic Google Scholar R. J. CARROLL, R. J. CARROLL Department of Statistics, Texas A&M University, College StationTexas 77843, U.S.A. Search for other works by this author on: Oxford Academic Google Scholar W. SMITH W. SMITH Statistical and Mathematical Services, Eli Lilly & Company307 East McCarty Street, Indianapolis, Indiana 46285, U.S.A. Search for other works by this author on: Oxford Academic Google Scholar Biometrika, Volume 75, Issue 3, September 1988, Pages 549–556, https://doi.org/10.1093/biomet/75.3.549 Published: 01 September 1988 Article history Received: 01 August 1986 Revision received: 01 February 1988 Published: 01 September 1988}, number={3}, journal={Biometrika}, publisher={Oxford University Press (OUP)}, author={Davidian, M. and Carroll, R. J. and Smith, W.}, year={1988}, pages={549–556} } @article{davidian_carroll_1987, title={VARIANCE FUNCTION ESTIMATION}, volume={82}, ISSN={["0162-1459"]}, DOI={10.2307/2289384}, abstractNote={Abstract Heteroscedastic regression models are used in fields including economics, engineering, and the biological and physical sciences. Often, the heteroscedasticity is modeled as a function of the covariates or the regression and other structural parameters. Standard asymptotic theory implies that how one estimates the variance function, in particular the structural parameters, has no effect on the first-order properties of the regression parameter estimates; there is evidence, however, both in practice and higher-order theory to suggest that how one estimates the variance function does matter. Further, in some settings, estimation of the variance function is of independent interest or plays an important role in estimation of other quantities. In this article, we study variance function estimation in a unified way, focusing on common methods proposed in the statistical and other literature, to make both general observations and compare different estimation schemes. We show that there are significant differences in both efficiency and robustness for many common methods. We develop a general theory for variance function estimation, focusing on estimation of the structural parameters and including most methods in common use in our development. The general qualitative conclusions are these. First, most variance function estimation procedures can be looked upon as regressions with “responses” being transformations of absolute residuals from a preliminary fit or sample standard deviations from replicates at a design point. Our conclusion is that the former is typically more efficient, but not uniformly so. Second, for variance function estimates based on transformations of absolute residuals, we show that efficiency is a monotone function of the efficiency of the fit from which the residuals are formed, at least for symmetric errors. Our conclusion is that one should iterate so that residuals are based on generalized least squares. Finally, robustness issues are of even more importance here than in estimation of a regression function for the mean. The loss of efficiency of the standard method away from the normal distribution is much more rapid than in the regression problem. As an example of the type of model and estimation methods we consider, for observation-covariate pairs (Yi, xi ), one may model the variance as proportional to a power of the mean response, for example, Where f(xi , β) is the possibly nonlinear mean function and θ is the structural parameter of interest. “Regression methods” for estimation of θ and σ based on residuals for some regression fit involve minimizing a sum of squares where some function T of the ‖ri ‖ plays the role of the “responses” and an appropriate function of the variance plays the role of the “regression function.” For example, if T(x) = x 2, the responses would be ri 2, and the form of the regression function would be suggested by the aproximate fact . One could weight the sum of squares appropriately by considering the approximate variance of ri 2. For the case of replication at each xi , some methods suggest replacing the ri , in the function T by sample standard deviations at each xi . Other functions T, such as T(x) = x or log x have also been proposed.}, number={400}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={DAVIDIAN, M and CARROLL, RJ}, year={1987}, month={Dec}, pages={1079–1091} }