@article{wu_gao_yang_reich_rappold_2024, title={Estimating spatially varying health effects of wildland fire smoke using mobile health data}, volume={7}, ISSN={["1467-9876"]}, url={https://doi.org/10.1093/jrsssc/qlae034}, DOI={10.1093/jrsssc/qlae034}, abstractNote={Abstract Wildland fire smoke exposures are an increasing threat to public health, highlighting the need for studying the effects of protective behaviours on reducing health outcomes. Emerging smartphone applications provide unprecedented opportunities to deliver health risk communication messages to a large number of individuals in real-time and subsequently study the effectiveness, but also pose methodological challenges. Smoke Sense, a citizen science project, provides an interactive smartphone app platform for participants to engage with information about air quality, and ways to record their own health symptoms and actions taken to reduce smoke exposure. We propose a doubly robust estimator of the structural nested mean model that accounts for spatially and time-varying effects via a local estimating equation approach with geographical kernel weighting. Moreover, our analytical framework also handles informative missingness by inverse probability weighting of estimating functions. We evaluate the method using extensive simulation studies and apply it to Smoke Sense data to increase the knowledge base about the relationship between health preventive measures and health-related outcomes. Our results show that the protective behaviours’ effects vary over space and time and find that protective behaviours have more significant effects on reducing health symptoms in the Southwest than the Northwest region of the U.S.}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES C-APPLIED STATISTICS}, author={Wu, Lili and Gao, Chenyin and Yang, Shu and Reich, Brian J. and Rappold, Ana G.}, year={2024}, month={Jul} } @article{lee_gao_ghosh_yang_2024, title={Transporting survival of an HIV clinical trial to the external target populations}, volume={3}, ISSN={["1520-5711"]}, DOI={10.1080/10543406.2024.2330216}, abstractNote={Due to the heterogeneity of the randomized controlled trial (RCT) and external target populations, the estimated treatment effect from the RCT is not directly applicable to the target population. For example, the patient characteristics of the ACTG 175 HIV trial are significantly different from that of the three external target populations of interest: US early-stage HIV patients, Thailand HIV patients, and southern Ethiopia HIV patients. This paper considers several methods to transport the treatment effect from the ACTG 175 HIV trial to the target populations beyond the trial population. Most transport methods focus on continuous and binary outcomes; on the contrary, we derive and discuss several transport methods for survival outcomes: an outcome regression method based on a Cox proportional hazard (PH) model, an inverse probability weighting method based on the models for treatment assignment, sampling score, and censoring, and a doubly robust method that combines both methods, called the augmented calibration weighting (ACW) method. However, as the PH assumption was found to be incorrect for the ACTG 175 trial, the methods that depend on the PH assumption may lead to the biased quantification of the treatment effect. To account for the violation of the PH assumption, we extend the ACW method with the linear spline-based hazard regression model that does not require the PH assumption. Applying the aforementioned methods for transportability, we explore the effect of PH assumption, or the violation thereof, on transporting the survival results from the ACTG 175 trial to various external populations.}, journal={JOURNAL OF BIOPHARMACEUTICAL STATISTICS}, author={Lee, Dasom and Gao, Chenyin and Ghosh, Sujit and Yang, Shu}, year={2024}, month={Mar} } @article{yang_gao_zeng_wang_2023, title={Elastic integrative analysis of randomised trial and real-world data for treatment heterogeneity estimation}, volume={4}, ISSN={["1467-9868"]}, url={https://doi.org/10.1093/jrsssb/qkad017}, DOI={10.1093/jrsssb/qkad017}, abstractNote={Abstract We propose a test-based elastic integrative analysis of the randomised trial and real-world data to estimate treatment effect heterogeneity with a vector of known effect modifiers. When the real-world data are not subject to bias, our approach combines the trial and real-world data for efficient estimation. Utilising the trial design, we construct a test to decide whether or not to use real-world data. We characterise the asymptotic distribution of the test-based estimator under local alternatives. We provide a data-adaptive procedure to select the test threshold that promises the smallest mean square error and an elastic confidence interval with a good finite-sample coverage property.}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B-STATISTICAL METHODOLOGY}, author={Yang, Shu and Gao, Chenyin and Zeng, Donglin and Wang, Xiaofei}, year={2023}, month={Apr} } @article{gao_yang_2023, title={Pretest estimation in combining probability and non-probability samples}, volume={17}, ISSN={["1935-7524"]}, DOI={10.1214/23-EJS2137}, abstractNote={Multiple heterogeneous data sources are becoming increasingly available for statistical analyses in the era of big data. As an important example in finite-population inference, we develop a unified framework of the test-and-pool approach to general parameter estimation by combining gold-standard probability and non-probability samples. We focus on the case when the study variable is observed in both datasets for estimating the target parameters, and each contains other auxiliary variables. Utilizing the probability design, we conduct a pretest procedure to determine the comparability of the non-probability data with the probability data and decide whether or not to leverage the non-probability data in a pooled analysis. When the probability and non-probability data are comparable, our approach combines both data for efficient estimation. Otherwise, we retain only the probability data for estimation. We also characterize the asymptotic distribution of the proposed test-and-pool estimator under a local alternative and provide a data-adaptive procedure to select the critical tuning parameters that target the smallest mean square error of the test-and-pool estimator. Lastly, to deal with the non-regularity of the test-and-pool estimator, we construct a robust confidence interval that has a good finite-sample coverage property.}, number={1}, journal={ELECTRONIC JOURNAL OF STATISTICS}, author={Gao, Chenyin and Yang, Shu}, year={2023}, pages={1492–1546} } @article{gao_yang_kim_2023, title={Soft calibration for selection bias problems under mixed-effects models}, volume={3}, ISSN={["1464-3510"]}, url={https://doi.org/10.1093/biomet/asad016}, DOI={10.1093/biomet/asad016}, abstractNote={Abstract Calibration weighting has been widely used to correct selection biases in nonprobability sampling, missing data and causal inference. The main idea is to calibrate the biased sample to the benchmark by adjusting the subject weights. However, hard calibration can produce enormous weights when an exact calibration is enforced on a large set of extraneous covariates. This article proposes a soft calibration scheme, where the outcome and the selection indicator follow mixed-effect models. The scheme imposes an exact calibration on the fixed effects and an approximate calibration on the random effects. On the one hand, our soft calibration has an intrinsic connection with best linear unbiased prediction, which results in a more efficient estimation compared to hard calibration. On the other hand, soft calibration weighting estimation can be envisioned as penalized propensity score weight estimation, with the penalty term motivated by the mixed-effect structure. The asymptotic distribution and a valid variance estimator are derived for soft calibration. We demonstrate the superiority of the proposed estimator over other competitors in simulation studies and using a real-world data application on the effect of BMI screening on childhood obesity.}, journal={BIOMETRIKA}, author={Gao, Chenyin and Yang, Shu and Kim, Jae Kwang}, year={2023}, month={Mar} } @article{deng_gao_2023, title={Where does the risk lie? Systemic risk and tail risk networks in the Chinese financial market}, volume={2}, ISSN={["1468-0106"]}, DOI={10.1111/1468-0106.12417}, abstractNote={AbstractThis paper studies tail risk connectedness and systemic risk in the Chinese financial market in the post‐crisis period of 2009–2017. We adopt the conditional value at risk (CoVaR) and complex theory to construct the tail risk connectedness network and identify the systemically important financial institutions during the Chinese financial turbulence. We precisely characterize the dynamic evolution of the tail risk connectedness at the institutional, sector and market levels. We find that, during normal times, the banking sector contributes the most tail risk to the market and that the real estate sector contributes the least. However, during the crisis period, the real estate sector played its role and became the most significant tail risk emitter. In addition, we identify the significant important financial institutions in the Chinese financial market, highlighting the fact that the four state‐owned commercial banks and two largest insurance companies dominate. Our results are helpful to both regulators for developing macroprudential supervision policies and investors interested in the Chinese financial market for making risk management strategies.}, journal={PACIFIC ECONOMIC REVIEW}, author={Deng, Yang and Gao, Chenyin}, year={2023}, month={Feb} } @article{gao_thompson_kim_yang_2022, title={Nearest neighbour ratio imputation with incomplete multinomial outcome in survey sampling}, volume={5}, ISSN={["1467-985X"]}, url={https://doi.org/10.1111/rssa.12841}, DOI={10.1111/rssa.12841}, abstractNote={AbstractNonresponse is a common problem in survey sampling. Appropriate treatment can be challenging, especially when dealing with detailed breakdowns of totals. Often, the nearest neighbour imputation method is used to handle such incomplete multinomial data. In this article, we investigate the nearest neighbour ratio imputation (NNRI) estimator, in which auxiliary variables are used to identify the closest donor and the vector of proportions from the donor is applied to the total of the recipient to implement ratio imputation. To estimate the asymptotic variance, we first treat the NNRI as a special case of predictive matching imputation and build on earlier work to linearize the imputed estimate. To account for the non-negligible sampling fractions, parametric and generalized additive models are employed to incorporate the smoothness of the imputation estimator, which results in a valid variance estimator. We apply the proposed method to estimate expenditures detail items based on empirical data from the 2018 collection of the Service Annual Survey, conducted by the United States Census Bureau. Our simulation results demonstrate the validity of our proposed estimators and also confirm that the derived variance estimators have good performance even when the sampling fraction is non-negligible.}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A-STATISTICS IN SOCIETY}, author={Gao, Chenyin and Thompson, Katherine Jenny and Kim, Jae Kwang and Yang, Shu}, year={2022}, month={May} } @article{xie_du_zhao_gao_lyu_suo_kuang_2021, title={Advanced trophectoderm quality increases the risk of a large for gestational age baby in single frozen-thawed blastocyst transfer cycles}, volume={36}, ISSN={["1460-2350"]}, DOI={10.1093/humrep/deab088}, abstractNote={Abstract STUDY QUESTION Does trophectoderm (TE) quality affect birthweight after single frozen-thawed blastocyst transfer? SUMMARY ANSWER Transfer of single blastocyst with advanced TE quality was associated with higher birthweight and increased risk of a large for gestational age (LGA) baby. WHAT IS KNOWN ALREADY Transfer of blastocysts with advanced TE quality results in higher ongoing pregnancy rates and a lower miscarriage risk. However, data on the relationship between TE quality and birthweight are still lacking. STUDY DESIGN, SIZE, DURATION This retrospective cohort study at a tertiary-care academic medical center included 1548 singleton babies born from single frozen-thawed blastocyst transfer from January 2011 to June 2019. PARTICIPANTS/MATERIALS, SETTING, METHODS Babies were grouped into four groups according to embryo expansion (Stages 3, 4, 5 and 6), three groups according to inner cell mass (ICM) quality (A, B and C), and three groups according to TE quality (A, B and C). Main outcomes included absolute birthweight, Z-scores adjusted for gestational age and gender, and adverse neonatal outcomes. Multivariable linear and logistic regression analyses were performed to investigate the association of neonatal outcomes with expansion stage, ICM quality and TE quality. MAIN RESULTS AND THE ROLE OF CHANCE As TE quality decreased, birthweight (3468.10 ± 471.52, 3357.69 ± 522.06, and 3288.79 ± 501.90 for A, B and C, respectively, P = 0.002), Z-scores (0.59 ± 1.07, 0.42 ± 1.04, and 0.27 ± 1.06 for A, B and C, respectively, P = 0.002) and incidence of LGA (28.9%, 19.7% and 17.4% for A, B and C, respectively, P = 0.027) decreased correspondingly. After adjusting for confounders, compared with the Grade A group, blastocysts with TE Grade B (standardized coefficients (β): −127.97 g, 95% CI: −234.46 to −21.47, P = 0.019) and blastocysts with TE grade C (β: −200.27 g, 95% CI: −320.69 to −79.86, P = 0.001) resulted in offspring with lower birthweight. Blastocysts with TE grade C brought babies with lower Z-scores than TE Grade A (β: −0.35, 95% CI: −0.59 to −0.10, P = 0.005). Also, embryos with TE Grade B (adjusted odds ratio (aOR):0.91, 95% CI: 0.84 to 0.99, P = 0.033) and embryos with TE Grade C (aOR : 0.89, 95% CI: 0.81 to 0.98, P = 0.016) had lower chance of leading to a LGA baby than those with TE Grade A. No association between neonatal outcomes with embryo expansion stage and ICM was observed (all P > 0.05). LIMITATIONS, REASONS FOR CAUTION The retrospective design, lack of controlling for several unknown confounders, and inter-observer variation limited this study. WIDER IMPLICATIONS OF THE FINDINGS The study extends our knowledge of the down-stream effect of TE quality on newborn birthweight and the risk of LGA. STUDY FUNDING/COMPETING INTEREST(S) This study was funded by National Key R&D Program of China (2018YFC1003000), National Natural Science Foundation of China (81771533 to Y.P.K. and 31200825 to L.S.) and Innovative Research Team of High-level Local Universities in Shanghai (SSMU-ZLCX20180401), Shanghai Sailing Program(21YF1423200) and the Fundamental research program funding of Ninth People's Hospital affiliated to Shanghai Jiao Tong university School of Medicine (JYZZ117). The authors declare no conflict of interest in this present study. TRIAL REGISTRATION NUMBER N/A }, number={8}, journal={HUMAN REPRODUCTION}, author={Xie, Qin and Du, Tong and Zhao, Ming and Gao, Chenyin and Lyu, Qifeng and Suo, Lun and Kuang, Yanping}, year={2021}, month={Aug}, pages={2111–2120} }