@article{abba_williams_reich_2023, title={A PENALIZED COMPLEXITY PRIOR FOR DEEP BAYESIAN TRANSFER LEARNING WITH APPLICATION TO MATERIALS INFORMATICS}, volume={17}, ISSN={["1941-7330"]}, DOI={10.1214/23-AOAS1759}, abstractNote={A key task in the emerging field of materials informatics is to use machine learning to predict a material's properties and functions. A fast and accurate predictive model allows researchers to more efficiently identify or construct a material with desirable properties. As in many fields, deep learning is one of the state-of-the art approaches, but fully training a deep learning model is not always feasible in materials informatics due to limitations on data availability, computational resources, and time. Accordingly, there is a critical need in the application of deep learning to materials informatics problems to develop efficient transfer learning algorithms. The Bayesian framework is natural for transfer learning because the model trained from the source data can be encoded in the prior distribution for the target task of interest. However, the Bayesian perspective on transfer learning is relatively unaccounted for in the literature, and is complicated for deep learning because the parameter space is large and the interpretations of individual parameters are unclear. Therefore, rather than subjective prior distributions for individual parameters, we propose a new Bayesian transfer learning approach based on the penalized complexity prior on the Kullback-Leibler divergence between the predictive models of the source and target tasks. We show via simulations that the proposed method outperforms other transfer learning methods across a variety of settings. The new method is then applied to a predictive materials science problem where we show improved precision for estimating the band gap of a material based on its structural properties.}, number={4}, journal={ANNALS OF APPLIED STATISTICS}, author={Abba, Mohamed A. and Williams, Jonathan P. and Reich, Brian J.}, year={2023}, month={Dec}, pages={3241–3256} } @article{williams_ommen_hannig_2023, title={GENERALIZED FIDUCIAL FACTOR: AN ALTERNATIVE TO THE BAYES FACTOR FOR FORENSIC IDENTIFICATION OF SOURCE PROBLEMS}, volume={17}, ISSN={["1941-7330"]}, DOI={10.1214/22-AOAS1632}, abstractNote={One formulation of forensic identification of source problems is to determine the source of trace evidence, for instance, glass fragments found on a suspect for a crime. The current state of the science is to compute a Bayes factor (BF) comparing the marginal distribution of measurements of trace evidence under two competing propositions for whether or not the unknown source evidence originated from a specific source. The obvious problem with such an approach is the ability to tailor the prior distributions (placed on the features/parameters of the statistical model for the measurements of trace evidence) in favor of the defense or prosecution, which is further complicated by the fact that the typical number of measurements of trace evidence is typically sufficiently small that prior choice/specification has a strong influence on the value of the BF. To remedy this problem of prior specification and choice, we develop an alternative to the BF, within the framework of generalized fiducial inference (GFI), that we term a {\em generalized fiducial factor} (GFF). Furthermore, we demonstrate empirically, on the synthetic and real Netherlands Forensic Institute (NFI) casework data, deficiencies in the BF and classical/frequentist likelihood ratio (LR) approaches.}, number={1}, journal={ANNALS OF APPLIED STATISTICS}, author={Williams, Jonathan P. and Ommen, Danica M. and Hannig, Jan}, year={2023}, month={Mar}, pages={378–402} } @article{koner_williams_2023, title={The EAS approach to variable selection for multivariate response data in high-dimensional settings}, volume={17}, ISSN={["1935-7524"]}, DOI={10.1214/23-EJS2141}, abstractNote={In this paper, we extend the epsilon admissible subsets (EAS) model selection approach, from its original construction in the high-dimensional linear regression setting, to an EAS framework for performing group variable selection in the high-dimensional multivariate regression setting. Assuming a matrix-Normal linear model we show that the EAS strategy is asymptotically consistent if there exists a sparse, true data generating set of predictors. Nonetheless, our EAS strategy is designed to estimate a posterior-like, generalized fiducial distribution over a parsimonious class of models in the setting of correlated predictors and/or in the absence of a sparsity assumption. The effectiveness of our approach, to this end, is demonstrated empirically in simulation studies, and is compared to other state-of-the-art model/variable selection procedures.}, number={2}, journal={ELECTRONIC JOURNAL OF STATISTICS}, author={Koner, Salil and Williams, Jonathan P.}, year={2023}, pages={1947–1995} } @article{williams_xie_hannig_2022, title={The EAS approach for graphical selection consistency in vector autoregression models}, ISSN={["1708-945X"]}, DOI={10.1002/cjs.11726}, abstractNote={As evidenced by various recent and significant papers within the frequentist literature, along with numerous applications in macroeconomics, genomics, and neuroscience, there continues to be substantial interest in understanding the theoretical estimation properties of high‐dimensional vector autoregression (VAR) models. To date, however, while Bayesian VAR (BVAR) models have been developed and studied empirically (primarily in the econometrics literature), there exist very few theoretical investigations of the repeated‐sampling properties for BVAR models in the literature, and there exist no generalized fiducial investigations of VAR models. In this direction, we construct methodology via the ε ‐admissible subsets (EAS) approach for inference based on a generalized fiducial distribution of relative model probabilities over all sets of active/inactive components (graphs) of the VAR transition matrix. We provide a mathematical proof of pairwise and strong graphical selection consistency for the EAS approach for stable VAR(1) models, and demonstrate empirically that it is an effective strategy in high‐dimensional settings.}, journal={CANADIAN JOURNAL OF STATISTICS-REVUE CANADIENNE DE STATISTIQUE}, author={Williams, Jonathan P. and Xie, Yuying and Hannig, Jan}, year={2022}, month={Sep} } @article{nghiem_williams_afoakwah_huynh_ng_byrnes_2021, title={Can Administrative Health Data Improve the Gold Standard? Evidence from a Model of the Progression of Myocardial Infarction}, volume={18}, ISSN={["1660-4601"]}, DOI={10.3390/ijerph18147385}, abstractNote={Background: Myocardial infarction (MI), remains one of the leading causes of death and disability globally but publications on the progression of MI using data from the real world are limited. Multistate models have been widely used to estimate transition rates between disease states to evaluate the cost-effectiveness of healthcare interventions. We apply a Bayesian multistate hidden Markov model to investigate the progression of MI using a longitudinal dataset from Queensland, Australia. Objective: To apply a new model to investigate the progression of myocardial infarction (MI) and to show the potential to use administrative data for economic evaluation and modeling disease progression. Methods: The cohort includes 135,399 patients admitted to public hospitals in Queensland, Australia, in 2010 treatment of cardiovascular diseases. Any subsequent hospitalizations of these patients were followed until 2015. This study focused on the sub-cohort of 8705 patients hospitalized for MI. We apply a Bayesian multistate hidden Markov model to estimate transition rates between health states of MI patients and adjust for delayed enrolment biases and misclassification errors. We also estimate the association between age, sex, and ethnicity with the progression of MI. Results: On average, the risk of developing Non-ST segment elevation myocardial infarction (NSTEMI) was 8.7%, and ST-segment elevation myocardial infarction (STEMI) was 4.3%. The risk varied with age, sex, and ethnicity. The progression rates to STEMI or NSTEMI were higher among males, Indigenous, or elderly patients. For example, the risk of STEMI among males was 4.35%, while the corresponding figure for females was 3.71%. After adjustment for misclassification, the probability of STEMI increased by 1.2%, while NSTEMI increased by 1.4%. Conclusions: This study shows that administrative health data were useful to estimate factors determining the risk of MI and the progression of this health condition. It also shows that misclassification may cause the incidence of MI to be under-estimated.}, number={14}, journal={INTERNATIONAL JOURNAL OF ENVIRONMENTAL RESEARCH AND PUBLIC HEALTH}, author={Nghiem, Son and Williams, Jonathan and Afoakwah, Clifford and Huynh, Quan and Ng, Shu-kay and Byrnes, Joshua}, year={2021}, month={Jul} } @article{williams_2021, title={Discussion of "A Gibbs Sampler for a Class of Random Convex Polytopes"}, volume={116}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2021.1946405}, abstractNote={An exciting new algorithmic breakthrough has been advanced for how to carry out inferences in a Dempster-Shafer (DS) formulation of a categorical data generating model. The developed sampling mechanism, which draws on theory for directed graphs, is a clever and remarkable achievement, as this has been an open problem for many decades. In this discussion, I comment on important contributions, central questions, and prevailing matters of the article.}, number={535}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Williams, Jonathan P.}, year={2021}, month={Jul}, pages={1198–1200} } @article{oh_foster_williams_zheng_ru_lunn_mowat_2019, title={Diagnostic utility of clinical and laboratory test parameters for differentiating between sudden acquired retinal degeneration syndrome and pituitary‐dependent hyperadrenocorticism in dogs}, volume={22}, ISSN={1463-5216 1463-5224}, url={http://dx.doi.org/10.1111/vop.12661}, DOI={10.1111/vop.12661}, abstractNote={Abstract}, number={6}, journal={Veterinary Ophthalmology}, publisher={Wiley}, author={Oh, Annie and Foster, Melanie L. and Williams, Jonathan G. and Zheng, Chaowen and Ru, Hongyu and Lunn, Katharine F. and Mowat, Freya M.}, year={2019}, month={Mar}, pages={842–858} }