@article{tang_martin_2024, title={Empirical Bayes inference in sparse high-dimensional generalized linear models}, volume={18}, ISSN={["1935-7524"]}, DOI={10.1214/24-EJS2274}, abstractNote={High-dimensional linear models have been widely studied, but the developments in high-dimensional generalized linear models, or GLMs, have been slower. In this paper, we propose an empirical or data-driven prior leading to an empirical Bayes posterior distribution which can be used for estimation of and inference on the coefficient vector in a high-dimensional GLM, as well as for variable selection. We prove that our proposed posterior concentrates around the true/sparse coefficient vector at the optimal rate, provide conditions under which the posterior can achieve variable selection consistency, and prove a Bernstein–von Mises theorem that implies asymptotically valid uncertainty quantification. Computation of the proposed empirical Bayes posterior is simple and efficient, and is shown to perform well in simulations compared to existing Bayesian and non-Bayesian methods in terms of estimation and variable selection.}, number={2}, journal={ELECTRONIC JOURNAL OF STATISTICS}, author={Tang, Yiqi and Martin, Ryan}, year={2024}, pages={3212–3246} } @article{martin_williams_2024, title={Large-Sample Theory for Inferential Models: A Possibilistic Bernstein-von Mises Theorem}, volume={14909}, ISBN={["978-3-031-67976-6"]}, ISSN={["1611-3349"]}, DOI={10.1007/978-3-031-67977-3_12}, journal={BELIEF FUNCTIONS: THEORY AND APPLICATIONS, BELIEF 2024}, author={Martin, Ryan and Williams, Jonathan P.}, year={2024}, pages={111–120} } @article{hector_martin_2024, title={Turning the information-sharing dial: Efficient inference from different data sources}, volume={18}, ISSN={["1935-7524"]}, DOI={10.1214/24-EJS2265}, number={2}, journal={ELECTRONIC JOURNAL OF STATISTICS}, author={Hector, Emily C. and Martin, Ryan}, year={2024}, pages={2974–3020} } @article{cellal_martini_2024, title={Variational Approximations of Possibilistic Inferential Models}, volume={14909}, ISBN={["978-3-031-67976-6"]}, ISSN={["1611-3349"]}, DOI={10.1007/978-3-031-67977-3_13}, journal={BELIEF FUNCTIONS: THEORY AND APPLICATIONS, BELIEF 2024}, author={Cellal, Leonardo and Martini, Ryan}, year={2024}, pages={121–130} } @article{martin_2024, title={Which Statistical Hypotheses are Afflicted with False Confidence?}, volume={14909}, ISBN={["978-3-031-67976-6"]}, ISSN={["1611-3349"]}, DOI={10.1007/978-3-031-67977-3_15}, journal={BELIEF FUNCTIONS: THEORY AND APPLICATIONS, BELIEF 2024}, author={Martin, Ryan}, year={2024}, pages={140–149} } @article{dixit_martin_2023, title={A PRticle filter algorithm for nonparametric estimation of multivariate mixing distributions}, volume={33}, ISSN={["1573-1375"]}, DOI={10.1007/s11222-023-10242-2}, abstractNote={Predictive recursion (PR) is a fast, recursive algorithm that gives a smooth estimate of the mixing distribution under the general mixture model. However, the PR algorithm requires evaluation of a normalizing constant at each iteration. When the support of the mixing distribution is of relatively low dimension, this is not a problem since quadrature methods can be used and are very efficient. But when the support is of higher dimension, quadrature methods are inefficient and there is no obvious Monte Carlo-based alternative. In this paper, we propose a new strategy, which we refer to as PRticle filter, wherein we augment the basic PR algorithm with a filtering mechanism that adaptively reweights an initial set of particles along the updating sequence which are used to obtain Monte Carlo approximations of the normalizing constants. Convergence properties of the PRticle filter approximation are established and its empirical accuracy is demonstrated with simulation studies and a marked spatial point process data analysis.}, number={4}, journal={STATISTICS AND COMPUTING}, author={Dixit, Vaidehi and Martin, Ryan}, year={2023}, month={Aug} } @article{syring_martin_2023, title={Gibbs posterior concentration rates under sub-exponential type losses}, volume={29}, ISSN={["1573-9759"]}, DOI={10.3150/22-BEJ1491}, abstractNote={Bayesian posterior distributions are widely used for inference, but their dependence on a statistical model creates some challenges. In particular, there may be lots of nuisance parameters that require prior distributions and posterior computations, plus a potentially serious risk of model misspecification bias. Gibbs posterior distributions, on the other hand, offer direct, principled, probabilistic inference on quantities of interest through a loss function, not a model-based likelihood. Here we provide simple sufficient conditions for establishing Gibbs posterior concentration rates when the loss function is of a sub-exponential type. We apply these general results in a range of practically relevant examples, including mean regression, quantile regression, and sparse high-dimensional classification. We also apply these techniques in an important problem in medical statistics, namely, estimation of a personalized minimum clinically important difference.}, number={2}, journal={BERNOULLI}, author={Syring, Nicholas and Martin, Ryan}, year={2023}, month={May}, pages={1080–1108} } @article{cella_martin_2023, title={Possibility-theoretic statistical inference offers performance and probativeness assurances}, volume={163}, ISSN={["1873-4731"]}, DOI={10.1016/j.ijar.2023.109060}, abstractNote={Statisticians are largely focused on developing methods that perform well in a frequentist sense—even the Bayesians. But the widely-publicized replication crisis suggests that these performance guarantees alone are not enough to instill confidence in scientific discoveries. In addition to reliably detecting hypotheses that are (in)compatible with data, investigators require methods that can probe for hypotheses that are actually supported by the data. In this paper, we demonstrate that valid inferential models (IMs) achieve both performance and probativeness properties and we offer a powerful new result that ensures the IM's probing is reliable. We also compare and contrast the IM's dual performance and probativeness abilities with that of Deborah Mayo's severe testing framework.}, journal={INTERNATIONAL JOURNAL OF APPROXIMATE REASONING}, author={Cella, Leonardo and Martin, Ryan}, year={2023}, month={Dec} } @article{dixit_martin_2023, title={Revisiting consistency of a recursive estimator of mixing distributions}, volume={17}, ISSN={["1935-7524"]}, DOI={10.1214/23-EJS2121}, abstractNote={Estimation of the mixing distribution under a general mixture model is a very difficult problem, especially when the mixing distribution is assumed to have a density. Predictive recursion (PR) is a fast, recursive algorithm for nonparametric estimation of a mixing distribution/density in general mixture models. However, the existing PR consistency results make rather strong assumptions, some of which fail for a class of mixture models relevant for monotone density estimation, namely, scale mixtures of uniform kernels. In this paper, we develop new consistency results for PR under weaker conditions. Armed with this new theory, we prove that PR is consistent for the scale mixture of uniforms problem, and we show that the corresponding PR mixture density estimator has very good practical performance compared to several existing methods for monotone density estimation.}, number={1}, journal={ELECTRONIC JOURNAL OF STATISTICS}, author={Dixit, Vaidehi and Martin, Ryan}, year={2023}, pages={1007–1042} } @article{martin_2024, title={Ryan Martin's contribution to the Discussion of 'Estimating means of bounded random variables by betting' by Waudby-Smith and Ramdas}, volume={86}, ISSN={["1467-9868"]}, DOI={10.1093/jrsssb/qkad112}, abstractNote={Congratulations to Waudby-Smith and Ramdas (WSR) for their excellent contribution to the rapidly-growing literature on anytime-valid inference.Since some of my work involves imprecise probability, Professor Ramdas asked me privately what, if any, connections there are between e-values, etc. and imprecise probability.The answer to his question might be of general interest, so I'll share it here.Following WSR, let (X t : t ≥ 1) be a [0, 1]-valued process with distribution P ∈ P µ having unknown mean µ.Write X t = (X 1 , . . ., X t ) and x t for a generic realization.Take M (X t , m) to be an e-value for testing H m 0 : µ = m.}, number={1}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES B-STATISTICAL METHODOLOGY}, author={Martin, Ryan}, year={2024}, month={Feb} } @article{wu_martin_2023, title={A Comparison of Learning Rate Selection Methods in Generalized Bayesian Inference}, volume={18}, ISSN={["1936-0975"]}, DOI={10.1214/21-BA1302}, abstractNote={Generalized Bayes posterior distributions are formed by putting a fractional power on the likelihood before combining with the prior via Bayes's formula. This fractional power, which is often viewed as a remedy for potential model misspecification bias, is called the learning rate, and a number of data-driven learning rate selection methods have been proposed in the recent literature. Each of these proposals has a different focus, a different target they aim to achieve, which makes them difficult to compare. In this paper, we provide a direct head-to-head comparison of these learning rate selection methods in various misspecified model scenarios, in terms of several relevant metrics, in particular, coverage probability of the generalized Bayes credible regions. In some examples all the methods perform well, while in others the misspecification is too severe to be overcome, but we find that the so-called generalized posterior calibration algorithm tends to outperform the others in terms of credible region coverage probability.}, number={1}, journal={BAYESIAN ANALYSIS}, author={Wu, Pei-Shien and Martin, Ryan}, year={2023}, month={Mar}, pages={105–132} } @article{hose_hanss_martin_2022, title={A Practical Strategy for Valid Partial Prior-Dependent Possibilistic Inference}, volume={13506}, ISBN={["978-3-031-17800-9"]}, ISSN={["1611-3349"]}, DOI={10.1007/978-3-031-17801-6_19}, abstractNote={This paper considers statistical inference in contexts where only incomplete prior information is available. We develop a practical construction of a suitably valid inferential model (IM) that (a) takes the form of a possibility measure, and (b) depends mainly on the likelihood and partial prior. We also propose a general computational algorithm through which the proposed IM can be evaluated in applications.}, journal={BELIEF FUNCTIONS: THEORY AND APPLICATIONS (BELIEF 2022)}, author={Hose, Dominik and Hanss, Michael and Martin, Ryan}, year={2022}, pages={197–206} } @article{martin_syring_2022, title={Direct Gibbs posterior inference on risk minimizers: Construction, concentration, and calibration}, volume={47}, ISSN={["0169-7161"]}, DOI={10.1016/bs.host.2022.06.004}, abstractNote={Real-world problems, often couched as machine learning applications, involve quantities of interest that have real-world meaning, independent of any statistical model. To avoid potential model misspecification bias or over-complicating the problem formulation, a direct, model-free approach is desired. The traditional Bayesian framework relies on a model for the data-generating process so, apparently, the desired direct, model-free, posterior-probabilistic inference is out of reach. Fortunately, likelihood functions are not the only means of linking data and quantities of interest. Loss functions provide an alternative link, where the quantity of interest is defined, or at least could be defined, as a minimizer of the corresponding risk, or expected loss. In this case, one can obtain what is commonly referred to as a Gibbs posterior distribution by using the empirical risk function directly. This manuscript explores the Gibbs posterior construction, its asymptotic concentration properties, and the frequentist calibration of its credible regions. By being free from the constraints of model specification, Gibbs posteriors create new opportunities for probabilistic inference in modern statistical learning problems.}, journal={ADVANCEMENTS IN BAYESIAN METHODS AND IMPLEMENTATION}, author={Martin, Ryan and Syring, Nicholas}, year={2022}, pages={1–41} } @article{cella_martin_2022, title={Direct and approximately valid probabilistic inference on a class of statistical functionals}, volume={151}, ISSN={["1873-4731"]}, DOI={10.1016/j.ijar.2022.09.011}, abstractNote={Existing frameworks for probabilistic inference assume the quantity of interest is the parameter of a posited statistical model. In machine learning applications, however, often there is no statistical model/parameter; the quantity of interest is a statistical functional, a feature of the underlying distribution. Model-based methods can only handle such problems indirectly, via marginalization from a model parameter to the real quantity of interest. Here we develop a generalized inferential model (IM) framework for direct probabilistic uncertainty quantification on the quantity of interest. In particular, we construct a data-dependent, bootstrap-based possibility measure for uncertainty quantification and inference. We then prove that this new approach provides approximately valid inference in the sense that the plausibility values assigned to hypotheses about the unknowns are asymptotically well-calibrated in a frequentist sense. Among other things, this implies that confidence regions for the underlying functional derived from our proposed IM are approximately valid. The method is shown to perform well in key examples, including quantile regression, and in a personalized medicine application.}, journal={INTERNATIONAL JOURNAL OF APPROXIMATE REASONING}, author={Cella, Leonardo and Martin, Ryan}, year={2022}, month={Dec}, pages={205–224} } @article{dixit_martin_2022, title={Estimating a Mixing Distribution on the Sphere Using Predictive Recursion}, ISSN={["0976-8394"]}, DOI={10.1007/s13571-021-00275-w}, abstractNote={Mixture models are commonly used when data show signs of heterogeneity and, often, it is important to estimate the distribution of the latent variable responsible for that heterogeneity. This is a common problem for data taking values in a Euclidean space, but the work on mixing distribution estimation based on directional data taking values on the unit sphere is limited. In this paper, we propose using the predictive recursion (PR) algorithm to solve for a mixture on a sphere. One key feature of PR is its computational efficiency. Moreover, compared to likelihood-based methods that only support finite mixing distribution estimates, PR is able to estimate a smooth mixing density. PR’s asymptotic consistency in spherical mixture models is established, and simulation results showcase its benefits compared to existing likelihood-based methods. Using PR we propose a method for goodness-of-fit testing and a clustering mechanism in the context of directional data with two real-data illustrations.}, journal={SANKHYA-SERIES B-APPLIED AND INTERDISCIPLINARY STATISTICS}, author={Dixit, Vaidehi and Martin, Ryan}, year={2022}, month={Feb} } @article{cella_martin_2022, title={Valid Inferential Models Offer Performance and Probativeness Assurances}, volume={13506}, ISBN={["978-3-031-17800-9"]}, ISSN={["1611-3349"]}, DOI={10.1007/978-3-031-17801-6_21}, abstractNote={Bayesians and frequentists are now largely focused on developing methods that perform well in a frequentist sense. But the widely-publicized replication crisis suggests that performance guarantees are not enough for good science. In addition to reliably detecting hypotheses that are incompatible with data, users require methods that can probe for hypotheses that are actually supported by the data. In this paper, we demonstrate that valid inferential models achieve both performance and probativeness properties. We also draw important connections between inferential models and Deborah Mayo's severe testing.}, journal={BELIEF FUNCTIONS: THEORY AND APPLICATIONS (BELIEF 2022)}, author={Cella, Leonardo and Martin, Ryan}, year={2022}, pages={219–228} } @article{mao_martin_reich_2022, title={Valid Model-Free Spatial Prediction}, volume={12}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2022.2147531}, abstractNote={Predicting the response at an unobserved location is a fundamental problem in spatial statistics. Given the difficulty in modeling spatial dependence, especially in non-stationary cases, model-based prediction intervals are at risk of misspecification bias that can negatively affect their validity. Here we present a new approach for model-free spatial prediction based on the {\em conformal prediction} machinery. Our key observation is that spatial data can be treated as exactly or approximately exchangeable in a wide range of settings. For example, when the spatial locations are deterministic, we prove that the response values are, in a certain sense, locally approximately exchangeable for a broad class of spatial processes, and we develop a local spatial conformal prediction algorithm that yields valid prediction intervals without model assumptions. Numerical examples with both real and simulated data confirm that the proposed conformal prediction intervals are valid and generally more efficient than existing model-based procedures across a range of non-stationary and non-Gaussian settings.}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Mao, Huiying and Martin, Ryan and Reich, Brian J. J.}, year={2022}, month={Dec} } @article{cella_martin_2022, title={Valid inferential models for prediction in supervised learning problems}, volume={150}, ISSN={["1873-4731"]}, DOI={10.1016/j.ijar.2022.08.001}, abstractNote={Prediction, where observed data is used to quantify uncertainty about a future observation, is a fundamental problem in statistics. Prediction sets with coverage probability guarantees are a common solution, but these do not provide probabilistic uncertainty quantification in the sense of assigning beliefs to relevant assertions about the future observable. Alternatively, we recommend the use of a {\em probabilistic predictor}, a data-dependent (imprecise) probability distribution for the to-be-predicted observation given the observed data. It is essential that the probabilistic predictor be reliable or valid, and here we offer a notion of validity and explore its behavioral and statistical implications. In particular, we show that valid probabilistic predictors must be imprecise, that they avoid sure loss, and that they lead to prediction procedures with desirable frequentist error rate control properties. We provide a general construction of a provably valid probabilistic predictor, which has close connections to the powerful conformal prediction machinery, and we illustrate this construction in regression and classification applications.}, journal={INTERNATIONAL JOURNAL OF APPROXIMATE REASONING}, author={Cella, Leonardo and Martin, Ryan}, year={2022}, month={Nov}, pages={1–18} } @article{cella_martin_2021, title={Approximately Valid and Model-Free Possibilistic Inference}, volume={12915}, ISBN={["978-3-030-88600-4"]}, ISSN={["1611-3349"]}, DOI={10.1007/978-3-030-88601-1_13}, abstractNote={Existing frameworks for probabilistic inference assume the inferential target is a feature the posited statistical model's parameters. In this paper, we develop a new version of the so-called generalized inferential model framework for possibilistic inference on unknowns that are well-defined independent of a statistical model. We provide a bootstrap-based implementation and establish approximate validity.}, journal={BELIEF FUNCTIONS: THEORY AND APPLICATIONS (BELIEF 2021)}, author={Cella, Leonardo and Martin, Ryan}, year={2021}, pages={127–136} } @article{shi_ghosal_martin_2021, title={Bayesian estimation of sparse precision matrices in the presence of Gaussian measurement error}, volume={15}, ISSN={["1935-7524"]}, DOI={10.1214/21-EJS1904}, abstractNote={Estimation of sparse, high-dimensional precision matrices is an important and challenging problem. Existing methods all assume that observations can be made precisely but, in practice, this often is not the case; for example, the instruments used to measure the response may have limited precision. The present paper incorporates measurement error in the context of estimating a sparse, high-dimensional precision matrix. In particular, for a Gaussian graphical model with data corrupted by Gaussian measurement error with unknown variance, we establish a general result which gives sufficient conditions under which the posterior contraction rates that hold in the no-measurement-error case carry over to the measurement-error case. Interestingly, this result does not require that the measurement error variance be small. We apply our general result to several cases with well-known prior distributions for sparse precision matrices and also to a case with a newly-constructed prior for precision matrices with a sparse factor-loading form. Two different simulation studies highlight the empirical benefits of accounting for the measurement error as opposed to ignoring it, even when that measurement error is relatively small.}, number={2}, journal={ELECTRONIC JOURNAL OF STATISTICS}, author={Shi, Wenli and Ghosal, Subhashis and Martin, Ryan}, year={2021}, pages={4545–4579} } @article{liu_martin_2021, title={Comment: Settle the Unsettling: An Inferential Models Perspective}, volume={36}, ISSN={["2168-8745"]}, DOI={10.1214/21-STS765B}, abstractNote={Here, we demonstrate that the inferential model (IM) framework, unlike the updating rules that Gong and Meng show to be unreliable, provides valid and efficient inferences/prediction while not being susceptible to sure loss. In this sense, the IM framework settles what Gong and Meng characterized as “unsettling.”}, number={2}, journal={STATISTICAL SCIENCE}, author={Liu, Chuanhai and Martin, Ryan}, year={2021}, month={May}, pages={196–200} } @article{cahoon_martin_2021, title={Generalized inferential models for censored data}, volume={137}, ISSN={["1873-4731"]}, DOI={10.1016/j.ijar.2021.06.015}, abstractNote={Inferential challenges that arise when data are censored have been extensively studied under the classical frameworks. In this paper, we provide an alternative generalized inferential model approach whose output is a data-dependent plausibility function. This construction is driven by an association between the distribution of the relative likelihood function at the interest parameter and an unobserved auxiliary variable. The plausibility function emerges from the distribution of a suitably calibrated random set designed to predict that unobserved auxiliary variable. The evaluation of this plausibility function requires a novel use of the classical Kaplan–Meier estimator to estimate the censoring rather than the event distribution. We prove that the proposed method provides valid inference, at least approximately, and our real- and simulated-data examples demonstrate its superior performance compared to existing methods.}, journal={INTERNATIONAL JOURNAL OF APPROXIMATE REASONING}, author={Cahoon, Joyce and Martin, Ryan}, year={2021}, month={Oct}, pages={51–66} } @article{bhattacharya_martin_2022, title={Gibbs posterior inference on multivariate quantiles}, volume={218}, ISSN={["1873-1171"]}, DOI={10.1016/j.jspi.2021.10.003}, abstractNote={Bayesian and other likelihood-based methods require specification of a statistical model and may not be fully satisfactory for inference on quantities, such as quantiles, that are not naturally defined as model parameters. In this paper, we construct a direct and model-free Gibbs posterior distribution for multivariate quantiles. Being model-free means that inferences drawn from the Gibbs posterior are not subject to model misspecification bias, and being direct means that no priors for or marginalization over nuisance parameters are required. We show here that the Gibbs posterior enjoys a root-n convergence rate and a Bernstein–von Mises property, i.e., for large n, the Gibbs posterior distribution can be approximated by a Gaussian. Moreover, we present numerical results showing the validity and efficiency of credible sets derived from a suitably scaled Gibbs posterior.}, journal={JOURNAL OF STATISTICAL PLANNING AND INFERENCE}, author={Bhattacharya, Indrabati and Martin, Ryan}, year={2022}, month={May}, pages={106–121} } @article{hong_martin_2021, title={Imprecise credibility theory}, ISSN={["1748-5002"]}, DOI={10.1017/S1748499521000117}, abstractNote={AbstractThe classical credibility theory is a cornerstone of experience rating, especially in the field of property and casualty insurance. An obstacle to putting the credibility theory into practice is the conversion of available prior information into a precise choice of crucial hyperparameters. In most real-world applications, the information necessary to justify a precise choice is lacking, so we propose animprecise credibility estimatorthat honestly acknowledges the imprecision in the hyperparameter specification. This results in an interval estimator that is doubly robust in the sense that it retains the credibility estimator’s freedom from model specification and fast asymptotic concentration, while simultaneously being insensitive to prior hyperparameter specification.}, journal={ANNALS OF ACTUARIAL SCIENCE}, author={Hong, Liang and Martin, Ryan}, year={2021}, month={Apr} } @misc{martin_balch_ferson_2021, title={Response to the comment Confidence in confidence distributions!}, volume={477}, ISSN={["1471-2946"]}, DOI={10.1098/rspa.2020.0579}, abstractNote={Thanks to Drs Céline Cunen, Nils Lid Hjort and Tore Schweder for their interest in our recent contribution [1] concerning the probability dilution phenomenon in satellite conjunction analysis and, more generally, the difficulties associated with representing statistical inference using ordinary or precise probabilities. Our analysis focused primarily on Bayesian uncertainty quantification but, of course, this is not the only probabilistic approach available, so we welcome a confidence distribution-based solution from those who literally wrote the book on confidence distributions [2]. Their illustration reproduces the lack of proper calibration—or false confidence—that can emerge when marginalizing a Bayesian posterior distribution and highlights the difference between Bayesian posteriors and confidence distributions (CDs) with respect to the validity property advocated in our paper. However, the message presented by Cunen et al. [3]— that replacing a Bayesian posterior distribution with a CD is all it takes to overcome false confidence—is potentially misleading. Our false confidence theorem applies to all epistemic probability distributions, including CDs; so, contrary to the authors’ claim, their proposed CD is at risk of false confidence too. In particular, as is well known, CDs only support reliable inferences on one-sided propositions of the form (−∞, a] or [a, +∞). Other sets, including two-sided intervals and their complements, are still subject to the false confidence phenomenon uncovered in Balch et al. [1].}, number={2250}, journal={PROCEEDINGS OF THE ROYAL SOCIETY A-MATHEMATICAL PHYSICAL AND ENGINEERING SCIENCES}, author={Martin, Ryan and Balch, Michael S. and Ferson, Scott}, year={2021}, month={Jun} } @article{martin_2021, title={Ryan Martin's contribution to the Discussion of 'Testing by betting: A strategy for statistical and scientific communication' by Glenn Shafer}, volume={184}, ISSN={["1467-985X"]}, DOI={10.1111/rssa.12665}, abstractNote={=3131251 or http://dx.doi.org/10.2139/ssrn.3131251 . Dawid , A.P. , de Rooij , S. , Shafer , G. , Shen , A. , Vereshchagin , N. & Vovk , V. ( 2011 ) Insuring against loss of evidence in game-theoretic probability . Statistics and Probability Letters 81 ( 1 ), 157 – 162 . Fama , E.F. ( 1991 ). Efficient capital markets: II . The Journal of Finance 46 ( 5 ), 1575 – 1617 . Fisher , R.A. ( 1956 ). Statistical Methods and Scientific Inference . Edinburgh : Oliver and Boyd . Subsequent editions appeared in 1959 and 1973. Harvey , C.R. ( 2017 ). The scientific outlook in financial economics . Journal of Finance 72 ( 4 ), 1399 – 1440 . Jeffreys , H. ( 1961 ). Theory of Probability (3rd ed.). Oxford . Kass , R.E. & A.E. Raftery ( 1995 ). Bayes factors . Journal of the American Statistical Association 90 ( 430 ), 773 – 795 . Shafer , G. ( 1990 ) The unity and diversity of probability (with discussion) . Statistical Science 5 ( 4 ), 435 – 462 . Shafer , G. ( 2019a ) The language of betting as a strategy for statistical and scientific communication . arXiv:1903.06991 [math.ST]. Shafer , G. ( 2019b ) On the nineteenth century origins of significance testing and p-hacking . Working Paper 55, Available from: https://www.proba bilit yandf inance.com . Shafer , G. ( 2019c ) Pascal's and Huygens's game-theoretic foundations for probability . Sartoniana 32 , 117 – 145 . Shafer , G. & Vovk , V. ( 2001 ) Probability and finance: It's Only a Game! New York : Wiley . Shafer , G. & Vovk , V. ( 2006 ) The sources of Kolmogorov's Grundbegriffe . Statistical Science 21 ( 1 ), 70 – 98 . Shafer , G. & Vovk , V. ( 2019 ) Game-Theoretic Foundations for Probability and Finance . Hoboken, New Jersey : Wiley . Tamayo-Uria , I. , Mateu , J. & Diggle , P.J. ( 2014 ) Modelling of the spatio-temporal distribution of rat sightings in an urban environment . Spatial Statistics 9 , 192 – 206 . Ville , J. ( 1939 ). Étude critique de la notion de collectif . Paris : Gauthier-Villars . Vovk , V. ( 2019 ). Non-algorithmic theory of randomness . arXiv:1910.00585 [math.ST]. Vovk , V. & V’yugin , V.V. ( 1993 ) On the empirical validity of the Bayesian method . Journal of the Royal Statistical Society, Series B 55 ( 1 ), 253 – 266 . Vovk , V. & Wang , R. ( 2019 ) Combining e-values and p-values . arXiv:191206116v1 [math.ST], to appear in Annals of Statistics as “E-values: Calibration, combination, and applications”. Vovk , V. , Gammerman , A. & Shafer , G. ( 2005 ) Algorithmic learning in a random world . Berlin : Springer . Waudby-Smith , I. & Ramdas , A. ( 2020 ) Variance-adaptive confidence sequences by betting . arXiv:2010.09686 [math. ST]. Wu , W. & Shafer , G. ( 2007 ) Testing lead-lag effects under game-theoretic efficient market hypotheses . Working Paper 23, Available from: https://www.proba bilit yandf inance.com . How to cite this article: Shafer G. Author ' s reply to the Discussion of ‘Testing by betting: A strategy for statistical and scientific communication’ by Glenn Shafer. J R Stat Soc Series A. 2021;184:432–478. https://doi.org/10.1111/rssa.12672}, number={2}, journal={JOURNAL OF THE ROYAL STATISTICAL SOCIETY SERIES A-STATISTICS IN SOCIETY}, author={Martin, Ryan}, year={2021}, month={Apr}, pages={456–457} } @article{martin_2021, title={Towards a Theory of Valid Inferential Models with Partial Prior Information}, volume={12915}, ISBN={["978-3-030-88600-4"]}, ISSN={["1611-3349"]}, DOI={10.1007/978-3-030-88601-1_14}, abstractNote={Inferential models (IMs) are used to quantify uncertainty in statistical inference problems, and validity is a crucial property that ensures the IM's reliability. Previous work has focused on validity in the special case where no prior information is available. Here I allow for prior information in the form of a non-trivial credal set, define a notion of validity and investigate its implications.}, journal={BELIEF FUNCTIONS: THEORY AND APPLICATIONS (BELIEF 2021)}, author={Martin, Ryan}, year={2021}, pages={137–146} } @article{cella_martin_2022, title={Validity, consonant plausibility measures, and conformal prediction}, volume={141}, ISSN={["1873-4731"]}, DOI={10.1016/j.ijar.2021.07.013}, abstractNote={Prediction of future observations is an important and challenging problem. The two mainstream approaches for quantifying prediction uncertainty use prediction regions and predictive distributions, respectively, with the latter believed to be more informative because it can perform other prediction-related tasks. The standard notion of validity, what we refer to here as Type-1 validity, focuses on coverage probability of prediction regions, while a notion of validity relevant to the other prediction-related tasks performed by predictive distributions is lacking. Here we present a new notion, called Type-2 validity, relevant to these other prediction tasks. We establish connections between Type-2 validity and coherence properties, and show that imprecise probability considerations are required in order to achieve it. We go on to show that both types of prediction validity can be achieved by interpreting the conformal prediction output as the contour function of a consonant plausibility measure. We also offer an alternative characterization of conformal prediction, based on a new nonparametric inferential model construction, wherein the appearance of consonance is natural, and prove its validity.}, journal={INTERNATIONAL JOURNAL OF APPROXIMATE REASONING}, author={Cella, Leonardo and Martin, Ryan}, year={2022}, month={Feb}, pages={110–130} } @article{liu_yang_bondell_martin_2021, title={BAYESIAN INFERENCE IN HIGH-DIMENSIONAL LINEAR MODELS USING AN EMPIRICAL CORRELATION-ADAPTIVE PRIOR}, volume={31}, ISSN={["1996-8507"]}, DOI={10.5705/ss.202019.0133}, abstractNote={In the context of a high-dimensional linear regression model, we propose the use of an empirical correlation-adaptive prior that makes use of information in the observed predictor variable matrix to adaptively address high collinearity, determining if parameters associated with correlated predictors should be shrunk together or kept apart. Under suitable conditions, we prove that this empirical Bayes posterior concentrates around the true sparse parameter at the optimal rate asymptotically. A simplified version of a shotgun stochastic search algorithm is employed to implement the variable selection procedure, and we show, via simulation experiments across different settings and a real-data application, the favorable performance of the proposed method compared to existing methods.}, number={4}, journal={STATISTICA SINICA}, author={Liu, Chang and Yang, Yue and Bondell, Howard and Martin, Ryan}, year={2021}, month={Oct}, pages={2051–2072} } @article{hong_martin_2020, title={Model misspecification, Bayesian versus credibility estimation, and Gibbs posteriors}, volume={2020}, ISSN={["1651-2030"]}, DOI={10.1080/03461238.2019.1711154}, abstractNote={In the context of predicting future claims, a fully Bayesian analysis – one that specifies a statistical model, prior distribution, and updates using Bayes's formula – is often viewed as the gold-standard, while Bühlmann's credibility estimator serves as a simple approximation. But those desirable properties that give the Bayesian solution its elevated status depend critically on the posited model being correctly specified. Here we investigate the asymptotic behavior of Bayesian posterior distributions under a misspecified model, and our conclusion is that misspecification bias generally has damaging effects that can lead to inaccurate inference and prediction. The credibility estimator, on the other hand, is not sensitive at all to model misspecification, giving it an advantage over the Bayesian solution in those practically relevant cases where the model is uncertain. This begs the question: does robustness to model misspecification require that we abandon uncertainty quantification based on a posterior distribution? Our answer to this question is No, and we offer an alternative Gibbs posterior construction. Furthermore, we argue that this Gibbs perspective provides a new characterization of Bühlmann's credibility estimator.}, number={7}, journal={SCANDINAVIAN ACTUARIAL JOURNAL}, author={Hong, Liang and Martin, Ryan}, year={2020}, month={Aug}, pages={634–649} } @article{wang_martin_2020, title={Model-free posterior inference on the area under the receiver operating characteristic curve}, volume={209}, ISSN={["1873-1171"]}, DOI={10.1016/j.jspi.2020.03.008}, abstractNote={The area under the receiver operating characteristic curve (AUC) serves as a summary of a binary classifier's performance. For inference on the AUC, a common modeling assumption is binormality, which restricts the distribution of the score produced by the classifier. However, this assumption introduces an infinite-dimensional nuisance parameter and may be restrictive in certain machine learning settings. To avoid making distributional assumptions, and to avoid the computational challenges of a fully nonparametric analysis, we develop a direct and model-free Gibbs posterior distribution for inference on the AUC. We present the asymptotic Gibbs posterior concentration rate, and a strategy for tuning the learning rate so that the corresponding credible intervals achieve the nominal frequentist coverage probability. Simulation experiments and a real data analysis demonstrate the Gibbs posterior's strong performance compared to existing Bayesian methods.}, journal={JOURNAL OF STATISTICAL PLANNING AND INFERENCE}, author={Wang, Zhe and Martin, Ryan}, year={2020}, month={Dec}, pages={174–186} } @article{syring_martin_2020, title={ROBUST AND RATE-OPTIMAL GIBBS POSTERIOR INFERENCE ON THE BOUNDARY OF A NOISY IMAGE}, volume={48}, ISSN={["0090-5364"]}, DOI={10.1214/19-AOS1856}, abstractNote={Detection of an image boundary when the pixel intensities are measured with noise is an important problem in image segmentation, with numerous applications in medical imaging and engineering. From a statistical point of view, the challenge is that likelihood-based methods require modeling the pixel intensities inside and outside the image boundary, even though these are typically of no practical interest. Since misspecification of the pixel intensity models can negatively affect inference on the image boundary, it would be desirable to avoid this modeling step altogether. Towards this, we develop a robust Gibbs approach that constructs a posterior distribution for the image boundary directly, without modeling the pixel intensities. We prove that, for a suitable prior on the image boundary, the Gibbs posterior concentrates asymptotically at the minimax optimal rate, adaptive to the boundary smoothness. Monte Carlo computation of the Gibbs posterior is straightforward, and simulation experiments show that the corresponding inference is more accurate than that based on existing Bayesian methodology.}, number={3}, journal={ANNALS OF STATISTICS}, author={Syring, Nicholas and Martin, Ryan}, year={2020}, month={Jun}, pages={1498–1513} } @article{hong_martin_2021, title={Valid Model-Free Prediction of Future Insurance Claims}, volume={25}, ISSN={["2325-0453"]}, DOI={10.1080/10920277.2020.1802599}, abstractNote={Bias resulting from model misspecification is a concern when predicting insurance claims. Indeed, this bias puts the insurer at risk of making invalid or unreliable predictions. A method that could provide provably valid predictions uniformly across a large class of possible distributions would effectively eliminate the risk of model misspecification bias. Conformal prediction is one such method that can meet this need, and here we tailor that approach to the typical insurance application and show that the predictions are not only valid but also efficient across a wide range of settings.}, number={4}, journal={NORTH AMERICAN ACTUARIAL JOURNAL}, author={Hong, Liang and Martin, Ryan}, year={2021}, pages={473–483} } @article{martin_2021, title={A Survey of Nonparametric Mixing Density Estimation via the Predictive Recursion Algorithm}, volume={83}, ISSN={["0976-8394"]}, DOI={10.1007/s13571-019-00206-w}, abstractNote={Nonparametric estimation of a mixing density based on observations from the corresponding mixture is a challenging statistical problem. This paper surveys the literature on a fast, recursive estimator based on the predictive recursion algorithm. After introducing the algorithm and giving a few examples, I summarize the available asymptotic convergence theory, describe an important semiparametric extension, and highlight two interesting applications. I conclude with a discussion of several recent developments in this area and some open problems.}, number={1}, journal={SANKHYA-SERIES B-APPLIED AND INTERDISCIPLINARY STATISTICS}, author={Martin, Ryan}, year={2021}, month={May}, pages={97–121} } @article{tokdar_martin_2021, title={Bayesian Test of Normality Versus a Dirichlet Process Mixture Alternative}, volume={83}, ISSN={["0976-8394"]}, DOI={10.1007/s13571-019-00210-0}, abstractNote={We propose a Bayesian test of normality for univariate or multivariate data against alternative nonparametric models characterized by Dirichlet process mixture distributions. The alternative models are based on the principles of embedding and predictive matching. They can be interpreted to offer random granulation of a normal distribution into a mixture of normals with mixture components occupying a smaller volume the farther they are from the distribution center. A scalar parametrization based on latent clustering is used to cover an entire spectrum of separation between the normal distributions and the alternative models. An efficient sequential importance sampler is developed to calculate Bayes factors. Simulations indicate the proposed test can detect non-normality without favoring the nonparametric alternative when normality holds.}, number={1}, journal={SANKHYA-SERIES B-APPLIED AND INTERDISCIPLINARY STATISTICS}, author={Tokdar, Surya T. and Martin, Ryan}, year={2021}, month={May}, pages={66–96} } @article{martin_walker_2019, title={Data-driven priors and their posterior concentration rates}, volume={13}, ISSN={["1935-7524"]}, DOI={10.1214/19-EJS1600}, abstractNote={In high-dimensional problems, choosing a prior distribution such that the corresponding posterior has desirable practical and theoretical properties can be challenging. This begs the question: can the data be used to help choose a good prior? In this paper, we develop a general strategy for constructing a data-driven or empirical prior and sufficient conditions for the corresponding posterior distribution to achieve a certain concentration rate. The idea is that the prior should put sufficient mass on parameter values for which the likelihood is large. An interesting byproduct of this data-driven centering is that the asymptotic properties of the posterior are less sensitive to the prior shape which, in turn, allows users to work with priors of computationally convenient forms while maintaining the desired rates. General results on both adaptive and non-adaptive rates based on empirical priors are presented, along with illustrations in density estimation, nonparametric regression, and high-dimensional structured normal models.}, number={2}, journal={ELECTRONIC JOURNAL OF STATISTICS}, author={Martin, Ryan and Walker, Stephen G.}, year={2019}, pages={3049–3081} } @article{martin_2019, title={Discussion of 'Nonparametric generalized fiducial inference for survival functions under censoring'}, volume={106}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/asz022}, abstractNote={It is a pleasure to participate in the discussion of the article by Cui & Hannig (2019). The authors are to be congratulated for their efforts and ingenuity in developing what is, to my knowledge, the first fiducial solution to a problem involving an infinite-dimensional parameter of interest. Specifically, the authors build on some existing machinery, summarized recently in Hannig et al. (2016) and the references therein, to construct a generalized fiducial distribution for the full event time survival function, |$S$|⁠, in the presence of right censoring. They proceed to show that the generalized fiducial distribution satisfies a Bernstein–von Mises theorem, that is, it asymptotically resembles a Gaussian process centred at the Kaplan–Meier estimator, |$\hat S$|⁠, with covariance function matching that of the limiting sampling distribution of |$\hat S$|⁠. This implies that summaries of the generalized fiducial distribution, such as hypothesis testing rules and confidence sets for |$S$|⁠, will control the frequentist error rates at the nominal level, asymptotically. With theoretical justification in hand, they go on to demonstrate that the methods derived from their generalized fiducial distribution perform as well as or better than state-of-the-art methods in survival analysis. Here I will focus mainly on general features of the generalized fiducial approach, leaving the specific results in survival analysis for the experts to discuss.}, number={3}, journal={BIOMETRIKA}, author={Martin, Ryan}, year={2019}, month={Sep}, pages={519–522} } @article{martin_ning_2020, title={Empirical Priors and Coverage of Posterior Credible Sets in a Sparse Normal Mean Model}, volume={82}, ISSN={["0976-8378"]}, DOI={10.1007/s13171-019-00189-w}, abstractNote={Bayesian methods provide a natural means for uncertainty quantification, that is, credible sets can be easily obtained from the posterior distribution. But is this uncertainty quantification valid in the sense that the posterior credible sets attain the nominal frequentist coverage probability? This paper investigates the frequentist validity of posterior uncertainty quantification based on a class of empirical priors in the sparse normal mean model. In particular, we show that our marginal posterior credible intervals achieve the nominal frequentist coverage probability under conditions slightly weaker than needed for selection consistency and a Bernstein--von Mises theorem for the full posterior, and numerical investigations suggest that our empirical Bayes method has superior frequentist coverage probability properties compared to other fully Bayes methods.}, number={2}, journal={SANKHYA-SERIES A-MATHEMATICAL STATISTICS AND PROBABILITY}, author={Martin, Ryan and Ning, Bo}, year={2020}, month={Aug}, pages={477–498} } @article{martin_2019, title={False confidence, non-additive beliefs, and valid statistical inference}, volume={113}, ISSN={["1873-4731"]}, DOI={10.1016/j.ijar.2019.06.005}, abstractNote={Statistics has made tremendous advances since the times of Fisher, Neyman, Jeffreys, and others, but the fundamental and practically relevant questions about probability and inference that puzzled our founding fathers remain unanswered. To bridge this gap, I propose to look beyond the two dominating schools of thought and ask the following three questions: what do scientists need out of statistics, do the existing frameworks meet these needs, and, if not, how to fill the void? To the first question, I contend that scientists seek to convert their data, posited statistical model, etc., into calibrated degrees of belief about quantities of interest. To the second question, I argue that any framework that returns additive beliefs, i.e., probabilities, necessarily suffers from {\em false confidence}---certain false hypotheses tend to be assigned high probability---and, therefore, risks systematic bias. This reveals the fundamental importance of {\em non-additive beliefs} in the context of statistical inference. But non-additivity alone is not enough so, to the third question, I offer a sufficient condition, called {\em validity}, for avoiding false confidence, and present a framework, based on random sets and belief functions, that provably meets this condition. Finally, I discuss characterizations of p-values and confidence intervals in terms of valid non-additive beliefs, which imply that users of these classical procedures are already following the proposed framework without knowing it.}, journal={INTERNATIONAL JOURNAL OF APPROXIMATE REASONING}, author={Martin, Ryan}, year={2019}, month={Oct}, pages={39–73} } @article{syring_hong_martin_2019, title={Gibbs posterior inference on value-at-risk}, ISSN={["1651-2030"]}, DOI={10.1080/03461238.2019.1573754}, abstractNote={ABSTRACT Accurate estimation of value-at-risk (VaR) and assessment of associated uncertainty is crucial for both insurers and regulators, particularly in Europe. Existing approaches link data and VaR indirectly by first linking data to the parameter of a probability model, and then expressing VaR as a function of that parameter. This indirect approach exposes the insurer to model misspecification bias or estimation inefficiency, depending on whether the parameter is finite- or infinite-dimensional. In this paper, we link data and VaR directly via what we call a discrepancy function, and this leads naturally to a Gibbs posterior distribution for VaR that does not suffer from the aforementioned biases and inefficiencies. Asymptotic consistency and root-n concentration rate of the Gibbs posterior are established, and simulations highlight its superior finite-sample performance compared to other approaches.}, number={7}, journal={SCANDINAVIAN ACTUARIAL JOURNAL}, author={Syring, Nicholas and Hong, Liang and Martin, Ryan}, year={2019}, month={Aug}, pages={548–557} } @article{lin_martin_yang_2019, title={ON OPTIMAL DESIGNS FOR NONREGULAR MODELS}, volume={47}, ISSN={["0090-5364"]}, DOI={10.1214/18-AOS1780}, abstractNote={Classically, Fisher information is the relevant object in defining optimal experimental designs. However, for models that lack certain regularity conditions, the Fisher information does not exist and, hence, there is no notion of design optimality available in the literature. This article seeks to fill the gap by proposing a so-called Hellinger information, which generalizes Fisher information in the sense that the two measures agree in regular problems, but the former also exists for certain types of non-regular problems. We derive a Hellinger information inequality, showing that Hellinger information defines a lower bound on the local minimax risk of estimators. This provides a connection between features of the underlying model---in particular, the design---and the performance of estimators, motivating the use of this new Hellinger information for non-regular optimal design problems. Hellinger optimal designs are derived for several non-regular regression problems, with numerical results empirically demonstrating the improved efficiency of these designs compared to alternatives.}, number={6}, journal={ANNALS OF STATISTICS}, author={Lin, Yi and Martin, Ryan and Yang, Min}, year={2019}, month={Dec}, pages={3335–3359} } @article{balch_martin_ferson_2019, title={Satellite conjunction analysis and the false confidence theorem}, volume={475}, ISSN={["1471-2946"]}, DOI={10.1098/rspa.2018.0565}, abstractNote={Satellite conjunction analysis is the assessment of collision risk during a close encounter between a satellite and another object in orbit. A counterintuitive phenomenon has emerged in the conjunction analysis literature, namely, probability dilution, in which lower quality data paradoxically appear to reduce the risk of collision. We show that probability dilution is a symptom of a fundamental deficiency in probabilistic representations of statistical inference, in which there are propositions that will consistently be assigned a high degree of belief, regardless of whether or not they are true. We call this deficiency false confidence. In satellite conjunction analysis, it results in a severe and persistent underestimate of collision risk exposure. We introduce the Martin–Liu validity criterion as a benchmark by which to identify statistical methods that are free from false confidence. Such inferences will necessarily be non-probabilistic. In satellite conjunction analysis, we show that uncertainty ellipsoids satisfy the validity criterion. Performing collision avoidance manoeuvres based on ellipsoid overlap will ensure that collision risk is capped at the user-specified level. Furthermore, this investigation into satellite conjunction analysis provides a template for recognizing and resolving false confidence issues as they occur in other problems of statistical inference.}, number={2227}, journal={PROCEEDINGS OF THE ROYAL SOCIETY A-MATHEMATICAL PHYSICAL AND ENGINEERING SCIENCES}, author={Balch, Michael Scott and Martin, Ryan and Ferson, Scott}, year={2019}, month={Jul} } @misc{tennant_crane_crick_davila_enkhbayar_havemann_kramer_martin_masuzzo_nobes_et al._2019, title={Ten Hot Topics around Scholarly Publishing}, volume={7}, ISSN={["2304-6775"]}, DOI={10.3390/publications7020034}, abstractNote={The changing world of scholarly communication and the emerging new wave of ‘Open Science’ or ‘Open Research’ has brought to light a number of controversial and hotly debated topics. Evidence-based rational debate is regularly drowned out by misinformed or exaggerated rhetoric, which does not benefit the evolving system of scholarly communication. This article aims to provide a baseline evidence framework for ten of the most contested topics, in order to help frame and move forward discussions, practices, and policies. We address issues around preprints and scooping, the practice of copyright transfer, the function of peer review, predatory publishers, and the legitimacy of ‘global’ databases. These arguments and data will be a powerful tool against misinformation across wider academic research, policy and practice, and will inform changes within the rapidly evolving scholarly publishing system.}, number={2}, journal={PUBLICATIONS}, author={Tennant, Jonathan P. and Crane, Harry and Crick, Tom and Davila, Jacinto and Enkhbayar, Asura and Havemann, Johanna and Kramer, Bianca and Martin, Ryan and Masuzzo, Paola and Nobes, Andy and et al.}, year={2019}, month={Jun} } @article{chae_martin_walker_2018, title={Convergence of an iterative algorithm to the nonparametric MLE of a mixing distribution}, volume={140}, ISSN={["1879-2103"]}, DOI={10.1016/j.spl.2018.05.012}, abstractNote={An iterative algorithm has been conjectured to converge to the nonparametric MLE of the mixing distribution. We give a rigorous proof of this conjecture and discuss the use of this algorithm for producing smooth mixing densities as near-MLEs.}, journal={STATISTICS & PROBABILITY LETTERS}, author={Chae, Minwoo and Martin, Ryan and Walker, Stephen G.}, year={2018}, month={Sep}, pages={142–146} } @article{martin_2019, title={Empirical Priors and Posterior Concentration Rates for a Monotone Density}, volume={81}, ISSN={0976-836X 0976-8378}, url={http://dx.doi.org/10.1007/S13171-018-0147-5}, DOI={10.1007/s13171-018-0147-5}, abstractNote={In a Bayesian context, prior specification for inference on monotone densities is conceptually straightforward, but proving posterior convergence theorems is complicated by the fact that desirable prior concentration properties often are not satisfied. In this paper, I first develop a new prior designed specifically to satisfy an empirical version of the prior concentration property, and then I give sufficient conditions on the prior inputs such that the corresponding empirical Bayes posterior concentrates around the true monotone density at nearly the optimal minimax rate. Numerical illustrations also reveal the practical benefits of the proposed empirical Bayes approach compared to Dirichlet process mixtures.}, number={2}, journal={Sankhya A}, publisher={Springer Science and Business Media LLC}, author={Martin, Ryan}, year={2019}, month={Dec}, pages={493–509} } @article{syring_martin_2019, title={Miscellanea Calibrating general posterior credible regions}, volume={106}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/asy054}, abstractNote={Summary Calibration of credible regions derived from under- or misspecified models is an important and challenging problem. In this paper, we introduce a scalar tuning parameter that controls the posterior distribution spread, and develop a Monte Carlo algorithm that sets this parameter so that the corresponding credible region achieves the nominal frequentist coverage probability.}, number={2}, journal={BIOMETRIKA}, author={Syring, Nicholas and Martin, Ryan}, year={2019}, month={Jun}, pages={479–486} } @article{chae_martin_walker_2019, title={On an algorithm for solving Fredholm integrals of the first kind}, volume={29}, ISSN={["1573-1375"]}, DOI={10.1007/s11222-018-9829-z}, abstractNote={In this paper we use an iterative algorithm for solving Fredholm equations of the first kind. The basic algorithm and convergence properties are known under certain conditions, but we provide a simpler convergence proof without requiring the restrictive conditions that have previously been needed. Several examples of independent interest are given, including mixing density estimation and a first passage time density function involving Brownian motion. We also develop the basic algorithm to include functions which are not necessarily non-negative and, again, present illustrations.}, number={4}, journal={STATISTICS AND COMPUTING}, author={Chae, Minwoo and Martin, Ryan and Walker, Stephen G.}, year={2019}, month={Jul}, pages={645–654} } @article{hong_martin_2019, title={Real-time Bayesian non-parametric prediction of solvency risk}, volume={13}, ISSN={["1748-5002"]}, DOI={10.1017/S1748499518000039}, abstractNote={AbstractInsurance regulation often dictates that insurers monitor their solvency risk in real time and take appropriate actions whenever the risk exceeds their tolerance level. Bayesian methods are appealing for prediction problems thanks to their ability to naturally incorporate both sample variability and parameter uncertainty into a predictive distribution. However, handling data arriving in real time requires a flexible non-parametric model, and the Monte Carlo methods necessary to evaluate the predictive distribution in such cases are not recursive and can be too expensive to rerun each time new data arrives. In this paper, we apply a recently developed alternative perspective on Bayesian prediction based on copulas. This approach facilitates recursive Bayesian prediction without computing a posterior, allowing insurers to perform real-time updating of risk measures to assess solvency risk, and providing them with a tool for carrying out dynamic risk management strategies in today’s “big data” era.}, number={1}, journal={ANNALS OF ACTUARIAL SCIENCE}, author={Hong, Liang and Martin, Ryan}, year={2019}, month={Mar}, pages={67–79} } @article{martin_ouyang_domagni_2018, title={'Purposely misspecified' posterior inference on the volatility of a jump diffusion process}, volume={134}, ISSN={["1879-2103"]}, DOI={10.1016/j.spl.2017.10.013}, abstractNote={Bayesian analysis requires prior distributions for all model parameters, whether of interest or not. This can be a burden, for a number of reasons, especially when the nuisance parameters are high- or infinite-dimensional, so there is motivation to find a way around this without completely abandoning the Bayesian approach. Here we consider a general strategy of working with a purposely misspecified model to avoid dealing directly with nuisance parameters. We focus this investigation on an interesting and challenging problem of inference on the volatility of a jump diffusion process based on discrete observations. If we simply ignore the jumps, we can work out precisely the asymptotic behavior of the Bayesian posterior distribution based on the misspecified model. This result suggests some simple adjustments to correct for the effects of misspecification, and we demonstrate that a suitably corrected version of our purposely misspecified posterior leads to inference on the volatility that is asymptotically optimal.}, journal={STATISTICS & PROBABILITY LETTERS}, author={Martin, Ryan and Ouyang, Cheng and Domagni, Francois}, year={2018}, month={Mar}, pages={106–113} } @misc{hong_martin_2017, title={A review of Bayesian asymptotics in general insurance applications}, volume={7}, ISSN={["2190-9741"]}, DOI={10.1007/s13385-017-0151-5}, number={1}, journal={EUROPEAN ACTUARIAL JOURNAL}, author={Hong, Liang and Martin, Ryan}, year={2017}, month={Jul}, pages={231–255} } @article{hong_martin_2018, title={Dirichlet process mixture models for insurance loss data}, ISSN={["1651-2030"]}, DOI={10.1080/03461238.2017.1402086}, abstractNote={Abstract In the recent insurance literature, a variety of finite-dimensional parametric models have been proposed for analyzing the hump-shaped, heavy-tailed, and highly skewed loss data often encountered in applications. These parametric models are relatively simple, but they lack flexibility in the sense that an actuary analyzing a new data-set cannot be sure that any one of these parametric models will be appropriate. As a consequence, the actuary must make a non-trivial choice among a collection of candidate models, putting him/herself at risk for various model misspecification biases. In this paper, we argue that, at least in cases where prediction of future insurance losses is the ultimate goal, there is reason to consider a single but more flexible nonparametric model. We focus here on Dirichlet process mixture models, and we reanalyze several of the standard insurance data-sets to support our claim that model misspecification biases can be avoided by taking a nonparametric approach, with little to no cost, compared to existing parametric approaches.}, number={6}, journal={SCANDINAVIAN ACTUARIAL JOURNAL}, author={Hong, Liang and Martin, Ryan}, year={2018}, pages={545–554} } @article{syring_martin_2017, title={Gibbs posterior inference on the minimum clinically important difference}, volume={187}, ISSN={["1873-1171"]}, DOI={10.1016/j.jspi.2017.03.001}, abstractNote={It is known that a statistically significant treatment may not be clinically significant. A quantity that can be used to assess clinical significance is called the minimum clinically important difference (MCID), and inference on the MCID is an important and challenging problem. Modeling for the purpose of inference on the MCID is non-trivial, and concerns about bias from a misspecified parametric model or inefficiency from a nonparametric model motivate an alternative approach to balance robustness and efficiency. In particular, a recently proposed representation of the MCID as the minimizer of a suitable risk function makes it possible to construct a Gibbs posterior distribution for the MCID without specifying a model. We establish the posterior convergence rate and show, numerically, that an appropriately scaled version of this Gibbs posterior yields interval estimates for the MCID which are both valid and efficient even for relatively small sample sizes.}, journal={JOURNAL OF STATISTICAL PLANNING AND INFERENCE}, author={Syring, Nicholas and Martin, Ryan}, year={2017}, month={Aug}, pages={67–77} } @article{hahn_martin_walker_2018, title={On Recursive Bayesian Predictive Distributions}, volume={113}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2017.1304219}, abstractNote={ABSTRACT A Bayesian framework is attractive in the context of prediction, but a fast recursive update of the predictive distribution has apparently been out of reach, in part because Monte Carlo methods are generally used to compute the predictive. This article shows that online Bayesian prediction is possible by characterizing the Bayesian predictive update in terms of a bivariate copula, making it unnecessary to pass through the posterior to update the predictive. In standard models, the Bayesian predictive update corresponds to familiar choices of copula but, in nonparametric problems, the appropriate copula may not have a closed-form expression. In such cases, our new perspective suggests a fast recursive approximation to the predictive density, in the spirit of Newton’s predictive recursion algorithm, but without requiring evaluation of normalizing constants. Consistency of the new algorithm is shown, and numerical examples demonstrate its quality performance in finite-samples compared to fully Bayesian and kernel methods. Supplementary materials for this article are available online.}, number={523}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Hahn, P. Richard and Martin, Ryan and Walker, Stephen G.}, year={2018}, pages={1085–1093} } @article{hong_kuffner_martin_2018, title={On overfitting and post-selection uncertainty assessments}, volume={105}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/asx083}, abstractNote={Summary In a regression context, when the relevant subset of explanatory variables is uncertain, it is common to use a data‐driven model selection procedure. Classical linear model theory, applied naively to the selected submodel, may not be valid because it ignores the selected submodel's dependence on the data. We provide an explanation of this phenomenon, in terms of overfitting, for a class of model selection criteria.}, number={1}, journal={BIOMETRIKA}, author={Hong, L. and Kuffner, T. A. and Martin, R.}, year={2018}, month={Mar}, pages={221–224} } @article{martin_2017, title={Uncertainty quantification for the horseshoe (with discussion) comment}, volume={12}, number={4}, journal={Bayesian analysis}, author={Martin, R.}, year={2017}, pages={1254–1258} } @article{martin_stufken_yang_2016, title={A Conversation with Samad Hedayat}, volume={31}, ISSN={["0883-4237"]}, DOI={10.1214/16-sts579}, abstractNote={A. Samad Hedayat was born on July 11, 1937, in Jahrom, Iran. He finished his undergraduate education in bioengineering with honors from the University of Tehran in 1962 and came to the U.S. to study statistics at Cornell, completing his Ph.D. in 1969. Just a few years later, in 1974, Samad accepted a full professor position at the University of Illinois at Chicago Circle—now called University of Illinois at Chicago (UIC)—and was named UIC Distinguished Professor in 2003. He was an early leader in the Department of Mathematics, Statistics and Computer Science and he remains a driving force to this day. Samad has also made substantial contributions in terms of research and service to the field, as evidenced by his numerous honors: he is an elected member of the International Statistical Institute, a fellow of the Institute of Mathematical Statistics and the American Statistical Association and an honorary member of the Iranian Mathematical Society, among others. This conversation, which was conducted in September 2015 and May 2016, touches on Professor Hedayat’s career, and the past, present and future of statistics. In keeping with one of his great passions, it also offers an abundance of advice for students and junior faculty.}, number={4}, journal={STATISTICAL SCIENCE}, author={Martin, Ryan and Stufken, John and Yang, Min}, year={2016}, month={Nov}, pages={637–647} } @article{martin_2017, title={A Statistical Inference Course Based on p-Values}, volume={71}, ISSN={["1537-2731"]}, DOI={10.1080/00031305.2016.1208629}, abstractNote={ABSTRACT Introductory statistical inference texts and courses treat the point estimation, hypothesis testing, and interval estimation problems separately, with primary emphasis on large-sample approximations. Here, I present an alternative approach to teaching this course, built around p-values, emphasizing provably valid inference for all sample sizes. Details about computation and marginalization are also provided, with several illustrative examples, along with a course outline. Supplementary materials for this article are available online.}, number={2}, journal={AMERICAN STATISTICIAN}, author={Martin, Ryan}, year={2017}, month={May}, pages={128–136} } @article{liu_martin_syring_2017, title={Efficient simulation from a gamma distribution with small shape parameter}, volume={32}, ISSN={["1613-9658"]}, DOI={10.1007/s00180-016-0692-0}, number={4}, journal={COMPUTATIONAL STATISTICS}, author={Liu, Chuanhai and Martin, Ryan and Syring, Nick}, year={2017}, month={Dec}, pages={1767–1775} } @article{martin_lin_2016, title={Exact prior-free probabilistic inference in a class of non-regular models}, volume={5}, ISSN={2049-1573}, url={http://dx.doi.org/10.1002/STA4.130}, DOI={10.1002/STA4.130}, abstractNote={Standard statistical methods, such as maximum likelihood, are often justified based on their asymptotic properties. For suitably regular models, this theory is standard, but when the model is non‐regular, for example, the support depends on the parameter, these asymptotic properties may be difficult to assess. Recently, an inferential model (IM) framework has been developed that provides valid prior‐free probabilistic inference without the need for asymptotic justification. In this paper, we construct an IM for a class of highly non‐regular models with parameter‐dependent support. This construction requires conditioning, which is facilitated through solving a particular differential equation. We prove that the plausibility intervals derived from this IM are exact, and we demonstrate, via simulations, that their exactness does not come at the cost of loss of efficiency. Copyright © 2016 John Wiley & Sons, Ltd.}, number={1}, journal={Stat}, publisher={Wiley}, author={Martin, Ryan and Lin, Yi}, year={2016}, pages={312–321} } @article{martin_2018, title={On an inferential model construction using generalized associations}, volume={195}, ISSN={["1873-1171"]}, DOI={10.1016/j.jspi.2016.11.006}, abstractNote={The inferential model (IM) approach, like fiducial and its generalizations, depends on a representation of the data-generating process. Here, a particular variation on the IM construction is considered, one based on generalized associations. The resulting generalized IM is more flexible in that it does not require a complete specification of the data-generating process and is provably valid under mild conditions. Computation and marginalization strategies are discussed, and two applications of this generalized IM approach are presented.}, journal={JOURNAL OF STATISTICAL PLANNING AND INFERENCE}, author={Martin, Ryan}, year={2018}, month={May}, pages={105–115} } @article{martin_han_2016, title={A semiparametric scale-mixture regression model and predictive recursion maximum likelihood}, volume={94}, ISSN={0167-9473}, url={http://dx.doi.org/10.1016/J.CSDA.2015.08.005}, DOI={10.1016/J.CSDA.2015.08.005}, abstractNote={To avoid specification of the error distribution in a regression model, we propose a general nonparametric scale mixture model for the error distribution. For fitting such mixtures, the predictive recursion method is a simple and computationally efficient alternative to existing methods. We define a predictive recursion-based marginal likelihood function, and estimation of the regression parameters proceeds by maximizing this function. A hybrid predictive recursion--EM algorithm is proposed for this purpose. The method's performance is compared with that of existing methods in simulations and real data analyses.}, journal={Computational Statistics & Data Analysis}, publisher={Elsevier BV}, author={Martin, Ryan and Han, Zhen}, year={2016}, month={Feb}, pages={75–85} } @article{martin_lingham_2016, title={Prior-Free Probabilistic Prediction of Future Observations}, volume={58}, ISSN={0040-1706 1537-2723}, url={http://dx.doi.org/10.1080/00401706.2015.1017116}, DOI={10.1080/00401706.2015.1017116}, abstractNote={Prediction of future observations is a fundamental problem in statistics. Here we present a general approach based on the recently developed inferential model (IM) framework. We employ an IM-based technique to marginalize out the unknown parameters, yielding prior-free probabilistic prediction of future observables. Verifiable sufficient conditions are given for validity of our IM for prediction, and a variety of examples demonstrate the proposed method’s performance. Thanks to its generality and ease of implementation, we expect that our IM-based method for prediction will be a useful tool for practitioners. Supplementary materials for this article are available online.}, number={2}, journal={Technometrics}, publisher={Informa UK Limited}, author={Martin, Ryan and Lingham, Rama T.}, year={2016}, month={Apr}, pages={225–235} } @article{martin_2015, title={Asymptotically Optimal Nonparametric Empirical Bayes Via Predictive Recursion}, volume={44}, ISSN={0361-0926 1532-415X}, url={http://dx.doi.org/10.1080/03610926.2012.743566}, DOI={10.1080/03610926.2012.743566}, abstractNote={An empirical Bayes problem has an unknown prior to be estimated from data. The predictive recursion (PR) algorithm provides fast nonparametric estimation of mixing distributions and is ideally suited for empirical Bayes applications. This article presents a general notion of empirical Bayes asymptotic optimality, and it is shown that PR-based procedures satisfy this property under certain conditions. As an application, the problem of in-season prediction of baseball batting averages is considered. There the PR-based empirical Bayes rule performs well in terms of prediction error and ability to capture the distribution of the latent features.}, number={2}, journal={Communications in Statistics - Theory and Methods}, publisher={Informa UK Limited}, author={Martin, Ryan}, year={2015}, pages={286–299} } @article{liu_martin_2015, title={Frameworks for prior-free posterior probabilistic inference}, volume={7}, ISSN={1939-5108}, url={http://dx.doi.org/10.1002/WICS.1329}, DOI={10.1002/WICS.1329}, abstractNote={The development of statistical methods for valid and efficient probabilistic inference without prior distributions has a long history. Fisher's fiducial inference is perhaps the most famous of these attempts. We argue that, despite its seemingly prior‐free formulation, fiducial and its various extensions are not prior‐free and, therefore, do not meet the requirements for prior‐free probabilistic inference. In contrast, the inferential model (IM) framework is genuinely prior‐free and is shown to be a promising new method for generating both valid and efficient probabilistic inference. With a brief introduction to the two fundamental principles, namely, the validity and efficiency principles, the three‐step construction of the basic IM framework is discussed in the context of the validity principle. Efficient IM methods, based on conditioning and marginalization are illustrated with two benchmark examples, namely, the bivariate normal with unknown correlation coefficient and the Behrens–Fisher problem. WIREs Comput Stat 2015, 7:77–85. doi: 10.1002/wics.1329This article is categorized under: Statistical and Graphical Methods of Data Analysis > Bayesian Methods and Theory }, number={1}, journal={Wiley Interdisciplinary Reviews: Computational Statistics}, publisher={Wiley}, author={Liu, Chuanhai and Martin, Ryan}, year={2015}, month={Jan}, pages={77–85} } @article{martin_liu_2015, title={Marginal Inferential Models: Prior-Free Probabilistic Inference on Interest Parameters}, volume={110}, ISSN={0162-1459 1537-274X}, url={http://dx.doi.org/10.1080/01621459.2014.985827}, DOI={10.1080/01621459.2014.985827}, abstractNote={The inferential models (IM) framework provides prior-free, frequency-calibrated, and posterior probabilistic inference. The key is the use of random sets to predict unobservable auxiliary variables connected to the observable data and unknown parameters. When nuisance parameters are present, a marginalization step can reduce the dimension of the auxiliary variable which, in turn, leads to more efficient inference. For regular problems, exact marginalization can be achieved, and we give conditions for marginal IM validity. We show that our approach provides exact and efficient marginal inference in several challenging problems, including a many-normal-means problem. In nonregular problems, we propose a generalized marginalization technique and prove its validity. Details are given for two benchmark examples, namely, the Behrens–Fisher and gamma mean problems.}, number={512}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Martin, Ryan and Liu, Chuanhai}, year={2015}, month={Oct}, pages={1621–1631} } @article{martin_2015, title={Plausibility Functions and Exact Frequentist Inference}, volume={110}, ISSN={0162-1459 1537-274X}, url={http://dx.doi.org/10.1080/01621459.2014.983232}, DOI={10.1080/01621459.2014.983232}, abstractNote={In the frequentist program, inferential methods with exact control on error rates are a primary focus. The standard approach, however, is to rely on asymptotic approximations, which may not be suitable. This article presents a general framework for the construction of exact frequentist procedures based on plausibility functions. It is shown that the plausibility function-based tests and confidence regions have the desired frequentist properties in finite samples—no large-sample justification needed. An extension of the proposed method is also given for problems involving nuisance parameters. Examples demonstrate that the plausibility function-based method is both exact and efficient in a wide variety of problems.}, number={512}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Martin, Ryan}, year={2015}, month={Oct}, pages={1552–1561} } @article{martin_2013, title={An Approximate Bayesian Marginal Likelihood Approach for Estimating Finite Mixtures}, volume={42}, ISSN={0361-0918 1532-4141}, url={http://dx.doi.org/10.1080/03610918.2012.667476}, DOI={10.1080/03610918.2012.667476}, abstractNote={Estimation of finite mixture models when the mixing distribution support is unknown is an important problem. This article gives a new approach based on a marginal likelihood for the unknown support. Motivated by a Bayesian Dirichlet prior model, a computationally efficient stochastic approximation version of the marginal likelihood is proposed and large-sample theory is presented. By restricting the support to a finite grid, a simulated annealing method is employed to maximize the marginal likelihood and estimate the support. Real and simulated data examples show that this novel stochastic approximation and simulated annealing procedure compares favorably with existing methods.}, number={7}, journal={Communications in Statistics - Simulation and Computation}, publisher={Informa UK Limited}, author={Martin, Ryan}, year={2013}, month={Aug}, pages={1533–1548} } @article{martin_liu_2013, title={Correction}, volume={108}, ISSN={0162-1459 1537-274X}, url={http://dx.doi.org/10.1080/01621459.2013.796885}, DOI={10.1080/01621459.2013.796885}, abstractNote={This is to provide corrections to Theorems 1 and 3 in Martin and Liu (2013). The latter correction also casts further light on the role of nested predictive random sets.}, number={503}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Martin, Ryan and Liu, Chuanhai}, year={2013}, month={Sep}, pages={1138–1139} } @article{martin_liu_2013, title={Inferential Models: A Framework for Prior-Free Posterior Probabilistic Inference}, volume={108}, ISSN={0162-1459 1537-274X}, url={http://dx.doi.org/10.1080/01621459.2012.747960}, DOI={10.1080/01621459.2012.747960}, abstractNote={Posterior probabilistic statistical inference without priors is an important but so far elusive goal. Fisher’s fiducial inference, Dempster–Shafer theory of belief functions, and Bayesian inference with default priors are attempts to achieve this goal but, to date, none has given a completely satisfactory picture. This article presents a new framework for probabilistic inference, based on inferential models (IMs), which not only provides data-dependent probabilistic measures of uncertainty about the unknown parameter, but also does so with an automatic long-run frequency-calibration property. The key to this new approach is the identification of an unobservable auxiliary variable associated with observable data and unknown parameter, and the prediction of this auxiliary variable with a random set before conditioning on data. Here we present a three-step IM construction, and prove a frequency-calibration property of the IM’s belief function under mild conditions. A corresponding optimality theory is developed, which helps to resolve the nonuniqueness issue. Several examples are presented to illustrate this new approach.}, number={501}, journal={Journal of the American Statistical Association}, publisher={Informa UK Limited}, author={Martin, Ryan and Liu, Chuanhai}, year={2013}, month={Mar}, pages={301–313} } @article{martin_2014, title={Random Sets and Exact Confidence Regions}, volume={76}, ISSN={0976-836X 0976-8378}, url={http://dx.doi.org/10.1007/S13171-013-0046-8}, DOI={10.1007/S13171-013-0046-8}, abstractNote={An important problem in statistics is the construction of confidence regions for unknown parameters. In most cases, asymptotic distribution theory is used to construct confidence regions, so any coverage probability claims only hold approximately, for large samples. This paper describes a new approach, using random sets, which allows users to construct exact confidence regions without appeal to asymptotic theory. In particular, if the user-specified random set satisfies a certain validity property, confidence regions obtained by thresholding the induced data-dependent plausibility function are shown to have the desired coverage probability.}, number={2}, journal={Sankhya A}, publisher={Springer Science and Business Media LLC}, author={Martin, Ryan}, year={2014}, pages={288–304} } @article{martin_tilak_2012, title={On ε-Optimality of the Pursuit Learning Algorithm}, volume={49}, ISSN={0021-9002 1475-6072}, url={http://dx.doi.org/10.1017/S0021900200009542}, DOI={10.1017/S0021900200009542}, abstractNote={Estimator algorithms in learning automata are useful tools for adaptive, real-time optimization in computer science and engineering applications. In this paper we investigate theoretical convergence properties for a special case of estimator algorithms - the pursuit learning algorithm. We identify and fill a gap in existing proofs of probabilistic convergence for pursuit learning. It is tradition to take the pursuit learning tuning parameter to be fixed in practical applications, but our proof sheds light on the importance of a vanishing sequence of tuning parameters in a theoretical convergence analysis.}, number={03}, journal={Journal of Applied Probability}, publisher={Cambridge University Press (CUP)}, author={Martin, Ryan and Tilak, Omkar}, year={2012}, month={Sep}, pages={795–805} } @article{martin_2012, title={Convergence rate for predictive recursion estimation of finite mixtures}, volume={82}, ISSN={0167-7152}, url={http://dx.doi.org/10.1016/j.spl.2011.10.023}, DOI={10.1016/j.spl.2011.10.023}, abstractNote={Predictive recursion (PR) is a fast stochastic algorithm for nonparametric estimation of mixing distributions in mixture models.It is known that the PR estimates of both the mixing and mixture densities are consistent under fairly mild conditions, but currently very little is known about the rate of convergence.Here I first investigate asymptotic convergence properties of the PR estimate under model misspecification in the special case of finite mixtures with known support.Tools from stochastic approximation theory are used to prove that the PR estimates converge, to the best Kullback-Leibler approximation, at a nearly root-n rate.When the support is unknown, PR can be used to construct an objective function which, when optimized, yields an estimate the support.I apply the known-support results to derive a rate of convergence for this modified PR estimate in the unknown support case, which compares favorably to known optimal rates.}, number={2}, journal={Statistics & Probability Letters}, publisher={Elsevier BV}, author={Martin, Ryan}, year={2012}, month={Feb}, pages={378–384} }