@article{mulgrave_ghosal_2020, title={Bayesian Inference in Nonparanormal Graphical Models}, volume={15}, ISSN={["1936-0975"]}, DOI={10.1214/19-BA1159}, abstractNote={Gaussian graphical models have been used to study intrinsic dependence among several variables, but the Gaussianity assumption may be restrictive in many applications. A nonparanormal graphical model is a semiparametric generalization for continuous variables where it is assumed that the variables follow a Gaussian graphical model only after some unknown smooth monotone transformations on each of them. We consider a Bayesian approach in the nonparanormal graphical model by putting priors on the unknown transformations through a random series based on B-splines where the coefficients are ordered to induce monotonicity. A truncated normal prior leads to partial conjugacy in the model and is useful for posterior simulation using Gibbs sampling. On the underlying precision matrix of the transformed variables, we consider a spike-and-slab prior and use an efficient posterior Gibbs sampling scheme. We use the Bayesian Information Criterion to choose the hyperparameters for the spike-and-slab prior. We present a posterior consistency result on the underlying transformation and the precision matrix. We study the numerical performance of the proposed method through an extensive simulation study and finally apply the proposed method on a real data set.}, number={2}, journal={BAYESIAN ANALYSIS}, author={Mulgrave, Jami J. and Ghosal, Subhashis}, year={2020}, month={Jun}, pages={449–475} }
@article{wei_ghosal_2020, title={Contraction properties of shrinkage priors in logistic regression}, volume={207}, ISSN={["1873-1171"]}, DOI={10.1016/j.jspi.2019.12.004}, abstractNote={Bayesian shrinkage priors have received a lot of attention recently because of their efficiency in computation and accuracy in estimation and variable selection. In this paper, we study the contraction properties of shrinkage priors in a logistic regression model where the number of covariates is high. For a shrinkage prior distribution that is heavy-tailed and concentrated around zero with high probability such as the horseshoe prior, the Dirichlet–Laplace prior, and the normal-gamma prior with appropriate choices of hyper-parameters, estimates of the logistic regression coefficient are shown to asymptotically concentrate around the true sparse vector in the L2-sense. It is shown that the proposed contraction rate is comparable with the point mass prior that is studied in Atchadé (2017). The simulation study under the logistic regression model verifies the theoretical results by showing that the horseshoe prior and the Dirichlet–Laplace prior perform like the point mass prior for the estimation, variable selection and prediction, and yield much better results than Bayesian lasso and the non-informative normal prior.}, journal={JOURNAL OF STATISTICAL PLANNING AND INFERENCE}, author={Wei, Ran and Ghosal, Subhashis}, year={2020}, month={Jul}, pages={215–229} }
@article{belitser_ghosal_2020, title={EMPIRICAL BAYES ORACLE UNCERTAINTY QUANTIFICATION FOR REGRESSION}, volume={48}, ISSN={["0090-5364"]}, DOI={10.1214/19-AOS1845}, abstractNote={We propose an empirical Bayes method for high-dimensional linear regression models. Following an oracle approach that quantifies the error locally for each possible value of the parameter, we show that an empirical Bayes posterior contracts at the optimal rate at all parameters and leads to uniform size-optimal credible balls with guaranteed coverage under an “excessive bias restriction” condition. This condition gives rise to a new slicing of the entire space that is suitable for ensuring uniformity in uncertainty quantification. The obtained results immediately lead to optimal contraction and coverage properties for many conceivable classes simultaneously. The results are also extended to high-dimensional additive nonparametric regression models.}, number={6}, journal={ANNALS OF STATISTICS}, author={Belitser, Eduard and Ghosal, Subhashis}, year={2020}, month={Dec}, pages={3113–3137} }
@article{ghosal_2020, title={Preface of the Special Issue in Honor of Professor Jayanta Kumar Ghosh}, ISSN={["0976-8378"]}, DOI={10.1007/s13171-020-00199-z}, journal={SANKHYA-SERIES A-MATHEMATICAL STATISTICS AND PROBABILITY}, author={Ghosal, Subhashis}, year={2020}, month={Mar} }
@article{roy_ghosal_prescott_choudhury_2019, title={BAYESIAN MODELING OF THE STRUCTURAL CONNECTOME FOR STUDYING ALZHEIMER'S DISEASE}, volume={13}, ISSN={["1932-6157"]}, DOI={10.1214/19-AOAS1257}, abstractNote={We study possible relations between the structure of the connectome, white matter connecting different regions of brain, and Alzheimer disease. Regression models in covariates including age, gender and disease status for the extent of white matter connecting each pair of regions of brain are proposed. Subject We study possible relations between the Alzheimer's disease progression and the structure of the connectome, white matter connecting different regions of brain. Regression models in covariates including age, gender and disease status for the extent of white matter connecting each pair of regions of brain are proposed. Subject inhomogeneity is also incorporated in the model through random effects with an unknown distribution. As there are large number of pairs of regions, we also adopt a dimension reduction technique through graphon (Lovasz and Szegedy (2006)) functions, which reduces functions of pairs of regions to functions of regions. The connecting graphon functions are considered unknown but assumed smoothness allows putting priors of low complexity on them. We pursue a nonparametric Bayesian approach by assigning a Dirichlet process scale mixture of zero mean normal prior on the distributions of the random effects and finite random series of tensor products of B-splines priors on the underlying graphon functions. Markov chain Monte Carlo techniques, for drawing samples for the posterior distributions are developed. The proposed Bayesian method overwhelmingly outperforms similar ANCOVA models in the simulation setup. The proposed Bayesian approach is applied on a dataset of 100 subjects and 83 brain regions and key regions implicated in the changing connectome are identified.}, number={3}, journal={ANNALS OF APPLIED STATISTICS}, author={Roy, Arkaprava and Ghosal, Subhashis and Prescott, Jeffrey and Choudhury, Kingshuk Roy}, year={2019}, month={Sep}, pages={1791–1816} }
@article{ning_ghosal_thomas_2019, title={Bayesian Method for Causal Inference in Spatially-Correlated Multivariate Time Series}, volume={14}, ISSN={["1936-0975"]}, DOI={10.1214/18-BA1102}, abstractNote={Measuring the causal impact of an advertising campaign on sales is an important problem for advertising companies interested in modeling consumer demand at stores in different locations. This paper proposes a new causal inference method that uses a Bayesian multivariate time series model to capture the spatial correlation between stores. Control stores which are used to build counterfactuals over the causal period are chosen before running the advertising campaign. The novelty of this method is to estimate causal effects by comparing the posterior distributions of latent variables given by the observed data and its counterfactual data. We use one-sided Kolmogorov-Smirnov distance to quantify the difference between the two posterior distributions. We found that this method is able to detect smaller scale of causal impact as measurement errors are automatically filtered out in the causal analysis compared to a commonly used method. A two-stage algorithm is used to estimate the model. A G-Wishart prior with a given graphical structure on the precision matrix is used to impose sparsity in spatial correlation. The graphical structure needs not correspond to a decomposable graph. We model the local linear trend by a stationary multivariate autoregressive process to prevent the prediction intervals from being explosive. A detailed simulation study shows the effectiveness of the proposed approach to causal inference. We apply the proposed method to a real dataset to measure the effect of an advertising campaign for a consumer product sold at stores of a large national retail chain.}, number={1}, journal={BAYESIAN ANALYSIS}, author={Ning, Bo and Ghosal, Subhashis and Thomas, Jewell}, year={2019}, month={Mar}, pages={1–28} }
@article{zhu_ghosal_2019, title={Bayesian Semiparametric ROC surface estimation under verification bias}, volume={133}, ISSN={0167-9473}, url={http://dx.doi.org/10.1016/J.CSDA.2018.09.003}, DOI={10.1016/j.csda.2018.09.003}, abstractNote={The Receiver Operating Characteristic (ROC) surface is a generalization of the ROC curve and is widely used for assessment of the accuracy of diagnostic tests on three categories. Verification bias occurs when not all subjects have their labels observed. This is a common problem in disease diagnosis since the gold standard test to get labels, i.e., the true disease status, can be invasive and expensive. The same situation happens in the evaluation of semi-supervised learning, where the unlabeled data are incorporated. A Bayesian approach for estimating the ROC surface is proposed based on continuous data under a semi-parametric trinormality assumption. The proposed method is then extended to situations in the presence of verification bias. The posterior distribution is computed under the trinormality assumption using a rank-based likelihood. The consistency of the posterior under a mild condition is also established. The proposed method is compared with existing methods for estimating an ROC surface. Simulation results show that it performs well in terms of accuracy. The method is applied to evaluate the performance of CA125 and HE4 in the diagnosis of epithelial ovarian cancer (EOC) as a demonstration.}, journal={Computational Statistics & Data Analysis}, publisher={Elsevier BV}, author={Zhu, Rui and Ghosal, Subhashis}, year={2019}, month={May}, pages={40–52} }
@article{yoo_ghosal_2019, title={Bayesian mode and maximum estimation and accelerated rates of contraction}, volume={25}, ISSN={["1573-9759"]}, DOI={10.3150/18-BEJ1056}, abstractNote={We study the problem of estimating the mode and maximum of an unknown regression function in the presence of noise. We adopt the Bayesian approach by using tensor-product B-splines and endowing the coefficients with Gaussian priors. In the usual fixed-in-advanced sampling plan, we establish posterior contraction rates for mode and maximum and show that they coincide with the minimax rates for this problem. To quantify estimation uncertainty, we construct credible sets for these two quantities that have high coverage probabilities with optimal sizes. If one is allowed to collect data sequentially, we further propose a Bayesian two-stage estimation procedure, where a second stage posterior is built based on samples collected within a credible set constructed from a first stage posterior. Under appropriate conditions on the radius of this credible set, we can accelerate optimal contraction rates from the fixed-in-advanced setting to the minimax sequential rates. A simulation experiment shows that our Bayesian two-stage procedure outperforms single-stage procedure and also slightly improves upon a non-Bayesian two-stage procedure.}, number={3}, journal={BERNOULLI}, author={Yoo, William Weimin and Ghosal, Subhashis}, year={2019}, month={Aug}, pages={2330–2358} }
@article{zhu_ghosal_2019, title={Bayesian nonparametric estimation of ROC surface under verification bias}, volume={38}, ISSN={["1097-0258"]}, DOI={10.1002/sim.8181}, abstractNote={The receiver operating characteristic (ROC) surface, as a generalization of the ROC curve, has been widely used to assess the accuracy of a diagnostic test for three categories. A common problem is verification bias, referring to the situation where not all subjects have their true classes verified. In this paper, we consider the problem of estimating the ROC surface under verification bias. We adopt a Bayesian nonparametric approach by directly modeling the underlying distributions of the three categories by Dirichlet process mixture priors. We propose a robust computing algorithm by only imposing a missing at random assumption for the verification process but no assumption on the distributions. The method can also accommodate covariates information in estimating the ROC surface, which can lead to a more comprehensive understanding of the diagnostic accuracy. It can be adapted and hugely simplified in the case where there is no verification bias, and very fast computation is possible through the Bayesian bootstrap process. The proposed method is compared with other commonly used methods by extensive simulations. We find that the proposed method generally outperforms other approaches. Applying the method to two real datasets, the key findings are as follows: (1) human epididymis protein 4 has a slightly better diagnosis ability compared to CA125 in discriminating healthy, early stage, and late stage patients of epithelial ovarian cancer. (2) Serum albumin has a prognostic ability in distinguishing different stages of hepatocellular carcinoma.}, number={18}, journal={STATISTICS IN MEDICINE}, author={Zhu, Rui and Ghosal, Subhashis}, year={2019}, month={Aug}, pages={3361–3377} }
@article{du_ghosal_2019, title={Multivariate Gaussian network structure learning}, volume={199}, ISSN={0378-3758}, url={http://dx.doi.org/10.1016/J.JSPI.2018.07.009}, DOI={10.1016/j.jspi.2018.07.009}, abstractNote={We consider a graphical model where a multivariate normal vector is associated with each node of the underlying graph and estimate the graphical structure. We minimize a loss function obtained by regressing the vector at each node on those at the remaining ones under a group penalty. We show that the proposed estimator can be computed by a fast convex optimization algorithm. We show that as the sample size increases, the estimated regression coefficients and the correct graphical structure are correctly estimated with probability tending to one. By extensive simulations, we show the superiority of the proposed method over comparable procedures. We apply the technique on two real datasets. The first one is to identify gene and protein networks showing up in cancer cell lines, and the second one is to reveal the connections among different industries in the US.}, journal={Journal of Statistical Planning and Inference}, publisher={Elsevier BV}, author={Du, Xingqi and Ghosal, Subhashis}, year={2019}, month={Mar}, pages={327–342} }
@article{du_ghosal_2018, title={Bayesian Discriminant Analysis Using a High Dimensional Predictor}, volume={80}, ISSN={0976-836X 0976-8378}, url={http://dx.doi.org/10.1007/S13171-018-0140-Z}, DOI={10.1007/s13171-018-0140-z}, number={S1}, journal={Sankhya A}, publisher={Springer Science and Business Media LLC}, author={Du, Xingqi and Ghosal, Subhashis}, year={2018}, month={Aug}, pages={112–145} }
@article{li_ghosal_2018, title={Bayesian classification of multiclass functional data}, volume={12}, ISSN={["1935-7524"]}, DOI={10.1214/18-EJS1522}, abstractNote={We propose a Bayesian approach to estimating parameters in multiclass functional models. Unordered multinomial probit, ordered multinomial probit and multinomial logistic models are considered. We use finite random series priors based on a suitable basis such as B-splines in these three multinomial models, and classify the functional data using the Bayes rule. We average over models based on the marginal likelihood estimated from Markov Chain Monte Carlo (MCMC) output. Posterior contraction rates for the three multinomial models are computed. We also consider Bayesian linear and quadratic discriminant analyses on the multivariate data obtained by applying a functional principal component technique on the original functional data. A simulation study is conducted to compare these methods on different types of data. We also apply these methods to a phoneme dataset.}, number={2}, journal={ELECTRONIC JOURNAL OF STATISTICS}, author={Li, Xiuqi and Ghosal, Subhashis}, year={2018}, pages={4669–4696} }
@article{das_ghosal_2017, title={Analyzing ozone concentration by Bayesian spatio-temporal quantile regression}, volume={28}, number={4}, journal={Environmetrics}, author={Das, P. and Ghosal, S.}, year={2017} }
@article{li_ghosal_2017, title={BAYESIAN DETECTION OF IMAGE BOUNDARIES}, volume={45}, ISSN={["0090-5364"]}, DOI={10.1214/16-aos1523}, abstractNote={Detecting boundary of an image based on noisy observations is a fundamental problem of image processing and image segmentation. For a $d$-dimensional image ($d = 2, 3, \ldots$), the boundary can often be described by a closed smooth $(d - 1)$-dimensional manifold. In this paper, we propose a nonparametric Bayesian approach based on priors indexed by $\mathbb{S}^{d - 1}$, the unit sphere in $\mathbb{R}^d$. We derive optimal posterior contraction rates using Gaussian processes or finite random series priors using basis functions such as trigonometric polynomials for 2-dimensional images and spherical harmonics for 3-dimensional images. For 2-dimensional images, we show a rescaled squared exponential Gaussian process on $\mathbb{S}^1$ achieves four goals of guaranteed geometric restriction, (nearly) minimax rate optimal and adaptive to the smoothness level, convenient for joint inference and computationally efficient. We conduct an extensive study of its reproducing kernel Hilbert space, which may be of interest by its own and can also be used in other contexts. Simulations confirm excellent performance of the proposed method and indicate its robustness under model misspecification at least under the simulated settings.}, number={5}, journal={ANNALS OF STATISTICS}, author={Li, Meng and Ghosal, Subhashis}, year={2017}, month={Oct}, pages={2190–2217} }
@article{suarez_ghosal_2017, title={Bayesian Estimation of Principal Components for Functional Data}, volume={12}, ISSN={["1936-0975"]}, DOI={10.1214/16-ba1003}, abstractNote={The area of principal components analysis (PCA) has seen relatively few contributions from the Bayesian school of inference. In this paper, we propose a Bayesian method for PCA in the case of functional data observed with error. We suggest modeling the covariance function by use of an approximate spectral decomposition, leading to easily interpretable parameters. We study in depth the choice of using the implied distributions arising from the inverse Wishart prior and prove a convergence theorem for the case of an exact nite dimensional rep- resentation. We also discuss computational issues as well as the care needed in choosing hyperparameters. A simulation study is used to demonstrate competitive performance against a recent frequentist procedure, particularly in terms of the principal component estimation. Finally, we apply the method to a real dataset, where we also incorporate model selection on the dimension of the}, number={2}, journal={BAYESIAN ANALYSIS}, author={Suarez, Adam J. and Ghosal, Subhashis}, year={2017}, month={Jun}, pages={311–333} }
@article{bhaumik_ghosal_2017, title={Bayesian inference for higher-order ordinary differential equation models}, volume={157}, journal={Journal of Multivariate Analysis}, author={Bhaumik, P. and Ghosal, S.}, year={2017}, pages={103–114} }
@article{das_ghosal_2017, title={Bayesian quantile regression using random B-spline series prior}, volume={109}, journal={Computational Statistics & Data Analysis}, author={Das, P. and Ghosal, S.}, year={2017}, pages={121–143} }
@article{bhaumik_ghosal_2017, title={Efficient Bayesian estimation and uncertainty quantification in ordinary differential equation models}, volume={23}, number={4B}, journal={Bernoulli}, author={Bhaumik, P. and Ghosal, S.}, year={2017}, pages={3537–3570} }
@article{shen_ghosal_2017, title={Posterior contraction rates of density derivative estimation}, volume={79}, number={2}, journal={Sankhya-Series A-Mathematical Statistics and Probability}, author={Shen, W. N. and Ghosal, S.}, year={2017}, pages={336–354} }
@article{shen_ghosal_2016, title={Adaptive Bayesian density regression for high-dimensional data}, volume={22}, number={1}, journal={Bernoulli}, author={Shen, W. N. and Ghosal, S.}, year={2016}, pages={396–420} }
@article{suarez_ghosal_2016, title={Bayesian Clustering of Functional Data Using Local Features}, volume={11}, ISSN={["1936-0975"]}, DOI={10.1214/14-ba925}, abstractNote={The use of exploratory methods is an important step in the understand- ing of data. When clustering functional data, most methods have used traditional clustering techniques on a vector of estimated basis coecients, assuming that the underlying signal functions live in the L2-space. Bayesian methods use models which imply the belief that some observations are realizations from some signal plus noise models with identical underlying signal functions. The method we pro- pose diers in this respect: we employ a model that does not assume that any of the signal functions are truly identical. We cluster each signal coecient using conditionally independent Dirichlet process priors, which leads to exact match- ing of local features, represented by coecients in a multiresolution wavelet basis. We then demonstrate the method using two datasets from dierent elds to show broad application potential.}, number={1}, journal={BAYESIAN ANALYSIS}, author={Suarez, Adam Justin and Ghosal, Subhashis}, year={2016}, month={Mar}, pages={71–98} }
@article{ghosal_2016, title={Editorial overview: Special issue on Bayesian nonparametrics. Subhashis Ghosal Guest editor of the special issue on Bayesian nonparametrics}, volume={10}, number={2}, journal={Electronic Journal of Statistics}, author={Ghosal, S.}, year={2016}, pages={3217–3218} }
@article{luo_ghosal_2016, title={Forward selection and estimation in high dimensional single index models}, volume={33}, ISSN={1572-3127}, url={http://dx.doi.org/10.1016/J.STAMET.2016.09.002}, DOI={10.1016/j.stamet.2016.09.002}, abstractNote={We propose a new variable selection and estimation technique for high dimensional single index models with unknown monotone smooth link function. Among many predictors, typically, only a small fraction of them have significant impact on prediction. In such a situation, more interpretable models with better prediction accuracy can be obtained by variable selection. In this article, we propose a new penalized forward selection technique which can reduce high dimensional optimization problems to several one dimensional optimization problems by choosing the best predictor and then iterating the selection steps until convergence. The advantage of optimizing in one dimension is that the location of optimum solution can be obtained with an intelligent search by exploiting smoothness of the criterion function. Moreover, these one dimensional optimization problems can be solved in parallel to reduce computing time nearly to the level of the one-predictor problem. Numerical comparison with the LASSO and the shrinkage sliced inverse regression shows very promising performance of our proposed method.}, journal={Statistical Methodology}, publisher={Elsevier BV}, author={Luo, Shikai and Ghosal, Subhashis}, year={2016}, month={Dec}, pages={172–179} }
@article{ghosal_turnbull_zhang_hwang_2016, title={Sparse penalized forward selection for support vector classification}, volume={25}, number={2}, journal={Journal of Computational and Graphical Statistics}, author={Ghosal, S. and Turnbull, B. and Zhang, H. H. and Hwang, W. Y.}, year={2016}, pages={493–514} }
@article{yoo_ghosal_2016, title={Supremum norm posterior contraction and credible sets for nonparametric multivariate regression}, volume={44}, number={3}, journal={Annals of Statistics}, author={Yoo, W. W. and Ghosal, S.}, year={2016}, pages={1069–1102} }
@article{shen_ghosal_2015, title={Adaptive Bayesian procedures using random series priors}, volume={42}, number={4}, journal={Scandinavian Journal of Statistics: Theory and Applications}, author={Shen, W. N. and Ghosal, S.}, year={2015}, pages={1194–1213} }
@article{banerjee_ghosal_2015, title={Bayesian structure learning in graphical models}, volume={136}, journal={Journal of Multivariate Analysis}, author={Banerjee, S. and Ghosal, S.}, year={2015}, pages={147–162} }
@article{ghosal_2015, title={Discussion of "Frequentist coverage of adaptive nonparametric Bayesian credible sets"}, volume={43}, number={4}, journal={Annals of Statistics}, author={Ghosal, S.}, year={2015}, pages={1455–1462} }
@article{li_ghosal_2015, title={Fast translation invariant multiscale image denoising}, volume={24}, number={12}, journal={IEEE Transactions on Image Processing}, author={Li, M. and Ghosal, S.}, year={2015}, pages={4876–4887} }
@article{luo_ghosal_2015, title={Prediction consistency of forward iterated regression and selection technique}, volume={107}, ISSN={0167-7152}, url={http://dx.doi.org/10.1016/J.SPL.2015.08.005}, DOI={10.1016/j.spl.2015.08.005}, abstractNote={Recently, Hwang et al. (2009) introduced a penalized forward selection technique for high dimensional linear regression which appears to possess excellent prediction and variable selection properties. In this article, we show that the procedure is prediction consistent.}, journal={Statistics & Probability Letters}, publisher={Elsevier BV}, author={Luo, Shikai and Ghosal, Subhashis}, year={2015}, month={Dec}, pages={79–83} }
@article{ghoshal_kleijn_vaart_van_zanten_2015, title={Special issue on Bayesian nonparametrics}, volume={166}, journal={Journal of Statistical Planning and Inference}, author={Ghoshal, S. and Kleijn, B. and Vaart, A. and van and Zanten, H.}, year={2015}, pages={1–1} }
@article{li_ghosal_2014, title={Bayesian Multiscale Smoothing of Gaussian Noised Images}, volume={9}, ISSN={["1936-0975"]}, DOI={10.1214/14-ba871}, abstractNote={We propose a multiscale model for Gaussian noised images under a Bayesian framework for both 2-dimensional (2D) and 3-dimensional (3D) images. We use a Chinese restaurant process prior to randomly generate ties among inten- sity values at neighboring pixels in the image. The resulting Bayesian estimator enjoys some desirable asymptotic properties for identifying precise structures in the image. The proposed Bayesian denoising procedure is completely data-driven. A conditional conjugacy property allows analytical computation of the posterior distribution without involving Markov chain Monte Carlo (MCMC) methods, mak- ing the method computationally ecient. Simulations on Shepp-Logan phantom and Lena test images conrm that our smoothing method is comparable with the best available methods for light noise and outperforms them for heavier noise both visually and numerically. The proposed method is further extended for 3D im- ages. A simulation study shows that the proposed method is numerically better than most existing denoising approaches for 3D images. A 3D Shepp-Logan phan- tom image is used to demonstrate the visual and numerical performance of the proposed method, along with the computational time. MATLAB toolboxes are made available online (both 2D and 3D) to implement the proposed method and reproduce the numerical results.}, number={3}, journal={BAYESIAN ANALYSIS}, author={Li, Meng and Ghosal, Subhashis}, year={2014}, pages={733–758} }
@article{gu_ghosal_kleiner_2014, title={Bayesian ROC curve estimation under verification bias}, volume={33}, number={29}, journal={Statistics in Medicine}, author={Gu, J. Z. and Ghosal, S. and Kleiner, D. E.}, year={2014}, pages={5081–5096} }
@article{mckay curtis_banerjee_ghosal_2014, title={Fast Bayesian model assessment for nonparametric additive regression}, volume={71}, ISSN={0167-9473}, url={http://dx.doi.org/10.1016/J.CSDA.2013.05.012}, DOI={10.1016/j.csda.2013.05.012}, abstractNote={Variable selection techniques for the classical linear regression model have been widely investigated. Variable selection in fully nonparametric and additive regression models has been studied more recently. A Bayesian approach for nonparametric additive regression models is considered, where the functions in the additive model are expanded in a B-spline basis and a multivariate Laplace prior is put on the coefficients. Posterior probabilities of models defined by selection of predictors in the working model are computed, using a Laplace approximation method. The prior times the likelihood is expanded around the posterior mode, which can be identified with the group LASSO, for which a fast computing algorithm exists. Thus Markov chain Monte-Carlo or any other time consuming sampling based methods are completely avoided, leading to quick assessment of various posterior model probabilities. This technique is applied to the high-dimensional situation where the number of parameters exceeds the number of observations.}, journal={Computational Statistics & Data Analysis}, publisher={Elsevier BV}, author={McKay Curtis, S. and Banerjee, Sayantan and Ghosal, Subhashis}, year={2014}, month={Mar}, pages={347–358} }
@article{banerjee_ghosal_2014, title={Posterior convergence rates for estimating large precision matrices using graphical models}, volume={8}, journal={Electronic Journal of Statistics}, author={Banerjee, S. and Ghosal, S.}, year={2014}, pages={2111–2137} }
@article{shen_tokdar_ghosal_2013, title={Adaptive Bayesian multivariate density estimation with Dirichlet mixtures}, volume={100}, number={3}, journal={Biometrika}, author={Shen, W. N. and Tokdar, S. T. and Ghosal, S.}, year={2013}, pages={623–640} }
@article{white_ghosal_2013, title={Denoising three-dimensional and colored images using a Bayesian multi-scale model for photon counts}, volume={93}, number={11}, journal={Signal Processing}, author={White, J. T. and Ghosal, S.}, year={2013}, pages={2906–2914} }
@article{belitser_ghosal_zanten_2012, title={Optimal two-stage procedures for estimating location and size of the maximum of a multivariate regression function}, volume={40}, number={6}, journal={Annals of Statistics}, author={Belitser, E. and Ghosal, S. and Zanten, H.}, year={2012}, pages={2850–2876} }
@article{white_ghosal_2011, title={Bayesian smoothing of photon-limited images with applications in astronomy}, volume={73}, journal={Journal of the Royal Statistical Society. Series B, Statistical Methodology}, author={White, J. T. and Ghosal, S.}, year={2011}, pages={579–599} }
@article{ghosal_roy_2011, title={Identifiability of the proportion of null hypotheses in skew-mixture models for the p-value distribution}, volume={5}, journal={Electronic Journal of Statistics}, author={Ghosal, S. and Roy, A.}, year={2011}, pages={329–341} }
@article{ghosal_roy_2011, title={Predicting false discovery proportion under dependence}, volume={106}, number={495}, journal={Journal of the American Statistical Association}, author={Ghosal, S. and Roy, A.}, year={2011}, pages={1208–1218} }
@article{clarke_ghosal_2010, title={Reference priors for exponential families with increasing dimension}, volume={4}, journal={Electronic Journal of Statistics}, author={Clarke, B. and Ghosal, S.}, year={2010}, pages={737–780} }
@article{wu_ghosal_2010, title={The L-1-consistency of Dirichlet mixtures in multivariate Bayesian density estimation}, volume={101}, number={10}, journal={Journal of Multivariate Analysis}, author={Wu, Y. F. and Ghosal, S.}, year={2010}, pages={2411–2419} }
@article{gu_ghosal_2009, title={Bayesian ROC curve estimation under binormality using a rank likelihood}, volume={139}, number={6}, journal={Journal of Statistical Planning and Inference}, author={Gu, J. Z. and Ghosal, S.}, year={2009}, pages={2076–2083} }
@inproceedings{ghosal_roy_2009, title={Bayesian nonparametric approach to multiple testing}, volume={7}, booktitle={Perspectives in mathematical sciences i: probability and statistics}, author={Ghosal, S. and Roy, A.}, year={2009}, pages={139–164} }
@article{roy_ghosal_rosenberger_2009, title={Convergence properties of sequential Bayesian D-optimal designs}, volume={139}, ISSN={0378-3758}, url={http://dx.doi.org/10.1016/j.jspi.2008.04.025}, DOI={10.1016/j.jspi.2008.04.025}, abstractNote={We establish convergence properties of sequential Bayesian optimal designs. In particular, for sequential D-optimality under a general nonlinear location-scale model for binary experiments, we establish posterior consistency, consistency of the design measure, and the asymptotic normality of posterior following the design. We illustrate our results in the context of a particular application in the design of phase I clinical trials, namely a sequential design of Haines et al. [2003. Bayesian optimal designs for phase I clinical trials. Biometrics 59, 591--600] that incorporates an ethical constraint on overdosing.}, number={2}, journal={Journal of Statistical Planning and Inference}, publisher={Elsevier BV}, author={Roy, Anindya and Ghosal, Subhashis and Rosenberger, William F.}, year={2009}, month={Feb}, pages={425–440} }
@article{hwang_zhang_ghosal_2009, title={FIRST: Combining forward iterative selection and shrinkage in high dimensional sparse linear regression}, volume={2}, DOI={10.4310/sii.2009.v2.n3.a7}, abstractNote={We propose a new class of variable selection techniques for regression in high dimensional linear models based on a forward selection version of the LASSO, adaptive LASSO or elastic net, respectively to be called as forward iterative regression and shrinkage technique (FIRST), adaptive FIRST and elastic FIRST. These methods seem to work effectively for extremely sparse high dimensional linear models. We exploit the fact that the LASSO, adaptive LASSO and elastic net have closed form solutions when the predictor is onedimensional. The explicit formula is then repeatedly used in an iterative fashion to build the model until convergence occurs. By carefully considering the relationship between estimators at successive stages, we develop fast algorithms to compute our estimators. The performance of our new estimators are compared with commonly used estimators in terms of predictive accuracy and errors in variable selection. AMS 2000 subject classifications: Primary 62J05, 62J05; secondary 62J07.}, number={3}, journal={Statistics and its Interface}, author={Hwang, W. Y. and Zhang, H. H. and Ghosal, S.}, year={2009}, pages={341–348} }
@article{gu_ghosal_roy_2008, title={Bayesian bootstrap estimation of ROC curve}, volume={27}, ISSN={0277-6715 1097-0258}, url={http://dx.doi.org/10.1002/sim.3366}, DOI={10.1002/sim.3366}, abstractNote={Abstract}, number={26}, journal={Statistics in Medicine}, publisher={Wiley}, author={Gu, Jiezhun and Ghosal, Subhashis and Roy, Anindya}, year={2008}, month={Nov}, pages={5407–5420} }
@article{gu_ghosal_2008, title={Strong approximations for resample quantile processes and application to ROC methodology}, volume={20}, ISSN={1048-5252 1029-0311}, url={http://dx.doi.org/10.1080/10485250801954128}, DOI={10.1080/10485250801954128}, abstractNote={Abstract Abstract The receiver operating characteristic (ROC) curve is defined as true positive rate versus false positive rate obtained by varying a decision threshold criterion. It has been widely used in medical sciences for its ability to measure the accuracy of diagnostic or prognostic tests. Mathematically speaking, ROC curve is the composition of survival function of one population with the quantile function of another population. In this paper, we study strong approximation for the quantile processes of the Bayesian bootstrap (BB) resampling distributions, and use this result to study strong approximations for the BB version of the ROC process in terms of two independent Kiefer processes. The results imply asymptotically accurate coverage probabilities for the confidence bands for the ROC curve and confidence intervals for the area under the curve functional of the ROC constructed using the BB method. Similar results follow for the bootstrap resampling distribution. Keywords: Bayesian bootstrapKiefer processROC curvestrong approximationquantile process Acknowledgements Research of the authors is partially supported by NSF grant number DMS-0349111. The authors wish to thank the referee for pointing out an inaccuracy in the statement of a result.}, number={3}, journal={Journal of Nonparametric Statistics}, publisher={Informa UK Limited}, author={Gu, Jiezhun and Ghosal, Subhashis}, year={2008}, month={Apr}, pages={229–240} }
@article{tang_ghosal_2007, title={A consistent nonparametric Bayesian procedure for estimating autoregressive conditional densities}, volume={51}, ISSN={0167-9473}, url={http://dx.doi.org/10.1016/j.csda.2006.06.020}, DOI={10.1016/j.csda.2006.06.020}, abstractNote={This article proposes a Bayesian infinite mixture model for the estimation of the conditional density of an ergodic time series. A nonparametric prior on the conditional density is described through the Dirichlet process. In the mixture model, a kernel is used leading to a dynamic nonlinear autoregressive model. This model can approximate any linear autoregressive model arbitrarily closely while imposing no constraint on parameters to ensure stationarity. We establish sufficient conditions for posterior consistency in two different topologies. The proposed method is compared with the mixture of autoregressive model [Wong and Li, 2000. On a mixture autoregressive model. J. Roy. Statist. Soc. Ser. B 62(1), 91–115] and the double-kernel local linear approach [Fan et al., 1996. Estimation of conditional densities and sensitivity measures in nonlinear dynamical systems. Biometrika 83, 189–206] by simulations and real examples. Our method shows excellent performances in these studies.}, number={9}, journal={Computational Statistics & Data Analysis}, publisher={Elsevier BV}, author={Tang, Yongqiang and Ghosal, Subhashis}, year={2007}, month={May}, pages={4424–4437} }
@article{ghosal_van der vaart_2007, title={Convergence rates of posterior distributions for noniid observations}, volume={35}, ISSN={["0090-5364"]}, DOI={10.1214/009053606000001172}, abstractNote={We consider the asymptotic behavior of posterior distributions and Bayes estimators based on observations which are required to be neither independent nor identically distributed. We give general results on the rate of convergence of the posterior measure relative to distances derived from a testing criterion. We then specialize our results to independent, nonidentically distributed observations, Markov processes, stationary Gaussian time series and the white noise model. We apply our general results to several examples of infinite-dimensional statistical models including nonparametric regression with normal errors, binary regression, Poisson regression, an interval censoring model, Whittle estimation of the spectral density of a time series and a nonlinear autoregressive model. © Institute of Mathematical Statistics, 2007.}, number={1}, journal={ANNALS OF STATISTICS}, author={Ghosal, Subhashis and Van Der Vaart, Aad}, year={2007}, month={Feb}, pages={192–223} }
@article{tang_ghosal_roy_2007, title={Nonparametric Bayesian estimation of positive false discovery rates}, volume={63}, ISSN={["0006-341X"]}, DOI={10.1111/j.1541-0420.2007.00819.x}, abstractNote={Summary We propose a Dirichlet process mixture model (DPMM) for the P‐value distribution in a multiple testing problem. The DPMM allows us to obtain posterior estimates of quantities such as the proportion of true null hypothesis and the probability of rejection of a single hypothesis. We describe a Markov chain Monte Carlo algorithm for computing the posterior and the posterior estimates. We propose an estimator of the positive false discovery rate based on these posterior estimates and investigate the performance of the proposed estimator via simulation. We also apply our methodology to analyze a leukemia data set.}, number={4}, journal={BIOMETRICS}, author={Tang, Yongqiang and Ghosal, Subhashis and Roy, Anindya}, year={2007}, month={Dec}, pages={1126–1134} }
@article{tang_ghosal_2007, title={Posterior consistency of Dirichlet mixtures for estimating a transition density}, volume={137}, ISSN={0378-3758}, url={http://dx.doi.org/10.1016/j.jspi.2006.03.007}, DOI={10.1016/j.jspi.2006.03.007}, abstractNote={The Dirichlet process mixture of normal densities has been successfully used as a prior for Bayesian density estimation for independent and identically distributed (i.i.d.) observations. A Markov model, which generalizes the i.i.d. set up, may be thought of as a suitable framework for observations arising over time. The predictive density of the future observation is then given by the posterior expectation of the transition density given the observations. We consider a Dirichlet process mixture prior for the transition density and study posterior consistency. Like the i.i.d. case, posterior consistency is obtained if the Kullback–Leibler neighborhoods of the true transition density receive positive prior probabilities and uniformly exponentially consistent tests exist for testing the true density against the complement of its neighborhoods. We show that under reasonable conditions, the Kullback–Leibler property holds for the Dirichlet mixture prior. For certain topologies on the space of transition densities, we show consistency holds under appropriate conditions by constructing the required tests. This approach, however, may not always lead to the best possible results. By modifying a recent approach of Walker [2004. New approaches to Bayesian consistency. Ann. Statist. 32, 2028–2043] for the i.i.d. case, we also show that better conditions for consistency can be given for certain weaker topologies.}, number={6}, journal={Journal of Statistical Planning and Inference}, publisher={Elsevier BV}, author={Tang, Yongqiang and Ghosal, Subhashis}, year={2007}, month={Jun}, pages={1711–1726} }
@article{ghosal_vaart_2007, title={Posterior convergence rates of dirichlet mixtures at smooth densities}, volume={35}, ISSN={["0090-5364"]}, DOI={10.1214/009053606000001271}, abstractNote={We study the rates of convergence of the posterior distribution for Bayesian density estimation with Dirichlet mixtures of normal distributions as the prior. The true density is assumed to be twice continuously differentiable. The bandwidth is given a sequence of priors which is obtained by scaling a single prior by an appropriate order. In order to handle this problem, we derive a new general rate theorem by considering a countable covering of the parameter space whose prior probabilities satisfy a summability condition together with certain individual bounds on the Hellinger metric entropy. We apply this new general theorem on posterior convergence rates by computing bounds for Hellinger (bracketing) entropy numbers for the involved class of densities, the error in the approximation of a smooth density by normal mixtures and the concentration rate of the prior. The best obtainable rate of convergence of the posterior turns out to be equivalent to the well-known frequentist rate for integrated mean squared error n -2/5 up to a logarithmic factor.}, number={2}, journal={ANNALS OF STATISTICS}, author={Ghosal, Subhashis and Vaart, Aad}, year={2007}, month={Apr}, pages={697–723} }
@article{ghosal_roy_2006, title={Posterior consistency of Gaussian process prior for nonparametric binary regression}, volume={34}, ISSN={["0090-5364"]}, DOI={10.1214/009053606000000795}, abstractNote={Consider binary observations whose response probability is an unknown smooth function of a set of covariates. Suppose that a prior on the response probability function is induced by a Gaussian process mapped to the unit interval through a link function. In this paper we study consistency of the resulting posterior distribution. If the covariance kernel has derivatives up to a desired order and the bandwidth parameter of the kernel is allowed to take arbitrarily small values, we show that the posterior distribution is consistent in the L 1 -distance. As an auxiliary result to our proofs, we show that, under certain conditions, a Gaussian process assigns positive probabilities to the uniform neighborhoods of a continuous function. This result may be of independent interest in the literature for small ball probabilities of Gaussian processes.}, number={5}, journal={ANNALS OF STATISTICS}, author={Ghosal, Subhashis and Roy, Anindya}, year={2006}, month={Oct}, pages={2413–2429} }
@article{choudhuri_ghosal_roy_2004, title={Bayesian estimation of the spectral density of a time series}, volume={99}, ISSN={["1537-274X"]}, DOI={10.1198/016214504000000557}, abstractNote={This article describes a Bayesian approach to estimating the spectral density of a stationary time series. A nonparametric prior on the spectral density is described through Bernstein polynomials. Because the actual likelihood is very complicated, a pseudoposterior distribution is obtained by updating the prior using the Whittle likelihood. A Markov chain Monte Carlo algorithm for sampling from this posterior distribution is described that is used for computing the posterior mean, variance, and other statistics. A consistency result is established for this pseudoposterior distribution that holds for a short-memory Gaussian time series and under some conditions on the prior. To prove this asymptotic result, a general consistency theorem of Schwartz is extended for a triangular array of independent, nonidentically distributed observations. This extension is also of independent interest. A simulation study is conducted to compare the proposed method with some existing methods. The method is illustrated with the well-studied sunspot dataset.}, number={468}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Choudhuri, N and Ghosal, S and Roy, A}, year={2004}, month={Dec}, pages={1050–1059} }
@article{choudhuri_ghosal_roy_2004, title={Contiguity of the Whittle measure for a Gaussian time series}, volume={91}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/91.1.211}, abstractNote={For a stationary time series, Whittle constructed a likelihood for the spectral density based on the approximate independence of the discrete Fourier transforms of the data at certain frequencies. Whittle's likelihood has been widely used in the literature for constructing estimators. In this paper, we show that, for a Gaussian time series, the Whittle measure is mutually contiguous with the actual distribution of the data. As a consequence, most asymptotic properties of estimators and test statistics derived under the Whittle measure can be carried over to the actual distribution. Copyright Biometrika Trust 2004, Oxford University Press.}, number={1}, journal={BIOMETRIKA}, author={Choudhuri, N and Ghosal, S and Roy, A}, year={2004}, month={Mar}, pages={211–218} }
@article{belitser_ghosal_2003, title={Adaptive Bayesian inference on the mean of an infinite-dimensional normal distribution}, volume={31}, number={2}, journal={Annals of Statistics}, author={Belitser, E. and Ghosal, S.}, year={2003}, pages={536–559} }
@article{ghosal_lember_vaart_2003, title={On Bayesian adaptation}, volume={79}, ISSN={["0167-8019"]}, DOI={10.1023/A:1025856016236}, number={1-2}, journal={ACTA APPLICANDAE MATHEMATICAE}, author={Ghosal, S and Lember, J and Vaart, A}, year={2003}, pages={165–175} }
@article{amewou-atisso_ghosal_ghosh_ramamoorthi_2003, title={Posterior consistency for semi-parametric regression problems}, volume={9}, ISSN={["1573-9759"]}, DOI={10.3150/bj/1068128979}, abstractNote={We consider Bayesian inference in the linear regression problem with an unknown error distribution that is symmetric about zero. We show that if the prior for the error distribution assigns positive probabilities to a certain type of neighbourhood of the true distribution, then the posterior distribution is consistent in the weak topology. In particular, this implies that the posterior distribution of the regression parameters is consistent in the Euclidean metric. The result follows from our generalization of a celebrated result of Schwartz to the independent, non-identical case and the existence of exponentially consistent tests of the complement of the neighbourhoods shown here. We then specialize to two important prior distributions, the Polya tree and Dirichlet mixtures, and show that under appropriate conditions these priors satisfy the positivity requirement of the prior probabilities of the neighbourhoods of the true density. We consider the case of both non-stochastic and stochastic regressors. A similar problem of Bayesian inference in a generalized linear model for binary responses with an unknown link is also considered.}, number={2}, journal={BERNOULLI}, author={Amewou-Atisso, M and Ghosal, S and Ghosh, JK and Ramamoorthi, RV}, year={2003}, month={Apr}, pages={291–312} }
@article{ghosal_1999, title={Asymptotic normality of posterior distributions in high-dimensional linear models}, volume={5}, ISSN={["1350-7265"]}, DOI={10.2307/3318438}, abstractNote={We study consistency and asymptotic normality of posterior distributions of the regression coefficient in a linear model when the dimension of the parameter grows with increasing sample size. Under certain growth restrictions on the dimension (depending on the design matrix), we show that the posterior distributions concentrate in neighbourhoods of the true parameter and can be approximated by an appropriate normal distribution.}, number={2}, journal={BERNOULLI}, author={Ghosal, S}, year={1999}, month={Apr}, pages={315–331} }