@article{shockley_gupta_harris_lahiri_peddada_2019, title={Quality Control of Quantitative High Throughput Screening Data}, volume={10}, ISSN={["1664-8021"]}, DOI={10.3389/fgene.2019.00387}, abstractNote={Quantitative high throughput screening (qHTS) experiments can generate 1000s of concentration-response profiles to screen compounds for potentially adverse effects. However, potency estimates for a single compound can vary considerably in study designs incorporating multiple concentration-response profiles for each compound. We introduce an automated quality control procedure based on analysis of variance (ANOVA) to identify and filter out compounds with multiple cluster response patterns and improve potency estimation in qHTS assays. Our approach, called Cluster Analysis by Subgroups using ANOVA (CASANOVA), clusters compound-specific response patterns into statistically supported subgroups. Applying CASANOVA to 43 publicly available qHTS data sets, we found that only about 20% of compounds with response values outside of the noise band have single cluster responses. The error rates for incorrectly separating true clusters and incorrectly clumping disparate clusters were both less than 5% in extensive simulation studies. Simulation studies also showed that the bias and variance of concentration at half-maximal response (AC50) estimates were usually within 10-fold when using a weighted average approach for potency estimation. In short, CASANOVA effectively sorts out compounds with “inconsistent” response patterns and produces trustworthy AC50 values.}, journal={FRONTIERS IN GENETICS}, author={Shockley, Keith R. and Gupta, Shuva and Harris, Shawn F. and Lahiri, Soumendra N. and Peddada, Shyamal D.}, year={2019}, month={May} } @article{chatterjee_gupta_lahiri_2015, title={On the residual empirical process based on the ALASSO in high dimensions and its functional oracle property}, volume={186}, ISSN={["1872-6895"]}, DOI={10.1016/j.jeconom.2015.02.012}, abstractNote={This paper considers post variable-selection inference in a high dimensional penalized regression model based on the ALASSO method of Zou (2006). It is shown that under suitable sparsity conditions, the residual empirical process based on the ALASSO provides valid inference methodology in very high dimensional regression problems where conventional methods fail. It is also shown that the ALASSO based residual empirical process satisfies a functional oracle property, i.e., in addition to selecting the set of relevant variables with probability tending to one, the ALASSO based residual empirical process converges to the same limiting Gaussian process as the OLS based residual empirical process under the oracle. The functional oracle property is critically exploited to construct asymptotically valid confidence bands for the error distribution function and prediction intervals for unobserved values of the response variable in the high dimensional set up, where traditional non-penalized methods are known to fail. Simulation results are presented illustrating finite sample performance of the proposed methodology.}, number={2}, journal={JOURNAL OF ECONOMETRICS}, author={Chatterjee, A. and Gupta, S. and Lahiri, S. N.}, year={2015}, month={Jun}, pages={317–324} } @article{gupta_lahiri_2014, title={Comment}, volume={109}, ISSN={["1537-274X"]}, DOI={10.1080/01621459.2014.905789}, abstractNote={which is again the LS estimator. Minimizing LS(b) with b > 1 gives a more robust way of doing LS in which the effect of potential outliers is diminished by the local averaging of b neighboring values; details are omitted due to lack of space. Similarly to the above, minimizing L1(1) is equivalent to L1 regression, whereas minimizing L1(b) with b > 1 gives additional robustness. Finally, let us revisit the general case of model (1) with μp(xj ) = β0 + x jβp. When p > 1, the regressors xj cannot be sorted in ascending order. One could instead use a localaveraging or nearest-neighbor technique to compute the subsample means. But no such trick is needed in the most interesting case of b = 1 since the quantities LS(1) and L1(1) are unequivocally defined as}, number={507}, journal={JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, author={Gupta, Shuva and Lahiri, S. N.}, year={2014}, month={Jul}, pages={1013–1015} } @article{das_gupta_gupta_2014, title={Screening active factors in supersaturated designs}, volume={77}, ISSN={["1872-7352"]}, DOI={10.1016/j.csda.2014.02.023}, abstractNote={Identification of active factors in supersaturated designs (SSDs) has been the subject of much recent study. Although several methods have been previously proposed, a solution to the problem beyond one or two active factors still seems to be unsatisfactory. The smoothly clipped absolute deviation (SCAD) penalty function for variable selection has nice theoretical properties, but due to its nonconvex nature, it poses computational issues in model fitting. As a result, so far it has not shown much promise for SSDs. Another issue regarding its inefficiency, particularly for SSDs, has been the method used for choosing the SCAD sparsity tuning parameter. The selection of the SCAD sparsity tuning parameter using the AIC and BIC information criteria, generalized cross-validation, and a recently proposed method based on the norm of the error in the solution of systems of linear equations are investigated. This is performed in conjunction with a recently developed more efficient algorithm for implementing the SCAD penalty. The small sample bias-corrected cAIC is found to yield a model size closer to the true model size. Results of the numerical study and real data analyses reveal that the SCAD is a valuable tool for identifying active factors in SSDs.}, journal={COMPUTATIONAL STATISTICS & DATA ANALYSIS}, author={Das, Ujjwal and Gupta, Sudhir and Gupta, Shuva}, year={2014}, month={Sep}, pages={223–232} }