@article{zhou_lahiri_2023, title={Stationary Jackknife}, ISSN={["1467-9892"]}, DOI={10.1111/jtsa.12714}, abstractNote={Variance estimation is an important aspect in statistical inference, especially in the dependent data situations. Resampling methods are ideal for solving this problem since these do not require restrictive distributional assumptions. In this paper, we develop a novel resampling method in the Jackknife family called the stationary jackknife. It can be used to estimate the variance of a statistic in the cases where observations are from a general stationary sequence. Unlike the moving block jackknife, the stationary jackknife computes the jackknife replication by deleting a variable length block and the length has a truncated geometric distribution. Under appropriate assumptions, we can show the stationary jackknife variance estimator is a consistent estimator for the case of the sample mean and, more generally, for a class of nonlinear statistics. Further, the stationary jackknife is shown to provide reasonable variance estimation for a wider range of expected block lengths when compared with the moving block jackknife by simulation.}, journal={JOURNAL OF TIME SERIES ANALYSIS}, author={Zhou, Weilian and Lahiri, Soumendra}, year={2023}, month={Aug} } @article{zhou_yi_mishne_chi_2021, title={SCALABLE ALGORITHMS FOR CONVEX CLUSTERING}, DOI={10.1109/DSLW51110.2021.9523411}, abstractNote={Convex clustering is an appealing approach to many classical clustering problems. It stands out among standard methods as it enjoys the existence of a unique global optimal solution. Despite this advantage, convex clustering has not been widely adopted, due to its computationally intensive nature. To address this obstacle, especially in the “big data” setting, we introduce a Scalable cOnvex cLustering AlgoRithm via Parallel Coordinate Descent Method (SOLAR-PCDM) that improves the algorithm’s scalability by combining a parallelizable algorithm with a compression strategy. This idea is in line with the rise and ever increasing availability of high performance computing systems built around multi-core processors, GPU-accelerators, and computer clusters. SOLARPCDM consists of two parts. In the first part, we develop a method called weighted convex clustering to recover the solution path by formulating a sequence of smaller equivalent optimization problems. In the second part, we utilize the Parallel Coordinate Descent Method (PCDM) to solve a specific convex clustering problem. We demonstrate the correctness and scalability of our algorithm on both simulated and real data examples.}, journal={2021 IEEE DATA SCIENCE AND LEARNING WORKSHOP (DSLW)}, author={Zhou, Weilian and Yi, Haidong and Mishne, Gal and Chi, Eric}, year={2021} }