@article{coleman_martin_reich_2015, title={Multiple window discrete scan statistic for higher-order Markovian sequences}, volume={42}, ISSN={["1360-0532"]}, DOI={10.1080/02664763.2015.1005061}, abstractNote={Accurate and efficient methods to detect unusual clusters of abnormal activity are needed in many fields such as medicine and business. Often the size of clusters is unknown; hence, multiple (variable) window scan statistics are used to identify clusters using a set of different potential cluster sizes. We give an efficient method to compute the exact distribution of multiple window discrete scan statistics for higher-order, multi-state Markovian sequences. We define a Markov chain to efficiently keep track of probabilities needed to compute p-values for the statistic. The state space of the Markov chain is set up by a criterion developed to identify strings that are associated with observing the specified values of the statistic. Using our algorithm, we identify cases where the available approximations do not perform well. We demonstrate our methods by detecting unusual clusters of made free throw shots by National Basketball Association players during the 2009–2010 regular season.}, number={8}, journal={JOURNAL OF APPLIED STATISTICS}, author={Coleman, Deidra A. and Martin, Donald E. K. and Reich, Brian J.}, year={2015}, month={Aug}, pages={1690–1705} }
@article{martin_coleman_2011, title={Distribution of clump statistics for a collection of words}, volume={48}, DOI={10.1239/jap/1324046018}, abstractNote={We give an efficient method based on minimal deterministic finite automata for computing the exact distribution of the number of occurrences and coverage of clumps (maximal sets of overlapping words) of a collection of words. In addition, we compute probabilities for the number of h -clumps, word groupings where gaps of a maximal length h between occurrences of words are allowed. The method facilitates the computation of p -values for testing procedures. A word is allowed to contain other words of the collection, making the computation more general, but also more difficult. The underlying sequence is assumed to be Markovian of an arbitrary order.}, number={4}, journal={Journal of Applied Probability}, author={Martin, D. E. K. and Coleman, D. A.}, year={2011}, pages={1049–1059} }