@article{yanchenko_sengupta_2024, title={A generalized hypothesis test for community structure in networks}, volume={3}, ISSN={["2050-1250"]}, DOI={10.1017/nws.2024.1}, abstractNote={Abstract}, journal={NETWORK SCIENCE}, author={Yanchenko, Eric and Sengupta, Srijan}, year={2024}, month={Mar} } @article{tabaie_sengupta_pruitt_fong_2023, title={A natural language processing approach to categorise contributing factors from patient safety event reports}, volume={30}, ISSN={["2632-1009"]}, DOI={10.1136/bmjhci-2022-100731}, abstractNote={ObjectivesThe objective of this study was to explore the use of natural language processing (NLP) algorithm to categorise contributing factors from patient safety event (PSE). Contributing factors are elements in the healthcare process (eg, communication failures) that instigate an event or allow an event to occur. Contributing factors can be used to further investigate why safety events occurred.}, number={1}, journal={BMJ Health & Care Informatics}, author={Tabaie, A. and Sengupta, S. and Pruitt, Zoe M. and Fong, A.}, year={2023}, pages={e100731} } @article{ganguly_buhrman_kline_mun_sengupta_2023, title={Automated Error Labeling in Radiation Oncology via Statistical Natural Language Processing}, volume={13}, ISSN={["2075-4418"]}, url={https://doi.org/10.3390/diagnostics13071215}, DOI={10.3390/diagnostics13071215}, abstractNote={A report published in 2000 from the Institute of Medicine revealed that medical errors were a leading cause of patient deaths, and urged the development of error detection and reporting systems. The field of radiation oncology is particularly vulnerable to these errors due to its highly complex process workflow, the large number of interactions among various systems, devices, and medical personnel, as well as the extensive preparation and treatment delivery steps. Natural language processing (NLP)-aided statistical algorithms have the potential to significantly improve the discovery and reporting of these medical errors by relieving human reporters of the burden of event type categorization and creating an automated, streamlined system for error incidents. In this paper, we demonstrate text-classification models developed with clinical data from a full service radiation oncology center (test center) that can predict the broad level and first level category of an error given a free-text description of the error. All but one of the resulting models had an excellent performance as quantified by several metrics. The results also suggest that more development and more extensive training data would further improve future results.}, number={7}, journal={DIAGNOSTICS}, author={Ganguly, Indrila and Buhrman, Graham and Kline, Ed and Mun, Seong K. K. and Sengupta, Srijan}, year={2023}, month={Apr} } @article{yanchenko_sengupta_2023, title={Core-periphery structure in networks: A statistical exposition}, volume={17}, ISSN={1935-7516}, url={http://dx.doi.org/10.1214/23-SS141}, DOI={10.1214/23-SS141}, abstractNote={Many real-world networks are theorized to have core-periphery structure consisting of a densely-connected core and a loosely-connected periphery. While this phenomenon has been extensively studied in a range of scientific disciplines, it has not received sufficient attention in the statistics community. In this expository article, our goal is to raise awareness about this topic and encourage statisticians to address the many open inference problems in this area. To this end, we first summarize the current research landscape by reviewing the metrics and models that have been used for quantitative studies on core-periphery structure. Next, we formulate and explore various inferential problems in this context, such as estimation, hypothesis testing, and Bayesian inference, and discuss related computational techniques. We also outline the multidisciplinary scientific impact of core-periphery structure in a number of real-world networks. Throughout the article, we provide our own interpretation of the literature from a statistical perspective, with the goal of prioritizing open problems where contribution from the statistics community will be most effective and important.}, number={none}, journal={Statistics Surveys}, publisher={Institute of Mathematical Statistics}, author={Yanchenko, Eric and Sengupta, Srijan}, year={2023}, month={Jan}, pages={42–74} } @article{ray_guha_dhungana_karak_choudhury_ray_zubair_ray_sengupta_bhatt_et al._2023, title={Development and validation of a predictive model for the diagnosis of rheumatic heart disease in low-income countries based on two cross-sectional studies}, volume={18}, ISSN={["2772-4875"]}, DOI={10.1016/j.ijcrp.2023.200195}, abstractNote={We developed a questionnaire-based risk-scoring system to identify children at risk for rheumatic heart disease (RHD) in rural India. The resulting predictive model was validated in Nepal, in a population with a similar demographic profile to rural India.The study involved 8646 students (mean age 13.0 years, 46% boys) from 20 middle and high schools in the West Midnapore district of India. The survey asked questions about the presence of different signs and symptoms of RHD. Students with possible RHD who experienced sore throat and joint pain were offered an echocardiogram to screen for RHD. Their findings were compared with randomly selected students without these symptoms. The data were analyzed to develop a predictive model for identifying RHD.Based on our univariate analyses, seven variables were used for building a predictive model. A four-variable model (joint pain plus sore throat, female sex, shortness of breath, and palpitations) best predicted the risk of RHD with a C-statistic of 0.854. A six-point scoring system developed from the model was validated among similarly aged children in Nepal.A simple questionnaire-based predictive instrument could identify children at higher risk for this disease in low-income countries where RHD remains prevalent. Echocardiography could then be used in these high-risk children to detect RHD in its early stages. This may support a strategy for more effective secondary prophylaxis of RHD.}, journal={INTERNATIONAL JOURNAL OF CARDIOLOGY CARDIOVASCULAR RISK AND PREVENTION}, author={Ray, Madhab and Guha, Santanu and Dhungana, Ranga Raj and Karak, Avik and Choudhury, Basabendra and Ray, Bipasha and Zubair, Haroon and Ray, Meghna and Sengupta, Srijan and Bhatt, Deepak L. and et al.}, year={2023}, month={Sep} } @inbook{kline_sengupta_2023, title={How AI can Help us Understand and Mitigate Error Propagation in Radiation Oncology}, url={http://dx.doi.org/10.1142/9789811263545_0014}, DOI={10.1142/9789811263545_0014}, booktitle={Artificial Intelligence in Radiation Oncology}, publisher={WORLD SCIENTIFIC}, author={Kline, Ed and Sengupta, Srijan}, year={2023}, month={Jan}, pages={305–334} } @article{larsen_stallrich_sengupta_deng_kohavi_stevens_2023, title={Statistical Challenges in Online Controlled Experiments: A Review of A/B Testing Methodology}, volume={10}, ISSN={["1537-2731"]}, url={https://doi.org/10.1080/00031305.2023.2257237}, DOI={10.1080/00031305.2023.2257237}, abstractNote={The rise of internet-based services and products in the late 1990's brought about an unprecedented opportunity for online businesses to engage in large scale data-driven decision making. Over the past two decades, organizations such as Airbnb, Alibaba, Amazon, Baidu, Booking, Alphabet's Google, LinkedIn, Lyft, Meta's Facebook, Microsoft, Netflix, Twitter, Uber, and Yandex have invested tremendous resources in online controlled experiments (OCEs) to assess the impact of innovation on their customers and businesses. Running OCEs at scale has presented a host of challenges requiring solutions from many domains. In this paper we review challenges that require new statistical methodologies to address them. In particular, we discuss the practice and culture of online experimentation, as well as its statistics literature, placing the current methodologies within their relevant statistical lineages and providing illustrative examples of OCE applications. Our goal is to raise academic statisticians' awareness of these new research opportunities to increase collaboration between academia and the online industry.}, journal={AMERICAN STATISTICIAN}, author={Larsen, Nicholas and Stallrich, Jonathan and Sengupta, Srijan and Deng, Alex and Kohavi, Ron and Stevens, Nathaniel T.}, year={2023}, month={Oct} } @inproceedings{ali_aneja_ganguly_sanyal_sengupta_2023, title={Wildfire Pollution Emissions, Exposure, and Human Health: A Growing Air Quality Control Issue}, url={https://doi.org/10.3390/ecas2023-15922}, DOI={10.3390/ecas2023-15922}, abstractNote={Wildfires emit large quantities of air pollutants into the atmosphere. As wildfires increase in frequency, intensity, duration, and coverage area, the emissions from these fires have become a significant control issue and health hazard for residential populations, especially vulnerable groups. A critical barrier to addressing the health impacts of air pollution caused by wildfires lies in our limited understanding of its true extent. This problem is expected to be exacerbated by additional factors such as the anticipated increase in wildfire intensity due to climate change, and the associated rise in fine particulate matter (PM2.5) in wildfire smoke, which, according to recent toxicological studies, could be more harmful than typical ambient PM2.5. The primary goal of our study is to develop a novel statistical framework that enables the forecasting of future emissions from active wildfires. This research aims to address the unquantified impacts of wildfire emissions and is a priority research area for many US federal agencies, e.g., NIEHS, US EPA, and NOAA. The framework integrates physicochemical models of emissions and satellite observations with forecasting models based on spatial statistics and machine learning models. Through the incorporation of these diverse datasets, we aim to improve the accuracy and reliability of our predictions regarding the spatio-temporal distribution of wildfire emissions. The potential human health impacts resulting from poor air quality during wildfires are also explored. By modeling the relationship between environmental exposures and disease risk, the burden of disease attributed to both short- and long-term impacts of exposure to wildfire events will be assessed.}, author={Ali, Muhammad Shehzaib and Aneja, Viney and Ganguly, Indrila and Sanyal, Swarnali and Sengupta, Srijan}, year={2023}, month={Nov} } @article{guo_cho_chen_sengupta_hong_mitra_2022, title={SAFER: Social Capital-Based Friend Recommendation to Defend against Phishing Attacks}, volume={16}, url={http://dx.doi.org/10.1609/icwsm.v16i1.19288}, DOI={10.1609/icwsm.v16i1.19288}, abstractNote={The tremendous growth of social media has been accompanied by highly advanced online social network (OSN) technologies. Such advanced technologies have been heavily utilized by perpetrators as convenient tools for deceiving people in online worlds. Social capital has been discussed as a powerful mechanism to leverage interpersonal relationships in social networks in order for an individual to achieve his/her goal. The beauty of social capital is the ability to materialize non-monetary, less costly, and non-economic resources into tools to solve social problems. In this paper, we aim to leverage social capital (SC) to minimize online users' vulnerabilities to online deception. In particular, we propose a Social cApital-based FriEnd Recommendation scheme, called SAFER, that can protect OSN users from phishing attacks. We quantify three dimensions of social capital, namely, structural, cognitive, and relational, based on user features obtained from real datasets and model a user's friending behavior based on their social capital. In addition, to model a user's behavior upon being attacked by a phishing attacker, we developed the so-called SER-SEIR (Susceptible, Exposed, Recovered-Susceptible, Exposed, Infected, and Recovered) model as a variant of the SEIR model. Via extensive simulation experiments based on two real datasets considering bot-based and human-based attackers performing phishing attacks, we demonstrate the performance of four SC-based friend recommendation schemes with three non-SC-based comparable counterparts in terms of the ratio of detecting attackers and the fraction of users in the states of S, E, I, and R. Based on the performance comparison, we analyze the overall trends of their performance in terms of the extent of resistance against phishing attacks by bot or human attackers.}, journal={Proceedings of the International AAAI Conference on Web and Social Media}, publisher={Association for the Advancement of Artificial Intelligence (AAAI)}, author={Guo, Zhen and Cho, Jin-Hee and Chen, Ing-Ray and Sengupta, Srijan and Hong, Michin and Mitra, Tanushree}, year={2022}, month={May}, pages={241–252} } @article{komolafe_fong_sengupta_2022, title={Scalable Community Extraction of Text Networks for Automated Grouping in Medical Databases}, ISSN={1680-743X 1683-8602}, url={http://dx.doi.org/10.6339/22-jds1038}, DOI={10.6339/22-JDS1038}, abstractNote={Networks are ubiquitous in today’s world. Community structure is a well-known feature of many empirical networks, and a lot of statistical methods have been developed for community detection. In this paper, we consider the problem of community extraction in text networks, which is greatly relevant in medical errors and patient safety databases. We adapt a well-known community extraction method to develop a scalable algorithm for extracting groups of similar documents in large text databases. The application of our method on a real-world patient safety report system demonstrates that the groups generated from community extraction are much more accurate than manual tagging by frontline workers.}, journal={Journal of Data Science}, publisher={School of Statistics, Renmin University of China}, author={Komolafe, Tomilayo and Fong, Allan and Sengupta, Srijan}, year={2022}, pages={1–20} } @article{boxley_krevat_sengupta_ratwani_fong_2022, title={Using Community Detection Techniques to Identify Themes in COVID-19–Related Patient Safety Event Reports}, volume={18}, ISSN={1549-8425 1549-8417}, url={http://dx.doi.org/10.1097/PTS.0000000000001051}, DOI={10.1097/PTS.0000000000001051}, abstractNote={ Objectives The COVID-19 pandemic has transformed how healthcare is delivered to patients. As the pandemic progresses and healthcare systems continue to adapt, it is important to understand how these changes in care have changed patient care. This study aims to use community detection techniques to identify and facilitate analysis of themes in patient safety event (PSE) reports to better understand COVID-19 pandemic’s impact on patient safety. With this approach, we also seek to understand how community detection techniques can be used to better identify themes and extract information from PSE reports. }, number={8}, journal={Journal of Patient Safety}, publisher={Ovid Technologies (Wolters Kluwer Health)}, author={Boxley, Christian and Krevat, Seth and Sengupta, Srijan and Ratwani, Raj and Fong, Allan}, year={2022}, month={Sep}, pages={e1196–e1202} } @inproceedings{sengupta_aneja_kravchenko_2022, title={Wildfire Pollution Exposure and Human Health: A Growing Air Quality and Public Health Issue}, volume={19}, url={http://dx.doi.org/10.3390/ecas2022-12809}, DOI={10.3390/ecas2022-12809}, abstractNote={: Wildfires emit large quantities of air pollutants into the atmosphere. As wildfires increase in frequency, intensity, duration, and coverage area, such emissions have become a significant health hazard for residential populations, particularly the vulnerable groups. This health hazard is exacerbated by two factors: first, wildfires are expected to increase in frequency as a result of climate change; and second, human health is adversely impacted by fine particulate matter produced from wild fires. Recent toxicological studies suggest that wildfire particulate matter may be more toxic than equal doses of ambient PM 2.5 . The role of ammonia emissions from wildfires on PM 2.5 is examined. The impact of poor air quality on human health is examined, and some strategies are discussed to forecast the burden of diseases associated with exposures to wildfire events, both short-and long-term, and help develop mitigation strategies.}, number={1}, booktitle={Environmental Science Proceedings}, publisher={MDPI}, author={Sengupta, Srijan and Aneja, Viney P. and Kravchenko, Julia}, year={2022}, month={Jul}, pages={59} } @article{stevens_wilson_driscoll_mcculloh_michailidis_paris_paynabar_perry_reisi-gahrooei_sengupta_et al._2021, title={Broader impacts of network monitoring: Its role in government, industry, technology, and beyond}, volume={33}, ISSN={["1532-4222"]}, url={https://doi.org/10.1080/08982112.2021.1974036}, DOI={10.1080/08982112.2021.1974036}, abstractNote={Abstract The study and use of network monitoring methodology is informed by its need in government, industry, and technology. Here, the panelists discuss the broader impacts of network monitoring in these sectors, how the use and development of new methods is influenced by these institutions, and what challenges need to be addressed in the next 5 to 10 years. There is a strong consensus that these sectors each play an important role in the innovation of network monitoring techniques. Applications to cyber security, transportation, infectious disease monitoring, engineering, and artificial intelligence are discussed.}, number={4}, journal={QUALITY ENGINEERING}, publisher={Informa UK Limited}, author={Stevens, Nathaniel T. and Wilson, James D. and Driscoll, Anne R. and McCulloh, Ian and Michailidis, George and Paris, Cecile and Paynabar, Kamran and Perry, Marcus B. and Reisi-Gahrooei, Mostafa and Sengupta, Srijan and et al.}, year={2021}, month={Sep} } @article{stevens_wilson_driscoll_mcculloh_michailidis_paris_paynabar_perry_reisi-gahrooei_sengupta_et al._2021, title={Foundations of network monitoring: Definitions and applications}, volume={33}, ISSN={0898-2112 1532-4222}, url={http://dx.doi.org/10.1080/08982112.2021.1974033}, DOI={10.1080/08982112.2021.1974033}, abstractNote={Abstract In this article, the panelists broadly discuss the definition of network monitoring, and how it may be similar to or different from network surveillance and network change-point detection. The discussion uncovers ambiguity and contradictions associated with these terms and we argue that this lack of clarity is detrimental to the field. The panelists also describe existing and emerging applications of network monitoring, which serves to illustrate the wide applicability of the tools and research associated with the field.}, number={4}, journal={Quality Engineering}, publisher={Informa UK Limited}, author={Stevens, Nathaniel T. and Wilson, James D. and Driscoll, Anne R. and McCulloh, Ian and Michailidis, George and Paris, Cecile and Paynabar, Kamran and Perry, Marcus B. and Reisi-Gahrooei, Mostafa and Sengupta, Srijan and et al.}, year={2021}, month={Oct}, pages={719–730} } @article{guo_cho_chen_sengupta_hong_mitra_2021, title={Online Social Deception and Its Countermeasures: A Survey}, volume={9}, ISSN={2169-3536}, url={http://dx.doi.org/10.1109/ACCESS.2020.3047337}, DOI={10.1109/ACCESS.2020.3047337}, abstractNote={We are living in an era when online communication over social network services (SNSs) have become an indispensable part of people’s everyday lives. As a consequence, online social deception (OSD) in SNSs has emerged as a serious threat in cyberspace, particularly for users vulnerable to such cyberattacks. Cyber attackers have exploited the sophisticated features of SNSs to carry out harmful OSD activities, such as financial fraud, privacy threat, or sexual/labor exploitation. Therefore, it is critical to understand OSD and develop effective countermeasures against OSD for building trustworthy SNSs. In this paper, we conduct an extensive survey, covering 1) the multidisciplinary concept of social deception; 2) types of OSD attacks and their unique characteristics compared to other social network attacks and cybercrimes; 3) comprehensive defense mechanisms embracing prevention, detection, and response (or mitigation) against OSD attacks along with their pros and cons; 4) datasets/metrics used for validation and verification; and 5) legal and ethical concerns related to OSD research. Based on this survey, we provide insights into the effectiveness of countermeasures and the lessons learned from the existing literature. We conclude our survey with in-depth discussions on the limitations of the state-of-the-art and suggest future research directions in OSD research.}, journal={IEEE Access}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Guo, Zhen and Cho, Jin-Hee and Chen, Ing-Ray and Sengupta, Srijan and Hong, Michin and Mitra, Tanushree}, year={2021}, pages={1770–1806} } @article{stevens_wilson_driscoll_mcculloh_michailidis_paris_parker_paynabar_perry_reisi-gahrooei_et al._2021, title={Research in network monitoring: Connections with SPM and new directions}, volume={33}, ISSN={0898-2112 1532-4222}, url={http://dx.doi.org/10.1080/08982112.2021.1974035}, DOI={10.1080/08982112.2021.1974035}, abstractNote={Abstract Traditional statistical process monitoring (SPM) provides a useful starting point for framing and solving network monitoring problems. In this paper the panelists discuss similarities and differences between the two fields and they describe many challenges and open problems in contemporary network monitoring research. The panelists also discuss potential outlets and avenues for disseminating such research.}, number={4}, journal={Quality Engineering}, publisher={Informa UK Limited}, author={Stevens, Nathaniel T. and Wilson, James D. and Driscoll, Anne R. and McCulloh, Ian and Michailidis, George and Paris, Cecile and Parker, Peter and Paynabar, Kamran and Perry, Marcus B. and Reisi-Gahrooei, Mostafa and et al.}, year={2021}, month={Oct}, pages={736–748} } @article{dasgupta_sengupta_2021, title={Scalable Estimation of Epidemic Thresholds via Node Sampling}, volume={84}, ISSN={0976-836X 0976-8378}, url={http://dx.doi.org/10.1007/s13171-021-00249-0}, DOI={10.1007/s13171-021-00249-0}, abstractNote={Infectious or contagious diseases can be transmitted from one person to another through social contact networks. In today's interconnected global society, such contagion processes can cause global public health hazards, as exemplified by the ongoing Covid-19 pandemic. It is therefore of great practical relevance to investigate the network transmission of contagious diseases from the perspective of statistical inference. An important and widely studied boundary condition for contagion processes over networks is the so-called epidemic threshold. The epidemic threshold plays a key role in determining whether a pathogen introduced into a social contact network will cause an epidemic or die out. In this paper, we investigate epidemic thresholds from the perspective of statistical network inference. We identify two major challenges that are caused by high computational and sampling complexity of the epidemic threshold. We develop two statistically accurate and computationally efficient approximation techniques to address these issues under the Chung-Lu modeling framework. The second approximation, which is based on random walk sampling, further enjoys the advantage of requiring data on a vanishingly small fraction of nodes. We establish theoretical guarantees for both methods and demonstrate their empirical superiority.}, number={1}, journal={Sankhya A}, publisher={Springer Science and Business Media LLC}, author={Dasgupta, Anirban and Sengupta, Srijan}, year={2021}, month={Jul}, pages={321–344} } @article{pruitt_boxley_krevat_sengupta_ratwani_fong_2021, title={The Impact of COVID-19 on Medical Device Reporting and Investigation}, volume={9}, url={http://dx.doi.org/10.33940/data/2021.9.3}, DOI={10.33940/data/2021.9.3}, abstractNote={Introduction The Manufacturer and User Facility Device Experience (MAUDE) database houses medical device reports submitted to the U.S. Food and Drug Administration (FDA). In May 2020, the FDA released guidance about medical device reporting during a pandemic, anticipating delays in reporting and investigating events involving medical devices. Methods We aimed to understand how the COVID-19 pandemic impacted medical device reporting by analyzing reports in the MAUDE database that mention COVID-19. Results From the 816,470 reports submitted between January 1 and July 31, 2020, 3,500 (0.43%) included phrases related to COVID-19. Of these reports, 4.8% (167/3,500) described adverse events during COVID-19 patients’ treatment, and 90.3% (3,161 /3,500) described barriers manufacturers faced investigating malfunctioning devices during the pandemic. 4.9% (172/3,500) of reports were not related to COVID-19. Malfunctions were clinically significant in 85.8% (3,004/3,500) of reports. Discussion Reports indicate challenges some manufacturers had when investigating medical devices during the pandemic. The pandemic made investigating implants uniquely difficult, as restrictions to person-to-person contact limited the type of care patients could receive. Because full-scale investigations into malfunctioning devices may be difficult to perform during the pandemic, safety issues may go unaddressed and result in future harm to patients. Conclusion The COVID-19 pandemic and the myriad of healthcare, travel, and shipping challenges it created impacted how manufacturers reported and investigated medical devices. At the current time, it is unclear how manufacturers will address delayed clinical management of implant devices and other uninvestigated malfunctions after the pandemic and how this will impact patient safety.}, journal={Patient Safety}, publisher={Patient Safety Authority}, author={Pruitt, Zoe and Boxley, Christian and Krevat, Seth and Sengupta, Srijan and Ratwani, Raj and Fong, Allan}, year={2021}, month={Sep}, pages={28–35} } @article{stevens_wilson_driscoll_mcculloh_michailidis_paris_parker_paynabar_perry_reisi-gahrooei_et al._2021, title={The interdisciplinary nature of network monitoring: Advantages and disadvantages}, volume={33}, ISSN={0898-2112 1532-4222}, url={http://dx.doi.org/10.1080/08982112.2021.1974034}, DOI={10.1080/08982112.2021.1974034}, abstractNote={Abstract Research in network monitoring spans a large and growing number of disciplines, including mathematics, physics, computer science, and statistics. Here, the panelists discuss the advantages and disadvantages of the interdisciplinary nature of the area. It is largely agreed that integrating expertise from many disciplines drives innovation in network monitoring development, but several notable barriers are discussed that limit the area’s full potential.}, number={4}, journal={Quality Engineering}, publisher={Informa UK Limited}, author={Stevens, Nathaniel T. and Wilson, James D. and Driscoll, Anne R. and McCulloh, Ian and Michailidis, George and Paris, Cecile and Parker, Peter and Paynabar, Kamran and Perry, Marcus B. and Reisi-Gahrooei, Mostafa and et al.}, year={2021}, month={Oct}, pages={731–735} } @article{guo_cho_chen_sengupta_hong_mitra_2020, title={Online Social Deception and Its Countermeasures: A Survey}, journal={IEEE Access}, publisher={IEEE}, author={Guo, Zhen and Cho, Jin-Hee and Chen, Ray and Sengupta, Srijan and Hong, Michin and Mitra, Tanushree}, year={2020} } @article{dasgupta_sengupta_2020, title={Scalable estimation of epidemic thresholds via node sampling}, journal={arXiv preprint arXiv:2007.14820}, author={Dasgupta, Anirban and Sengupta, Srijan}, year={2020} } @article{kodali_sengupta_house_woodall_2020, title={The value of summary statistics for anomaly detection in temporally evolving networks: A performance evaluation study}, volume={36}, number={6}, journal={Applied Stochastic Models in Business and Industry}, author={Kodali, Lata and Sengupta, Srijan and House, Leanna and Woodall, William H}, year={2020}, pages={980–1013} } @article{debchoudhury_sengupta_earle_coley_2019, title={A Bootstrap-Based Approach for Improving Measurements by Retarding Potential Analyzers}, volume={124}, number={6}, journal={Journal of Geophysical Research: Space Physics}, author={Debchoudhury, Shantanab and Sengupta, Srijan and Earle, Gregory and Coley, William}, year={2019}, pages={4569–4584} } @article{bhadra_chakraborty_sengupta_lahiri_2019, title={A Bootstrap-based Inference Framework for Testing Similarity of Paired Networks}, journal={arXiv preprint arXiv:1911.06869}, author={Bhadra, Somnath and Chakraborty, Kaustav and Sengupta, Srijan and Lahiri, Soumendra}, year={2019} } @phdthesis{debchoudhury_2019, title={Parameter estimation from retarding potential analyzers in the presence of realistic noise}, school={Virginia Tech}, author={Debchoudhury, Shantanab}, year={2019} } @article{komolafe_quevedo_sengupta_woodall_2019, title={Statistical evaluation of spectral methods for anomaly detection in static networks}, volume={7}, number={3}, journal={Network Science}, publisher={Cambridge University Press}, author={Komolafe, Tomilayo and Quevedo, A Valeria and Sengupta, Srijan and Woodall, William H}, year={2019}, pages={319–352} } @article{leitch_alexander_sengupta_2019, title={Toward epidemic thresholds on temporal networks: a review and open questions}, volume={4}, ISSN={2364-8228}, url={http://dx.doi.org/10.1007/s41109-019-0230-4}, DOI={10.1007/s41109-019-0230-4}, abstractNote={Abstract}, number={1}, journal={Applied Network Science}, publisher={Springer Science and Business Media LLC}, author={Leitch, Jack and Alexander, Kathleen A. and Sengupta, Srijan}, year={2019}, month={Nov}, pages={1–21} } @article{li_sengupta_hanigan_2019, title={Using artificial neural networks to predict pH, ammonia, and volatile fatty acid concentrations in the rumen}, volume={102}, number={10}, journal={Journal of dairy science}, publisher={Elsevier}, author={Li, Meng M and Sengupta, Srijan and Hanigan, Mark D}, year={2019}, pages={8850–8861} } @article{sengupta_chen_2018, title={A block model for node popularity in networks with community structure}, volume={80}, number={2}, journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, author={Sengupta, Srijan and Chen, Yuguo}, year={2018}, pages={365–386} } @article{sengupta_chen_2018, title={A block model for node popularity in networks with community structure Series B Statistical methodology}, author={Sengupta, Srijan and Chen, Yuguo}, year={2018} } @article{sengupta_2018, title={Anomaly detection in static networks using egonets}, journal={arXiv preprint arXiv:1807.08925}, author={Sengupta, Srijan}, year={2018} } @article{sengupta_woodall_2018, title={Discussion of “Statistical methods for network surveillance”}, volume={34}, number={4}, journal={Applied Stochastic Models in Business and Industry}, author={Sengupta, Srijan and Woodall, William H}, year={2018}, pages={446–448} } @article{performance evaluation of social network anomaly detection using a moving window--based scan method_2018, volume={34}, number={8}, journal={Quality and Reliability Engineering International}, year={2018}, pages={1699–1716} } @article{zhao_driscoll_sengupta_fricker_spitzner_woodall_2018, title={Performance evaluation of social network anomaly detection using a moving window-based scan method}, volume={34}, ISSN={0748-8017}, url={http://dx.doi.org/10.1002/qre.2364}, DOI={10.1002/qre.2364}, abstractNote={Abstract}, number={8}, journal={Quality and Reliability Engineering International}, publisher={Wiley}, author={Zhao, Meng J. and Driscoll, Anne R. and Sengupta, Srijan and Fricker, Ronald D., Jr and Spitzner, Dan J. and Woodall, William H.}, year={2018}, month={Aug}, pages={1699–1716} } @article{zhao_driscoll_sengupta_stevens_fricker_woodall_2018, title={The effect of temporal aggregation level in social network monitoring}, volume={13}, ISSN={1932-6203}, url={http://dx.doi.org/10.1371/journal.pone.0209075}, DOI={10.1371/journal.pone.0209075}, abstractNote={Social networks have become ubiquitous in modern society, which makes social network monitoring a research area of significant practical importance. Social network data consist of social interactions between pairs of individuals that are temporally aggregated over a certain interval of time, and the level of such temporal aggregation can have substantial impact on social network monitoring. There have been several studies on the effect of temporal aggregation in the process monitoring literature, but no studies on the effect of temporal aggregation in social network monitoring. We use the degree corrected stochastic block model (DCSBM) to simulate social networks and network anomalies and analyze these networks in the context of both count and binary network data. In conjunction with this model, we use the Priebe scan method as the monitoring method. We demonstrate that temporal aggregation at high levels leads to a considerable decrease in the ability to detect an anomaly within a specified time period. Moreover, converting social network communication data from counts to binary indicators can result in a significant loss of information, hindering detection performance. Aggregation at an appropriate level with count data, however, can amplify the anomalous signal generated by network anomalies and improve detection performance. Our results provide both insights on the practical effects of temporal aggregation and a framework for the study of other combinations of network models, surveillance methods, and types of anomalies.}, number={12}, journal={PLOS ONE}, publisher={Public Library of Science (PLoS)}, author={Zhao, Meng J. and Driscoll, Anne R. and Sengupta, Srijan and Stevens, Nathaniel T. and Fricker, Ronal D. and Woodall, William H.}, editor={Dorta-González, PabloEditor}, year={2018}, month={Dec}, pages={e0209075} } @article{the effect of temporal aggregation level in social network monitoring_2018, volume={13}, number={12}, journal={Plos one}, publisher={Public Library of Science San Francisco, CA USA}, year={2018}, pages={e0209075} } @article{sengupta_volgushev_shao_2016, title={A subsampled double bootstrap for massive data}, volume={111}, number={515}, journal={Journal of the American Statistical Association}, publisher={Taylor & Francis}, author={Sengupta, Srijan and Volgushev, Stanislav and Shao, Xiaofeng}, year={2016}, pages={1222–1232} } @phdthesis{sengupta_2016, title={Statistical analysis of networks with community structure and bootstrap methods for big data}, school={University of Illinois at Urbana-Champaign}, author={Sengupta, Srijan}, year={2016} } @article{cavaliere_politis_rahbek_bertail_clémençon_tressou_others_2015, title={Recent developments in bootstrap methods for dependent data}, volume={36}, number={3}, journal={Journal of Time Series Analysis}, publisher={Wiley Blackwell}, author={Cavaliere, Giuseppe and Politis, Dimitris N and Rahbek, Anders and Bertail, Patrice and Clémençon, Stéphan and Tressou, Jessica and others}, year={2015}, pages={462–480} } @article{sengupta_chen_2015, title={Spectral clustering in heterogeneous networks}, journal={Statistica Sinica}, publisher={Institute of Statistical Science, Academia Sinica and International Chinese …}, author={Sengupta, Srijan and Chen, Yuguo}, year={2015}, pages={1081–1106} } @article{sengupta_shao_wang_2015, title={The dependent random weighting}, volume={36}, number={3}, journal={Journal of Time Series Analysis}, author={Sengupta, Srijan and Shao, Xiaofeng and Wang, Yingchuan}, year={2015}, pages={315–326} } @inproceedings{sengupta_2010, title={Modeling the zero coupon yield curve: a regression approach}, volume={12}, booktitle={Global Conference of Actuaries}, author={Sengupta, Srijan}, year={2010} }