@article{wang_lin_zhong_yang_shahzad_2023, title={Enhanced Machine Learning Sketches for Network Measurements}, volume={72}, ISSN={["1557-9956"]}, DOI={10.1109/TC.2022.3185560}, abstractNote={Network monitoring and management require accurate statistics of a variety of flow-level metrics such as flow sizes, top-$k$k flows, and number of flows. Arguably, the current best technique to measure these metrics is sketches. While a significant amount of work has already been done on sketching techniques, there is still a lot of room for improvement because the accuracy of existing sketches varies with changing characteristics of network traffic. In this paper, we propose the idea of using machine learning to improve the accuracy of sketches, and propose a generic machine learning framework to reduce the dependence of accuracy of sketches on network traffic characteristics. We further present three case studies, where we applied our machine learning framework on sketches for measuring three flow-level network metrics, namely flow sizes, top-$k$k flows, and number of flows. We implemented and extensively evaluated this framework for these three metrics using both real-world and synthetic traffic traces. To the best of our knowledge, this is the first work that uses machine learning to reduce the dependence of sketching techniques on the characteristics of network traffic. We have released all our traces and implementation codes at Github.}, number={4}, journal={IEEE TRANSACTIONS ON COMPUTERS}, author={Wang, Hengrui and Lin, Huiping and Zhong, Zheng and Yang, Tong and Shahzad, Muhammad}, year={2023}, month={Apr}, pages={957–970} } @article{khan_venkatnarayan_shahzad_2023, title={Using RF Signals to Generate Indoor Maps}, volume={19}, ISSN={["1550-4867"]}, DOI={10.1145/3534121}, abstractNote={Generating maps of indoor environments beyond the line-of-sight finds applications in several areas such as planning, navigation, and security. While researchers have previously explored the use of RF signals to generate maps, prior work has two important limitations: (i) it requires moving the mapping setup along the entire lengths of the sides of the building, and (ii) it generates maps that are not fully connected, rather are scatter plots of locations from where some obstacles reflected the signals. Thus, prior approaches require human interpretation to locate the walls and determine how they merge. In this article, we address these limitations and propose RFMap, which generates fully connected maps, and does not require the measurement setup to be moved along the sides of the buildings. To generate the map, RFMap first transmits RF signals in many different directions and then measures the distances of different reflectors inside the building. Next, it identifies these reflectors and classifies them into various types based on the properties of the reflections. A key challenge is that RFMap does not receive reflections from all the directions due to the specular nature of the reflectors. Due to this, it only gets sparse data about the objects in the environment. To address this challenge, RFMap trains a deep generative adversarial network (GAN) to intelligently predict the missing information. At runtime, it feeds the locations and types of the detected reflectors to the trained GAN and generates complete and accurate map. We implemented RFMap using software defined radios and extensively evaluated it in several real-world environments. Our results show that RFMap generated the maps of all the buildings that we tested it on with high accuracy.}, number={1}, journal={ACM TRANSACTIONS ON SENSOR NETWORKS}, author={Khan, Usman Mahmood and Venkatnarayan, Raghav H. and Shahzad, Muhammd}, year={2023}, month={Feb} } @article{adina_shahzad_2022, title={A Distributed & Lightweight Framework to Secure IoT Networks Against Network Layer Attacks}, ISSN={["1095-2055"]}, DOI={10.1109/ICCCN54977.2022.9868908}, abstractNote={With the advent of the Internet of Things (IoT), sensor and actuator networks, subsequently referred to as IoT networks (IoT Ns), are proliferating at an unprecedented rate in several newfound areas such as smart cities, health care, and transportation, and consequently, securing them is of paramount importance. In this paper, we present NLSec, a fully distributed and lightweight framework that can detect arbitrary network layer (NL) attacks in any given IoTN, localize (i.e., identify) the compromised nodes, and mitigate the attacks by isolating the compromised nodes automatically. We also present insights obtained from an exploratory study on the effects of NL attacks on IoTNs, which guided us in designing NLSec. We demonstrate the effectiveness of NLSec through extensive experiments on a real IoTN test-bed under three well-known NL attacks.}, journal={2022 31ST INTERNATIONAL CONFERENCE ON COMPUTER COMMUNICATIONS AND NETWORKS (ICCCN 2022)}, author={Adina, Prasesh and Shahzad, Muhammad}, year={2022} } @article{iqbal_singh_shahzad_2022, title={Characterizing the Availability and Latency in AWS Network From the Perspective of Tenants}, ISSN={["1558-2566"]}, DOI={10.1109/TNET.2022.3148701}, abstractNote={Scalability and performance requirements are driving tenants to increasingly move their applications to public clouds. Unfortunately, cloud providers do not provide a view of their networking infrastructure to the tenants, rather only provide some generic service level agreements (SLAs). Tenants are, therefore, forced to plan the deployments of their applications based on these SLAs. This limits the performance that the tenants can achieve. Keeping this in view, we present a detailed network measurement study of the largest public cloud, Amazon Web Services (AWS). We collected network data to characterize the availability and latency of AWS over a period of 100 days and studied various temporal trends across several geographical locations of AWS throughout the world. We performed our study at all three levels of cloud hierarchy: inside availability zones (AZs), across AZs, and across regions. Our results show that network behavior varies significantly over time at different geographical locations, levels of hierarchy, and temporal granularities. For example, while we observed high availability at monthly granularity, it deteriorates at daily and hourly granularities. This and many other such observations that we present have significant implications for cloud tenants. We further implemented our measurement approach on Google Cloud Platform (GCP) to demonstrate that it can be deployed on any cloud platform and present some preliminary comparative observations from this implementation. Based on our observations, we present several recommendations that tenants can use to better deploy their applications.}, journal={IEEE-ACM TRANSACTIONS ON NETWORKING}, author={Iqbal, Hassan and Singh, Anand and Shahzad, Muhammad}, year={2022}, month={Feb} } @article{khan_rigazio_shahzad_2022, title={Contactless Monitoring of PPG Using Radar}, volume={6}, ISSN={["2474-9567"]}, DOI={10.1145/3550330}, abstractNote={In this paper, we propose VitaNet, a radio frequency based contactless approach that accurately estimates the PPG signal using radar for stationary participants. The main insight behind VitaNet is that the changes in the blood volume that manifest in the PPG waveform are correlated to the physical movements of the heart, which the radar can capture. To estimate the PPG waveform, VitaNet uses a self-attention architecture to identify the most informative reflections in an unsupervised manner, and then uses an encoder decoder network to transform the radar phase profile to the PPG sequence. We have trained and extensively evaluated VitaNet on a large dataset obtained from 25 participants over 179 full nights. Our evaluations show that VitaNet accurately estimates the PPG waveform and its derivatives with high accuracy, significantly improves the heart rate and heart rate variability estimates from the prior works, and also accurately estimates several useful PPG features. We have released the codes of VitaNet as well as the trained models and the dataset used in this paper.}, number={3}, journal={PROCEEDINGS OF THE ACM ON INTERACTIVE MOBILE WEARABLE AND UBIQUITOUS TECHNOLOGIES-IMWUT}, author={Khan, Usman Mahmood and Rigazio, Luca and Shahzad, Muhammad}, year={2022}, month={Sep} } @article{khan_shahzad_2022, title={Estimating Soil Moisture using RF Signals}, DOI={10.1145/3495243.3517025}, abstractNote={In this paper, we propose CoMEt, a radio frequency based approach that measures soil moisture at multiple depths underneath the ground surface without installing any objects in the soil and without making any contact with the ground surface. The main insight behind CoMEt is that the phase of an RF signal depends on its wavelength in the medium through which it is propagating, which in turn depends on the amount of soil moisture. To measure soil moisture, CoMEt leverages the phase changes across successive antennas in a receive antenna array along with the time of flight of the received signal to jointly estimate the depth of each layer of soil and the wavelength of the signal in each layer. It then uses these estimates to obtain the amount of moisture in each soil layer. We have implemented CoMEt using a software defined radio and a Raspberry Pi to measure soil moisture in real-time. We have extensively evaluated CoMEt in both indoor and outdoor environments. Our results show that CoMEt estimated soil moisture for up to three layers of soil with a median error of just 1.1%.}, journal={PROCEEDINGS OF THE 2022 THE 28TH ANNUAL INTERNATIONAL CONFERENCE ON MOBILE COMPUTING AND NETWORKING, ACM MOBICOM 2022}, author={Khan, Usman Mahmood and Shahzad, Muhammad}, year={2022}, pages={242–254} } @article{iqbal_khan_khan_shahzad_2022, title={Left or Right: A Peek into the Political Biases in Email Spam Filtering Algorithms During US Election 2020}, DOI={10.1145/3485447.3512121}, abstractNote={Email services use spam filtering algorithms (SFAs) to filter emails that are unwanted by the user. However, at times, the emails perceived by an SFA as unwanted may be important to the user. Such incorrect decisions can have significant implications if SFAs treat emails of user interest as spam on a large scale. This is particularly important during national elections. To study whether the SFAs of popular email services have any biases in treating the campaign emails, we conducted a large-scale study of the campaign emails of the US elections 2020 by subscribing to a large number of Presidential, Senate, and House candidates using over a hundred email accounts on Gmail, Outlook, and Yahoo. We analyzed the biases in the SFAs towards the left and the right candidates and further studied the impact of the interactions (such as reading or marking emails as spam) of email recipients on these biases. We observed that the SFAs of different email services indeed exhibit biases towards different political affiliations.}, journal={PROCEEDINGS OF THE ACM WEB CONFERENCE 2022 (WWW'22)}, author={Iqbal, Hassan and Khan, Usman Mahmood and Khan, Hassan Ali and Shahzad, Muhammad}, year={2022}, pages={2491–2500} } @article{khan_iqbal_shahzad_jin_2022, title={RMS: Removing Barriers to Analyze the Availability and Surge Pricing of Ridesharing Services}, DOI={10.1145/3491102.3517464}, abstractNote={Ridesharing services do not make data of their availability (supply, utilization, idle time, and idle distance) and surge pricing publicly available. It limits the opportunities to study the spatiotemporal trends of the availability and surge pricing of these services. Only a few research studies conducted in North America analyzed these features for only Uber and Lyft. Despite the interesting observations, the results of prior works are not generalizable or reproducible because: i) the datasets collected in previous publications are spatiotemporally sensitive, i.e., previous works do not represent the current availability and surge pricing of ridesharing services in different parts of the world; and ii) the analyses presented in previous works are limited in scope (in terms of countries and ridesharing services they studied). Hence, prior works are not generally applicable to ridesharing services operating in different countries. This paper addresses the issue of ridesharing-data unavailability by presenting Ridesharing Measurement Suite (RMS). RMS removes the barrier of entry for analyzing the availability and surge pricing of ridesharing services for ridesharing users, researchers from various scientific domains, and regulators. RMS continuously collects the data of the availability and surge pricing of ridesharing services. It exposes real-time data of these services through i) graphical user interfaces and ii) public APIs to assist various stakeholders of these services and simplify the data collection and analysis process for future ridesharing research studies. To signify the utility of RMS, we deployed RMS to collect and analyze the availability and surge pricing data of 10 ridesharing services operating in nine countries for eight weeks in pre and during pandemic periods. Using the data collected and analyzed by RMS, we identify that previous articles miscalculated the utilization of ridesharing services as they did not count in the vehicles driving in multiple categories of the same service. We observe that during COVID-19, the supply of ridesharing services decreased by 54%, utilization of available vehicles increased by 6%, and a 5 × increase in the surge frequency of services. We also find that surge occurs in a small geographical region, and its intensity reduces by 50% in about 0.5 miles away from the location of a surge. We present several other interesting observations on ridesharing services’ availability and surge pricing.}, journal={PROCEEDINGS OF THE 2022 CHI CONFERENCE ON HUMAN FACTORS IN COMPUTING SYSTEMS (CHI' 22)}, author={Khan, Hassan Ali and Iqbal, Hassan and Shahzad, Muhammad and Jin, Guoliang}, year={2022} } @article{venkatnarayan_shahzad_2021, title={Accurately Decoding MIMO Streams in VLC}, ISSN={["1095-2055"]}, DOI={10.1109/ICCCN52240.2021.9522352}, abstractNote={Among the efforts to overcome the problem of rapidly saturating RF bands, visible light communication (VLC) is garnering a renewed interest as it can be enabled using commodity LED lamps. As indoor spaces are typically illuminated by multiple lamps comprising multiple LEDs each, a natural approach to efficiently utilize the bandwidth of all the LEDs is to use MIMO communication. The state of the art approach to decode MIMO streams is to use channel matrix. Although channel matrix based decoding method (CMDM) works very well in conventional RF technologies, when used in VLC, it suffers from several limitations, such as high sensitivity to environmental conditions, and need for sophisticated receivers. To overcome these limitations, we propose PCDM, a novel parallelogram - clustering based decoding method, which is fundamentally different from CMDM and achieves an order of magnitude lower bit error rate compared to CMDM. We implement and extensively evaluate these two methods using a real VLC MIMO testbed. Our results show that PCDM outperformed CMDM in all scenarios.}, journal={30TH INTERNATIONAL CONFERENCE ON COMPUTER COMMUNICATIONS AND NETWORKS (ICCCN 2021)}, author={Venkatnarayan, Raghav H. and Shahzad, Muhammad}, year={2021} } @article{ganji_shahzad_2021, title={Characterizing the Performance of QUIC on Android and Wear OS Devices}, ISSN={["1095-2055"]}, DOI={10.1109/ICCCN52240.2021.9522258}, abstractNote={Google’s QUIC protocol has become popular over the past few years and is being rapidly adopted as the transport protocol of choice by popular Internet services in their mobile applications. Considering this, it is crucial to understand the performance and implementation issues of integrating QUIC with mobile and wearable applications. In this paper, we conduct a comprehensive measurement analysis and comparison of QUIC with TCP on mobile and wearable platforms. Our experiments cover a wide range of environments, including different request sizes, traffic directions, and connectivity types. From our experiments, we found that the benefits of using QUIC instead of TCP to service HTTP requests are not uniform across different scenarios. We also found a bug in the current implementation of QUIC in Android’s Cronet library that prevents the applications from reverting back to using WiFi after a connection migration from LTE happens. Our experiences from this measurement study has lead us to propose a probabilistic framework, which we call Dynamic Transport Selection, that adaptively chooses the appropriate transport protocol for a given network environment. We implemented and evaluated this framework in Android and Wear OS devices and found that it improves the overall request completion performance of the application by as much as 41.76% when compared to using either QUIC or TCP alone}, journal={30TH INTERNATIONAL CONFERENCE ON COMPUTER COMMUNICATIONS AND NETWORKS (ICCCN 2021)}, author={Ganji, Anirudh and Shahzad, Muhammad}, year={2021} } @article{iqbal_khalid_shahzad_2021, title={Dissecting Cloud Gaming Performance with DECAF}, volume={5}, ISSN={["2476-1249"]}, DOI={10.1145/3491043}, abstractNote={Cloud gaming platforms have witnessed tremendous growth over the past two years with a number of large Internet companies including Amazon, Facebook, Google, Microsoft, and Nvidia publicly launching their own platforms. While cloud gaming platforms continue to grow, the visibility in their performance and relative comparison is lacking. This is largely due to absence of systematic measurement methodologies which can generally be applied. As such, in this paper, we implement DECAF, a methodology to systematically analyze and dissect the performance of cloud gaming platforms across different game genres and game platforms. DECAF is highly automated and requires minimum manual intervention. By applying DECAF, we measure the performance of three commercial cloud gaming platforms including Google Stadia, Amazon Luna, and Nvidia GeForceNow, and uncover a number of important findings. First, we find that processing delays in the cloud comprise majority of the total round trip delay experienced by users, accounting for as much as 73.54% of total user-perceived delay. Second, we find that video streams delivered by cloud gaming platforms are characterized by high variability of bitrate, frame rate, and resolution. Platforms struggle to consistently serve 1080p/60 frames per second streams across different game genres even when the available bandwidth is 8-20× that of platform's recommended settings. Finally, we show that game platforms exhibit performance cliffs by reacting poorly to packet losses, in some cases dramatically reducing the delivered bitrate by up to 6.6× when loss rates increase from 0.1% to 1%. Our work has important implications for cloud gaming platforms and opens the door for further research on comprehensive measurement methodologies for cloud gaming.}, number={3}, journal={PROCEEDINGS OF THE ACM ON MEASUREMENT AND ANALYSIS OF COMPUTING SYSTEMS}, author={Iqbal, Hassan and Khalid, Ayesha and Shahzad, Muhammad}, year={2021}, month={Dec} } @article{venkatnarayan_mahmood_shahzad_2021, title={WiFi based Multi-User Gesture Recognition}, volume={20}, ISSN={["1558-0660"]}, DOI={10.1109/TMC.2019.2954891}, abstractNote={WiFi based gesture recognition has received significant attention over the past few years. However, the key limitation of prior WiFi based gesture recognition systems is that they cannot recognize the gestures of multiple users performing them simultaneously. In this article, we address this limitation and propose WiMU, a WiFi based Multi-User gesture recognition system. The key idea behind WiMU is that when it detects that some users have performed some gestures simultaneously, it first automatically determines the number of simultaneously performed gestures ($N_a$Na) and then, using the training samples collected from a single user, generates virtual samples for various plausible combinations of $N_a$Na gestures. The key property of these virtual samples is that the virtual samples for any given combination of gestures are identical to the real samples that would result from real users performing that combination of gestures. WiMU compares the detected sample against these virtual samples and recognizes the simultaneously performed gestures. We implemented and extensively evaluated WiMU using commodity WiFi devices. Our results show that WiMU recognizes 2, 3, 4, 5, 6, 7, and 8 simultaneously performed gestures with accuracies of 95.6, 94.9, 93.9, 92.7, 91.6, 91.0, and 90.1 percent, respectively.}, number={3}, journal={IEEE TRANSACTIONS ON MOBILE COMPUTING}, author={Venkatnarayan, Raghav H. and Mahmood, Shakir and Shahzad, Muhammad}, year={2021}, month={Mar}, pages={1242–1256} } @article{zhang_venkatnarayan_shahzad_2020, title={A WiFi-based Home Security System}, ISSN={["2155-6806"]}, DOI={10.1109/MASS50613.2020.00026}, abstractNote={Typical home security systems monitor homes for intrusions by installing contact sensors on doors and windows and motion sensors inside the house. Unfortunately, due to the high deployment and operational costs of today’s home security systems, only a small fraction of homes have security systems installed (e.g., only 17% in the US and 15% in China). In this paper, we propose a Wi Fi based H ome S ecurity system (WiHS) that uses commodity WiFi devices, which most modern households already have, to perform the three primary tasks of typical home security systems: 1) detect when a door/window is opened/closed, 2) identify which door/window has been opened/closed, and 3) detect movements inside the house. The design of WiHS is based on our intuitive and theoretical understanding of the impacts of the movements of doors and windows on WiFi signals, which we will develop and present in this paper. We extensively evaluated WiHS using commodity WiFi devices in 3 different houses. WiHS detected intrusions with over 95% accuracy and identified the exact door/window that moved with just 4.5% average error.}, journal={2020 IEEE 17TH INTERNATIONAL CONFERENCE ON MOBILE AD HOC AND SMART SYSTEMS (MASS 2020)}, author={Zhang, Shaohu and Venkatnarayan, Raghav H. and Shahzad, Muhammad}, year={2020}, pages={129–137} } @article{akbulut_perros_shahzad_2020, title={Bimodal affect recognition based on autoregressive hidden Markov models from physiological signals}, volume={195}, ISSN={["1872-7565"]}, DOI={10.1016/j.cmpb.2020.105571}, abstractNote={Background and objective: Affect provides contextual information about the emotional state of a person as he/she communicates in both verbal and/or non-verbal forms. While human’s are great at determining the emotional state of people while they communicate in person, it is challenging and still largely an unsolved problem to computationally determine the emotional state of a person. Methods: Emotional states of a person manifest in the physiological biosignals such as electrocardiogram (ECG) and electrodermal activity (EDA) because these signals are impacted by the peripheral nervous system of the body, and the peripheral nervous system is strongly coupled with the mental state of the person. In this paper, we present a method to accurately recognize six emotions using ECG and EDA signals and applying autoregressive hidden Markov models (AR-HMMs) and heart rate variability analysis on these signals. The six emotions include happiness, sadness, surprise, fear, anger, and disgust. Results: We evaluated our method on a comprehensive new dataset collected from 30 participants. Our results show that our proposed method achieves an average accuracy of 88.6% in distinguishing across the 6 emotions. Conclusions: The key technical depth of the paper is in the use of the AR-HMMs to model the EDA signal and the use of LDA to enable accurate emotion recognition without requiring a large number of training samples. Unlike other studies, we have taken a hierarchical approach to classify emotions, where we first categorize the emotion as either positive or negative and then identify the exact emotion.}, journal={COMPUTER METHODS AND PROGRAMS IN BIOMEDICINE}, author={Akbulut, Fatma Patlar and Perros, Harry G. and Shahzad, Muhammad}, year={2020}, month={Oct} } @article{ganji_singh_shahzad_2020, title={Characterizing the Impact of TCP Coexistence in Data Center Networks}, ISSN={["1063-6927"]}, DOI={10.1109/ICDCS47774.2020.00035}, abstractNote={The switch fabrics of today’s data centers carry traffic controlled by a variety of TCP congestion control algorithms. This leads us to ask: how does the coexistence of multiple variants of TCP on shared switch fabric impacts the performance achieved by different applications in data centers? To answer this question, we conducted an extensive set of experiments with coexisting TCP variants on Leaf-Spine and Fat-Tree switch fabrics. We executed common data center workloads, which include streaming, MapReduce, and storage workloads, using four commonly used TCP variants, namely BBR, DCTCP, CUBIC, and New Reno. We also extensively executed iPerf workloads using these 4 TCP variants to purely study the impact of the coexistence of TCP variants on each other’s performance without incorporating the network behavior of the application layer. Our experiments resulted in a large set of network traces comprised of 160 billion packets (we will release these traces after publication of this work). We present comprehensive observations from these traces that have important implications in ensuring optimal utilization of data center switch fabric and in meeting the network performance needs of application layer workloads.}, journal={2020 IEEE 40TH INTERNATIONAL CONFERENCE ON DISTRIBUTED COMPUTING SYSTEMS (ICDCS)}, author={Ganji, Anirudh and Singh, Anand and Shahzad, Muhammad}, year={2020}, pages={388–398} } @article{boob_mahmood_shahzad_2020, title={Distributed and Privacy Preserving Routing of Connected Vehicles to Minimize Congestion}, ISSN={["2155-6806"]}, DOI={10.1109/MASS50613.2020.00036}, abstractNote={With a large number of connected vehicles on the roads, there is an opportunity to leverage their connectivity to minimize congestion on roads by calculating fast routes for vehicles in a way that each vehicle contributes as little to the congestion as possible. The existing commercial and research based approaches of calculating routes for vehicles suffer from one or more of the following two limitations: 1) they are not privacy preserving in the sense that they receive destination addresses from users and may either store and use them for other commercial purposes or are at a risk of getting hacked and exposing these addresses to hackers; and 2) they require expensive infrastructure such as road side units (RSUs). To address these limitations, we propose a distributed and privacy preserving routing protocol, namely DPR, which the connected vehicles collaboratively and repeatedly execute to calculate fast routes to their destinations such that the overall congestion on the road network is significantly reduced and at the same time the privacy of the vehicles is preserved. The DPR protocol relies on direct vehicle to vehicle communication and does not need any new infrastructure such as RSUs. We have implemented and evaluated our DPR protocol through simulations on a real road network under several traffic conditions. Our results show that DPR reduces the average travel time of vehicles that travel a distance of 1000, 2500, and over 4000 meters by 15%, 32%, and 42%, respectively. This reduction in travel time is significant considering that this improvement results purely from software manipulations and without requiring any new infrastructure.}, journal={2020 IEEE 17TH INTERNATIONAL CONFERENCE ON MOBILE AD HOC AND SMART SYSTEMS (MASS 2020)}, author={Boob, Surabhi and Mahmood, Shakir and Shahzad, Muhammad}, year={2020}, pages={220–228} } @article{shahzad_shafiq_liu_2020, title={Large Scale Characterization of Software Vulnerability Life Cycles}, volume={17}, ISSN={["1941-0018"]}, DOI={10.1109/TDSC.2019.2893950}, abstractNote={Software systems inherently contain vulnerabilities that have been exploited in the past resulting in significant revenue losses. The study of various aspects related to vulnerabilities such as their severity, rates of disclosure, exploit and patch release, and existence of common vulnerabilities in different products can help in improving the development, deployment, and maintenance process of software systems. It can also help in designing future security policies and conducting audits of past incidents. Furthermore, such an analysis can help customers to assess the security risks associated with software products of different vendors. In this paper, we conduct an exploratory measurement study of a large software vulnerability data set containing 56077 vulnerabilities disclosed since 1988 till 2013. We investigate vulnerabilities along following eight dimensions: (1) phases in the life cycle of vulnerabilities, (2) evolution of vulnerabilities over the years, (3) functionality of vulnerabilities, (4) access requirement for exploitation of vulnerabilities, (5) risk level of vulnerabilities, (6) software vendors, (7) software products, and (8) existence of common vulnerabilities in multiple software products. Our exploratory analysis uncovers several statistically significant findings that have important implications for software development and deployment.}, number={4}, journal={IEEE TRANSACTIONS ON DEPENDABLE AND SECURE COMPUTING}, author={Shahzad, Muhammad and Shafiq, M. Zubair and Liu, Alex X.}, year={2020}, pages={730–744} } @article{khan_venkatnarayan_shahzad_2020, title={RFMap: Generating Indoor Maps using RF Signals}, DOI={10.1109/IPSN48710.2020.00-40}, abstractNote={Generating maps of indoor environments beyond the line of sight finds applications in several areas such as planning, navigation, and security. While researchers have previously explored the use of RF signals to generate maps, prior work has two important limitations: (i) it requires moving the mapping setup along the entire lengths of the sides of the building, and (ii) it generates maps that are not fully connected, rather are scatter plots of locations from where some obstacles reflected the signals. Thus, prior approaches require human interpretation to locate the walls and determine how they merge. In this paper, we address these limitations and propose RFMap, which generates fully connected maps, and does not require the measurement setup to be moved along the sides of the buildings. To generate the map, RFMap first transmits RF signals in many different directions by rotating the antennas while keeping them at the same location and then measures the distances of different reflectors inside the building. Next, it identifies these reflectors and classifies them into various types based on the properties of the reflections. A key challenge is that RFMap does not receive reflections from all the directions due to the specular nature of the reflectors. Due to this, it only gets sparse data about the objects in the environment. To address this challenge, RFMap trains deep generative adversarial network (GAN) to intelligently predict the missing information. At runtime, it feeds the locations and types of the detected reflectors to the trained GAN and generates the complete and accurate map. We implemented RFMap using software defined radios and extensively evaluated it in several real world environments. Our results show that RFMap generated the maps of all the buildings that we tested it on with high accuracy.}, journal={2020 19TH ACM/IEEE INTERNATIONAL CONFERENCE ON INFORMATION PROCESSING IN SENSOR NETWORKS (IPSN 2020)}, author={Khan, Usman Mahmood and Venkatnarayan, Raghav H. and Shahzad, Muhammad}, year={2020}, pages={133–144} } @article{liu_shen_yan_yang_shahzad_cui_xie_2020, title={SF-Sketch: A Two-Stage Sketch for Data Streams}, volume={31}, ISSN={["1558-2183"]}, DOI={10.1109/TPDS.2020.2987609}, abstractNote={Sketches are probabilistic data structures designed for recording frequencies of items in a multi-set. They are widely used in various fields, especially for gathering Internet statistics from distributed data streams in network measurements. In a distributed streaming application with high data rates, a sketch in each monitoring node “fills up” very quickly and then its content is transferred to a remote collector responsible for answering queries. Thus, the size of the contents transferred must be kept as small as possible while meeting the desired accuracy requirement. To obtain significantly higher accuracy while keeping the same update and query speed as the best prior sketches, in this article, we propose a new sketch – the Slim-Fat (SF) sketch. The key idea behind the SF-sketch is to maintain two separate sketches: a larger sketch, the Fat-subsketch, and a smaller sketch, the Slim-subsketch. The Fat-subsketch is used for updating and periodically producing the Slim-subsketch, which is then transferred to the remote collector for answering queries quickly and accurately. We also present the error bound as well as an accurate model of the correct rate of the SF-sketch, and verify their correctness through experiments. We implemented and extensively evaluated the SF-sketch along with several prior sketches. Our results show that when the size of our Slim-subsketch and of the widely used Count-Min (CM) sketch are kept the same, our SF-sketch outperforms the CM-sketch by up to 33.1 times in terms of accuracy (when the ratio of the sizes of the Fat-subsketch and the Slim-subsketch is 16:1). We have made all source codes publicly available at Github [“Source code of SF sketches,” [Online]. Available: https://github.com/paper2017/SF-sketch].}, number={10}, journal={IEEE TRANSACTIONS ON PARALLEL AND DISTRIBUTED SYSTEMS}, author={Liu, Lingtong and Shen, Yulong and Yan, Yibo and Yang, Tong and Shahzad, Muhammad and Cui, Bin and Xie, Gaogang}, year={2020}, month={Oct}, pages={2263–2276} } @article{sim_paul_tilevich_butt_shahzad_2019, title={CSLIM: Automated Extraction of loT Functionalities from Legacy C Codebases}, DOI={10.1145/3288599.3296013}, abstractNote={Many Internet of Things (IoT) devices are resource-poor, possessing limited memory, disk space, and processor capacity. To accommodate such resource scarcity, IoT software cannot include any extraneous functionalities not used in operating the underlying device. Although legacy systems software contains numerous functionalities that can be reused in IoT applications, these functionalities are exposed as part of a larger codebase with multiple complex dependencies and a heavy runtime footprint. To enable programmers to effectively reuse extant systems software in IoT applications, this paper presents Cslim, a cross-package function extraction tool for C. Cslim extracts programmer-specified functions from a source package and generates new source files for a target package, thereby enabling the reuse of systems software in resource-poor execution environments, such as the IoT devices. Cslim resolves all dependencies by recursively extracting required functions, while bypassing the complexities of preprocessor macro variabilities by operating on preprocessed source files. Furthermore, Cslim efficiently traverses and resolves the calling dependencies by maintaining an in-memory relational database. Finally, Cslim is easy to use, as it requires neither manual intervention nor source code modifications. Our prototype implementation of Cslim has successfully extracted a set of functions from SQLite and GlusterFS, producing slimmed down executables that can be deployed on IoT devices.}, journal={ICDCN '19: PROCEEDINGS OF THE 2019 INTERNATIONAL CONFERENCE ON DISTRIBUTED COMPUTING AND NETWORKING}, author={Sim, Hyogi and Paul, Arnab K. and Tilevich, Eli and Butt, Ali R. and Shahzad, Muhammad}, year={2019}, pages={421–426} } @article{yang_zhang_wang_shahzad_liu_xin_li_2018, title={FID-sketch: an accurate sketch to store frequencies in data streams}, volume={22}, ISSN={1386-145X 1573-1413}, url={http://dx.doi.org/10.1007/S11280-018-0546-5}, DOI={10.1007/s11280-018-0546-5}, number={6}, journal={World Wide Web}, publisher={Springer Science and Business Media LLC}, author={Yang, Tong and Zhang, Haowei and Wang, Hao and Shahzad, Muhammad and Liu, Xue and Xin, Qin and Li, Xiaoming}, year={2018}, month={Apr}, pages={2675–2696} } @article{dai_shahzad_liu_li_zhong_chen_2018, title={Identifying and Estimating Persistent Items in Data Streams}, volume={26}, ISSN={["1558-2566"]}, DOI={10.1109/TNET.2018.2865125}, abstractNote={This paper addresses the fundamental problem of finding persistent items and estimating the number of times each persistent item occurred in a given data stream during a given period of time at any given observation point. We propose a novel scheme, PIE, that can not only accurately identify each persistent item with a probability greater than any desired false negative rate (FNR), but can also accurately estimate the number of occurrences of each persistent item. The key idea of PIE is that it uses Raptor codes to encode the ID of each item that appears at the observation point during a measurement period and stores only a few bits of the encoded ID in the memory. The item that is persistent occurs in enough measurement periods that enough encoded bits for the ID can be retrieved from the observation point to decode them correctly and get the ID of the persistent item. To estimate the number of occurrences of any given persistent item, PIE uses maximum likelihood estimation-based statistical techniques on the information already recorded during the measurement periods. We implemented and evaluated PIE using three real network traffic traces and compared its performance with three prior schemes. Our results show that PIE not only achieves the desire FNR in every scenario, its average FNR can be 19.5 times smaller than the FNR of the adapted prior scheme. Our results also show that PIE achieves any desired success probability in estimating the number of occurrences of persistent items.}, number={6}, journal={IEEE-ACM TRANSACTIONS ON NETWORKING}, author={Dai, Haipeng and Shahzad, Muhammad and Liu, Alex X. and Li, Meng and Zhong, Yuankun and Chen, Guihai}, year={2018}, month={Dec}, pages={2429–2442} } @article{venkatnarayan_page_shahzad_2018, title={Multi-User Gesture Recognition Using WiFi}, DOI={10.1145/3210240.3210335}, abstractNote={WiFi based gesture recognition has received significant attention over the past few years. However, the key limitation of prior WiFi based gesture recognition systems is that they cannot recognize the gestures of multiple users performing them simultaneously. In this paper, we address this limitation and propose WiMU, a WiFi based Multi-User gesture recognition system. The key idea behind WiMU is that when it detects that some users have performed some gestures simultaneously, it first automatically determines the number of simultaneously performed gestures (Na) and then, using the training samples collected from a single user, generates virtual samples for various plausible combinations of Na gestures. The key property of these virtual samples is that the virtual samples for any given combination of gestures are identical to the real samples that would result from real users performing that combination of gestures. WiMU compares the detected sample against these virtual samples and recognizes the simultaneously performed gestures. We implemented and extensively evaluated WiMU using commodity WiFi devices. Our results show that WiMU recognizes 2, 3, 4, 5, and 6 simultaneously performed gestures with accuracies of 95.0, 94.6, 93.6, 92.6, and 90.9%, respectively.}, journal={MOBISYS'18: PROCEEDINGS OF THE 16TH ACM INTERNATIONAL CONFERENCE ON MOBILE SYSTEMS, APPLICATIONS, AND SERVICES}, author={Venkatnarayan, Raghav H. and Page, Griffin and Shahzad, Muhammad}, year={2018}, pages={401–413} } @article{yang_liu_shahzad_yang_fu_xie_li_2017, title={A Shifting Framework for Set Queries}, volume={25}, ISSN={["1558-2566"]}, DOI={10.1109/tnet.2017.2730227}, abstractNote={Set queries are fundamental operations in computer networks. This paper addresses the fundamental problem of designing a probabilistic data structure that can quickly process set queries using a small amount of memory. We propose a shifting bloom filter (ShBF) framework for representing and querying sets. We demonstrate the effectiveness of ShBF using three types of popular set queries: membership, association, and multiplicity queries. The key novelty of ShBF is on encoding the auxiliary information of a set element in a location offset. In contrast, prior BF-based set data structures allocate additional memory to store auxiliary information. We further extend our shifting framework from BF-based data structures to sketch-based data structures, which are widely used to store multiplicities of items. We conducted experiments using real-world network traces, and results show that ShBF significantly advances the state-of-the-art on all three types of set queries.}, number={5}, journal={IEEE-ACM TRANSACTIONS ON NETWORKING}, author={Yang, Tong and Liu, Alex X. and Shahzad, Muhammad and Yang, Dongsheng and Fu, Qiaobin and Xie, Gaogang and Li, Xiaoming}, year={2017}, month={Oct}, pages={3116–3131} } @article{shahzad_liu_samuel_2017, title={Behavior Based Human Authentication on Touch Screen Devices Using Gestures and Signatures}, volume={16}, ISSN={["1558-0660"]}, DOI={10.1109/tmc.2016.2635643}, abstractNote={With the rich functionalities and enhanced computing capabilities available on mobile computing devices with touch screens, users not only store sensitive information (such as credit card numbers) but also use privacy sensitive applications (such as online banking) on these devices, which make them hot targets for hackers and thieves. To protect private information, such devices typically lock themselves after a few minutes of inactivity and prompt a password/PIN/pattern screen when reactivated. Passwords/PINs/patterns based schemes are inherently vulnerable to shoulder surfing attacks and smudge attacks. In this paper, we propose BEAT, an authentication scheme for touch screen devices that authenticates users based on their behavior of performing certain actions on the touch screens. An action is either a gesture, which is a brief interaction of a user's fingers with the touch screen such as swipe rightwards, or a signature, which is the conventional unique handwritten depiction of one's name. Unlike existing authentication schemes for touch screen devices, which use what user inputs as the authentication secret, BEAT authenticates users mainly based on how they input, using distinguishing features such as velocity, device acceleration, and stroke time. Even if attackers see what action a user performs, they cannot reproduce the behavior of the user doing those actions through shoulder surfing or smudge attacks. We implemented BEAT on Samsung Focus smart phones and Samsung Slate tablets running Windows, collected 15,009 gesture samples and 10,054 signature samples, and conducted real-time experiments to evaluate its performance. Experimental results show that, with only 25 training samples, for gestures, BEAT achieves an average equal error rate of 0.5 percent with three gestures and for signatures, it achieves an average equal error rate of 0.52 percent with single signature.}, number={10}, journal={IEEE TRANSACTIONS ON MOBILE COMPUTING}, author={Shahzad, Muhammad and Liu, Alex X. and Samuel, Arjmand}, year={2017}, month={Oct}, pages={2726–2741} } @article{shahzad_singh_2017, title={Continuous Authentication and Authorization for the Internet of Things}, volume={21}, ISSN={["1941-0131"]}, url={https://publons.com/publon/21294366/}, DOI={10.1109/mic.2017.33}, abstractNote={With the dawn of the Internet of Things, small but smart devices have become ubiquitous. Although these devices carry a lot of compute power and enable several interesting applications, they lack conventional interfaces such as keyboards, mice, and touchscreens. As a result, such devices can't authenticate and authorize users in familiar ways. Furthermore, unlike for conventional settings, a one-time authentication at the start of a session usually isn't appropriate for the IoT, because the application scenarios are dynamic and a user might not retain physical control or even awareness of IoT devices quite as readily as with traditional computers. Thus, users need to be continuously authenticated and authorized. Fortunately, the IoT offers interesting potential solutions for meeting these requirements. This article discusses some challenges and opportunities in developing continuous authentication and authorization approaches for the IoT while also presenting a case study of a Wi-Fi-based human authentication system called WiFiU.}, number={2}, journal={IEEE INTERNET COMPUTING}, author={Shahzad, Muhammad and Singh, Munindar P.}, year={2017}, pages={86–90} } @article{wang_liu_shahzad_ling_lu_2017, title={Device-Free Human Activity Recognition Using Commercial WiFi Devices}, volume={35}, ISSN={["1558-0008"]}, DOI={10.1109/jsac.2017.2679658}, abstractNote={Since human bodies are good reflectors of wireless signals, human activities can be recognized by monitoring changes in WiFi signals. However, existing WiFi-based human activity recognition systems do not build models that can quantify the correlation between WiFi signal dynamics and human activities. In this paper, we propose a Channel State Information (CSI)-based human Activity Recognition and Monitoring system (CARM). CARM is based on two theoretical models. First, we propose a CSI-speed model that quantifies the relation between CSI dynamics and human movement speeds. Second, we propose a CSI-activity model that quantifies the relation between human movement speeds and human activities. Based on these two models, we implemented the CARM on commercial WiFi devices. Our experimental results show that the CARM achieves recognition accuracy of 96% and is robust to environmental changes.}, number={5}, journal={IEEE JOURNAL ON SELECTED AREAS IN COMMUNICATIONS}, author={Wang, Wei and Liu, Alex X. and Shahzad, Muhammad and Ling, Kang and Lu, Sanglu}, year={2017}, month={May}, pages={1118–1131} } @article{shahzad_liu_2017, title={Fast and Accurate Tracking of Population Dynamics in RFID Systems}, ISSN={["1063-6927"]}, DOI={10.1109/icdcs.2017.58}, abstractNote={RFID systems have been widely deployed for various applications such as supply chain management, indoor localization, inventory control, and access control. This paper deals with the fundamental problem of estimating the number of arriving and departing tags between any two time instants in dynamically changing RFID tag populations, which is needed in many applications such as warehouse monitoring and privacy sensitive RFID systems. In this paper, we propose a dynamic tag estimation scheme, namely DTE, that can achieve arbitrarily high required reliability, is compliant with the C1G2 standard, and works in single as well as multiple-reader environment. DTE uses the standardized frame slotted Aloha protocol and utilizes the number of slots that change their values in corresponding Aloha frames at the two time instants to estimate the number of arriving and departing tags. It is easy to deploy because it neither requires modification to tags nor to the communication protocol between tags and readers. We have extensively evaluated and compared DTE with the only prior scheme, ZDE, that can estimate the number of arriving and departing tags. Unfortunately, ZDE can not achieve arbitrarily high required reliability. In contrast, our proposed scheme always achieves the required reliability. For example, for a tag population containing 10 4 tags, a required reliability of 95%, and a required confidence interval of 5%, DTE takes 5.12 seconds to achieve the required reliability whereas ZDE achieves a reliability of only 66% in the same amount of time.}, journal={2017 IEEE 37TH INTERNATIONAL CONFERENCE ON DISTRIBUTED COMPUTING SYSTEMS (ICDCS 2017)}, author={Shahzad, Muhammad and Liu, Alex X.}, year={2017}, pages={836–846} } @article{liu_li_liu_guo_shahzad_wang_wu_2017, title={Multi-Category RFID Estimation}, volume={25}, ISSN={["1558-2566"]}, DOI={10.1109/tnet.2016.2594481}, abstractNote={This paper concerns the practically important problem of multi-category radio frequency identification (RFID) estimation: given a set of RFID tags, we want to quickly and accurately estimate the number of tags in each category. However, almost all the existing RFID estimation protocols are dedicated to the estimation problem on a single set, regardless of tag categories. A feasible solution is to separately execute the existing estimation protocols on each category. The execution time of such a serial solution is proportional to the number of categories, and cannot satisfy the delay-stringent application scenarios. Simultaneous RIFD estimation over multiple categories is desirable, and hence, this paper proposes an approach called simultaneous estimation for multi-category RFID systems (SEM). SEM exploits the Manchester-coding mechanism, which is supported by the ISO 18000-6 RFID standard, to decode the combined signals, thereby simultaneously obtaining the reply status of tags from each category. As a result, multiple bit vectors are decoded from just one physical slotted frame. Built on our SEM, many existing excellent estimation protocols can be used to estimate the tag cardinality of each category in a simultaneous manner. To ensure the predefined accuracy, we calculate the variance of the estimate in one round, as well as the variance of the average estimate in multiple rounds. To find the optimal frame size, we propose an efficient binary search-based algorithm. To address significant variance in category sizes, we propose an adaptive partitioning (AP) strategy to group categories of similar sizes together and execute the estimation protocol for each group separately. Compared with the existing protocols, our approach is much faster, meanwhile satisfying the predefined estimation accuracy. For example, with 20 categories, the proposed SEM+AP is about seven times faster than prior estimation schemes. Moreover, our approach is the only one whose normalized estimation time (i.e., time per category) decreases as the number of categories increases.}, number={1}, journal={IEEE-ACM TRANSACTIONS ON NETWORKING}, author={Liu, Xiulong and Li, Keqiu and Liu, Alex X. and Guo, Song and Shahzad, Muhammad and Wang, Ann L. and Wu, Jie}, year={2017}, month={Feb}, pages={264–277} } @article{ali_liu_wang_shahzad_2017, title={Recognizing Keystrokes Using WiFi Devices}, volume={35}, ISSN={["1558-0008"]}, DOI={10.1109/jsac.2017.2680998}, abstractNote={Keystroke privacy is critical for ensuring the security of computer systems and the privacy of human users as what is being typed could be passwords or privacy sensitive information. In this paper, we show for the first time that WiFi signals can also be exploited to recognize keystrokes. The intuition is that while typing a certain key, the hands and fingers of a user move in a unique formation and direction and thus generate a unique pattern in the time-series of channel state information (CSI) values, which we call CSI-waveform for that key. In this paper, we propose a WiFi signal-based keystroke recognition system called WiKey. WiKey consists of two commercial off-the-shelf WiFi devices, a sender (such as a router) and a receiver (such as a laptop). The sender continuously emits signals and the receiver continuously receives signals. When a human subject types on a keyboard, WiKey recognizes the typed keys based on how the CSI values at the WiFi signal receiver end. We implemented the WiKey system using a TP-Link TL-WR1043ND WiFi router and a Lenovo X200 laptop. WiKey achieves over 97.5% detection rate for detecting the keystroke and 96.4% recognition accuracy for classifying single keys. In real-world experiments, WiKey can recognize keystrokes in a continuously typed sentence with an accuracy of 93.5%. WiKey can also recognize complete words inside a sentence with over 85% accuracy.}, number={5}, journal={IEEE JOURNAL ON SELECTED AREAS IN COMMUNICATIONS}, author={Ali, Kamran and Liu, Alex X. and Wang, Wei and Shahzad, Muhammad}, year={2017}, month={May}, pages={1175–1190} } @inproceedings{yang_yin_li_shahzad_uhlig_cui_li_2017, title={Rectangular hash table: Bloom filter and bitmap assisted hash table with high speed}, DOI={10.1109/bigdata.2017.8257999}, abstractNote={Hash table, a widely used data structure, can achieve an O(1) average lookup speed at the cost of large memory usage. Unfortunately, hash tables suffer from collisions and the rate of collisions is largely determined by the load factor. Broadly speaking, existing research has taken two approaches to improve the performance of hash tables. The first approach trades-off collision rate with memory usage, but only works well under low load. The second approach pursues high load and no hash collisions, but comes with update failures. The goal of this paper is to design a practical and efficient hash table that achieves high load factor, low hash collision rate, fast lookup speed, fast update speed, and zero update failures. To achieve this goal, we take a three-step approach. First, we propose a set of hashing techniques that leverage Bloom filters to significantly reduce hash collision rates. Second, we introduce a novel kick mechanism to achieve a high load factor. Last, we develop bitmaps to significantly accelerate the kick mechanism. Theoretical analysis and experimental results show that our hashing schemes significantly outperform the state-of-the-art Our hash table achieves a high load factor (greater than 95%), a low collision rate (less than 0.56%), and the number of hash buckets almost equals to the number of key-value pairs. Given n key-value pairs, the collision rate is reduced to 0 by either using 1.18 ×n buckets or allowing up to 5 blind kicks. We have released the source code of the implementations of our hash table and of 6 prior hash tables at Github [1].}, booktitle={2017 IEEE International Conference on Big Data (Big Data)}, author={Yang, T. and Yin, B. C. and Li, H. and Shahzad, M. and Uhlig, S. and Cui, B. and Li, X. M.}, year={2017}, pages={837–846} } @article{yang_liu_yan_shahzad_shen_li_cui_xie_2017, title={SF-sketch: A Fast, Accurate, and Memory Efficient Data Structure to Store Frequencies of Data Items}, ISSN={["1084-4627"]}, DOI={10.1109/icde.2017.50}, abstractNote={A sketch is a probabilistic data structure that is used to record frequencies of items in a multi-set. Sketches have been applied in a variety of fields, such as data stream processing, natural language processing, distributed data sets etc. In this paper, we propose a new sketch, called Slim-Fat (SF) sketch, which has a much smaller memory footprint for query while supporting updates. The key idea behind our proposed SF-sketch is to maintain two separate sketches: a small sketch called Slimsubsketch and a large sketch called Fat-subsketch. The Slimsubsketch enables fast and accurate querying. The Fat-subsketch is used to assist the insertion and deletion from Slim-subsketch. We implemented and evaluated SF-sketch along with several prior sketches and compared them side by side. Our experimental results show that SF-sketch significantly outperforms the most commonly used CM-sketch in terms of accuracy. The full version is provided at arXiv.org [12].}, journal={2017 IEEE 33RD INTERNATIONAL CONFERENCE ON DATA ENGINEERING (ICDE 2017)}, author={Yang, Tong and Liu, Lingtong and Yan, Yibo and Shahzad, Muhammad and Shen, Yulong and Li, Xiaoming and Cui, Bin and Xie, Gaogang}, year={2017}, pages={103–106} } @article{yang_liu_shahzad_zhong_fu_li_xie_li_2016, title={A shifting bloom filter framework for set queries}, volume={9}, DOI={10.14778/2876473.2876476}, abstractNote={Set queries are fundamental operations in computer systems and applications. This paper addresses the fundamental problem of designing a probabilistic data structure that can quickly process set queries using a small amount of memory. We propose a Shifting Bloom Filter (ShBF) framework for representing and querying sets. We demonstrate the effectiveness of ShBF using three types of popular set queries: membership, association, and multiplicity queries. The key novelty of ShBF is on encoding the auxiliary information of a set element in a location offset. In contrast, prior BF based set data structures allocate additional memory to store auxiliary information. We conducted experiments using real-world network traces, and results show that ShBF significantly advances the state-of-the-art on all three types of set queries.}, number={5}, journal={Proceedings of the VLDB Endowment}, author={Yang, T. and Liu, A. X. and Shahzad, M. and Zhong, Y. K. and Fu, Q. B. and Li, Z. and Xie, G. G. and Li, X. M.}, year={2016}, pages={408–419} } @inproceedings{wang_shahzad_liu_2016, title={A stochastic frame based approach to RFID tag searching}, DOI={10.1109/ifipnetworking.2016.7497201}, abstractNote={This paper addresses the fundamental problem of RFID tag searching: given a set of known tag IDs and a population of RFID tags with unknown IDs, where the tags may be passive or active, we want to know which tag IDs are in the tag population. RFID tag searching has many applications such as product recall, inventory balancing, and stock verification. Previous RFID tag searching protocols cannot achieve arbitrarily high accuracy and are not C1G2 compliant. In this paper, we propose a protocol called RTSP, which satisfies the four requirements of C1G2 compliance, arbitrary accuracy, privacy preserving, and multiple-reader capability. RTSP is easy to deploy because it is implemented on readers as a software module and does not require any implementation on tags. Furthermore, it does not require any modifications either to tags or to the communication protocol between tags and readers and works with the commercially available off-the-shelf RFID tags. We implemented RTSP along with the fastest tag identification protocol and compared them side-by-side. Our experimental results show that RTSP always achieves the required accuracy and is 22.73% faster than the fastest RFID identification protocol.}, booktitle={2016 IFIP Networking Conference (IFIP Networking) and Workshops}, author={Wang, A. L. and Shahzad, M. and Liu, A. X.}, year={2016}, pages={153–161} } @article{shahzad_liu_2016, title={Accurate and Efficient Per-Flow Latency Measurement Without Probing and Time Stamping}, volume={24}, ISSN={["1558-2566"]}, DOI={10.1109/tnet.2016.2533544}, abstractNote={With the growth in number and significance of the emerging applications that require extremely low latencies, network operators are facing increasing need to perform latency measurement on per-flow basis for network monitoring and troubleshooting. In this paper, we propose COLATE, the first per-flow latency measurement scheme that requires no probe packets and time stamping. Given a set of observation points, COLATE records packet timing information at each point so that later, for any two points, it can accurately estimate the average and the standard deviation of the latencies experienced by the packets of any flow in passing the two points. The key idea is that when recording packet timing information, COLATE purposely allows noise to be introduced for minimizing storage space, and when querying the latency of a target flow, COLATE uses statistical techniques to denoise and obtain an accurate latency estimate. COLATE is designed to be efficiently implementable on network middleboxes. In terms of processing overhead, COLATE performs only one hash and one memory update per packet. In terms of storage space, COLATE uses less than 0.1-b/packet, which means that, on a backbone link with half a million packets per second, using a 256-GB drive, COLATE can accumulate time stamps of packets traversing the link for over 1.5 years. We evaluated COLATE using three real traffic traces, namely, a backbone traffic trace, an enterprise network traffic trace, and a data center traffic trace. Results show that COLATE always achieves the required reliability for any given confidence interval.}, number={6}, journal={IEEE-ACM TRANSACTIONS ON NETWORKING}, author={Shahzad, Muhammad and Liu, Alex X.}, year={2016}, month={Dec}, pages={3477–3492} } @article{shahzad_liu_2016, title={Fast and Reliable Detection and Identification of Missing RFID Tags in the Wild}, volume={24}, ISSN={["1558-2566"]}, DOI={10.1109/tnet.2016.2559539}, abstractNote={Radio-frequency identification (RFID) systems have been deployed to detect and identify missing products by affixing them with cheap passive RFID tags and monitoring them with RFID readers. Existing missing tag detection and identification protocols require the tag population to contain only those tags whose IDs are already known to the reader. However, in reality, tag populations often contain tags with unknown IDs, called unexpected tags. These unexpected tags cause unexpected false positives, i.e., due to them, missing tags are detected as present. We take the first step toward addressing the problem of detecting and identifying missing tags from a population that contains unexpected tags. Our protocol, RUN, uses standardized frame slotted Aloha for communication between tags and readers. It executes multiple frames with different seeds to reduce the effects of unexpected false positives. At the same time, it minimizes the missing tag detection and identification time by first estimating the number of unexpected tags in the population and then using it along with the false-positive probability to obtain optimal frame sizes and minimum number of times Aloha frames should be executed to mitigate the effects of false positives. RUN works with multiple readers with overlapping regions. It is easy to deploy, because it is implemented on readers as a software module and does not require any modifications to tags or to the communication protocol between the tags and the readers. We implemented RUN along with four major missing tag detection and identification protocols, namely, TRP, IIP, MTI, and SFMTI, and the fastest tag ID collection protocol TH and compared them side by side. Our performance evaluation results show that RUN is the only protocol that achieves required reliability in the presence of unexpected tags, whereas the best existing protocol achieves a maximum reliability of only 67%. RUN identifies 100% of missing tags in the presence of unexpected tags, whereas the best existing protocol identifies a maximum of only 60% of missing tags.}, number={6}, journal={IEEE-ACM TRANSACTIONS ON NETWORKING}, author={Shahzad, Muhammad and Liu, Alex X.}, year={2016}, month={Dec}, pages={3770–3784} } @article{dai_shahzad_liu_zhong_2016, title={Finding persistent items in data streams}, volume={10}, DOI={10.14778/3025111.3025112}, abstractNote={ Frequent item mining, which deals with finding items that occur frequently in a given data stream over a period of time, is one of the heavily studied problems in data stream mining. A generalized version of frequent item mining is the persistent item mining, where a persistent item, unlike a frequent item, does not necessarily occur more frequently compared to other items over a short period of time, rather persists and occurs more frequently over a long period of time. To the best of our knowledge, there is no prior work on mining persistent items in a data stream. In this paper, we address the fundamental problem of finding persistent items in a given data stream during a given period of time at any given observation point. We propose a novel scheme, PIE, that can accurately identify each persistent item with a probability greater than any desired false negative rate (FNR) while using a very small amount of memory. The key idea of PIE is that it uses Raptor codes to encode the ID of each item that appears at the observation point during a measurement period and stores only a few bits of the encoded ID in the memory of that observation point during that measurement period. The item that is persistent occurs in enough measurement periods that enough encoded bits for the ID can be retrieved from the observation point to decode them correctly and get the ID of the persistent item. We implemented and extensively evaluated PIE using three real network traffic traces and compared its performance with two prior adapted schemes. Our results show that not only PIE achieves the desired FNR in every scenario, its FNR, on average, is 19.5 times smaller than the FNR of the best adapted prior art. }, number={4}, journal={Proceedings of the VLDB Endowment}, author={Dai, H. P. and Shahzad, M. and Liu, A. X. and Zhong, Y. K.}, year={2016}, pages={289–300} } @article{shahzad_liu_2015, title={Fairness Matters: Identification of Active RFID Tags with Statistically Guaranteed Fairness}, ISSN={["1092-1648"]}, DOI={10.1109/icnp.2015.23}, abstractNote={RFID systems with battery powered active tags are widely used in various applications such as supply chain management and object tracking. In RFID identification, tags transmit their IDs to readers over a shared wireless medium, thus, transmissions from tags often collide causing some tags to use their scarce energy resources to retransmit their IDs. Existing RFID identification protocols are unfair in the sense that some tags transmit more times compared to others and thus deplete their batteries faster. Locating tags with depleted batteries for replacement is troublesome. This paper addresses the fundamental problem of ensuring required fairness in the number of transmissions per tag while minimizing identification time in active RFID tag identification. We propose the first Fair RFID Identification Protocol (FRIP) that can achieve any required amount of fairness. The key idea behind FRIP is to bound the expected number of tags that transmit more than once by finding optimal frame sizes for the standardized frame slotted Aloha. We implemented and performed side-by-side comparisons of FRIP with all nine major existing RFID identification protocols. Our results show that FRIP can achieve arbitrarily high fairness. FRIP reduces the average number of transmissions per tag by at least 2.62 times compared to the best existing protocol. At the same time, it is faster than the existing protocols. FRIP is easy to deploy because it is compliant with the C1G2 standard, and thus, requires no modifications to tags or to the communication protocol between tags and readers. It only needs to be implemented on readers as a software module. FRIP works with multiple readers.}, journal={2015 IEEE 23RD INTERNATIONAL CONFERENCE ON NETWORK PROTOCOLS (ICNP)}, author={Shahzad, Muhammad and Liu, Alex X.}, year={2015}, pages={279–290} } @article{shahzad_shahzad_farooq_2013, title={In-execution dynamic malware analysis and detection by mining information in process control blocks of Linux OS}, volume={231}, ISSN={0020-0255}, url={http://dx.doi.org/10.1016/j.ins.2011.09.016}, DOI={10.1016/j.ins.2011.09.016}, abstractNote={Run-time behavior of processes – running on an end-host – is being actively used to dynamically detect malware. Most of these detection schemes build model of run-time behavior of a process on the basis of its data flow and/or sequence of system calls. These novel techniques have shown promising results but an efficient and effective technique must meet the following performance metrics: (1) high detection accuracy, (2) low false alarm rate, (3) small detection time, and (4) the technique should be resilient to run-time evasion attempts. To meet these challenges, a novel concept of genetic footprint is proposed, by mining the information in the kernel process control blocks (PCB) of a process, that can be used to detect malicious processes at run time. The genetic footprint consists of selected parameters – maintained inside the PCB of a kernel for each running process – that define the semantics and behavior of an executing process. A systematic forensic study of the execution traces of benign and malware processes is performed to identify discriminatory parameters of a PCB (task_struct is PCB in case of Linux OS). As a result, 16 out of 118 task structure parameters are short listed using the time series analysis. A statistical analysis is done to corroborate the features of the genetic footprint and to select suitable machine learning classifiers to detect malware. The scheme has been evaluated on a dataset that consists of 105 benign processes and 114 recently collected malware processes for Linux. The results of experiments show that the presented scheme achieves a detection accuracy of 96% with 0% false alarm rate in less than 100 ms of the start of a malicious activity. Last but not least, the presented technique utilizes partial knowledge that is available at a given time while the process is still executing; as a result, the kernel of OS can devise mitigation strategies. It is also shown that the presented technique is robust to well known run-time evasion attempts.}, journal={Information Sciences}, publisher={Elsevier BV}, author={Shahzad, Farrukh and Shahzad, M. and Farooq, Muddassar}, year={2013}, month={May}, pages={45–63} } @inbook{zahid_shahzad_khayam_farooq_2009, title={Keystroke-Based User Identification on Smart Phones}, ISBN={9783642043413 9783642043420}, ISSN={0302-9743 1611-3349}, url={http://dx.doi.org/10.1007/978-3-642-04342-0_12}, DOI={10.1007/978-3-642-04342-0_12}, abstractNote={Smart phones are now being used to store users’ identities and sensitive information/data. Therefore, it is important to authenticate legitimate users of a smart phone and to block imposters. In this paper, we demonstrate that keystroke dynamics of a smart phone user can be translated into a viable features’ set for accurate user identification. To this end, we collect and analyze keystroke data of 25 diverse smart phone users. Based on this analysis, we select six distinguishing keystroke features that can be used for user identification. We show that these keystroke features for different users are diffused and therefore a fuzzy classifier is well-suited to cluster and classify them. We then optimize the front-end fuzzy classifier using Particle Swarm Optimization (PSO) and Genetic Algorithm (GA) as back-end dynamic optimizers to adapt to variations in usage patterns. Finally, we provide a novel keystroke dynamics based PIN (Personal Identification Number) verification mode to ensure information security on smart phones. The results of our experiments show that the proposed user identification system has an average error rate of 2% after the detection mode and the error rate of rejecting legitimate users drops to zero in the PIN verification mode. We also compare error rates (in terms of detecting both legitimate users and imposters) of our proposed classifier with 5 existing state-of-the-art techniques for user identification on desktop computers. Our results show that the proposed technique consistently and considerably outperforms existing schemes.}, booktitle={Lecture Notes in Computer Science}, publisher={Springer Berlin Heidelberg}, author={Zahid, Saira and Shahzad, Muhammad and Khayam, Syed Ali and Farooq, Muddassar}, year={2009}, pages={224–243} } @inbook{shahzad_zahid_farooq_2008, title={A Scalable Formal Framework for Analyzing the Behavior of Nature-Inspired Routing Protocols}, ISBN={9783540876991 9783540877004}, ISSN={0302-9743 1611-3349}, url={http://dx.doi.org/10.1007/978-3-540-87700-4_112}, DOI={10.1007/978-3-540-87700-4_112}, abstractNote={Nature-inspired routing algorithms for fixed networks is an active area of research. In these algorithms, ant- or bee-agents are deployed for collecting the state of a network and providing them to autonomous and fully distributed controllers at each network node. In these routing systems the agents, through local interactions, self-organize to produce system-level behaviors which show adaptivity to changes and perturbations in the network environment. The formal modeling of such fully self-organizing, distributed and adaptive routing systems is a difficult task. In this paper, we propose a scalable formal framework that has following desirable features: (1) it models important performance metrics: throughput, delay and goodness of links, (2) it is scalable to any size of topology, (3) it is robust to changing network traffic conditions. The proposed framework is utilized to model a well-known BeeHive protocol which is further validated on NTTNeT (a 57 node topology). To the best of our knowledge, this is the first formal framework that has been validated on such a large topology.}, booktitle={Parallel Problem Solving from Nature – PPSN X}, publisher={Springer Berlin Heidelberg}, author={Shahzad, Muhammad and Zahid, Saira and Farooq, Muddassar}, year={2008}, pages={1130–1139} }