@article{iqbal_singh_shahzad_2022, title={Characterizing the Availability and Latency in AWS Network From the Perspective of Tenants}, ISSN={["1558-2566"]}, DOI={10.1109/TNET.2022.3148701}, abstractNote={Scalability and performance requirements are driving tenants to increasingly move their applications to public clouds. Unfortunately, cloud providers do not provide a view of their networking infrastructure to the tenants, rather only provide some generic service level agreements (SLAs). Tenants are, therefore, forced to plan the deployments of their applications based on these SLAs. This limits the performance that the tenants can achieve. Keeping this in view, we present a detailed network measurement study of the largest public cloud, Amazon Web Services (AWS). We collected network data to characterize the availability and latency of AWS over a period of 100 days and studied various temporal trends across several geographical locations of AWS throughout the world. We performed our study at all three levels of cloud hierarchy: inside availability zones (AZs), across AZs, and across regions. Our results show that network behavior varies significantly over time at different geographical locations, levels of hierarchy, and temporal granularities. For example, while we observed high availability at monthly granularity, it deteriorates at daily and hourly granularities. This and many other such observations that we present have significant implications for cloud tenants. We further implemented our measurement approach on Google Cloud Platform (GCP) to demonstrate that it can be deployed on any cloud platform and present some preliminary comparative observations from this implementation. Based on our observations, we present several recommendations that tenants can use to better deploy their applications.}, journal={IEEE-ACM TRANSACTIONS ON NETWORKING}, author={Iqbal, Hassan and Singh, Anand and Shahzad, Muhammad}, year={2022}, month={Feb} } @article{ganji_singh_shahzad_2020, title={Characterizing the Impact of TCP Coexistence in Data Center Networks}, ISSN={["1063-6927"]}, DOI={10.1109/ICDCS47774.2020.00035}, abstractNote={The switch fabrics of today’s data centers carry traffic controlled by a variety of TCP congestion control algorithms. This leads us to ask: how does the coexistence of multiple variants of TCP on shared switch fabric impacts the performance achieved by different applications in data centers? To answer this question, we conducted an extensive set of experiments with coexisting TCP variants on Leaf-Spine and Fat-Tree switch fabrics. We executed common data center workloads, which include streaming, MapReduce, and storage workloads, using four commonly used TCP variants, namely BBR, DCTCP, CUBIC, and New Reno. We also extensively executed iPerf workloads using these 4 TCP variants to purely study the impact of the coexistence of TCP variants on each other’s performance without incorporating the network behavior of the application layer. Our experiments resulted in a large set of network traces comprised of 160 billion packets (we will release these traces after publication of this work). We present comprehensive observations from these traces that have important implications in ensuring optimal utilization of data center switch fabric and in meeting the network performance needs of application layer workloads.}, journal={2020 IEEE 40TH INTERNATIONAL CONFERENCE ON DISTRIBUTED COMPUTING SYSTEMS (ICDCS)}, author={Ganji, Anirudh and Singh, Anand and Shahzad, Muhammad}, year={2020}, pages={388–398} } @inproceedings{singh_viniotis_2016, title={An SLA-based resource allocation for IoT applications in cloud environments}, DOI={10.1109/ciot.2016.7872913}, abstractNote={In an IoT cloud, the message broker service allows point to multi-point communication between IoT devices and applications. The service is governed by Service Level Agreements (SLA) that specify, among other requirements, the volume of messages served during an enforcement period. For simplicity, current SLAs do not provide detailed information about message arrival patterns, making enforcement of the SLA a difficult problem for the providers. In this paper, we propose a new, two-step SLA by introducing sub-periods for measurement and control within the total enforcement period. Our proposed SLA retains the simplicity of the current SLA and provides additional controls for the providers to enforce it. We present the conformance of the new SLA as a resource allocation problem for cloud providers and propose a buffering, scheduling and rate limiting mechanism to enforce it. We verify that the solution achieves conformance, analyze the tradeoffs of the solution and evaluate via simulation the effects of system parameters such as capacity, number of sub-periods and enforcement period.}, booktitle={2016 Cloudification of the Internet of Things (CIOT)}, author={Singh, A. and Viniotis, Y.}, year={2016} } @inproceedings{singh_viniotis_2016, title={Resource allocation for IoT applications in cloud environments}, DOI={10.1109/iccnc.2017.7876218}, abstractNote={In an IoT cloud, the message broker service offered to customers allows customer's devices and applications to connect and interact with cloud applications and other devices. The service is governed by Service Level Agreements (SLA) that specify, among other requirements, the volume of messages served during an enforcement period. For simplicity, SLAs do not take into account detailed information about message arrival patterns or service times, making enforcement of the SLA a challenging problem for the providers. In this paper, we propose a resource allocation mechanism (that uses buffering, scheduling and rate limiting) to meet the SLA. We verify that the solution achieves conformance, analyze the tradeoffs of the solution and evaluate via simulation the effects of system parameters such as capacity and enforcement period.}, booktitle={2017 international conference on computing, networking and communications (icnc)}, author={Singh, A. and Viniotis, Y.}, year={2016}, pages={719–723} }