@article{xu_mueller_2018, title={Work-In-Progress: Making Machine Learning Real-Time Predictable}, ISSN={["1052-8725"]}, DOI={10.1109/RTSS.2018.00029}, abstractNote={Machine learning (ML) on edge computing devices is becoming popular in the industry as a means to make control systems more intelligent and autonomous. The new trend is to utilize embedded edge devices, as they boast higher computational power and larger memories than before, to perform ML tasks that had previously been limited to cloud-hosted deployments. In this work, we assess the real-time predictability and consider data privacy concerns by comparing traditional cloud services with edge-based ones for certain data analytics tasks. We identify the subset of ML problems appropriate for edge devices by investigating if they result in real-time predictable services for a set of widely used ML libraries. We specifically enhance the Caffe library to make it more suitable for real-time predictability. We then deploy ML models with high accuracy scores on an embedded system, exposing it to industry sensor data from the field, to demonstrates its efficacy and suitability for real-time processing.}, journal={2018 39TH IEEE REAL-TIME SYSTEMS SYMPOSIUM (RTSS 2018)}, author={Xu, Hang and Mueller, Frank}, year={2018}, pages={157–160} } @inproceedings{qian_xu_zhang_chakrabortty_mueller_xin_2016, title={A resilient software infrastructure for wide-area measurement systems}, DOI={10.1109/pesgm.2016.7741949}, abstractNote={To support the scalability and resilience requirements of distributed Wide-Area Measurement System (WAMS) architectures, we design and implement a software infrastructure to estimate power grid oscillation modes based on real-time data collected from Phasor Measurement Units (PMUs). This estimation algorithm can be deployed on a hierarchical structure of Phasor Data Concentrators (PDCs), which calculate local estimates and communicate with each other to calculate the global estimate. This work contributes a resilient system to WAMS with guarantees for (1) Quality of Service in network delay, (2) network failure tolerance, and (3) self-recoverability. The core component of the infrastructure is a distributed storage system. Externally, the storage system provides a cloud data lookup service with bounded response times and resilience, which decouples the data communication between PMUs, PDCs, and power-grid monitor/control applications. Internally, the storage system organizes PDCs as storage nodes and employs a real-time task scheduler to order data lookup requests so that urgent requests can be served earlier. To demonstrate the resilience of our distributed system, we deploy the system on a (1) virtual platform and (2) bare-metal machines, where we run a distributed algorithm on the basis of the Prony algorithm and the Alternating Directions Method of Multipliers (ADMM) to estimate the electro-mechanical oscillation modes. We inject different failures into the system to study their impact on the estimation algorithm. Our experiments show that temporary failures of a PDC or a network link do not affect the estimation result since the historical PMU data are cached in the storage system and PDCs can obtain the data on demand.}, booktitle={2016 ieee power and energy society general meeting (pesgm)}, author={Qian, T. and Xu, H. and Zhang, J. H. and Chakrabortty, Aranya and Mueller, F. and Xin, Y. F.}, year={2016} }