@article{zhao_yang_vatsavai_2023, title={Cloud Imputation for Multi-sensor Remote Sensing Imagery with Style Transfer}, volume={14175}, ISBN={["978-3-031-43429-7"]}, ISSN={["1611-3349"]}, DOI={10.1007/978-3-031-43430-3_3}, abstractNote={Widely used optical remote sensing images are often contaminated by clouds. The missing or cloud-contaminated data leads to incorrect predictions by the downstream machine learning tasks. However, the availability of multi-sensor remote sensing imagery has great potential for improving imputation under clouds. Existing cloud imputation methods could generally preserve the spatial structure in the imputed regions, however, the spectral distribution does not match the target image due to differences in sensor characteristics and temporal differences. In this paper, we present a novel deep learning-based multi-sensor imputation technique inspired by the computer vision-based style transfer. The proposed deep learning framework consists of two modules: (i) cluster-based attentional instance normalization (CAIN), and (ii) adaptive instance normalization (AdaIN). The combined module, CAINA, exploits the style information from cloud-free regions. These regions (land cover) were obtained through clustering to reduce the style differences between the target and predicted image patches. We have conducted extensive experiments and made comparisons against the state-of-the-art methods using a benchmark dataset with images from Landsat-8 and Sentinel-2 satellites. Our experiments show that the proposed CAINA is at least 24.49% better on MSE and 18.38% better on cloud MSE as compared to state-of-the-art methods.}, journal={MACHINE LEARNING AND KNOWLEDGE DISCOVERY IN DATABASES: APPLIED DATA SCIENCE AND DEMO TRACK, ECML PKDD 2023, PT VII}, author={Zhao, Yifan and Yang, Xian and Vatsavai, Ranga Raju}, year={2023}, pages={37–53} } @article{yang_zhao_vatsavai_2023, title={Harmonization-guided deep residual network for imputing under clouds with multi-sensor satellite imagery}, url={https://doi.org/10.1145/3609956.3609967}, DOI={10.1145/3609956.3609967}, abstractNote={Multi-sensor spatiotemporal satellite images have become crucial for monitoring the geophysical characteristics of the Earth’s environment. However, clouds often obstruct the view from the optical sensors mounted on satellites and therefore degrade the quality of spectral, spatial, and temporal information. Though cloud imputation with the rise of deep learning research has provided novel ways to reconstruct the cloud-contaminated regions, many learning-based methods still lack the capability of harmonizing the differences between similar spectral bands across multiple sensors. To cope with the inter-sensor inconsistency of overlapping bands in different optical sensors, we propose a novel harmonization-guided residual network to impute the areas under clouds. We present a knowledge-guided harmonization model that maps the reflectance response from one satellite collection to another based on the spectral distribution of the cloud-free pixels. The harmonized cloud-free image is subsequently exploited in the intermediate layers as an additional input, paired with a custom loss function that considers image reconstruction quality and inter-sensor consistency jointly during training. To demonstrate the performance of our model, we conducted extensive experiments on a multi-sensor remote sensing imagery benchmark dataset consisting of widely used Landsat-8 and Sentinel-2 images. Compared to the state-of-the-art methods, results show at least a 22.35% improvement in MSE.}, journal={PROCEEDINGS OF 2023 18TH INTERNATIONAL SYMPOSIUM ON SPATIAL AND TEMPORAL DATA, SSTD 2023}, author={Yang, Xian and Zhao, Yifan and Vatsavai, Ranga Raju}, year={2023}, pages={151–160} } @article{yang_zhao_vatsavai_2022, title={Deep Residual Network with Multi-Image Attention for Imputing Under Clouds in Satellite Imagery}, ISSN={["1051-4651"]}, DOI={10.1109/ICPR56361.2022.9956166}, abstractNote={Earth observations from remote sensing imagery play an important role in many environmental applications ranging from natural resource (e.g., crops, forests) monitoring to man-made object (e.g., builds, factories) recognition. Most widely used optical remote sensing data however is often contaminated by clouds making it hard to identify the objects underneath. Fortunately, with the recent advances and increased operational satellites, the spatial and temporal density of image collections have significantly increased. In this paper, we present a novel deep learning-based imputation technique for inferring spectral values under the clouds using nearby cloud-free satellite image observations. The proposed deep learning architecture, extended contextual attention (ECA), exploits similar properties from the cloud-free areas to tackle clouds of different sizes occurring at arbitrary locations in the image. A contextual attention mechanism is incorporated to utilize the useful cloud-free information from multiple images. To maximize the imputation performance of the model on the cloudy patches instead of the entire image, a two-phase custom loss function is deployed to guide the model. To study the performance of our model, we trained our model on a benchmark Sentinel-2 dataset by superimposing real-world cloud patterns. Extensive experiments and comparisons against the state-of-the-art methods using pixel-wise and structural metrics show the improved performance of our model. Our experiments demonstrated that the ECA method is consistently better than all other methods, it is 28.4% better on MSE and 31.7% better on cloudy MSE as compared to the state-of-the-art EDSR network.}, journal={2022 26TH INTERNATIONAL CONFERENCE ON PATTERN RECOGNITION (ICPR)}, author={Yang, Xian and Zhao, Yifan and Vatsavai, Ranga Raju}, year={2022}, pages={643–649} } @article{zhao_yang_vatsavai_2022, title={Multi-stream Deep Residual Network for Cloud Imputation Using Multi-resolution Remote Sensing Imagery}, DOI={10.1109/ICMLA55696.2022.00021}, abstractNote={For more than five decades, remote sensing imagery has been providing critical information for many applications such as crop monitoring, disaster assessment, and urban planning. Unfortunately, more than 50% of optical remote sensing images are contaminated by clouds severely affecting the object identification. However, thanks to recent advances in remote sensing instruments and increase in number of operational satellites, we now have petabytes of multi-sensor observations covering the globe. Historically cloud imputation techniques were designed for single sensor images, thus existing benchmarks were mostly limited to single sensor images, which precludes design and validation of cloud imputation techniques on multi-sensor data. In this paper, we introduce a new benchmark data set consisting of images from two widely used and publicly available satellite images, Landsat-8 and Sentinel-2, and a new multi-stream deep residual network (MDRN). This newly introduced benchmark dataset fills an important gap in the existing benchmark datasets, which allows exploitation of multi-resolution spectral information from the cloud-free regions of temporally nearby images, and the MDRN algorithm addresses imputation using the multi-resolution data. Both quantitative and qualitative experiments show that the utility of our benchmark dataset and as well as efficacy of our MDRN architecture in cloud imputation. The MDRN outperforms the closest competing method by 14.1%.}, journal={2022 21ST IEEE INTERNATIONAL CONFERENCE ON MACHINE LEARNING AND APPLICATIONS, ICMLA}, author={Zhao, Yifan and Yang, Xian and Vatsavai, Ranga Raju}, year={2022}, pages={97–104} } @article{wu_yang_zhu_mitra_2022, title={Toward Cleansing Backdoored Neural Networks in Federated Learning}, ISSN={["1063-6927"]}, DOI={10.1109/ICDCS54860.2022.00084}, abstractNote={Malicious clients can attack federated learning systems using compromised data during the training phase, including backdoor samples. The compromised global model will perform well on the validation dataset designed for the task, but a small subset of data with backdoor patterns may trigger the model to make a wrong prediction. In this work, we propose a new and effective method to mitigate backdoor attacks in federated learning after the training phase. Through federated pruning method, we remove redundant neurons and "backdoor neurons", which trigger misbehavior upon recognizing backdoor patterns while keeping silent when the input data is clean. The second optional fine-tuning process is designed to recover the pruning damage to the test accuracy on benign datasets. In the last step, we eliminate backdoor attacks by limiting the extreme values of inputs and neural network neurons’ weights. Experiments using our defenses mechanism against the state-of-the-art Distributed Backdoor Attacks on CIFAR-10 show promising results; the averaged attack success rate drops more than 70% with less than 2% loss of test accuracy on the validation dataset. Our defense method has also outperformed the state-of-the-art pruning defense against backdoor attacks in the federated learning scenario.}, journal={2022 IEEE 42ND INTERNATIONAL CONFERENCE ON DISTRIBUTED COMPUTING SYSTEMS (ICDCS 2022)}, author={Wu, Chen and Yang, Xian and Zhu, Sencun and Mitra, Prasenjit}, year={2022}, pages={820–830} } @article{zhao_yang_vatsavai_2021, title={A Scalable System for Searching Large-scale Multi-sensor Remote Sensing Image Collections}, ISSN={["2639-1589"]}, DOI={10.1109/BigData52589.2021.9671679}, abstractNote={Huge amounts of remote sensing data collected from hundreds of operational satellites in conjunction with on-demand UAV based imaging products are offering unprecedented capabilities towards monitoring dynamic earth resources. However, searching for the right combination of imagery products that satisfy an application requirement is a daunting task. Earlier efforts at streamlining remote sensing data discovery include NASA’s Earth Observing System (EOS) Data and Information System (EOSDIS), USGS Global Visualization Viewer (GloVis), and several other research systems like Minnesota MapServer. These systems were built on top of metadata harvesting, indexing, keyword searching modules which were not scalable and interoperable. To address these challenges, recently the SpatioTemporal Asset Catalog (STAC) specification was developed to provide a common language to describe a range of geospatial information, so that data products can be more easily indexed and discovered. In this paper we present an highly scalable STAC API based system with spatiotemporal indexing support. Experimental evaluation shows that our spatiotemporal indexing based queries are 1000x faster than standard STAC API server.}, journal={2021 IEEE INTERNATIONAL CONFERENCE ON BIG DATA (BIG DATA)}, author={Zhao, Yifan and Yang, Xian and Vatsavai, Ranga Raju}, year={2021}, pages={3780–3783} } @article{zhao_yang_bolnykh_harenberg_korchiev_yerramsetty_vellanki_kodumagulla_samatova_2021, title={Predictive models with end user preference}, volume={8}, ISSN={["1932-1872"]}, DOI={10.1002/sam.11545}, abstractNote={Abstract}, journal={STATISTICAL ANALYSIS AND DATA MINING}, author={Zhao, Yifan and Yang, Xian and Bolnykh, Carolina and Harenberg, Steve and Korchiev, Nodirbek and Yerramsetty, Saavan Raj and Vellanki, Bhanu Prasad and Kodumagulla, Ramakanth and Samatova, Nagiza F.}, year={2021}, month={Aug} }