@article{zhao_yang_vatsavai_2023, title={Cloud Imputation for Multi-sensor Remote Sensing Imagery with Style Transfer}, volume={14175}, ISBN={["978-3-031-43429-7"]}, ISSN={["1611-3349"]}, DOI={10.1007/978-3-031-43430-3_3}, abstractNote={Widely used optical remote sensing images are often contaminated by clouds. The missing or cloud-contaminated data leads to incorrect predictions by the downstream machine learning tasks. However, the availability of multi-sensor remote sensing imagery has great potential for improving imputation under clouds. Existing cloud imputation methods could generally preserve the spatial structure in the imputed regions, however, the spectral distribution does not match the target image due to differences in sensor characteristics and temporal differences. In this paper, we present a novel deep learning-based multi-sensor imputation technique inspired by the computer vision-based style transfer. The proposed deep learning framework consists of two modules: (i) cluster-based attentional instance normalization (CAIN), and (ii) adaptive instance normalization (AdaIN). The combined module, CAINA, exploits the style information from cloud-free regions. These regions (land cover) were obtained through clustering to reduce the style differences between the target and predicted image patches. We have conducted extensive experiments and made comparisons against the state-of-the-art methods using a benchmark dataset with images from Landsat-8 and Sentinel-2 satellites. Our experiments show that the proposed CAINA is at least 24.49% better on MSE and 18.38% better on cloud MSE as compared to state-of-the-art methods.}, journal={MACHINE LEARNING AND KNOWLEDGE DISCOVERY IN DATABASES: APPLIED DATA SCIENCE AND DEMO TRACK, ECML PKDD 2023, PT VII}, author={Zhao, Yifan and Yang, Xian and Vatsavai, Ranga Raju}, year={2023}, pages={37–53} } @article{liu_mei_peng_vatsavai_2023, title={Context Retrieval via Normalized Contextual Latent Interaction for Conversational Agent}, ISSN={["2375-9232"]}, DOI={10.1109/ICDMW60847.2023.00196}, abstractNote={Conversational agents leveraging AI, particularly deep learning, are emerging in both academic research and real-world applications. However, these applications still face challenges, including disrespecting knowledge and facts, not personalizing to user preferences, and enormous demand for computational resources during training and inference. Recent research efforts have been focused on addressing these challenges from various aspects, including supplementing various types of auxiliary information to the conversational agents. However, existing methods are still not able to effectively and efficiently exploit relevant information from these auxiliary supplements to further unleash the power of the conversational agents and the language models they use. In this paper, we present a novel method, PK-NCLI, that is able to accurately and efficiently identify relevant auxiliary information to improve the quality of conversational responses by learning the relevance among persona, chat history, and knowledge background through lowlevel normalized contextual latent interaction. Our experimental results indicate that PK-NCLI outperforms the state-of-theart method, PK-FoCus, by 47.80%/30.61%/24.14% in terms of perplexity, knowledge grounding, and training efficiency, respectively, and maintained the same level of persona grounding performance. We also provide a detailed analysis of how different factors, including language model choices and trade-offs on training weights, would affect the performance of PK-NCLI.}, journal={2023 23RD IEEE INTERNATIONAL CONFERENCE ON DATA MINING WORKSHOPS, ICDMW 2023}, author={Liu, Junfeng and Mei, Zhuocheng and Peng, Kewen and Vatsavai, Ranga Raju}, year={2023}, pages={1543–1550} } @article{yang_zhao_vatsavai_2023, title={Harmonization-guided deep residual network for imputing under clouds with multi-sensor satellite imagery}, url={https://doi.org/10.1145/3609956.3609967}, DOI={10.1145/3609956.3609967}, abstractNote={Multi-sensor spatiotemporal satellite images have become crucial for monitoring the geophysical characteristics of the Earth’s environment. However, clouds often obstruct the view from the optical sensors mounted on satellites and therefore degrade the quality of spectral, spatial, and temporal information. Though cloud imputation with the rise of deep learning research has provided novel ways to reconstruct the cloud-contaminated regions, many learning-based methods still lack the capability of harmonizing the differences between similar spectral bands across multiple sensors. To cope with the inter-sensor inconsistency of overlapping bands in different optical sensors, we propose a novel harmonization-guided residual network to impute the areas under clouds. We present a knowledge-guided harmonization model that maps the reflectance response from one satellite collection to another based on the spectral distribution of the cloud-free pixels. The harmonized cloud-free image is subsequently exploited in the intermediate layers as an additional input, paired with a custom loss function that considers image reconstruction quality and inter-sensor consistency jointly during training. To demonstrate the performance of our model, we conducted extensive experiments on a multi-sensor remote sensing imagery benchmark dataset consisting of widely used Landsat-8 and Sentinel-2 images. Compared to the state-of-the-art methods, results show at least a 22.35% improvement in MSE.}, journal={PROCEEDINGS OF 2023 18TH INTERNATIONAL SYMPOSIUM ON SPATIAL AND TEMPORAL DATA, SSTD 2023}, author={Yang, Xian and Zhao, Yifan and Vatsavai, Ranga Raju}, year={2023}, pages={151–160} } @article{samudrala_zhao_vatsavai_2023, title={NOVEL DEEP LEARNING FRAMEWORK FOR IMPUTING HOLES IN ORTHORECTIFIED VHR IMAGES}, ISSN={["2153-6996"]}, DOI={10.1109/IGARSS52108.2023.10281804}, abstractNote={Many downstream applications, such as agricultural monitoring, damage assessments, and urban planning, have benefited from remote sensing imagery. Ortho image generation from very high-resolution images has unpleasant side effects in places of building occlusions leaving holes in the orthorectified images. As a result, blank pixels caused by orthorectification must be filled in prior to downstream tasks such as machine learning. In this paper, we present a new deep learning architecture based on Wide Activation Super Resolution (WDSR) network combined with an Adaptive Instance Normalization (AdaIN) based style transfer for imputing holes (missing pixels) in orthorectified images. To test and validate the performance of imputation algorithms, we developed a new multi-resolution benchmark dataset consisting of WorldView-3 and Sentinel-2 images. Our experiments show that the WDSR framework outperforms other state-of-the-art (SOTA) deep learning methods and the Ordinary Kriging method. Our proposed method has improved the mean squared error (MSE) by at least 12.54% in comparison to existing SOTA methods.}, journal={IGARSS 2023 - 2023 IEEE INTERNATIONAL GEOSCIENCE AND REMOTE SENSING SYMPOSIUM}, author={Samudrala, Sai Venkata Vinay Kumar and Zhao, Yifan and Vatsavai, Ranga Raju}, year={2023}, pages={5158–5161} } @article{liu_symons_vatsavai_2023, title={Persona-Coded Poly-Encoder: Persona-Guided Multi-Stream Conversational Sentence Scoring}, ISSN={["1082-3409"]}, DOI={10.1109/ICTAI59109.2023.00044}, abstractNote={Recent advances in machine learning and deep learning have led to the widespread use of Conversational AI in many practical applications. However, it is still very challenging to leverage auxiliary information that can provide conversational context or personalized tuning to improve the quality of conversations. For example, there has only been limited research on using an individual’s persona information to improve conversation quality, and even state-of-the-art conversational AI techniques are unable to effectively leverage signals from heterogeneous sources of auxiliary data, such as multi-modal interaction data, demographics, SDOH data, etc. In this paper, we present a novel Persona-Coded Poly-Encoder method that leverages persona information in a multi-stream encoding scheme to improve the quality of response generation for conversations. To show the efficacy of the proposed method, we evaluate our method on two different persona-based conversational datasets, and compared against two state-of-the-art methods. Our experimental results and analysis demonstrate that our method can improve conversation quality over the baseline method Poly-Encoder by $ 3.32\%$ and $ 2.94\%$ in terms of BLEU score and HR@1, respectively. More significantly, our method offers a path to better utilization of multi-modal data in conversational tasks. Lastly, our study outlines several challenges and future research directions for advancing personalized conversational AI technology.}, journal={2023 IEEE 35TH INTERNATIONAL CONFERENCE ON TOOLS WITH ARTIFICIAL INTELLIGENCE, ICTAI}, author={Liu, Junfeng and Symons, Christopher and Vatsavai, Ranga Raju}, year={2023}, pages={250–257} } @inproceedings{mei_vatsavai_chirkova_2023, title={Q-learning Based Simulation Tool for Studying Effectiveness of Dynamic Application of Fertilizer on Crop Productivity}, url={https://doi.org/10.1145/3615833.3628591}, DOI={10.1145/3615833.3628591}, abstractNote={As per the Food and Agriculture Organization (FAO), agricultural productivity needs to be increased by 70% to feed a projected 10 billion people by the year 2050, and fertilizer application plays a key role in achieving this goal. Throughout the world, fertilizer usage has significantly increased to improve crop yields. Unfortunately, several studies have shown that over 65% of the fertilizer is being wasted leading to various environmental problems such as nitrogen runoff into lakes, rivers, and oceans. In addition, the traditional practice of uniform fertilizer application without regard to field and/or crop conditions can result in wasted fertilizer. To address these challenges, we propose a simple Q-learning-based simulation tool for studying the dynamic application of fertilizer. Q-learning is particularly well suited for solving this problem, as the framework allows sensing the environment to characterize the site and crop conditions and determine the amount of fertilizer to apply. We used remote sensing data as a proxy for crop health monitoring. We used reward shaping to determine the optimal amount of fertilizer. We compared our framework with other popular deep-learning approaches Deep Q Network (DQN), Double Deep Q Network (DDQN), and Dueling Network (Due_N). Experimental results show that our approach is computationally efficient while matching or performing better than other approaches.}, author={Mei, Zhuocheng and Vatsavai, Ranga and Chirkova, Rada}, year={2023}, month={Nov} } @article{gadiraju_vatsavai_2023, title={Remote Sensing Based Crop Type Classification Via Deep Transfer Learning}, volume={16}, ISSN={["2151-1535"]}, url={https://doi.org/10.1109/JSTARS.2023.3270141}, DOI={10.1109/JSTARS.2023.3270141}, abstractNote={Machine learning methods using aerial imagery (satellite and unmanned-aerial-vehicles-based imagery) have been extensively used for crop classification. Traditionally, per-pixel-based, object-based, and patch-based methods have been used for classifying crops worldwide. Recently, aided by the increased availability of powerful computing architectures such as graphical processing units, deep learning-based systems have become popular in other domains such as natural images. However, building complex deep neural networks for aerial imagery from scratch is a challenging affair, owing to the limited labeled data in the remote sensing domain and the multitemporal (phenology) and geographic variability associated with agricultural data. In this article, we discuss these challenges in detail. We then discuss various transfer learning methodologies that help overcome these challenges. Finally, we evaluate whether a transfer learning strategy of using pretrained networks from a different domain helps improve remote sensing image classification performance on a benchmark dataset. Our findings indicate that deep neural networks pretrained on a different domain dataset cannot be used as off-the-shelf feature extractors. However, using the pretrained network weights as initial weights for training on the remote sensing dataset or freezing the early layers of the pretrained network improves the performance compared to training the network from scratch.}, journal={IEEE JOURNAL OF SELECTED TOPICS IN APPLIED EARTH OBSERVATIONS AND REMOTE SENSING}, author={Gadiraju, Krishna Karthik and Vatsavai, Ranga Raju}, year={2023}, pages={4699–4712} } @article{ramachandra_jones_vatsavai_2022, title={A Survey of Single-Scene Video Anomaly Detection}, volume={44}, ISSN={["1939-3539"]}, DOI={10.1109/TPAMI.2020.3040591}, abstractNote={This article summarizes research trends on the topic of anomaly detection in video feeds of a single scene. We discuss the various problem formulations, publicly available datasets and evaluation criteria. We categorize and situate past research into an intuitive taxonomy and provide a comprehensive comparison of the accuracy of many algorithms on standard test sets. Finally, we also provide best practices and suggest some possible directions for future research.}, number={5}, journal={IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE}, author={Ramachandra, Bharathkumar and Jones, Michael J. and Vatsavai, Ranga Raju}, year={2022}, month={May}, pages={2293–2312} } @article{yang_zhao_vatsavai_2022, title={Deep Residual Network with Multi-Image Attention for Imputing Under Clouds in Satellite Imagery}, ISSN={["1051-4651"]}, DOI={10.1109/ICPR56361.2022.9956166}, abstractNote={Earth observations from remote sensing imagery play an important role in many environmental applications ranging from natural resource (e.g., crops, forests) monitoring to man-made object (e.g., builds, factories) recognition. Most widely used optical remote sensing data however is often contaminated by clouds making it hard to identify the objects underneath. Fortunately, with the recent advances and increased operational satellites, the spatial and temporal density of image collections have significantly increased. In this paper, we present a novel deep learning-based imputation technique for inferring spectral values under the clouds using nearby cloud-free satellite image observations. The proposed deep learning architecture, extended contextual attention (ECA), exploits similar properties from the cloud-free areas to tackle clouds of different sizes occurring at arbitrary locations in the image. A contextual attention mechanism is incorporated to utilize the useful cloud-free information from multiple images. To maximize the imputation performance of the model on the cloudy patches instead of the entire image, a two-phase custom loss function is deployed to guide the model. To study the performance of our model, we trained our model on a benchmark Sentinel-2 dataset by superimposing real-world cloud patterns. Extensive experiments and comparisons against the state-of-the-art methods using pixel-wise and structural metrics show the improved performance of our model. Our experiments demonstrated that the ECA method is consistently better than all other methods, it is 28.4% better on MSE and 31.7% better on cloudy MSE as compared to the state-of-the-art EDSR network.}, journal={2022 26TH INTERNATIONAL CONFERENCE ON PATTERN RECOGNITION (ICPR)}, author={Yang, Xian and Zhao, Yifan and Vatsavai, Ranga Raju}, year={2022}, pages={643–649} } @article{zhao_yang_vatsavai_2022, title={Multi-stream Deep Residual Network for Cloud Imputation Using Multi-resolution Remote Sensing Imagery}, DOI={10.1109/ICMLA55696.2022.00021}, abstractNote={For more than five decades, remote sensing imagery has been providing critical information for many applications such as crop monitoring, disaster assessment, and urban planning. Unfortunately, more than 50% of optical remote sensing images are contaminated by clouds severely affecting the object identification. However, thanks to recent advances in remote sensing instruments and increase in number of operational satellites, we now have petabytes of multi-sensor observations covering the globe. Historically cloud imputation techniques were designed for single sensor images, thus existing benchmarks were mostly limited to single sensor images, which precludes design and validation of cloud imputation techniques on multi-sensor data. In this paper, we introduce a new benchmark data set consisting of images from two widely used and publicly available satellite images, Landsat-8 and Sentinel-2, and a new multi-stream deep residual network (MDRN). This newly introduced benchmark dataset fills an important gap in the existing benchmark datasets, which allows exploitation of multi-resolution spectral information from the cloud-free regions of temporally nearby images, and the MDRN algorithm addresses imputation using the multi-resolution data. Both quantitative and qualitative experiments show that the utility of our benchmark dataset and as well as efficacy of our MDRN architecture in cloud imputation. The MDRN outperforms the closest competing method by 14.1%.}, journal={2022 21ST IEEE INTERNATIONAL CONFERENCE ON MACHINE LEARNING AND APPLICATIONS, ICMLA}, author={Zhao, Yifan and Yang, Xian and Vatsavai, Ranga Raju}, year={2022}, pages={97–104} } @article{liu_symons_vatsavai_2022, title={Persona-Based Conversational AI: State of the Art and Challenges}, ISSN={["2375-9232"]}, DOI={10.1109/ICDMW58026.2022.00129}, abstractNote={Conversational AI has become an increasingly prominent and practical application of machine learning. How-ever, existing conversational AI techniques still suffer from var-ious limitations. One such limitation is a lack of well-developed methods for incorporating auxiliary information that could help a model understand conversational context better. In this paper, we explore how persona-based information could help improve the quality of response generation in conversations. First, we provide a literature review focusing on the current state-of-the-art methods that utilize persona information. We evaluate two strong baseline methods, the Ranking Profile Memory Network and the Poly-Encoder, on the NeurIPS ConvAI2 benchmark dataset. Our analysis elucidates the importance of incorporating persona information into conversational systems. Additionally, our study highlights several limitations with current state-of-the-art meth-ods and outlines challenges and future research directions for advancing personalized conversational AI technology.}, journal={2022 IEEE INTERNATIONAL CONFERENCE ON DATA MINING WORKSHOPS, ICDMW}, author={Liu, Junfeng and Symons, Christopher and Vatsavai, Ranga Raju}, year={2022}, pages={993–1001} } @article{gadiraju_chen_ramachandra_vatsavai_2022, title={Real-Time Change Detection At the Edge}, DOI={10.1109/ICMLA55696.2022.00130}, abstractNote={Detecting changes in real-time using remote sensing data is of paramount importance in areas such as crop health monitoring, weed detection, and disaster management. However, real-time change detection using remote sensing imagery faces several challenges: a) it requires real-time data extraction which is a challenge for traditional satellite imagery sources such as MODIS and LANDSAT due to the latency associated with collecting and processing the data. Due to the advances made in the past decade in drone technology, Unmanned Aerial Vehicles (UAVs) can be used for real-time data collection. However, a large percentage of this data will be unlabeled which limits the use of well-known supervised machine learning methods; b) from an infrastructure perspective, the cloud-edge solution of processing the data collected from UAVs (edge) only on the cloud is also constrained by latency and bandwidth-related issues. Due to these limitations, transferring large amounts of data between cloud and edge, or storing large amounts of information regarding past time periods on an edge device is infeasible. We can limit the amount of data transferred between the cloud and edge by performing analyses on-the-fly at the edge using low-power devices (edge devices) that can be connected to UAVs. However, edge devices have computational and memory bottlenecks, which would limit the usage of complex machine learning algorithms. In this paper, we demonstrate how an unsupervised GMM-based real-time change detection method at the edge can be used to identify weeds in real-time. We evaluate the scalability of our method on edge computing and traditional devices such as NVIDIA Jetson TX2, RTX 2080, and traditional Intel CPUs. We perform a case study for weed detection on images collected from UAVs. Our results demonstrate both the efficacy and computational efficiency of our method.}, journal={2022 21ST IEEE INTERNATIONAL CONFERENCE ON MACHINE LEARNING AND APPLICATIONS, ICMLA}, author={Gadiraju, Krishna Karthik and Chen, Zexi and Ramachandra, Bharathkumar and Vatsavai, Ranga Raju}, year={2022}, pages={776–781} } @article{zhao_yang_vatsavai_2021, title={A Scalable System for Searching Large-scale Multi-sensor Remote Sensing Image Collections}, ISSN={["2639-1589"]}, DOI={10.1109/BigData52589.2021.9671679}, abstractNote={Huge amounts of remote sensing data collected from hundreds of operational satellites in conjunction with on-demand UAV based imaging products are offering unprecedented capabilities towards monitoring dynamic earth resources. However, searching for the right combination of imagery products that satisfy an application requirement is a daunting task. Earlier efforts at streamlining remote sensing data discovery include NASA’s Earth Observing System (EOS) Data and Information System (EOSDIS), USGS Global Visualization Viewer (GloVis), and several other research systems like Minnesota MapServer. These systems were built on top of metadata harvesting, indexing, keyword searching modules which were not scalable and interoperable. To address these challenges, recently the SpatioTemporal Asset Catalog (STAC) specification was developed to provide a common language to describe a range of geospatial information, so that data products can be more easily indexed and discovered. In this paper we present an highly scalable STAC API based system with spatiotemporal indexing support. Experimental evaluation shows that our spatiotemporal indexing based queries are 1000x faster than standard STAC API server.}, journal={2021 IEEE INTERNATIONAL CONFERENCE ON BIG DATA (BIG DATA)}, author={Zhao, Yifan and Yang, Xian and Vatsavai, Ranga Raju}, year={2021}, pages={3780–3783} } @article{chen_dutton_ramachandra_wu_vatsavai_2021, title={Local Clustering with Mean Teacher for Semi-supervised learning}, ISSN={["1051-4651"]}, DOI={10.1109/ICPR48806.2021.9412469}, abstractNote={The Mean Teacher (MT) model of Tarvainen and Valpola has shown good performance on several semi-supervised benchmark datasets. MT maintains a teacher model's weights as the exponential moving average of a student model's weights and minimizes the divergence between their probability predictions under diverse perturbations of the inputs. However, MT is known to suffer from confirmation bias, that is, reinforcing incorrect teacher model predictions. In this work, we propose a simple yet effective method called Local Clustering (LC) to mitigate the effect of confirmation bias. In MT, each data point is considered independent of other points during training; however, data points are likely to be close to each other in feature space if they share similar features. Motivated by this, we cluster data points locally by minimizing the pairwise distance between neighboring data points in feature space. Combined with a standard classification cross-entropy objective on labeled data points, the misclassified unlabeled data points are pulled towards high-density regions of their correct class with the help of their neighbors, thus improving model performance. We demonstrate on semi-supervised benchmark datasets SVHN and CIFAR-10 that adding our LC loss to MT yields significant improvements compared to MT and performance comparable to the state of the art in semi-supervised learning11The code is available at: https://github.com/jay1204/local_clustering_with_mt_for_ssl.}, journal={2020 25TH INTERNATIONAL CONFERENCE ON PATTERN RECOGNITION (ICPR)}, author={Chen, Zexi and Dutton, Benjamin and Ramachandra, Bharathkumar and Wu, Tianfu and Vatsavai, Ranga Raju}, year={2021}, pages={6243–6250} } @article{ramachandra_jones_vatsavai_2021, title={Perceptual metric learning for video anomaly detection}, volume={32}, ISSN={["1432-1769"]}, DOI={10.1007/s00138-021-01187-5}, number={3}, journal={MACHINE VISION AND APPLICATIONS}, author={Ramachandra, Bharathkumar and Jones, Michael and Vatsavai, Ranga Raju}, year={2021}, month={May} } @article{ramachandra_dutton_vatsavai_2019, title={Anomalous cluster detection in spatiotemporal meteorological fields}, volume={12}, ISSN={["1932-1872"]}, DOI={10.1002/sam.11398}, abstractNote={Finding anomalous regions in spatiotemporal climate data is an important problem with a need for greater accuracy. The collective and contextual nature of anomalies (e.g., heat waves) coupled with the real‐valued, seasonal, multimodal, highly correlated, and gridded nature of climate variable observations poses a multitude of challenges. Existing anomaly detection methods have limitations in the specific setting of real‐valued areal spatiotemporal data. In this paper, we develop a method for extreme event detection in meteorological datasets that follows from well known distribution‐based anomaly detection approaches. The method models spatial and temporal correlations explicitly through a piecewise parametric assumption and generalizes the Mahalanobis distance across distributions of different dimensionalities. The result is an effective method to mine contiguous spatiotemporal anomalous regions from meteorological fields which improves upon the current standard approach in climatology. The proposed method has been evaluated on a real global surface temperature dataset and validated using historical records of extreme events.}, number={2}, journal={STATISTICAL ANALYSIS AND DATA MINING}, author={Ramachandra, Bharathkumar and Dutton, Benjamin and Vatsavai, Ranga Raju}, year={2019}, month={Apr}, pages={88–100} } @article{pool_vatsavai_2018, title={Deformable Part Models for Complex Object Detection in Remote Sensing Imagery}, DOI={10.1145/3282834.3282843}, abstractNote={Image understanding is a difficult problem even in its simplest form because, objects, based on a variety of factors, can have a wide range of intra-class variability. Consider people standing, sitting, facing backward, etc. Beyond recognition, detection requires the localization of the object which can become a costly search problem of the image if not given any heuristics. The problem of detecting objects within in an image has been a vastly investigated problem, but if we consider entities of an even higher, more complex level, such as nuclear plants, how do we begin to approach a solution? In this paper, we explore the state of the art, deformable part models (DPMs), and their applicability for complex object detection in very high-resolution satellite images. A deformable part model, or DPM, is a method used for object detection in images that leverages the fact that objects are inherently made up of a collection of parts. Each part of an object is connected to one or more other parts in a treelike structure. These parts can vary in distance, orientation, or pose with respect to one another but, within some reasonable range, still be considered the skeleton of the same object. DPMs compensate for this property of various objects by utilizing histogram of oriented gradients (HOG) features for object representation at coarse and fine scales, pictorial structures [8], and application of a deformation cost on that pictorial structure. As such, these models can allow for variations in object pose, shape, and viewpoints while still remaining a very specific representation of that object describing not only the object as a whole, but also each of its distinct parts and their spatial relationships. In this paper, we investigate the landscape of research regarding DPMs, how this class of methods for object detection have evolved, and what remains to be explored to make the method more suitable for high-level, complex geospatial object understanding.}, journal={BIGSPATIAL 2018: PROCEEDINGS OF THE 7TH ACM SIGSPATIAL INTERNATIONAL WORKSHOP ON ANALYTICS FOR BIG GEOSPATIAL DATA (BIGSPATIAL-2018)}, author={Pool, Nathan and Vatsavai, Ranga Raju}, year={2018}, pages={57–62} } @article{shashidharan_vatsavai_meentemeyer_2018, title={FUTURES-DPE: Towards Dynamic Provisioning and Execution of Geosimulations in HPC environments}, DOI={10.1145/3274895.3274948}, abstractNote={Geosimulations using computer simulation models provideGI scientists an effective way to study complex geographic phenomena and predict future outcomes. Typically, geosimulations are developed to execute in an HPC environment with parallel and distributed execution capabilities. However, traditional HPC environments limit these simulations to a static runtime environment, where resources for execution must be decided before execution. Traditional simulation approaches such as a data parallel approach assigns fixed computing resources on every unit of data (e.g., a tile or a county). However, in many practical situations, a user may want to assign additional computing resources to speedup or perform more computation in a specific region. For example, in an urban growth model (UGM) simulation, to explore the outcomes of changes due to urban policy in a tile or a group of tiles at a given time-step, an urban geographer may want to assign more computing resources to those group of tiles to quickly determine impacts of policy on urbanization. In the absence of a dynamic resource allocation mechanism, the utility of a geosimulation to explore what-if scenarios on-the-fly is limited to pre-allocated computing resources. Thus, to effectively leverage existing resources, we first design a co-scheduling approach for geosimulations in a resource constrained HPC environment. We then present a second design for a geosimulation which allows dynamic provisioning of resources in an HPC environment based on run-time users' demands. Finally, to demonstrate the utility of the two approaches we modify the FUTURES geosimulation to support computationally expensive high-resolution simulation in regions of interest (ROIs) as specified by a user using the FUTURES-DPE framework.}, journal={26TH ACM SIGSPATIAL INTERNATIONAL CONFERENCE ON ADVANCES IN GEOGRAPHIC INFORMATION SYSTEMS (ACM SIGSPATIAL GIS 2018)}, author={Shashidharan, Ashwin and Vatsavai, Ranga Raju and Meentemeyer, Ross K.}, year={2018}, pages={464–467} } @article{gadiraju_vatsavai_kaza_wibbels_krishna_2018, title={Machine Learning Approaches for Slum Detection Using Very High Resolution Satellite Images}, ISSN={["2375-9232"]}, DOI={10.1109/ICDMW.2018.00198}, abstractNote={Detecting informal settlements has become an important area of research in the past decade, owing to the availability of high resolution satellite imagery. Traditional per-pixel based classification methods provide high degree of accuracy in distinguishing primitive instances such as buildings, roads, forests and water. However, these methods fail to capture the complex relationships between neighboring pixels that is necessary for distinguishing complex objects such as informal and formal settlements. In this paper, we perform several experiments to compare and contrast how various per-pixel based classification methods, when combined with various features perform in detecting slums. In addition, we also explored a deep neural network, which showed better accuracy than the pixel based methods.}, journal={2018 18TH IEEE INTERNATIONAL CONFERENCE ON DATA MINING WORKSHOPS (ICDMW)}, author={Gadiraju, Krishna Karthik and Vatsavai, Ranga Raju and Kaza, Nikhil and Wibbels, Erik and Krishna, Anirudh}, year={2018}, pages={1397–1404} } @article{ramachandra_nawathe_monroe_han_ham_vatsavai_2018, title={Real-Time Energy Audit of Built Environments: Simultaneous Localization and Thermal Mapping}, volume={24}, ISSN={["1943-555X"]}, DOI={10.1061/(ASCE)IS.1943-555X.0000431}, abstractNote={AbstractLeveraging thermography for managing built environments has become prevalent as a robust tool for detecting, analyzing, and reporting their performance in a nondestructive manner. Despite m...}, number={3}, journal={JOURNAL OF INFRASTRUCTURE SYSTEMS}, author={Ramachandra, Bharathkumar and Nawathe, Pranav and Monroe, Jacob and Han, Kevin and Ham, Youngjib and Vatsavai, Ranga Raju}, year={2018}, month={Sep} } @inproceedings{chen_ramachandra_vatsavai_2017, title={Hierarchical change detection framework for biomass monitoring}, DOI={10.1109/igarss.2017.8127030}, abstractNote={In this paper, we present a nearest neighbor based hierarchical change detection methodology for analyzing multi-temporal remote sensing imagery. A key contribution of this work is to define change as hierarchical rather than boolean. Based on this definition of change pattern, we developed a novel time series similarity based change detection framework for identifying inter-annual changes by exploiting phenological properties of growing crops from satellite time series imagery. The proposed framework consists of four components: hierarchical clustering tree construction, nearest neighbor based classification, relaxation labeling, and change detection using similarity hierarchy. Though the proposed approach is unsupervised, we present evaluation using manually induced change regions embedded in the real dataset. We compare our method with the widely used K-Means clustering and evaluation shows that K-Means over-detects changes in comparison to our proposed method.}, booktitle={2017 ieee international geoscience and remote sensing symposium (igarss)}, author={Chen, Z. and Ramachandra, B. and Vatsavai, Ranga Raju}, year={2017}, pages={620–623} } @article{bhangale_durbha_king_younan_vatsavai_2017, title={High performance GPU computing based approaches for oil spill detection from multi-temporal remote sensing data}, volume={202}, ISSN={["1879-0704"]}, DOI={10.1016/j.rse.2017.03.024}, abstractNote={Oil spills have adverse effects on the environment and economy. Near real time detection and response activities enable to better manage the required resources at the incident area for clean-up and control operations. Multi-temporal remote sensing (RS) technologies are widely used to detect and monitor oil spills on the Ocean surfaces. However, current techniques using RS data for oil spill detection are time consuming and expensive in terms of computational cost and related infrastructure. The main focus of this work is oil spill detection from voluminous multi-temporal LANDSAT-7 imagery using high performance computing technologies such as graphics processing units (GPUs) and Message Passing Interface (MPI) to speed up the detection process and provide rapid response. Kepler compute architecture based GPU (Tesla K40) with Compute Unified Device Architecture (CUDA), which is a parallel programming mechanism for GPU is used in the development of the detection algorithms. Oil spill detection techniques that were adapted to GPU based processing include band-ratio and Morphological attribute profile (MAP) based on six structural and shape description attributes namely, Gray mean, standard deviation, elongation, shape complexity, solidity and orientation. Experimental results show the significant gains in the computational speed of these techniques when implemented on a GPU and MPI. A GPU vs. CPU comparison shows that the proposed approach achieves a speedup of around 10 × for MAP and 14 × for band ratio approaches, which includes the data transfer cost. However, the MPI implementation using 64 cores outperforms the GPU, and executes the time intensive task of computing the above said attributes in only 18 min, whereas a GPU consumes around an hour.}, journal={REMOTE SENSING OF ENVIRONMENT}, author={Bhangale, Ujwala and Durbha, Surya S. and King, Roger L. and Younan, Nicolas H. and Vatsavai, Rangaraju}, year={2017}, month={Dec}, pages={28–44} } @article{prasad_aghajarian_mcdermott_shah_mokbel_puri_rey_shekhar_xe_vatsavai_et al._2017, title={Parallel Processing over Spatial-Temporal Datasets from Geo, Bio, Climate and Social Science Communities: A Research Roadmap}, ISSN={["2379-7703"]}, DOI={10.1109/bigdatacongress.2017.39}, abstractNote={This vision paper reviews the current state-ofart and lays out emerging research challenges in parallel processing of spatial-temporal large datasets relevant to a variety of scientific communities. The spatio-temporal data, whether captured through remote sensors (global earth observations), ground and ocean sensors (e.g., soil moisture sensors, buoys), social media and hand-held, traffic-related sensors and cameras, medical imaging (e.g., MRI), or large scale simulations (e.g., climate) have always been “big.” A common thread among all these big collections of datasets is that they are spatial and temporal. Processing and analyzing these datasets requires high-performance computing (HPC) infrastructures. Various agencies, scientific communities and increasingly the society at large rely on spatial data management, analysis, and spatial data mining to gain insights and produce actionable plans. Therefore, an ecosystem of integrated and reliable software infrastructure is required for spatialtemporal big data management and analysis that will serve as crucial tools for solving a wide set of research problems from different scientific and engineering areas and to empower users with next-generation tools. This vision requires a multidisciplinary effort to significantly advance domain research and have a broad impact on the society. The areas of research discussed in this paper include (i) spatial data mining, (ii) data analytics over remote sensing data, (iii) processing medical images, (iv) spatial econometrics analyses, (v) Map-Reducebased systems for spatial computation and visualization, (vi) CyberGIS systems, and (vii) foundational parallel algorithms and data structures for polygonal datasets, and why HPC infrastructures, including harnessing graphics accelerators, are needed for time-critical applications.}, journal={2017 IEEE 6TH INTERNATIONAL CONGRESS ON BIG DATA (BIGDATA CONGRESS 2017)}, author={Prasad, Sushil K. and Aghajarian, Danial and McDermott, Michael and Shah, Dhara and Mokbel, Mohamed and Puri, Satish and Rey, Sergio J. and Shekhar, Shashi and Xe, Yiqun and Vatsavai, Ranga Raju and et al.}, year={2017}, pages={232–250} } @article{kurte_durbha_king_younan_vatsavai_2017, title={Semantics-Enabled Framework for Spatial Image Information Mining of Linked Earth Observation Data}, volume={10}, ISSN={["2151-1535"]}, DOI={10.1109/jstars.2016.2547992}, abstractNote={Recent developments in sensor technology are contributing toward the tremendous growth of remote sensing (RS) archives (currently, at the petabyte scale). However, this data largely remain unexploited due to the current limitations in the data discovery, querying, and retrieval capabilities. This issue becomes exacerbated in disaster situations, where there is a need for rapid processing and retrieval of the affected areas. Furthermore, the retrieval of images based on the spatial configurations of affected regions [land use/cover (LULC) classes] in an image is important in disaster situations such as floods and earthquakes. The majority of existing Earth observation (EO) image information mining (IIM) systems does not consider the spatial relations among image regions during image retrieval (aka spatial semantic gap). In this work, we have specifically addressed two issues, i.e., explicit modeling of topological and directional relationships between image regions, and development of a resource description framework (RDF)-based spatial semantic graphs (SSGs). This enables more intuitive querying and reasoning on the archived data. A spatial IIM (SIIM) framework is proposed, which integrates a logic-based reasoning mechanism to extract the hidden spatial relationships (both topological and directional) and enables image retrieval based on spatial relationships. The system is tested using several spatial relations-based queries on the RS image repository of flood-affected areas to check its applicability in post flood scenario. Precision, recall, and F-measure metrics were used to evaluate the performance of the SIIM system, which showed good potential for spatial relations-based image retrieval.}, number={1}, journal={IEEE JOURNAL OF SELECTED TOPICS IN APPLIED EARTH OBSERVATIONS AND REMOTE SENSING}, author={Kurte, Kuldeep R. and Durbha, Surya S. and King, Roger L. and Younan, Nicolas H. and Vatsavai, Rangaraju}, year={2017}, month={Jan}, pages={29–44} } @inproceedings{connors_vatsavai_2017, title={Semi-supervised deep generative models for change detection in very high resolution imagery}, DOI={10.1109/igarss.2017.8127139}, abstractNote={Increasing population, rapid urbanization, quest for biofuels, pollution, diseases, and adverse climate changes are some of the major drivers behind the changing surface of our planet. Timely monitoring and assessment of these changes, along with dissemination of accurate information, is important for policy makers, city planners, and humanitarian relief workers. Advances in remote sensing technologies have led to acquisition of very high resolution remote sensing imagery in the past decade. This data is highly useful for the aforementioned applications, and machine learning technology can be used to identify and quantify the changed regions. In this study we explore a semi-supervised deep generative model for change detection in very high resolution multispectral and bitemporal imagery. We constructed an auxiliary variational autoencoder that infers class labels without incurring high sample complexity costs. The resulting classifier was able to produce accurate predictions of real changes over images that appear significantly different due to environmental conditions (not real changes) while utilizing only a small set of labeled samples.}, booktitle={2017 ieee international geoscience and remote sensing symposium (igarss)}, author={Connors, C. and Vatsavai, Ranga Raju}, year={2017}, pages={1063–1066} } @article{hong_vatsavai_2016, title={A Scalable Probabilistic Change Detection Algorithm for Very High Resolution (VHR) Satellite Imagery}, ISSN={["2379-7703"]}, DOI={10.1109/bigdatacongress.2016.42}, abstractNote={Detecting landscape changes using very high-resolution multispectral imagery demands an accurate and scalable algorithm that is robust to geometric and atmospheric errors. Existing pixel-based change detection approaches, however, have several drawbacks, which render them ineffective for VHR imagery analysis. A recent probabilistic change detection framework provides more accurate assessment of changes than traditional approaches by analyzing image patches than pixels. However, this patch (grid)-based approach produces coarse-resolution (patch size) changes. In this work we present a sliding window based approach that produces changes at the native image resolution. The increased computational demand of the sliding window based approach is addressed through thread-level parallelization on shared memory architectures. Our experimental evaluation showed a 91% performance improvement compared to its sequential counterpart on a sq. KM aerial image with varying window sizes on a 16-core (32 virtual threads) Intel Xeon processor.}, journal={2016 IEEE INTERNATIONAL CONGRESS ON BIG DATA - BIGDATA CONGRESS 2016}, author={Hong, Seokyong and Vatsavai, Ranga Raju}, year={2016}, pages={275–282} } @article{ramachandra_gadiraju_vatsavai_kaiser_karnowski_2016, title={Detecting Extreme Events in Gridded Climate Data}, volume={80}, ISSN={1877-0509}, url={http://dx.doi.org/10.1016/J.PROCS.2016.05.537}, DOI={10.1016/J.PROCS.2016.05.537}, abstractNote={Detecting and tracking extreme events in gridded climatological data is a challenging problem on several fronts: algorithms, scalability, and I/O. Successful detection of these events will give climate scientists an alternate view of the behavior of different climatological variables, leading to enhanced scientific understanding of the impacts of events such as heat and cold waves, and on a larger scale, the El Niño Southern Oscillation. Recent advances in computing power and research in data sciences enabled us to look at this problem with a different perspective from what was previously possible. In this paper we present our computationally efficient algorithms for anomalous cluster detection on climate change big data. We provide results on detection and tracking of surface temperature and geopotential height anomalies, a trend analysis, and a study of relationships between the variables. We also identify the limitations of our approaches, future directions for research and alternate approaches.}, journal={Procedia Computer Science}, publisher={Elsevier BV}, author={Ramachandra, Bharathkumar and Gadiraju, Krishna Karthik and Vatsavai, Ranga Raju and Kaiser, Dale P. and Karnowski, Thomas P.}, year={2016}, pages={2397–2401} } @article{vatsavai_chandola_2016, title={Guest editorial: big spatial data}, volume={20}, ISSN={["1573-7624"]}, DOI={10.1007/s10707-016-0269-7}, number={4}, journal={GEOINFORMATICA}, author={Vatsavai, Raju and Chandola, Varun}, year={2016}, month={Oct}, pages={797–799} } @article{idrobo_rusz_spiegelberg_mcguire_symons_vatsavai_cantoni_lupini_2016, title={Mapping Magnetic Ordering With Aberrated Electron Probes in STEM}, volume={22}, ISSN={1431-9276 1435-8115}, url={http://dx.doi.org/10.1017/S1431927616009223}, DOI={10.1017/S1431927616009223}, abstractNote={Although magnetism originates at the atomic scale, existing spectroscopic techniques sensitive to magnetic signals only produce spectra with spatial resolutions on a larger scale. However, recently it has been theoretically argued that atomic-size electron probes with customized phase distributions can detect electron magnetic circular dichroism (EMCD) [1]. Based on this prediction we have recently shown that deliberately aberrated electron probes in scanning transmission electron microscopy (STEM) can be utilized to obtain chiral dichroic signals in materials via electron energy-loss spectroscopy (EELS) with high spatial resolution [2].}, number={S3}, journal={Microscopy and Microanalysis}, publisher={Cambridge University Press (CUP)}, author={Idrobo, Juan Carlos and Rusz, Ján and Spiegelberg, Jakob and McGuire, Michael A. and Symons, Christopher T. and Vatsavai, Ranga Raju and Cantoni, Claudia and Lupini, Andrew R.}, year={2016}, month={Jul}, pages={1676–1677} } @article{karpatne_jiang_vatsavai_shekhar_kumar_2016, title={Monitoring Land-Cover Changes A machine-learning perspective}, volume={4}, ISSN={["2168-6831"]}, DOI={10.1109/mgrs.2016.2528038}, abstractNote={Monitoring land-cover changes is of prime importance for the effective planning and management of critical, natural and man-made resources. The growing availability of remote sensing data provides ample opportunities for monitoring land-cover changes on a global scale using machine-learning techniques. However, remote sensing data sets exhibit unique domain-specific properties that limit the usefulness of traditional machine-learning methods. This article presents a brief overview of these challenges from the perspective of machine learning and discusses some of the recent advances in machine learning that are relevant for addressing them. These approaches show promise for future research in the detection of land-cover change using machine-learning algorithms.}, number={2}, journal={IEEE GEOSCIENCE AND REMOTE SENSING MAGAZINE}, author={Karpatne, Anuj and Jiang, Zhe and Vatsavai, Ranga Raju and Shekhar, Shashi and Kumar, Vipin}, year={2016}, month={Jun}, pages={8–21} } @inproceedings{chen_vatsavai_ramachandra_zhang_singh_sukumar_2016, title={Scalable nearest neighbor based hierarchical change detection framework for crop monitoring}, DOI={10.1109/bigdata.2016.7840735}, abstractNote={Monitoring biomass over large geographic regions for changes in vegetation and cropping patterns is important for many applications. Changes in vegetation happen due to reasons ranging from climate change and damages to new government policies and regulations. Remote sensing imagery (multi-spectral and multi-temporal) is widely used in change pattern mapping studies. Existing bi-temporal change detection techniques are better suited for multi-spectral images and time series based techniques are more suited for analyzing multi-temporal images. A key contribution of this work is to define change as hierarchical rather than boolean. Based on this definition of change pattern, we developed a novel time series similarity based change detection framework for identifying inter-annual changes by exploiting phenological properties of growing crops from satellite time series imagery. The proposed framework consists of three components: hierarchical clustering tree construction, nearest neighbor based classification, and change detection using similarity hierarchy. Though the proposed approach is unsupervised, we present evaluation using manually induced change regions embedded in the real dataset. We compare our method with the widely used K-Means clustering and evaluation shows that K-Means over-detects changes in comparison to our proposed method.}, booktitle={2016 IEEE International Conference on Big Data (Big Data)}, author={Chen, Z. X. and Vatsavai, Ranga Raju and Ramachandra, B. and Zhang, Q. and Singh, N. and Sukumar, S.}, year={2016}, pages={1309–1314} } @article{hong_vatsavai_2016, title={Sliding Window-based Probabilistic Change Detection for Remote-sensed Images}, volume={80}, ISSN={1877-0509}, url={http://dx.doi.org/10.1016/J.PROCS.2016.05.438}, DOI={10.1016/J.PROCS.2016.05.438}, abstractNote={A recent probabilistic change detection algorithm provides a way for assessing changes on remote-sensed images which is more robust to geometric and atmospheric errors than existing pixel-based methods. However, its grid (patch)-based change detection results in coarse-resolution change maps and often discretizes continuous changes that occur across grid boundaries. In this study, we propose a sliding window-based extension of the probabilistic change detection approach to overcome such artificial limitations.}, journal={Procedia Computer Science}, publisher={Elsevier BV}, author={Hong, Seokyong and Vatsavai, Ranga R.}, year={2016}, pages={2348–2352} } @article{shashidharan_berkel_vatsavai_meentemeyer_2016, title={pFUTURES: A Parallel Framework for Cellular Automaton Based Urban Growth Models}, volume={9927}, ISBN={["978-3-319-45737-6"]}, ISSN={["1611-3349"]}, DOI={10.1007/978-3-319-45738-3_11}, abstractNote={Simulating structural changes in landscape is a routine task in computational geography. Owing to advances in sensing and data collection technologies, geospatial data is becoming available at finer spatial and temporal resolutions. However, in practice, these large datasets impede land simulation based studies over large geographic regions due to computational and I/O challenges. The memory overhead of sequential implementations and long execution times further limit the possibilities of simulating future urban scenarios. In this paper, we present a generic framework for co-ordinating I/O and computation for geospatial simulations in a distributed computing environment. We present three parallel approaches and demonstrate the performance and scalability benefits of our parallel implementation pFUTURES, an extension of the FUTURES open-source multi-level urban growth model. Our analysis shows that although a time synchronous parallel approach obtains the same results as a sequential model, an asynchronous parallel approach provides better scaling due to reduced disk I/O and communication overheads.}, journal={GEOGRAPHIC INFORMATION SCIENCE, (GISCIENCE 2016)}, author={Shashidharan, Ashwin and Berkel, Derek B. and Vatsavai, Ranga Raju and Meentemeyer, Ross K.}, year={2016}, pages={163–177} } @article{vatsavai_2015, title={A Scalable Complex Pattern Mining Framework for Global Settlement Mapping}, ISSN={["2379-7703"]}, DOI={10.1109/bigdatacongress.2015.81}, abstractNote={Human settlements manifest as complex spatial patterns in very high-resolution (VHR) satellite remote sensing images. Widely used pixel and object-based methods are incapable of capturing these complex patterns. Recently developed multiple instance learning algorithms showed to be very effective in mapping different types of human settlements. However, multiple instance learning approaches are computationally expensive and do not scale for global scale problems using big VHR imagery data. In this paper, we extend the Gaussian Multiple Instance (GMIL) learning by simplifying the model assumptions. Experimental evaluation shows that this method is computationally more efficient while maintaining similar accuracy as the GMIL algorithm.}, journal={2015 IEEE INTERNATIONAL CONGRESS ON BIG DATA - BIGDATA CONGRESS 2015}, author={Vatsavai, Ranga Raju}, year={2015}, pages={514–521} } @inproceedings{vatsavai_2015, title={Multitemporal data mining: From biomass monitoring to nuclear proliferation detection}, DOI={10.1109/multi-temp.2015.7245751}, abstractNote={We are living in an era of unprecedented population growth and migration, expanding urban and agriculture lands, depleting forests and portable water resources, and natural hazards and climate changes that are changing the face of the planet Earth. Multitemporal remote sensing observations provide a powerful means to monitor the Earth to identify and characterize these changes in near-real time. Data mining is proven to be highly useful in analyzing multi-resolution, multi-spectral, multisensor, and multi-temporal remote sensing data. In this paper we describe the state-of-the-art data mining approaches with applications in biomass and critical infrastructure monitoring.}, booktitle={2015 8th International Workshop on the Analysis of Multitemporal Remote Sensing Images (Multi-Temp)}, author={Vatsavai, Ranga Raju}, year={2015} } @article{vatsavai_graesser_2012, title={Probabilistic Change Detection Framework for Analyzing Settlement Dynamics Using Very High-resolution Satellite Imagery}, volume={9}, ISSN={1877-0509}, url={http://dx.doi.org/10.1016/j.procs.2012.04.097}, DOI={10.1016/j.procs.2012.04.097}, abstractNote={Global human population growth and an increasingly urbanizing world have led to rapid changes in human settlement landscapes and patterns. Timely monitoring and assessment of these changes and dissemination of accurate information is important for policy makers, city planners, and humanitarian relief workers. Satellite imagery provides useful data for the aforementioned applications, and remote sensing can be used to identify and quantify change areas. We explore a probabilistic framework to identify changes in human settlements using very high-resolution satellite imagery. As compared to predominantly pixel-based change detection systems which are highly sensitive to image registration errors, our grid (block) based approach is more robust to registration errors. The presented framework is an automated change detection system applicable to both panchromatic and multi-spectral imagery. The detection system provides comprehensible information about change areas, and minimizes the post-detection thresholding procedure often needed in traditional change detection algorithms.}, journal={Procedia Computer Science}, publisher={Elsevier BV}, author={Vatsavai, Ranga R. and Graesser, Jordan}, year={2012}, pages={907–916} } @article{vatsavai_bhaduri_2011, title={A hybrid classification scheme for mining multisource geospatial data}, volume={15}, ISSN={1384-6175 1573-7624}, url={http://dx.doi.org/10.1007/S10707-010-0113-4}, DOI={10.1007/S10707-010-0113-4}, number={1}, journal={GeoInformatica}, publisher={Springer Science and Business Media LLC}, author={Vatsavai, Ranga Raju and Bhaduri, Budhendra}, year={2011}, month={Jan}, pages={29–47} } @article{chandola_vatsavai_2011, title={A scalable gaussian process analysis algorithm for biomass monitoring}, volume={4}, ISSN={1932-1864}, url={http://dx.doi.org/10.1002/sam.10129}, DOI={10.1002/sam.10129}, abstractNote={Abstract}, number={4}, journal={Statistical Analysis and Data Mining}, publisher={Wiley}, author={Chandola, Varun and Vatsavai, Ranga Raju}, year={2011}, month={Jul}, pages={430–445} } @article{hoffman_larson_mills_brooks_ganguly_hargrove_huang_kumar_vatsavai_2011, title={Data Mining in Earth System Science (DMESS 2011)}, volume={4}, ISSN={1877-0509}, url={http://dx.doi.org/10.1016/j.procs.2011.04.157}, DOI={10.1016/j.procs.2011.04.157}, abstractNote={From field-scale measurements to global climate simulations and remote sensing, the growing body of very large and long time series Earth science data are increasingly difficult to analyze, visualize, and interpret. Data mining, information theoretic, and machine learning techniques—such as cluster analysis, singular value decomposition, block entropy, Fourier and wavelet analysis, phase-space reconstruction, and artificial neural networks—are being applied to problems of segmentation, feature extraction, change detection, model-data comparison, and model validation. The size and complexity of Earth science data exceed the limits of most analysis tools and the capacities of desktop computers. New scalable analysis and visualization tools, running on parallel cluster computers and supercomputers, are required to analyze data of this magnitude. This workshop will demonstrate how data mining techniques are applied in the Earth sciences and describe innovative computer science methods that support analysis and discovery in the Earth sciences.}, journal={Procedia Computer Science}, publisher={Elsevier BV}, author={Hoffman, Forrest M. and Larson, J. Walter and Mills, Richard Tran and Brooks, Bjørn-Gustaf J. and Ganguly, Auroop R. and Hargrove, William W. and Huang, Jian and Kumar, Jitendra and Vatsavai, Ranga R.}, year={2011}, pages={1450–1455} } @article{vatsavai_symons_chandola_jun_2011, title={GX-Means: A model-based divide and merge algorithm for geospatial image clustering}, volume={4}, ISSN={1877-0509}, url={http://dx.doi.org/10.1016/j.procs.2011.04.020}, DOI={10.1016/j.procs.2011.04.020}, abstractNote={One of the practical issues in clustering is the specification of the appropriate number of clusters, which is not obvious when analyzing geospatial datasets, partly because they are huge (both in size and spatial extent) and high dimensional. In this paper we present a computationally effcient model-based split and merge clustering algorithm that incrementally finds model parameters and the number of clusters. Additionally, we attempt to provide insights into this problem and other data mining challenges that are encountered when clustering geospatial data. The basic algorithm we present is similar to the G-means and X-means algorithms; however, our proposed approach avoids certain limitations of these well-known clustering algorithms that are pertinent when dealing with geospatial data. We compare the performance of our approach with the G-means and X-means algorithms. Experimental evaluation on simulated data and on multispectral and hyperspectral remotely sensed image data demonstrates the effectiveness of our algorithm.}, journal={Procedia Computer Science}, publisher={Elsevier BV}, author={Vatsavai, Ranga R. and Symons, Christopher T. and Chandola, Varun and Jun, Goo}, year={2011}, pages={186–195} } @inbook{vatsavai_2009, series={Lecture Notes in Computer Science}, title={Incremental Clustering Algorithm for Earth Science Data Mining}, ISBN={9783642019722 9783642019739}, ISSN={0302-9743 1611-3349}, url={http://dx.doi.org/10.1007/978-3-642-01973-9_42}, DOI={10.1007/978-3-642-01973-9_42}, abstractNote={Remote sensing data plays a key role in understanding the complex geographic phenomena. Clustering is a useful tool in discovering interesting patterns and structures within the multivariate geospatial data. One of the key issues in clustering is the specification of appropriate number of clusters, which is not obvious in many practical situations. In this paper we provide an extension of G-means algorithm which automatically learns the number of clusters present in the data and avoids over estimation of the number of clusters. Experimental evaluation on simulated and remotely sensed image data shows the effectiveness of our algorithm.}, booktitle={Computational Science – ICCS 2009}, publisher={Springer Berlin Heidelberg}, author={Vatsavai, Ranga Raju}, editor={Allen, G. and Nabrzyski, J. and Seidel, E. and van Albada, G. D. and Dongarra, J. and Sloot, P.M.A.Editors}, year={2009}, pages={375–384}, collection={Lecture Notes in Computer Science} } @inbook{vatsavai_shekhar_bhaduri_2008, title={A Learning Scheme for Recognizing Sub-classes from Model Trained on Aggregate Classes}, ISBN={9783540896883 9783540896890}, ISSN={0302-9743 1611-3349}, url={http://dx.doi.org/10.1007/978-3-540-89689-0_100}, DOI={10.1007/978-3-540-89689-0_100}, abstractNote={In many practical situations it is not feasible to collect labeled samples for all available classes in a domain. Especially in supervised classification of remotely sensed images it is impossible to collect ground truth information over large geographic regions for all thematic classes. As a result often analysts collect labels for aggregate classes. In this paper we present a novel learning scheme that automatically learns sub-classes from the user given aggregate classes. We model each aggregate class as finite Gaussian mixture instead of classical assumption of unimodal Gaussian per class. The number of components in each finite Gaussian mixture are automatically estimated. Experimental results on real remotely sensed image classification showed not only improved accuracy in aggregate class classification but the proposed method also recognized sub-classes.}, booktitle={Lecture Notes in Computer Science}, publisher={Springer Berlin Heidelberg}, author={Vatsavai, Ranga Raju and Shekhar, Shashi and Bhaduri, Budhendra}, year={2008}, pages={967–976} } @article{vatsavai_shekhar_burk_2007, title={An efficient spatial semi-supervised learning algorithm}, volume={22}, ISSN={1744-5760 1744-5779}, url={http://dx.doi.org/10.1080/17445760701207546}, DOI={10.1080/17445760701207546}, abstractNote={We began by developing a semi-supervised learning method based on the expectation-maximization (EM) algorithm, and maximum likelihood and maximum a posteriori classifiers (MLC and MAP). This scheme utilizes a small set of labeled and a large number of unlabeled training samples. We conducted several experiments on multi-spectral images to understand the impact of unlabeled samples on the classification performance. Our study shows that although, in general, classification accuracy improves with the addition of unlabeled training samples, it is not guaranteed to achieve consistently higher accuracies unless sufficient care is exercised when designing a semi-supervised classifier. We also extended this semi-supervised framework to model spatial context through Markov random fields (MRF). Initial experiments showed an improved accuracy of the spatial semi-supervised algorithm (SSSL) over MLC, semi-supervised, and MRF classifiers. An efficient implementation is provided so that the SSSL can be applied in production environments. We also discuss some open research problems.}, number={6}, journal={International Journal of Parallel, Emergent and Distributed Systems}, publisher={Informa UK Limited}, author={Vatsavai, Ranga Raju and Shekhar, Shashi and Burk, Thomas E.}, year={2007}, month={Dec}, pages={427–437} } @inbook{mignet_basak_bhide_roy_roy_sengar_vatsavai_reichert_steinbach_ravikant_et al._2006, title={Improving DB2 Performance Expert – A Generic Analysis Framework}, ISBN={9783540329602 9783540329619}, ISSN={0302-9743 1611-3349}, url={http://dx.doi.org/10.1007/11687238_68}, DOI={10.1007/11687238_68}, abstractNote={The complexity of software has been dramatically increasing over the years. Database management systems have not escaped this complexity. On the contrary, this problem is aggravated in database systems because they try to integrate multiple paradigms (object, relational, XML) in one box and are supposed to perform well in every scenario unlike OLAP or OLTP. As a result, it is very difficult to fine tune the performance of a DBMS. Hence, there is a need for a external tool which can monitor and fine tune the DBMS. In this extended abstract, we describe a few techniques to improve DB2 Performance Expert, which helps in monitoring DB2. Specifically, we describe a component which is capable of doing early performance problem detection by analyzing the sensor values over a long period of time. We also showcase a trends plotter and workload characterizer which allows a DBA to have a better understanding of the resource usages. A prototype of these tools has been demonstrated to a few select customers and based on their feedback this paper outlines the various issues that still need to be addressed in the next versions of the tool.}, booktitle={Lecture Notes in Computer Science}, publisher={Springer Berlin Heidelberg}, author={Mignet, Laurent and Basak, Jayanta and Bhide, Manish and Roy, Prasan and Roy, Sourashis and Sengar, Vibhuti S. and Vatsavai, Ranga R. and Reichert, Michael and Steinbach, Torsten and Ravikant, D. V. S. and et al.}, year={2006}, pages={1097–1101} } @inbook{vatsavai_shekhar_burk_lime_2006, title={UMN-MapServer: A High-Performance, Interoperable, and Open Source Web Mapping and Geo-spatial Analysis System}, ISBN={9783540445265 9783540445289}, ISSN={0302-9743 1611-3349}, url={http://dx.doi.org/10.1007/11863939_26}, DOI={10.1007/11863939_26}, abstractNote={Recent advances in Internet technologies, coupled with wide adoption of the web services paradigm and interoperability standards, makes the World Wide Web a popular vehicle for geo-spatial information distribution and online geo-processing. Web GIS is rapidly evolving and adapting to advances in Internet technologies. Web GISes are predominantly designed under a “thin-client / fat-server” paradigm. This approach has several disadvantages. For example, as the number of users increases, the load on the server increases and system performance decreases. Recently the focus has been shifted towards client-side Web GISes, which are heavy-duty, stand-alone systems. We take an opposing approach and present a load balancing client/server Web-based spatial analysis system, UMN-MapServer, and evaluate its performance in a regional natural resource mapping and analysis (NRAMS) application which utilizes biweekly AVHRR imagery and several other raster and vector geo-spatial datasets. We also evaluate alternative approaches and assess the pros and cons of our design and implementation. UMN-MapServer also implements several open standards, such as, WMS, WCS, GML and WFS. In this paper, we also describe in detail the WMS, WCS,and GML extensions from the interoperability point of view, and discuss issues related to adoption of such standards.}, booktitle={Geographic Information Science}, publisher={Springer Berlin Heidelberg}, author={Vatsavai, Ranga Raju and Shekhar, Shashi and Burk, Thomas E. and Lime, Stephen}, year={2006}, pages={400–417} } @inbook{kazar_shekhar_lilja_vatsavai_pace_2004, title={Comparing Exact and Approximate Spatial Auto-regression Model Solutions for Spatial Data Analysis}, ISBN={9783540235583 9783540302315}, ISSN={0302-9743 1611-3349}, url={http://dx.doi.org/10.1007/978-3-540-30231-5_10}, DOI={10.1007/978-3-540-30231-5_10}, abstractNote={The spatial auto-regression (SAR) model is a popular spatial data analysis technique, which has been used in many applications with geo-spatial datasets. However, exact solutions for estimating SAR parameters are computationally expensive due to the need to compute all the eigenvalues of a very large matrix. Recently we developed a dense-exact parallel formulation of the SAR parameter estimation procedure using data parallelism and a hybrid programming technique. Though this parallel implementation showed scalability up to eight processors, the exact solution still suffers from high computational complexity and memory requirements. These limitations have led us to investigate approximate solutions for SAR model parameter estimation with the main objective of scaling the SAR model for large spatial data analysis problems. In this paper we present two candidate approximate-semi-sparse solutions of the SAR model based on Taylor series expansion and Chebyshev polynomials. Our initial experiments showed that these new techniques scale well for very large data sets, such as remote sensing images having millions of pixels. The results also show that the differences between exact and approximate SAR parameter estimates are within 0.7% and 8.2% for Chebyshev polynomials and Taylor series expansion, respectively, and have no significant effect on the prediction accuracy.}, booktitle={Geographic Information Science}, publisher={Springer Berlin Heidelberg}, author={Kazar, Baris M. and Shekhar, Shashi and Lilja, David J. and Vatsavai, Ranga R. and Pace, R. Kelley}, year={2004}, pages={140–161} }