@article{kim_chi_2023, title={Time-aware deep reinforcement learning with multi-temporal abstraction}, ISSN={["1573-7497"]}, DOI={10.1007/s10489-022-04392-5}, abstractNote={Deep reinforcement learning (DRL) is advantageous, but it rarely performs well when tested on real-world decision-making tasks, particularly those involving irregular time series with sparse actions. Although irregular time series with sparse actions can be handled using temporal abstractions for the agent to grasp high-level states, they aggravate temporal irregularities by increasing the range of time intervals essential to represent a state and estimate expected returns. In this work, we propose a general Time-aware DRL framework with Multi-Temporal Abstraction (T-MTA) that incorporates the awareness of time intervals from two aspects: temporal discounting and temporal abstraction. For the former, we propose a Time-aware DRL method, whereas for the latter we propose a Multi-Temporal Abstraction mechanism. T-MTA was tested in three standard RL testbeds and two real-life tasks (control of nuclear reactors and prevention of septic shock), which represent four common contexts of learning environments, online and offline, as well as fully and partially observable. As T-MTA is a general framework, it can be combined with any model-free DRL method. In this work, we examined two in particular: the Deep Q-Network approach and its variants, and Truly Proximal Policy Optimization. Our results show that T-MTA significantly outperforms competing baseline frameworks, including a standalone Time-aware DRL framework, MTAs, and the original DRL methods without considering either type of temporal aspect, especially when partially observable environments are involved and the range of time intervals is large.}, journal={APPLIED INTELLIGENCE}, author={Kim, Yeo Jin and Chi, Min}, year={2023}, month={Mar} } @article{ausin_azizsoltani_ju_kim_chi_2021, title={InferNet for Delayed Reinforcement Tasks: Addressing the Temporal Credit Assignment Problem}, ISSN={["2639-1589"]}, DOI={10.1109/BigData52589.2021.9671827}, abstractNote={Rewards are the critical signals for Reinforcement Learning (RL) algorithms to learn the desired behavior in a sequential multi-step learning task. However, when these rewards are delayed and noisy in nature, the learning process becomes more challenging. The temporal Credit Assignment Problem (CAP) is a well-known and challenging task in AI. While RL, especially Deep RL, often works well with immediate rewards but may fail when rewards are delayed or noisy, or both. In this work, we propose delegating the CAP to a Neural Network-based algorithm named InferNet that explicitly learns to infer the immediate rewards from the delayed and noisy rewards. The effectiveness of InferNet was evaluated on three online RL tasks: a GridWorld, a CartPole, and 40 Atari games; and two offline RL tasks: GridWorld and a real-life Sepsis treatment task. The effectiveness of InferNet rewards is compared to that of immediate and delayed rewards in two settings: with and without noise. For the offline RL tasks, it is also compared to a strong baseline, InferGP [7]. Overall, our results show that InferNet is robust to delayed or noisy reward functions, and it could be used effectively for solving the temporal CAP in a wide range of RL tasks, when immediate rewards are not available or they are noisy.}, journal={2021 IEEE INTERNATIONAL CONFERENCE ON BIG DATA (BIG DATA)}, author={Ausin, Markel Sanz and Azizsoltani, Hamoon and Ju, Song and Kim, Yeo Jin and Chi, Min}, year={2021}, pages={1337–1348} } @article{kim_ausin_chi_2021, title={Multi-Temporal Abstraction with Time-Aware Deep Q-Learning for Septic Shock Prevention}, ISSN={["2639-1589"]}, DOI={10.1109/BigData52589.2021.9671662}, abstractNote={Sepsis is a life-threatening organ dysfunction and a disease of astronomical burden. Septic shock, the most severe complication of sepsis, leads to a mortality rate as high as 50%. However, septic shock prevention is extremely challenging because individual patients often have very different disease progression, and thus the timings of medical interventions can play a key role in their effectiveness. Recently, reinforcement learning (RL) methods like deep Q-learning networks (DQN) have shown great promise in developing effective treatments for preventing septic shock. In this work, we propose MTA-TQN, a Multi-view -Temporal Abstraction mechanism within a Time-aware deep Q-learning Network framework for this task. More specifically, 1) MTA-TQN leverages irregular time intervals to discount expected return which would prevent systemic overestimations caused by temporal discount errors; 2) it learns both short and long-range dependencies with multi-view temporal abstractions which would reduce bias to a specific series of observations for a single state. The effectiveness of MTA-TQN is validated on two hard exploration Atari games and the septic shock prevention task using real-world EHRs. Our results demonstrate that both time-awareness and multi-view temporal abstraction are essential to induce effective policies, particularly with irregular time-series data. In the septic shock prevention task, while the top 10% of patients whose treatments agreed with DQN induced policy experienced a 17% septic shock rate, our MTA-TQN policies achieved a 5.7% septic shock rate.}, journal={2021 IEEE INTERNATIONAL CONFERENCE ON BIG DATA (BIG DATA)}, author={Kim, Yeo Jin and Ausin, Markel Sanz and Chi, Min}, year={2021}, pages={1657–1663} } @article{ju_kim_ausin_mayorga_chi_2021, title={To Reduce Healthcare Workload: Identify Critical Sepsis Progression Moments through Deep Reinforcement Learning}, ISSN={["2639-1589"]}, DOI={10.1109/BigData52589.2021.9671407}, abstractNote={Healthcare systems are struggling with increasing workloads that adversely affect quality of care and patient outcomes. When clinical practitioners have to make countless medical decisions, they may not always able to make them consistently or spend time on them. In this work, we formulate clinical decision making as a reinforcement learning (RL) problem and propose a human-controlled machine-assisted (HC-MA) decision making framework whereby we can simultaneously give clinical practitioners (the humans) control over the decision-making process while supporting effective decision-making. In our HC-MA framework, the role of the RL agent is to nudge clinicians only if they make suboptimal decisions at critical moments. This framework is supported by a general Critical Deep RL (Critical-DRL) approach, which uses Long-Short Term Rewards (LSTRs) and Critical Deep Q-learning Networks (CriQNs). Critical-DRL’s effectiveness has been evaluated in both a GridWorld game and real-world datasets from two medical systems: a large health system in the northeast of USA, referred as NEMed and Mayo Clinic in Rochester, Minnesota, USA for septic patient treatment. We found that our Critical-DRL approach, by which decisions are made at critical junctures, is as effective as a fully executed DRL policy and moreover, it enables us to identify the critical moments in the septic treatment process, thus greatly reducing burden on medical decision-makers by allowing them to make critical clinical decisions without negatively impacting outcomes.}, journal={2021 IEEE INTERNATIONAL CONFERENCE ON BIG DATA (BIG DATA)}, author={Ju, Song and Kim, Yeo Jin and Ausin, Markel Sanz and Mayorga, Maria E. and Chi, Min}, year={2021}, pages={1640–1646} } @article{yang_kim_taub_azevedo_chi_2020, title={PRIME: Block-Wise Missingness Handling for Multi-modalities in Intelligent Tutoring Systems}, volume={11962}, ISBN={["978-3-030-37733-5"]}, ISSN={["1611-3349"]}, DOI={10.1007/978-3-030-37734-2_6}, abstractNote={Block-wise missingness in multimodal data poses a challenging barrier for the analysis over it, which is quite common in practical scenarios such as the multimedia intelligent tutoring systems (ITSs). In this work, we collected data from 194 undergraduates via a biology ITS which involves three modalities: student-system logfiles, facial expressions, and eye tracking. However, only 32 out of the 194 students had all three modalities and 83% of them were missing the facial expression data, eye tracking data, or both. To handle such a block-wise missing problem, we propose a Progressively Refined Imputation for Multi-modalities by auto-Encoder (PRIME), which trains the model based on single, pairwise, and entire modalities for imputation in a progressive manner, and therefore enables us to maximally utilize all the available data. We have evaluated PRIME against single-modality log-only (without missingness handling) and five state-of-the-art missing data handling methods on one important yet challenging student modeling task: to predict students’ learning gains. Our results show that using multimodal data as a result of missing data handling yields better prediction performance than using logfiles only, and PRIME outperforms other baseline methods for both learning gain prediction and data reconstruction tasks.}, journal={MULTIMEDIA MODELING (MMM 2020), PT II}, author={Yang, Xi and Kim, Yeo-Jin and Taub, Michelle and Azevedo, Roger and Chi, Min}, year={2020}, pages={63–75} }