@article{ausin_maniktala_barnes_chi_2022, title={The Impact of Batch Deep Reinforcement Learning on Student Performance: A Simple Act of Explanation Can Go A Long Way}, ISSN={["1560-4306"]}, url={https://doi.org/10.1007/s40593-022-00312-3}, DOI={10.1007/s40593-022-00312-3}, journal={INTERNATIONAL JOURNAL OF ARTIFICIAL INTELLIGENCE IN EDUCATION}, author={Ausin, Markel Sanz and Maniktala, Mehak and Barnes, Tiffany and Chi, Min}, year={2022}, month={Nov} } @article{zhou_azizsoltani_ausin_barnes_chi_2021, title={Leveraging Granularity: Hierarchical Reinforcement Learning for Pedagogical Policy Induction}, volume={8}, ISSN={["1560-4306"]}, DOI={10.1007/s40593-021-00269-9}, journal={INTERNATIONAL JOURNAL OF ARTIFICIAL INTELLIGENCE IN EDUCATION}, author={Zhou, Guojing and Azizsoltani, Hamoon and Ausin, Markel Sanz and Barnes, Tiffany and Chi, Min}, year={2021}, month={Aug} } @article{ausin_maniktala_barnes_chi_2021, title={Tackling the Credit Assignment Problem in Reinforcement Learning-Induced Pedagogical Policies with Neural Networks}, volume={12748}, ISBN={["978-3-030-78291-7"]}, ISSN={["1611-3349"]}, url={https://doi.org/10.1007/978-3-030-78292-4_29}, DOI={10.1007/978-3-030-78292-4_29}, abstractNote={Intelligent Tutoring Systems (ITS) provide a powerful tool for students to learn in an adaptive, personalized, and goal-oriented manner. In recent years, Reinforcement Learning (RL) has shown to be capable of leveraging previous student data to induce effective pedagogical policies for future students. One of the most desirable goals of these policies is to maximize student learning gains while minimizing the training time. However, this metric is often not available until a student has completed the entire tutor. For this reason, the reinforcement signal of the effectiveness of the tutor is delayed. Assigning credit for each intermediate action based on a delayed reward is a challenging problem denoted the temporal Credit Assignment Problem (CAP). The CAP makes it difficult for most RL algorithms to assign credit to each action. In this work, we develop a general Neural Network-based algorithm that tackles the CAP by inferring immediate rewards from delayed rewards. We perform two empirical classroom studies, and the results show that this algorithm, in combination with a Deep RL agent, can improve student learning performance while reducing training time.}, journal={ARTIFICIAL INTELLIGENCE IN EDUCATION (AIED 2021), PT I}, author={Ausin, Markel Sanz and Maniktala, Mehak and Barnes, Tiffany and Chi, Min}, year={2021}, pages={356–368} } @inbook{ausin_maniktala_barnes_chi_2020, title={Exploring the Impact of Simple Explanations and Agency on Batch Deep Reinforcement Learning Induced Pedagogical Policies}, url={https://doi.org/10.1007/978-3-030-52237-7_38}, DOI={10.1007/978-3-030-52237-7_38}, abstractNote={In recent years, Reinforcement learning (RL), especially Deep RL (DRL), has shown outstanding performance in video games from Atari, Mario, to StarCraft. However, little evidence has shown that DRL can be successfully applied to real-life human-centric tasks such as education or healthcare. Different from classic game-playing where the RL goal is to make an agent smart, in human-centric tasks the ultimate RL goal is to make the human-agent interactions productive and fruitful. Additionally, in many real-life human-centric tasks, data can be noisy and limited. As a sub-field of RL, batch RL is designed for handling situations where data is limited yet noisy, and building simulations is challenging. In two consecutive classroom studies, we investigated applying batch DRL to the task of pedagogical policy induction for an Intelligent Tutoring System (ITS), and empirically evaluated the effectiveness of induced pedagogical policies. In Fall 2018 (F18), the DRL policy is compared against an expert-designed baseline policy and in Spring 2019 (S19), we examined the impact of explaining the batch DRL-induced policy with student decisions and the expert baseline policy. Our results showed that 1) while no significant difference was found between the batch RL-induced policy and the expert policy in F18, the batch RL-induced policy with simple explanations significantly improved students’ learning performance more than the expert policy alone in S19; and 2) no significant differences were found between the student decision making and the expert policy. Overall, our results suggest that pairing simple explanations with induced RL policies can be an important and effective technique for applying RL to real-life human-centric tasks.}, booktitle={Lecture Notes in Computer Science}, publisher={Springer International Publishing}, author={Ausin, Markel Sanz and Maniktala, Mehak and Barnes, Tiffany and Chi, Min}, year={2020}, pages={472–485} } @article{zhou_azizsoltani_ausin_barnes_chi_2019, title={Hierarchical Reinforcement Learning for Pedagogical Policy Induction}, volume={11625}, ISBN={["978-3-030-23203-0"]}, ISSN={["1611-3349"]}, DOI={10.1007/978-3-030-23204-7_45}, abstractNote={In interactive e-learning environments such as Intelligent Tutoring Systems, there are pedagogical decisions to make at two main levels of granularity: whole problems and single steps. Recent years have seen growing interest in data-driven techniques for such pedagogical decision making, which can dynamically tailor students’ learning experiences. Most existing data-driven approaches, however, treat these pedagogical decisions equally, or independently, disregarding the long-term impact that tutor decisions may have across these two levels of granularity. In this paper, we propose and apply an offline, off-policy Gaussian Processes based Hierarchical Reinforcement Learning (HRL) framework to induce a hierarchical pedagogical policy that makes decisions at both problem and step levels. In an empirical classroom study with 180 students, our results show that the HRL policy is significantly more effective than a Deep Q-Network (DQN) induced policy and a random yet reasonable baseline policy.}, journal={ARTIFICIAL INTELLIGENCE IN EDUCATION (AIED 2019), PT I}, author={Zhou, Guojing and Azizsoltani, Hamoon and Ausin, Markel Sanz and Barnes, Tiffany and Chi, Min}, year={2019}, pages={544–556} }