@article{shi_chi_barnes_price_2022, title={Code-DKT: A Code-based Knowledge Tracing Model for Programming Tasks}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85132259475&partnerID=MN8TOARS}, DOI={10.48550/arXiv.2206.03545}, abstractNote={Knowledge tracing (KT) models are a popular approach for predicting students' future performance at practice problems using their prior attempts. Though many innovations have been made in KT, most models including the state-of-the-art Deep KT (DKT) mainly leverage each student's response either as correct or incorrect, ignoring its content. In this work, we propose Code-based Deep Knowledge Tracing (Code-DKT), a model that uses an attention mechanism to automatically extract and select domain-specific code features to extend DKT. We compared the effectiveness of Code-DKT against Bayesian and Deep Knowledge Tracing (BKT and DKT) on a dataset from a class of 50 students attempting to solve 5 introductory programming assignments. Our results show that Code-DKT consistently outperforms DKT by 3.07-4.00% AUC across the 5 assignments, a comparable improvement to other state-of-the-art domain-general KT models over DKT. Finally, we analyze problem-specific performance through a set of case studies for one assignment to demonstrate when and how code features improve Code-DKT's predictions.}, journal={arXiv}, author={Shi, Y. and Chi, M. and Barnes, T. and Price, T.W.}, year={2022} } @inproceedings{castro_suh_e_naowaprateep_shi_2022, title={Developing Comic-based Learning Toolkits for Teaching Computing to Elementary School Learners}, url={https://doi.org/10.1145/3545947.3576272}, DOI={10.1145/3545947.3576272}, abstractNote={We describe the use of comics to teach computing by having learners create, design, and arrange comic panels. We designed comic-based learning toolkits, guided by the following research question: How do we support the informal learning of computing concepts for elementary school learners through a physical comic-based learning toolkit? This question emerged as a result of our partnership with a community organization that teaches art to elementary school learners through the production and distribution of art subscription boxes. Subscription boxes contain art materials and instruction manuals that learners can use to create artistic artifacts at home. Partnering with the organization, we explored how to teach computing through art activities and designed a subscription box for comic creation activities that used materials such as paper comic panels, coloring pens, magnets, and activity manuals. Our learning toolkits guide learners to use computing concepts in the story-crafting process, for example: decomposing narratives with comic panels, sequencing comic panels to create a narrative flow, using conditionals (e.g., if-else) for character decision-making within the story, using loops to repeat comic story events, and iterating on or refining the comic to create and develop a cohesive narrative flow.}, author={Castro, Francisco and Suh, Sangho and E, Jane L. and Naowaprateep, Weena and Shi, Yang}, year={2022}, month={Mar} } @article{skripchuk_shi_price_2022, title={Identifying Common Errors in Open-Ended Machine Learning Projects}, volume={1}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85126130120&partnerID=MN8TOARS}, DOI={10.1145/3478431.3499397}, journal={PROCEEDINGS OF THE 53RD ACM TECHNICAL SYMPOSIUM ON COMPUTER SCIENCE EDUCATION (SIGCSE 2022), VOL 1}, publisher={ACM}, author={Skripchuk, James and Shi, Yang and Price, Thomas}, year={2022}, pages={216–222} } @inproceedings{penmetsa_shi_price_2021, title={Investigate Effectiveness of Code Features in Knowledge Tracing Task on Novice Programming Course}, volume={3051}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85122896934&partnerID=MN8TOARS}, booktitle={CEUR Workshop Proceedings}, author={Penmetsa, P. and Shi, Y. and Price, T.}, year={2021} } @inproceedings{just a few expert constraints can help: humanizing data-driven subgoal detection for novice programming_2021, url={https://eric.ed.gov/?id=ED615599}, booktitle={Educational Data Mining 2021}, year={2021} } @inproceedings{knowing both when and where: temporal-astnn for early prediction of student success in novice programming tasks_2021, url={https://eric.ed.gov/?id=ED615543}, booktitle={Educational Data Mining 2021}, year={2021} } @inproceedings{more with less: exploring how to use deep learning effectively through semi-supervised learning for automatic bug detection in student code_2021, url={https://eric.ed.gov/?id=ED615586}, booktitle={Educational Data Mining 2021}, year={2021} } @article{shi_shah_wang_marwan_penmetsa_price_2021, title={Toward Semi-Automatic Misconception Discovery Using Code Embeddings}, url={http://dx.doi.org/10.1145/3448139.3448205}, DOI={10.1145/3448139.3448205}, abstractNote={Understanding students' misconceptions is important for effective teaching and assessment. However, discovering such misconceptions manually can be time-consuming and laborious. Automated misconception discovery can address these challenges by highlighting patterns in student data, which domain experts can then inspect to identify misconceptions. In this work, we present a novel method for the semi-automated discovery of problem-specific misconceptions from students' program code in computing courses, using a state-of-the-art code classification model. We trained the model on a block-based programming dataset and used the learned embedding to cluster incorrect student submissions. We found these clusters correspond to specific misconceptions about the problem and would not have been easily discovered with existing approaches. We also discuss potential applications of our approach and how these misconceptions inform domain-specific insights into students' learning processes.}, journal={LAK21 CONFERENCE PROCEEDINGS: THE ELEVENTH INTERNATIONAL CONFERENCE ON LEARNING ANALYTICS & KNOWLEDGE}, publisher={ACM}, author={Shi, Yang and Shah, Krupal and Wang, Wengran and Marwan, Samiha and Penmetsa, Poorvaja and Price, Thomas W.}, year={2021}, pages={606–612} } @article{shi_shah_wang_marwan_penmetsa_price_2021, title={Toward semi-automatic misconception discovery using code embeddings}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85102809405&partnerID=MN8TOARS}, journal={arXiv}, author={Shi, Y. and Shah, K. and Wang, W. and Marwan, S. and Penmetsa, P. and Price, T.W.}, year={2021} } @inproceedings{wang_rao_shi_milliken_martens_barnes_price_2020, title={Comparing feature engineering approaches to predict complex programming behaviors}, volume={2734}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85096164837&partnerID=MN8TOARS}, booktitle={CEUR Workshop Proceedings}, author={Wang, W. and Rao, Y. and Shi, Y. and Milliken, A. and Martens, C. and Barnes, T. and Price, T.W.}, year={2020} } @article{chen_huang_hou_shi_dai_wang_2020, title={Test_positive at W-nut 2020 shared task-3: Joint event multi-task learning for slot filling in noisy text}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85098406978&partnerID=MN8TOARS}, journal={arXiv}, author={Chen, C. and Huang, C.-Y. and Hou, Y. and Shi, Y. and Dai, E. and Wang, J.}, year={2020} } @inproceedings{shi_li_song_li_ye_2019, title={Energy audition based cyber-physical attack detection system in IoT}, url={http://dx.doi.org/10.1145/3321408.3321588}, DOI={10.1145/3321408.3321588}, abstractNote={In this paper, we propose an attack detection framework in the Internet of Things (IoT) devices. The framework applies a data-centric method to process the energy consumption data and classify the attack status of the monitored device. We implement the framework in real hardware, and emulate common types of attacks to evaluate the performance of the attack detection framework. Due to the characteristic of the energy data, not only cyber attacks but also physical attacks such as heating are also emulated and tested. To shorten the detection time, a two-stage strategy is also proposed to first apply a short time window for a rough detection, then a long time window to the fine detection of anomalies. The accuracy of short-term detection is 90%, while in the long-term detections the accuracy reaches 99.5%. Due to the nature of information from energy consumption data, the framework is more secure in cases the kernel of the device is already compromised.}, booktitle={Proceedings of the ACM Turing Celebration Conference - China}, publisher={ACM}, author={Shi, Yang and Li, Fangyu and Song, WenZhan and Li, Xiang-Yang and Ye, Jin}, year={2019}, month={May} } @article{li_shi_shinde_ye_song_2019, title={Enhanced Cyber-Physical Security in Internet of Things Through Energy Auditing}, volume={6}, url={https://doi.org/10.1109/JIOT.2019.2899492}, DOI={10.1109/JIOT.2019.2899492}, abstractNote={Internet of Things (IoT) are vulnerable to both cyber and physical attacks. Therefore, a cyber-physical security system against different kinds of attacks is in high demand. Traditionally, attacks are detected via monitoring system logs. However, the system logs, such as network statistics and file access records, can be forged. Furthermore, existing solutions mainly target cyber attacks. This paper proposes the first energy auditing and analytics-based IoT monitoring mechanism. To our best knowledge, this is the first attempt to detect and identify IoT cyber and physical attacks based on energy auditing. Using the energy meter readings, we develop a dual deep learning (DL) model system, which adaptively learns the system behaviors in a normal condition. Unlike the previous single DL models for energy disaggregation, we propose a disaggregation-aggregation architecture. The innovative design makes it possible to detect both cyber and physical attacks. The disaggregation model analyzes the energy consumptions of system subcomponents, e.g., CPU, network, disk, etc., to identify cyber attacks, while the aggregation model detects the physical attacks by characterizing the difference between the measured power consumption and prediction results. Using energy consumption data only, the proposed system identifies both cyber and physical attacks. The system and algorithm designs are described in detail. In the hardware simulation experiments, the proposed system exhibits promising performances.}, number={3}, journal={IEEE Internet of Things Journal}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Li, Fangyu and Shi, Yang and Shinde, Aditya and Ye, Jin and Song, Wenzhan}, year={2019}, month={Jun}, pages={5224–5231} } @article{li_shinde_shi_ye_li_song_2019, title={System Statistics Learning-Based IoT Security: Feasibility and Suitability}, volume={6}, url={http://dx.doi.org/10.1109/jiot.2019.2897063}, DOI={10.1109/jiot.2019.2897063}, abstractNote={Cyber attacks and malfunctions challenge the wide applications of Internet of Things (IoT). Since they are generally designed as embedded systems, typical auto-sustainable IoT devices usually have a limited capacity and a low processing power. Because of the limited computation resources, it is difficult to apply the traditional techniques designed for personal computers or super computers, like traffic analyzers and antivirus software. In this paper, we propose to leverage statistical learning methods to characterize the device behavior and flag deviations as anomalies. Because the system statistics, such as CPU usage cycles, disk usage, etc., can be obtained by IoT application program interfaces, the proposed framework is platform and deviceindependent. Considering IoT applications, we train multiple machine learning models to evaluate their feasibility and suitability. For the target auto-sustainable IoT devices, which operate well-planned processes, the normal system performances can be modeled accurately. Based on time series analysis methods, such as local outlier factor, cumulative sum, and the proposed adaptive online thresholding, the anomalous behaviors can be effectively detected. Comparing their performances on detecting anomalies as well as the computation sources required, we conclude that relatively simple machine learning models are more suitable for IoT security, and a data-driven anomaly detection method is preferred.}, number={4}, journal={IEEE Internet of Things Journal}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Li, Fangyu and Shinde, Aditya and Shi, Yang and Ye, Jin and Li, Xiang-Yang and Song, Wenzhan}, year={2019}, pages={6396–6403} } @inproceedings{shi_li_liu_beyette_song_2018, title={Dynamic Time-frequency Feature Extraction for Brain Activity Recognition}, volume={2018-July}, url={http://dx.doi.org/10.1109/embc.2018.8512914}, DOI={10.1109/embc.2018.8512914}, abstractNote={The biomedical signal classification accuracy on motor imagery is not always satisfactory, partially because not all the important features have been effectively extracted. This paper proposes an improved dynamic feature extraction approach based on a time-frequency representation and an optimal sequence similarity measurement. Since the wavelet packet decomposition (WPD) generates more detailed signal variation information and the dynamic time warping (DTW) helps optimally measure the sequence similarity, more important features are kept for classification. We apply the extracted features from our proposed method to Electroencephalogram (EEG) based motor imagery through the OpenBCI device and obtain higher classification accuracy. Compared with traditional feature extraction methods, there is a significant classification accuracy improvement from 83.53% to 90.89%. Our work demonstrates the importance of the advanced feature extraction in time series data analysis, e.g. biomedical signal.}, booktitle={2018 40th Annual International Conference of the IEEE Engineering in Medicine and Biology Society (EMBC)}, author={Shi, Y. and Li, F. and Liu, T. and Beyette, F.R. and Song, W.}, year={2018}, month={Jul}, pages={3104–3107} } @article{potrojan: powerful neural-level trojan designs in deep learning models_2018, year={2018}, month={Feb} }