@article{young_nan_shen_2022, title={IDE Augmented with Human-Learning Inspired Natural Language Programming}, ISSN={["2574-1926"]}, DOI={10.1145/3510454.3516832}, abstractNote={Natural Language (NL) programming, the concept of synthesizing code from natural language inputs, has garnered growing interest among the software community in recent years. Unfortunately, current solutions in the space all suffer from the same problem, they require many labeled training examples due to their data-driven nature. To address this issue, this paper proposes an NLU-driven approach that forgoes the need for large numbers of labeled training examples. Inspired by how humans learn programming, this solution centers around Natural Language Understanding and draws on a novel graph-based mapping algorithm. The resulting NL programming framework, HISyn, uses no training examples, but gives synthesis accuracies comparable to data-driven methods trained on hundreds of samples. HISyn meanwhile demonstrates advantages in terms of interpretability, error diagnosis support, and cross-domain extensibility. To encourage adoption of HISyn among developers, the tool is made available as an extension for the Visual Studio Code IDE, thereby allowing users to easily submit inputs to HISyn and insert the generated code expressions into their active programs. A demo of the HISyn Extension can be found at https://youtu.be/KKOqJS24FNo.}, journal={2022 ACM/IEEE 44TH INTERNATIONAL CONFERENCE ON SOFTWARE ENGINEERING: COMPANION PROCEEDINGS (ICSE-COMPANION 2022)}, author={Young, Mitchell and Nan, Zifan and Shen, Xipeng}, year={2022}, pages={110–114} } @article{nan_dave_shen_liao_vanderbruggen_lin_emani_2022, title={Interactive NLU-Powered Ontology-Based Workflow Synthesis for FAIR Support of HPC}, DOI={10.1109/HUST56722.2022.00009}, abstractNote={Workflow synthesis is important for automatically creating the data processing workflow in a FAIR data management system for HPC. Previous methods are table-based, rigid and not scalable. This paper addresses these limitations by developing a new approach to workflow synthesis, interactive NLU-powered ontology-based workflow synthesis (INPOWS). IN-POWS allows the use of Natural Language for queries, maximizes the robustness in handling concepts and language ambiguities through an interactive ontology-based design, and achieves superior extensibility by adopting a synthesis algorithm powered by Natural Language Understanding. In our experiments, INPOWS shows the efficacy in enabling flexible, robust, and extensible workflow synthesis.}, journal={2022 IEEE/ACM INTERNATIONAL WORKSHOP ON HPC USER SUPPORT TOOLS (HUST)}, author={Nan, Zifan and Dave, Mithil and Shen, Xipeng and Liao, Chunhua and Vanderbruggen, Tristan and Lin, Pei-Hung and Emani, Murali}, year={2022}, pages={29–40} } @article{liao_lin_verma_vanderbruggen_emani_nan_shen_2021, title={HPC Ontology: Towards a Unified Ontology for Managing Training Datasets and AI Models for High-Performance Computing}, DOI={10.1109/MLHPC54614.2021.00012}, abstractNote={Machine learning (ML) techniques have been widely studied to address various challenges of productively and efficiently running large-scale scientific applications on heterogeneous supercomputers. However, it is extremely difficult to generate, access, and maintain training datasets and AI models to accelerate ML-based research. The Future of Research Communications and e-Scholarship has proposed the FAIR data principles describing Findability, Accessibility, Interoperability, and Reusability. In this paper, we present our ongoing work of designing an ontology for high-performance computing (named HPC ontology) in order to make training datasets and AI models FAIR. Our ontology provides controlled vocabularies, explicit semantics, and formal knowledge representations. Our design uses an extensible two-level pattern, capturing both high-level meta information and low-level data content for software, hardware, experiments, workflows, training datasets, AI models, and so on. Preliminary evaluation shows that HPC ontology is effective to annotate selected data and support a set of SPARQL queries.}, journal={PROCEEDINGS OF THE WORKSHOP ON MACHINE LEARNING IN HIGH PERFORMANCE COMPUTING ENVIRONMENTS (MLHPC 2021)}, author={Liao, Chunhua and Lin, Pei-Hung and Verma, Gaurav and Vanderbruggen, Tristan and Emani, Murali and Nan, Zifan and Shen, Xipeng}, year={2021}, pages={69–80} }