@article{banerjee_potts_jhala_jaselskis_2023, title={Developing a Construction Domain-Specific Artificial Intelligence Language Model for NCDOT's CLEAR Program to Promote Organizational Innovation and Institutional Knowledge}, volume={37}, ISSN={["1943-5487"]}, DOI={10.1061/JCCEE5.CPENG-4868}, abstractNote={Transportation agency personnel gain valuable knowledge through their work, but such knowledge is lost if it is not documented properly after the worker leaves the organization. The risk of losing institutional knowledge is a current problem at state departments of transportation, including the North Carolina Department of Transportation (NCDOT), due to high personnel turnover. State transportation agencies have implemented knowledge repositories in the form of lessons learned/best practices databases to address this problem. However, motivating end-users to use such databases is challenging. This paper addresses this challenge through novel artificial intelligence technology whereby a neural network–based language model is implemented as part of the NCDOT’s new knowledge management program: Communicate Lessons, Exchange Advice, Record (CLEAR). The CLEAR program encompasses a database of lessons learned/best practices and a website to access and search the database. The developed methodology involves training a language model on transportation construction texts and using that trained model in a novel algorithm enabling users to search the CLEAR database easily. The developed language-processing model provides an easily accessible interface to suggest the most relevant CLEAR data based on the end-user’s searched keywords. The model learns an inference model of construction domain–specific vocabulary extracted from various sources, such as contract documents, textbooks, and specifications, to make meaningful connections between lessons learned/best practices in the CLEAR database and project-specific knowledge. The developed model has been validated by project managers for projects at various life cycle stages. The automation of information retrieval is intended to encourage NCDOT personnel to use and embrace the CLEAR program as part of their routine work to improve project workflow. In the long run, the NCDOT will benefit from consistent usage of the CLEAR program and its high quality content, thereby leading to enhanced institutional knowledge and organizational innovation.Practical ApplicationsThe construction industry, with a particular emphasis on transportation construction, currently faces tremendous challenges in retaining and retraining existing personnel to ensure business continuity on projects. Knowledge gained on projects by project personnel can be lost forever if not properly documented. While knowledge repositories are effective toward ensuring the storing and retrieving of past knowledge, extant literature underlines the need to ensure continued participation by the end-users for the success of such repositories. This research effort uses natural language processing, a subfield artificial intelligence that deals specifically with text sources, as a means to quickly and accurately enhance the quality of search results being displayed to the end-users within the North Carolina Department of Transportation’s recently commissioned knowledge management program called CLEAR. As a result, end-users can stay motivated and embrace the CLEAR program, thereby ensuring its long-term success. In the long run, the consistent usage of the CLEAR program and the high quality content that is input to the CLEAR database by the NCDOT end-users will lead to enhanced institutional knowledge and internal organizational innovation.}, number={3}, journal={JOURNAL OF COMPUTING IN CIVIL ENGINEERING}, author={Banerjee, Siddharth and Potts, Colin M. and Jhala, Arnav H. and Jaselskis, Edward J.}, year={2023}, month={May} } @article{potts_savaliya_jhala_2022, title={Leveraging Multiple Representations of Topic Models for Knowledge Discovery}, volume={10}, ISSN={["2169-3536"]}, url={https://doi.org/10.1109/ACCESS.2022.3210529}, DOI={10.1109/ACCESS.2022.3210529}, abstractNote={Topic models are often useful in categorization of related documents in information retrieval and knowledge discovery systems, especially for large datasets. Interpreting the output of these models remains an ongoing challenge for the research community. The typical practice in the application of topic models is to tune the parameters of a chosen model for a target dataset and select the model with the best output based on a given metric. We present a novel perspective on topic analysis by presenting a process for combining output from multiple models with different theoretical underpinnings. We show that this results in our ability to tackle novel tasks such as semantic characterization of content that cannot be carried out by using single models. One example task is to characterize the differences between topics or documents in terms of their purpose and also importance with respect to the underlying output of the discovery algorithm. To show the potential benefit of leveraging multiple models we present an algorithm to map the term-space of Latent Dirichlet Allocation (LDA) to the neural document-embedding space of doc2vec. We also show that by utilizing both models in parallel and analyzing the resulting document distributions using the Normalized Pointwise Mutual Information (NPMI) metric we can gain insight into the purpose and importance of topics across models. This approach moves beyond topic identification to a richer characterization of the information and provides a better understanding of the complex relationships between these typically competing techniques.}, journal={IEEE ACCESS}, author={Potts, Colin M. and Savaliya, Akshat and Jhala, Arnav}, year={2022}, pages={104696–104705} }