@article{hou_korn_melo-filho_wright_tropsha_chirkova_2022, title={Compact Walks: Taming Knowledge-Graph Embeddings With Domain- and Task-Specific Pathways}, ISSN={["0730-8078"]}, DOI={10.1145/3514221.3517903}, abstractNote={Knowledge-graph (KG) embeddings have emerged as a promise in addressing challenges faced by modern biomedical research, including the growing gap between therapeutic needs and available treatments. The popularity of KG embeddings in graph analytics is on the rise, due at least partially to the presumed semanticity of the learned embeddings. Unfortunately, the ability of a node neighborhood picked up by an embedding to capture the node's semantics may depend on the characteristics of the data. One of the reasons for this problem is that KG nodes can be promiscuous, that is, associated with a number of different relationships that are not unique or indicative of the properties of the nodes. To address the promiscuity challenge and the documented runtime-performance challenge in real-life KG embedding tools, we propose to use domain- and task-specific information to specify regular-expression pathways that define neighborhoods of KG nodes of interest. Our proposed CompactWalks framework uses these semantic subgraphs to enable meaningful compact walks in random-walk based KG embedding methods. We report the results of case studies for the task of determining which pharmaceutical drugs could treat the same diseases. The findings suggest that our CompactWalks approach has the potential to address the promiscuity and runtime-performance challenges in applying embedding tools to large-scale KGs in real life, in the biomedical domain and possibly beyond.}, journal={PROCEEDINGS OF THE 2022 INTERNATIONAL CONFERENCE ON MANAGEMENT OF DATA (SIGMOD '22)}, author={Hou, Pei-Yu and Korn, Daniel R. and Melo-Filho, Cleber C. and Wright, David R. and Tropsha, Alexander and Chirkova, Rada}, year={2022}, pages={458–469} } @article{ao_dinakaran_yang_wright_chirkova_2021, title={Trustworthy Knowledge Graph Population From Texts for Domain Query Answering}, ISSN={["2639-1589"]}, DOI={10.1109/BigData52589.2021.9671514}, abstractNote={Obtaining answers to domain-specific questions over large-scale unstructured (text) data is an important component of data analytics in many application domains. As manual question answering does not scale to large text corpora, it is common to use information extraction (IE) to preprocess the texts of interest prior to posing the questions. This is often done by transforming text corpora into the knowledge-graph (KG) triple format that is suitable for efficient processing of the user questions in graph-oriented data-intensive systems.In a number of real-life scenarios, trustworthiness of the answers obtained from domain-specific texts is vital for downstream decision making. In this paper we focus on one critical aspect of trustworthiness, which concerns aligning with the given domain vocabularies (ontologies) those KG triples that are obtained from the source texts via IE solutions. To address this problem, we introduce a scalable domain-independent text-to-KG approach that adapts to specific domains by using domain ontologies, without having to consult external triple repositories. Our IE solution builds on the power of neural-based learning models and leverages feature engineering to distinguish ontology-aligned data from generic data in the source texts. Our experimental results indicate that the proposed approach could be more dependable than a state-of-the-art IE baseline in constructing KGs that are suitable for trustworthy domain question answering on text data.}, journal={2021 IEEE INTERNATIONAL CONFERENCE ON BIG DATA (BIG DATA)}, author={Ao, Jing and Dinakaran, Swathi and Yang, Hungjian and Wright, David and Chirkova, Rada}, year={2021}, pages={4590–4599} }