@inproceedings{d2d2f4097bc941ce9dee9b36f74de69f,
title = "Augmenting word embeddings through external knowledge-base for biomedical application",
abstract = "The technological advancements in biomedical domain has led to a tremendous growth of unstructured data; primarily a result of increased publication of findings. At the same time, a corresponding interest in the Natural Language Processing (NLP) community to develop scalable methodologies to exploit such massive unlabeled corpora for unsupervised language processing has resulted in new opportunities towards developing semantic sensitive models. Amongst them, the field of word embeddings has garnered significant attention due to its capability to understand implicit semantics. However such data driven models are largely agnostic of the rich explicit semantic knowledge available in the biomedical domain in the form of vocabularies and ontologies. This is problematic because it leads to a poor representation of words with little local context and its effect is acute in biomedical domain. In this paper, we propose a novel model (MeSH2Vec) that jointly exploits both contextual information and available explicit semantic knowledge to learn externally augmented word embeddings. Unlike existing approaches, the proposed methodology is more dexterous in its ability to handle relationships between indirectly related concepts. The 13\% improvement in the correlation to experts, shown on experiments involving biomedical concept similarity and relatedness task validates the effectiveness of the proposed approach and demonstrates the importance of incorporating human curated knowledge in the process of generating word embeddings.",
keywords = "biomedical domain, semantic knowledge, word embedding",
author = "Kishlay Jha and Guangxu Xun and Vishrawas Gopalakrishnan and Aidong Zhang",
note = "Publisher Copyright: {\textcopyright} 2017 IEEE.; 5th IEEE International Conference on Big Data, Big Data 2017 ; Conference date: 11-12-2017 Through 14-12-2017",
year = "2017",
month = jul,
day = "1",
doi = "10.1109/BigData.2017.8258142",
language = "English",
series = "Proceedings - 2017 IEEE International Conference on Big Data, Big Data 2017",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1965--1974",
editor = "Jian-Yun Nie and Zoran Obradovic and Toyotaro Suzumura and Rumi Ghosh and Raghunath Nambiar and Chonggang Wang and Hui Zang and Ricardo Baeza-Yates and Xiaohua Hu and Jeremy Kepner and Alfredo Cuzzocrea and Jian Tang and Masashi Toyoda",
booktitle = "Proceedings - 2017 IEEE International Conference on Big Data, Big Data 2017",
address = "United States",
}