@inproceedings{ccc5c9855eb54ea4b3d122dcd27a1fc2,
title = "Keyword search with real-time entity resolution in relational databases",
abstract = "Traditional methods of IR-style keyword search/query in relational databases are based on clean data without entity resolution (ER), and as a result, their answers to a query may contain duplicates for dirty datasets with duplicate tuples that have different identifiers and refer to the same real-world entity. In this paper, we propose a method for processing top -N keyword queries with real-time ER. This method creates an index to obtain candidate tuples for a keyword query, defines a function to compute the similarities between the query and its candidate tuples, and designs a clustering algorithm with the Divide and Conquer mechanism to deduplicate the query results. Extensive experiments are conducted to confirm the effectiveness and efficiency of the method for both dirty and (almost) clean datasets.",
keywords = "Entity resolution, Relational database, Similarity, Top-N keyword query",
author = "Liang Zhu and Xu Du and Qin Ma and Weiyi Meng and Haibo Liu",
note = "Publisher Copyright: {\textcopyright} 2018 Association for Computing Machinery.; 10th International Conference on Machine Learning and Computing, ICMLC 2018 ; Conference date: 26-02-2018 Through 28-02-2018",
year = "2018",
month = feb,
day = "26",
doi = "10.1145/3195106.3195171",
language = "English",
series = "ACM International Conference Proceeding Series",
publisher = "Association for Computing Machinery",
pages = "134--139",
booktitle = "Proceedingsof 2018 10th International Conference on Machine Learning and Computing, ICMLC 2018",
}