@inproceedings{1c9e2a0427a24928a3dd028b6fa540da,
title = "Boosting cross-media retrieval via visual-auditory feature analysis and relevance feedback",
abstract = "Different types of multimedia data express high-level semantics from different aspects. How to learn comprehensive high-level semantics from different types of data and enable efficient crossmedia retrieval becomes an emerging hot issue. There are abundant correlations among heterogeneous low-level media content, which makes it challenging to query cross-media data effectively. In this paper, we propose a new cross-media retrieval method based on short-term and long-term relevance feedback. Our method mainly focuses on two typical types of media data, i.e. image and audio. First, we build multimodal representation via statistical canonical correlation between image and audio feature matrices, and define cross-media distance metric for similarity measure; then we propose optimization strategy based on relevance feedback, which fuses short-term learning results and long-term accumulated knowledge into the objective function. Experiments on image-audio dataset have demonstrated the superiority of our method over several existing algorithms.",
keywords = "Cross-media retrieval, Feature analysis, Relevance feedback",
author = "Hong Zhang and Junsong Yuan and Xingyu Gao and Zhenyu Chen",
year = "2014",
month = nov,
day = "3",
doi = "10.1145/2647868.2654975",
language = "English",
series = "MM 2014 - Proceedings of the 2014 ACM Conference on Multimedia",
publisher = "Association for Computing Machinery",
pages = "953--956",
booktitle = "MM 2014 - Proceedings of the 2014 ACM Conference on Multimedia",
address = "United States",
note = "2014 ACM Conference on Multimedia, MM 2014 ; Conference date: 03-11-2014 Through 07-11-2014",
}