@inproceedings{a2b5d6f5a6c44e98b2e9c9385752781e,
title = "Guiding audio source separation by video object information",
abstract = "In this work we propose novel joint and sequential multimodal approaches for the task of single channel audio source separation in videos. This is done within the popular non-negative matrix factorization framework using information about the sounding object's motion. Specifically, we present methods that utilize non-negative least squares formulation to couple motion and audio information. The proposed techniques generalize recent work carried out on NMF-based motion-informed source separation and easily extend to video data. Experiments with two distinct multimodal datasets of string instrument performance recordings illustrate their advantages over the existing methods.",
keywords = "Audio source separation, Audio-visual objects, Motion, Multimodal analysis, Nonnegative matrix factorization",
author = "Sanjeel Parekh and Slim Essid and Alexey Ozerov and Duong, \{Ngoc Q.K.\} and Patrick Perez and Gael Richard",
note = "Publisher Copyright: {\textcopyright} 2017 IEEE.; 2017 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics, WASPAA 2017 ; Conference date: 15-10-2017 Through 18-10-2017",
year = "2017",
month = dec,
day = "7",
doi = "10.1109/WASPAA.2017.8169995",
language = "English",
series = "IEEE Workshop on Applications of Signal Processing to Audio and Acoustics",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "61--65",
booktitle = "2017 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics, WASPAA 2017",
}