@inproceedings{410d639759554715ba61c98dae370e47,
title = "Main melody extraction with source-filter NMF and CRNN",
abstract = "Estimating the main melody of a polyphonic audio recording remains a challenging task. We approach the task from a classification perspective and adopt a convolutional recurrent neural network (CRNN) architecture that relies on a particular form of pretraining by source-filter nonnegative matrix factorisation (NMF). The source-filter NMF decomposition is chosen for its ability to capture the pitch and timbre content of the leading voice/instrument, providing a better initial pitch salience than standard time-frequency representations. Starting from such a musically motivated representation, we propose to further enhance the NMF-based salience representations with CNN layers, then to model the temporal structure by an RNN network and to estimate the dominant melody with a final classification layer. The results show that such a system achieves state-of-the-art performance on the MedleyDB dataset without any augmentation methods or large training sets.",
author = "Dogac Basaran and Slim Essid and Geoffroy Peeters",
note = "Publisher Copyright: {\textcopyright} Dogac Basaran, Slim Essid, Geoffroy Peeters.; 19th International Society for Music Information Retrieval Conference, ISMIR 2018 ; Conference date: 23-09-2018 Through 27-09-2018",
year = "2018",
month = jan,
day = "1",
language = "English",
series = "Proceedings of the 19th International Society for Music Information Retrieval Conference, ISMIR 2018",
publisher = "International Society for Music Information Retrieval",
pages = "82--89",
editor = "Emilia Gomez and Xiao Hu and Eric Humphrey and Emmanouil Benetos",
booktitle = "Proceedings of the 19th International Society for Music Information Retrieval Conference, ISMIR 2018",
}