@inproceedings{4b47689c19cc4fc8909b004ee44d428a,
title = "HOG and subband power distribution image features for acoustic scene classification",
abstract = "Acoustic scene classification is a difficult problem mostly due to the high density of events concurrently occurring in audio scenes. In order to capture the occurrences of these events we propose to use the Subband Power Distribution (SPD) as a feature. We extract it by computing the histogram of amplitude values in each frequency band of a spectrogram image. The SPD allows us to model the density of events in each frequency band. Our method is evaluated on a large acoustic scene dataset using support vector machines. We outperform the previous methods when using the SPD in conjunction with the histogram of gradients. To reach further improvement, we also consider the use of an approximation of the earth mover's distance kernel to compare histograms in a more suitable way. Using the so-called Sinkhorn kernel improves the results on most of the feature configurations. Best performances reach a 92.8\% F1 score.",
keywords = "Acoustic scene classification, Sinkhorn distance, subband power distribution image, support vector machine",
author = "Victor Bisot and Slim Essid and Gael Richard",
note = "Publisher Copyright: {\textcopyright} 2015 EURASIP.; 23rd European Signal Processing Conference, EUSIPCO 2015 ; Conference date: 31-08-2015 Through 04-09-2015",
year = "2015",
month = dec,
day = "22",
doi = "10.1109/EUSIPCO.2015.7362477",
language = "English",
series = "2015 23rd European Signal Processing Conference, EUSIPCO 2015",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "719--723",
booktitle = "2015 23rd European Signal Processing Conference, EUSIPCO 2015",
}