@inproceedings{2b3b2f41c2c44f68b775bb3b4b168a3d,
title = "Fast simultaneous clustering and feature selection for binary data",
abstract = "This paper addresses the problem of clustering binary data with feature selection within the context of maximum likelihood (ML) and classification maximum likelihood (CML) approaches. In order to efficiently perform the clustering with feature selection, we propose the use of an appropriate Bernoulli model. We derive two algorithms: Expectation-Maximization (EM) and Classification EM (CEM) with feature selection. Without requiring a knowledge of the number of clusters, both algorithms optimize two approximations of the minimum message length (MML) criterion. To exploit the advantages of EM for clustering and of CEM for fast convergence, we combine the two algorithms. With Monte Carlo simulations and by varying parameters of the model, we rigorously validate the approach. We also illustrate our contribution using real datasets commonly used in document clustering.",
author = "Charlotte Laclau and Mohamed Nadif",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing Switzerland 2014; PAKDD 2006 International Workshop on Knowledge Discovery in Life Science Literature, KDLL 2006 ; Conference date: 09-04-2006 Through 09-04-2006",
year = "2014",
month = jan,
day = "1",
doi = "10.1007/978-3-319-12571-8\_17",
language = "English",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "192--202",
editor = "Hendrik Blockeel and \{van Leeuwen\}, Matthijs and Veronica Vinciotti",
booktitle = "Advances in Intelligent DataAnalysis XIII - 13th International Symposium, IDA 2014, Proceedings",
}