@inproceedings{3299333157274790ac56c19cb923f78c,
title = "C-SMOTE: Continuous Synthetic Minority Oversampling for Evolving Data Streams",
abstract = "Streaming Machine Learning (SML) studies single-pass learning algorithms that update their models one data item at a time given an unbounded and often non-stationary flow of data (a.k.a., in presence of concept drift). Online class imbalance learning is a branch of SML that combines the challenges of both class imbalance and concept drift. In this paper, we investigate the binary classification problem of rebalancing an imbalanced stream of data in the presence of concept drift, accessing one sample at a time. We propose Continuous Synthetic Minority Oversampling Technique (C-SMOTE), a novel rebalancing meta-strategy to pipeline with SML classification algorithms. C-SMOTE is inspired by the popular SMOTE algorithm but operates continuously. We benchmark C-SMOTE pipelines on ten different groups of data streams. We bring empirical evidence that models learnt with C-SMOTE pipelines outperform models trained on imbalanced data stream without losing the ability to deal with concept drifts. Moreover, we show that they outperform other stream balancing techniques from the literature.",
keywords = "Balancing, Binary Classification, Concept Drift, Streaming data",
author = "Alessio Bernardo and Gomes, \{Heitor Murilo\} and Jacob Montiel and Bernhard Pfahringer and Albert Bifet and Valle, \{Emanuele Della\}",
note = "Publisher Copyright: {\textcopyright} 2020 IEEE.; 8th IEEE International Conference on Big Data, Big Data 2020 ; Conference date: 10-12-2020 Through 13-12-2020",
year = "2020",
month = dec,
day = "10",
doi = "10.1109/BigData50022.2020.9377768",
language = "English",
series = "Proceedings - 2020 IEEE International Conference on Big Data, Big Data 2020",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "483--492",
editor = "Xintao Wu and Chris Jermaine and Li Xiong and Hu, \{Xiaohua Tony\} and Olivera Kotevska and Siyuan Lu and Weijia Xu and Srinivas Aluru and Chengxiang Zhai and Eyhab Al-Masri and Zhiyuan Chen and Jeff Saltz",
booktitle = "Proceedings - 2020 IEEE International Conference on Big Data, Big Data 2020",
}