@inproceedings{2748ad5127ab4e8290347151ccfc1425,
title = "Adding missing words to regular expressions",
abstract = "Regular expressions (regexes) are patterns that are used in many applications to extract words or tokens from text. However, even hand-crafted regexes may fail to match all the intended words. In this paper, we propose a novel way to generalize a given regex so that it matches also a set of missing (previously non-matched) words. Our method finds an approximate match between the missing words and the regex, and adds disjunctions for the unmatched parts appropriately. We show that this method can not just improve the precision and recall of the regex, but also generate much shorter regexes than baselines and competitors on various datasets.",
author = "Thomas Rebele and Katerina Tzompanaki and Suchanek, \{Fabian M.\}",
note = "Publisher Copyright: {\textcopyright} 2018, Springer International Publishing AG, part of Springer Nature.; 22nd Pacific-Asia Conference on Advances in Knowledge Discovery and Data Mining, PAKDD 2018 ; Conference date: 03-06-2018 Through 06-06-2018",
year = "2018",
month = jan,
day = "1",
doi = "10.1007/978-3-319-93037-4\_6",
language = "English",
isbn = "9783319930367",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "67--79",
editor = "Bao Ho and Dinh Phung and Webb, \{Geoffrey I.\} and Tseng, \{Vincent S.\} and Mohadeseh Ganji and Lida Rashidi",
booktitle = "Advances in Knowledge Discovery and Data Mining - 22nd Pacific-Asia Conference, PAKDD 2018, Proceedings",
}