@inproceedings{3a1700c6e30c41888f4c0a7920fd901a,
title = "Sequence Graphs Realizations and Ambiguity in Language Models",
abstract = "Several language models rely on an assumption modeling each local context as a (potentially oriented) bag of words, and have proven to be very efficient baselines. Sequence graphs are the natural structures encoding their information. However, a sequence graph may have several realizations as a sequence, leading to a degree of ambiguity. In this paper, we study such degree of ambiguity from a combinatorial and computational point of view. In particular, we present theoretical properties of sequence graphs. Several combinatorial problems are presented, depending on three levels of generalisation (window size, graph orientation, and weights), that we characterize with new complexity results. We establish different algorithms, including an integer program and a dynamic programming formulation to respectively recognize a sequence graph and to count the number of its distinct realizations.",
keywords = "Combinatorics, Complexity class, Graphs, Inverse problem, Sequences",
author = "Sammy Khalife and Yann Ponty and Laurent Bulteau",
note = "Publisher Copyright: {\textcopyright} 2021, Springer Nature Switzerland AG.; 27th International Conference on Computing and Combinatorics, COCOON 2021 ; Conference date: 24-10-2021 Through 26-10-2021",
year = "2021",
month = jan,
day = "1",
doi = "10.1007/978-3-030-89543-3\_13",
language = "English",
isbn = "9783030895426",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "153--163",
editor = "Chi-Yeh Chen and Wing-Kai Hon and Ling-Ju Hung and Chia-Wei Lee",
booktitle = "Computing and Combinatorics - 27th International Conference, COCOON 2021, Proceedings",
}