@inproceedings{67325b401ffa4a7cbb6cee363b3f0a94,
title = "BatchQueue: Fast and memory-thrifty core to core communication",
abstract = "Sequential applications can take advantage of multi-core systems by way of pipeline parallelism to improve their performance. In such parallelism, core to core communication overhead is the main limit of speedup. This paper presents BatchQueue, a fast and memory-thrifty core to core communication system based on batch processing of whole cache line. BatchQueue is able to send a 32bit word of data in just 12.5 ns on a Xeon X5472 and only needs 2 full cache lines plus 3 byte-sized variables - each on a different cache line for optimal performance - to work. The characteristics of BatchQueue - high throughput and increased latency resulting from its batch processing - makes it well suited for highly communicative tasks with no real time requirements such as monitoring.",
author = "Thomas Preud'homme and Julien Sopena and Ga{\"e}l Thomas and Bertil Folliot",
year = "2010",
month = dec,
day = "1",
doi = "10.1109/SBAC-PAD.2010.34",
language = "English",
isbn = "9780769542164",
series = "Proceedings - 22nd International Symposium on Computer Architecture and High Performance Computing, SBAC-PAD 2010",
pages = "215--222",
booktitle = "Proceedings - 22nd International Symposium on Computer Architecture and High Performance Computing, SBAC-PAD 2010",
note = "22nd International Symposium on Computer Architecture and High Performance Computing, SBAC-PAD 2010 ; Conference date: 27-10-2010 Through 30-10-2010",
}