@inproceedings{ab721a632913470791d21fe81d5637e2,
title = "Valeo4Cast: A Modular Approach to End-to-End Forecasting",
abstract = "Motion forecasting is crucial in autonomous driving systems to anticipate the future trajectories of surrounding agents such as pedestrians, vehicles, and traffic signals. In end-to-end forecasting, the model must jointly detect and track from sensor data (cameras or LiDARs) the past trajectories of the different elements of the scene and predict their future locations. We depart from the current trend of tackling this task via end-to-end training from perception to forecasting, and instead use a modular approach. We individually build and train detection, tracking and forecasting modules. We then only use consecutive finetuning steps to integrate the modules better and alleviate compounding errors. We conduct an in-depth study on the finetuning strategies and it reveals that our simple yet effective approach significantly improves performance on the end-to-end forecasting benchmark. Consequently, our solution ranks first in the Argoverse 2 End-to-end Forecasting Challenge, with 63.82 mAPf. We surpass forecasting results by +17.1 points over last year{\textquoteright}s winner and by +13.3 points over this year{\textquoteright}s runner-up. This remarkable performance in forecasting can be explained by our modular paradigm, which integrates finetuning strategies and significantly outperforms the end-to-end-trained counterparts.",
keywords = "End-to-end motion forecasting, Finetuning, Modular approach",
author = "Yihong Xu and {\'E}loi Zablocki and Alexandre Boulch and Gilles Puy and Mickael Chen and Florent Bartoccioni and Nermin Samet and Oriane Sim{\'e}oni and Spyros Gidaris and Vu, \{Tuan Hung\} and Andrei Bursuc and Eduardo Valle and Renaud Marlet and Matthieu Cord",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.; Workshops that were held in conjunction with the 18th European Conference on Computer Vision, ECCV 2024 ; Conference date: 29-09-2024 Through 04-10-2024",
year = "2025",
month = jan,
day = "1",
doi = "10.1007/978-3-031-91767-7\_1",
language = "English",
isbn = "9783031917660",
series = "Lecture Notes in Computer Science",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "1--14",
editor = "\{Del Bue\}, Alessio and Cristian Canton and Jordi Pont-Tuset and Tatiana Tommasi",
booktitle = "Computer Vision – ECCV 2024 Workshops, Proceedings",
}