@inproceedings{17567e0c4279484294b6593e0c513b7a,
title = "Web document analysis based on visual segmentation and page rendering",
abstract = "This paper proposes an approach for segmenting a Web page into its semantic parts. Such analysis may be useful for adapting blog or other pages on small devices. In this approach, we take advantage of both dynamic layout after rendering and textual information. Our method segments the page into blocks and then classifies the blocks. A classification in semantic parts is performed thanks to a SVM-based machine learning approach using a set of 30 textual and visual-based features. Evaluation is conducted on a Web blog database. Results are provided for both block classification and blog segmentation into articles.",
keywords = "Internet document, Web page segmentation, block segmentation, semantic block",
author = "Nguyen, \{Cong Kinh\} and Laurence Likforman-Sulem and Moissinac, \{Jean Claude\} and Claudie Faure and Jeremy Lardon",
year = "2012",
month = may,
day = "24",
doi = "10.1109/DAS.2012.95",
language = "English",
isbn = "9780769546612",
series = "Proceedings - 10th IAPR International Workshop on Document Analysis Systems, DAS 2012",
pages = "354--358",
booktitle = "Proceedings - 10th IAPR International Workshop on Document Analysis Systems, DAS 2012",
note = "10th IAPR International Workshop on Document Analysis Systems, DAS 2012 ; Conference date: 27-03-2012 Through 29-03-2012",
}