@article{3880,
  keywords = {Digital humanities, Image classification, Multimodal Classification},
  author = {F. Tao and W. Hao and L. Yueyan and D. Sanhong},
  title = {Classifying Images of Intangible Cultural Heritages with Multimodal Fusion},
  abstract = {[Objective] This paper proposes a new method combining images and texual descriptions, aiming to improve the classification of Intangible Cultural Heritage (ICH) images. [Methods] We built the new model with multimodal fusion, which includes a fine-tuned deep pre-trained model for extracting visual semantic features, a BERT model for extracting textual features, a fusion layer for concatenating visual and textual features, and an output layer for predicting labels. [Results] We examined the proposed model with the national ICH project-New Year Prints to classify the Mianzu Prints, Taohuawu Prints, Yangjiabu Prints, and Yangliuqing Prints. We found that fine-tuning the convolutional layer strengthened the visual semantics features of the ICH images, and the F1 value for classification reached 72.028\%. Compared with the baseline models, our method yielded the best results, with a F1 value of 77.574\%. [Limitations] The proposed model was only tested on New Year Prints, which needs to be expanded to more ICH projects in the future. [Conclusions] Adding textual description features can improve the performance of ICH image classification. Fine-tuning convolutional layers in image deep pre-trained model can improve extraction of visual semantics features.},
  year = {2022},
  journal = {Data Analysis and Knowledge Discovery},
  volume = {6},
  number = {2-3},
  pages = {329-337},
  issn = {20963467 (ISSN)},
  url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85130441432&doi=10.11925%2finfotech.2096-3467.2021.0911&partnerID=40&md5=40d47b33d12e0a4ab78f48469e5b9ba5},
  doi = {10.11925/infotech.2096-3467.2021.0911},
  note = {Publisher: Chinese Academy of Sciences},
  language = {Chinese},
}