<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2025.1738444</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Painting authentication using CNNs and sliding window feature extraction</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Ruiz de Miras</surname><given-names>Juan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1569692"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
</contrib>
<contrib contrib-type="author">
<name><surname>V&#x00ED;lchez</surname><given-names>Jos&#x00E9; Luis</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3335547"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Gacto</surname><given-names>Mar&#x00ED;a Jos&#x00E9;</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3266510"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Mart&#x00ED;n</surname><given-names>Domingo</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3334648"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Software Engineering Department, University of Granada</institution>, <city>Granada</city>, <country country="es">Spain</country></aff>
<aff id="aff2"><label>2</label><institution>Department of Analytical Chemistry, University of Granada</institution>, <city>Granada</city>, <country country="es">Spain</country></aff>
<aff id="aff3"><label>3</label><institution>Biosanitary Research Institute (ibs.GRANADA)</institution>, <city>Granada</city>, <country country="es">Spain</country></aff>
<aff id="aff4"><label>4</label><institution>Andalusian Research Institute in Data Science and Computational Intelligence (DaSCI), University of Granada</institution>, <city>Granada</city>, <country country="es">Spain</country></aff>
<author-notes>
<corresp id="c001"><label>&#x002A;</label>Correspondence: Juan Ruiz de Miras, <email xlink:href="mailto:demiras@ugr.es">demiras@ugr.es</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-01-13">
<day>13</day>
<month>01</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>8</volume>
<elocation-id>1738444</elocation-id>
<history>
<date date-type="received">
<day>03</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>16</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>23</day>
<month>12</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2026 Ruiz de Miras, V&#x00ED;lchez, Gacto and Mart&#x00ED;n.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Ruiz de Miras, V&#x00ED;lchez, Gacto and Mart&#x00ED;n</copyright-holder>
<license>
<ali:license_ref start_date="2026-01-13">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Painting authentication is an inherently complex task, often relying on a combination of connoisseurship and technical analysis. This study focuses on the authentication of a single painting attributed to Paolo Veronese, using a convolutional neural network approach tailored to severe data scarcity. To ensure that stylistic comparisons were based on artistic execution rather than iconographic differences, the dataset was restricted to paintings depicting the Holy Family, the same subject as the work under authentication. A custom shallow convolutional network was developed to process multichannel inputs (RGB, grayscale, and edge maps) extracted from overlapping patches via a sliding-window strategy. This patch-based design expanded the dataset from a small number of paintings to thousands of localized patches, enabling the model to learn microtextural and brushstroke features. Regularization techniques were employed to enhance generalization, while a painting-level cross-validation strategy was used to prevent data leakage. The model achieved high classification performance (accuracy of 94.51%, Area under the Curve 0.99) and generated probability heatmaps that revealed stylistic coherence in authentic Veronese works and fragmentation in non-Veronese paintings. The work under examination yielded an intermediate global mean Veronese probability (61%) with extensive high-probability regions over stylistically salient passages, suggesting partial stylistic affinity. The results support the use of patch-based models for stylistic analysis in art authentication, especially under domain-specific data constraints. While the network provides strong probabilistic evidence of stylistic affinity, definitive attribution requires further integration with historical, technical, and provenance-based analyses.</p>
</abstract>
<kwd-group>
<kwd>authentication</kwd>
<kwd>convolutional neural networks</kwd>
<kwd>painting classification</kwd>
<kwd>Paolo Veronese</kwd>
<kwd>sliding-window patch extraction</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This research was partially funded by the Spanish Ministry of Science, Innovation and University (grant no. PID2024-161348OB-I00), and the Carlos III Health Institute, co-funded by the European Union and ERDF A way of making Europe (grant no. PI23/00129).</funding-statement>
</funding-group>
<counts>
<fig-count count="8"/>
<table-count count="5"/>
<equation-count count="0"/>
<ref-count count="34"/>
<page-count count="15"/>
<word-count count="9141"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Pattern Recognition</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec1">
<label>1</label>
<title>Introduction</title>
<p>Art authentication remains a complex, multidisciplinary challenge that requires the integration of historical expertise, scientific analysis, and, increasingly, computational techniques (<xref ref-type="bibr" rid="ref15">King, 2024</xref>). In recent years, deep learning has emerged as a powerful tool for image-based analysis, offering scalable, objective approaches that complement traditional expert judgment. Convolutional neural networks (CNNs) and other advanced architectures have demonstrated high levels of accuracy in art authentication by learning complex visual patterns directly from images.</p>
<p>However, despite the diversity of architectures and methodologies employed in prior work (see Section 2), deep-learning approaches share a fundamental requirement: access to large, labeled datasets for model training. Previous studies rely on hundreds or thousands of high-resolution images of authenticated artworks to enable neural networks to learn meaningful stylistic and compositional patterns. This dependence on extensive training data remains a key limitation, particularly when authenticating a single artwork, where the training set of verified examples may consist of only a handful of paintings. This data-scarce scenario renders standard deep, pretrained models, which contain millions of parameters, highly prone to overfitting (<xref ref-type="bibr" rid="ref3">Bejani and Ghatee, 2021</xref>). Moreover, transfer learning approaches often rely on features learned from natural images (e.g., ImageNet), which emphasize high-level semantic content over low-level microtextural nuances required to distinguish a master from a workshop. A significant gap therefore exists for tailored methodologies capable of operating effectively under these severe data constraints, with emphasis on execution technique rather than semantic composition.</p>
<p>To address this gap, an approach based on a custom, shallow CNN and sliding-window-based feature extraction is introduced and applied to the authentication of a single painting with a disputed attribution. This patch-based approach mitigates data scarcity by substantially expanding the training set, shifting the model&#x2019;s focus from global composition to learning localized, microstylistic features such as brushwork and texture (<xref ref-type="bibr" rid="ref27">Sabha et al., 2024</xref>). Furthermore, the proposed model integrates multichannel inputs (RGB, grayscale, and edge maps) providing complementary visual cues; this technique has been shown to enhance the capture of subtle stylistic features (<xref ref-type="bibr" rid="ref32">Ugail et al., 2023</xref>). The artwork, a Holy Family (see <xref ref-type="fig" rid="fig1">Figure 1A</xref>), has been linked to Paolo Veronese, although its authorship remains uncertain, and may originate from the master, Veronese&#x2019;s workshop, or his disciples (<xref ref-type="bibr" rid="ref4">Blanc et al., 2023</xref>; <xref ref-type="bibr" rid="ref20">L&#x00F3;pez-Baldomero et al., 2023</xref>). Training on a restricted dataset of Holy Family paintings attributed to Veronese and his circle, is intended to identify stylistic features that may clarify the painting&#x2019;s origin, offering a reproducible and data-driven complement to expert judgment.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>Holy Family painting under examination: <bold>(A)</bold> Original artwork, reproduced by the authors from &#x201C;Holy Family: The Virgin&#x201D;; <bold>(B)</bold> reverse with inscription; <bold>(C)</bold> reverse with highlighted inscription; and <bold>(D)</bold> cropped, squared image prepared for CNN analysis.</p>
</caption>
<graphic xlink:href="frai-08-1738444-g001.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">A painting depicting a woman in a red robe holding a baby with a bearded man nearby. Two sections show labels on the back, one in faded script, and one with clearer text, mentioning "Boceto di Pablo Veronese" and a tribute to Giovanni de Rugiera from Tomasso Ferro di Genova.</alt-text>
</graphic>
</fig>
<p>The main contributions of this study are as follows:</p><list list-type="bullet">
<list-item>
<p>A tailored deep-learning framework for severe data scarcity: a sliding-window patch-generation strategy combined with a custom, shallow CNN architecture that effectively mitigates overfitting, enabling robust training with a dataset of only six paintings.</p>
</list-item>
<list-item>
<p>Multichannel stylistic feature extraction: integrating RGB, grayscale, and edge maps enables the model to capture complementary stylistic cues, such as brushwork texture and structural contours, that are critical for distinguishing a master from a workshop or circle.</p>
</list-item>
<list-item>
<p>Painting-level validation: the implementation of a painting-level leave-one-out cross-validation strategy (rather than at the patch level) ensures unbiased performance estimation and prevents data leakage.</p>
</list-item>
<list-item>
<p>Quantitative evidence for a disputed attribution: a probabilistic assessment of the Holy Family painting under examination is provided, offering objective data that support the hypothesis of partial stylistic affinity.</p>
</list-item>
</list>
<p>The remainder of this paper is organized as follows: Section 2 reviews related work; Section 3 describes the dataset and the proposed methodology; Section 4 presents the experimental results; and Section 5 discusses the results and presents the conclusions.</p>
</sec>
<sec id="sec2">
<label>2</label>
<title>Related work</title>
<p>Deep learning has emerged as a powerful approach for image-based analysis in the cultural-heritage domain. CNNs and advanced architectures, such as vision transformers, have demonstrated high levels of accuracy in art authentication by learning complex visual patterns directly from images (<xref ref-type="bibr" rid="ref10">Elgammal et al., 2018</xref>; <xref ref-type="bibr" rid="ref9">Dobbs et al., 2023</xref>; <xref ref-type="bibr" rid="ref32">Ugail et al., 2023</xref>; <xref ref-type="bibr" rid="ref7">Chen et al., 2024</xref>; <xref ref-type="bibr" rid="ref30">Schaerf et al., 2024</xref>).</p>
<p>Specifically, in <xref ref-type="bibr" rid="ref30">Schaerf et al. (2024)</xref>, the use of vision transformers for the authentication of Van Gogh paintings was explored, showing that deep-learning models can outperform handcrafted-feature approaches in both precision and interpretability. Similarly, Dobbs applied large-scale CNN-based classification framework to contemporary artworks, achieving more than 91% accuracy and highlighting deep learning&#x2019;s potential for scalable authentication across diverse artistic styles (<xref ref-type="bibr" rid="ref9">Dobbs et al., 2023</xref>).</p>
<p>In <xref ref-type="bibr" rid="ref32">Ugail et al. (2023)</xref>, the effectiveness of transfer learning in attributing Renaissance paintings, specifically works by Raphael, using pretrained CNNs fine-tuned on curated datasets was demonstrated. Their approach provided valuable insights into stylistic attribution and supported expert judgment in complex cases. The study in <xref ref-type="bibr" rid="ref7">Chen et al. (2024)</xref> further reinforced the utility of CNNs in fine-art recognition, emphasizing their role in extracting stylistic and compositional features for authentication tasks. Earlier work in <xref ref-type="bibr" rid="ref10">Elgammal et al. (2018)</xref> introduced a deep-learning framework for analyzing stylistic evolution in art history, laying the groundwork for subsequent applications in authentication and attribution.</p>
<p>Beyond the cultural-heritage domain, the efficacy of shallow and custom-designed CNNs has been demonstrated in other fields requiring high levels of precision under specific constraints. Recent advances show that compact models, when properly optimized, can rival deeper architectures. For instance, Radojcic proposed a two-layer TinyML approach for plant disease classification (<xref ref-type="bibr" rid="ref24">Radojcic et al., 2026</xref>), highlighting the computational efficiency of shallow networks. Similarly, the studies in <xref ref-type="bibr" rid="ref34">Zivkovic et al. (2025)</xref> and <xref ref-type="bibr" rid="ref2">Basha et al. (2021)</xref> employed metaheuristic optimization strategies to enhance CNN design for ocular-disease diagnosis and general-purpose applications, respectively. These studies reinforce the premise that tailored, shallow architectures constitute a robust solution for specialized tasks where massive datasets for pretraining are unavailable or unsuitable.</p>
</sec>
<sec sec-type="materials|methods" id="sec3">
<label>3</label>
<title>Materials and methods</title>
<sec id="sec4">
<label>3.1</label>
<title>Materials</title>
<p>The original painting under examination is shown in <xref ref-type="fig" rid="fig1">Figure 1A</xref>, while the cropped image used for analysis appears in <xref ref-type="fig" rid="fig1">Figure 1D</xref>. The crop was performed primarily to fit the elliptical composition into a rectangular frame, without introducing artificial background pixels that could bias the analysis. The resulting image was processed into square patches, a common input format for CNN analysis.</p>
<p>The painting depicts the Holy Family: The Virgin holding the Baby Jesus while lifting a veil, observed from behind by a figure identified as St. Joseph. An inscription is preserved on the reverse. The original text appears in <xref ref-type="fig" rid="fig1">Figure 1B</xref>, and the highlighted version in <xref ref-type="fig" rid="fig1">Figure 1C</xref>. This inscription may indicate authorship by P. Veronese, a copy after Paolo Veronese, or production by his workshop or followers. The painting is currently located in Spain and is part of a private collection.</p>
<p>The painting was previously analyzed using several analytical techniques (<xref ref-type="bibr" rid="ref4">Blanc et al., 2023</xref>; <xref ref-type="bibr" rid="ref20">L&#x00F3;pez-Baldomero et al., 2023</xref>): X-ray fluorescence spectroscopy to identify chemical elements in the paint, X-ray diffraction to determine crystalline components of the pigments, and spectral image analysis with endmember extraction to identify pigments. These studies concluded that the materials and artistic techniques are consistent with those of Italian Renaissance artists. However, the painting&#x2019;s precise authorship was not established.</p>
<p>A set of Holy Family paintings (see <xref ref-type="fig" rid="fig2">Figure 2</xref>) was used to train the CNN. The training dataset was restricted to this subject to ensure thematic consistency with the work under examination. Maintaining thematic consistency, the CNN can focus on analyzing color palettes, visual patterns, and artistic techniques within a controlled, homogeneous context, thereby reducing variability unrelated to authorship and improving the reliability of stylistic analysis. The training dataset was also limited to works attributed to Paolo Veronese, his workshop, or followers. This restriction ensures stylistic coherence within the dataset, as including works by other Renaissance painters could introduce significant variability in composition, color schemes, and brushwork. Focusing exclusively on Veronese&#x2019;s circle, the CNN can learn discriminative features specific to this artistic environment, thus reducing noise from unrelated stylistic traits and improving model&#x2019;s ability to capture subtle patterns relevant to authorship attribution. Details for each painting are listed in <xref ref-type="table" rid="tab1">Table 1</xref>. Four paintings (1&#x2013;4) are confirmed works by Paolo Veronese (the Veronese class), and two paintings (5&#x2013;6) are attributed to his disciples or followers (the non-Veronese class).</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Training dataset of Holy Family paintings: (1&#x2013;4) attributed to Paolo Veronese; (5, 6) attributed to his workshop or followers.</p>
</caption>
<graphic xlink:href="frai-08-1738444-g002.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">A collection of religious paintings is divided into two categories: Veronese Paintings and Non-Veronese Paintings. The Veronese Paintings section includes four images, each depicting the Madonna with Child and other figures. The Non-Veronese Paintings section features two similar compositions, also portraying religious scenes with Madonna, Child, and additional figures, characterized by rich colors and intricate details typical of the Renaissance period.</alt-text>
</graphic>
</fig>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Details for the paintings in the training dataset (1&#x2013;6) and the test dataset (7&#x2013;10).</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Painting</th>
<th align="left" valign="top">Title</th>
<th align="left" valign="top">Author</th>
<th align="left" valign="top">Date</th>
<th align="left" valign="top">Additional Information</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">1</td>
<td align="left" valign="middle">The Mystic Marriage of Saint Catherine of Alexandria</td>
<td align="left" valign="middle">P. Veronese</td>
<td align="left" valign="middle">1,547&#x2013;50</td>
<td align="left" valign="middle">
<ext-link xlink:href="https://artgallery.yale.edu/collections/objects/63825" ext-link-type="uri">https://artgallery.yale.edu/collections/objects/63825</ext-link>
</td>
</tr>
<tr>
<td align="left" valign="middle">2</td>
<td align="left" valign="middle">Holy Family with Sts. Anthony Abbot, Catherine and the Infant John the Baptist</td>
<td align="left" valign="middle">P. Veronese</td>
<td align="left" valign="middle">1,551</td>
<td align="left" valign="middle">
<ext-link xlink:href="https://www.wikiart.org/en/paolo-veronese/holy-family-with-sts-anthony-abbot-catherine-and-the-infant-john-the-baptist-1551" ext-link-type="uri">https://www.wikiart.org/en/paolo-veronese/holy-family-with-sts-anthony-abbot-catherine-and-the-infant-john-the-baptist-1551</ext-link>
</td>
</tr>
<tr>
<td align="left" valign="middle">3</td>
<td align="left" valign="middle">The Mystical Marriage of Saint Catherine</td>
<td align="left" valign="middle">P. Veronese</td>
<td align="left" valign="middle">1,557&#x2013;65</td>
<td align="left" valign="middle">
<ext-link xlink:href="https://www.museefabre.fr/recherche/musee%3AMUS_BIEN%3A3408?is_search_page=1&#x0026;search=veronese&#x0026;currentPage=1" ext-link-type="uri">https://www.museefabre.fr/recherche/musee%3AMUS_BIEN%3A3408?is_search_page=1&#x0026;search=veronese&#x0026;currentPage=1</ext-link>
</td>
</tr>
<tr>
<td align="left" valign="middle">4</td>
<td align="left" valign="middle">Holy Family with Young St. John and St. Catherine</td>
<td align="left" valign="middle">P. Veronese</td>
<td align="left" valign="middle">1,565</td>
<td align="left" valign="middle">
<ext-link xlink:href="https://www.uffizi.it/en/artworks/veronese-holy-family" ext-link-type="uri">https://www.uffizi.it/en/artworks/veronese-holy-family</ext-link>
</td>
</tr>
<tr>
<td align="left" valign="middle">5</td>
<td align="left" valign="middle">The Holy Family with the Infant St. John the Baptist</td>
<td align="left" valign="middle">Workshop of P. Veronese</td>
<td align="left" valign="middle">1,550&#x2013;75</td>
<td align="left" valign="middle">
<ext-link xlink:href="https://id.rijksmuseum.nl/20026863" ext-link-type="uri">https://id.rijksmuseum.nl/20026863</ext-link>
</td>
</tr>
<tr>
<td align="left" valign="middle">6</td>
<td align="left" valign="middle">The Mystic Marriage of St. Catherine of Alexandria</td>
<td align="left" valign="middle">Benedetto Caliari</td>
<td align="left" valign="middle">1,562&#x2013;9</td>
<td align="left" valign="middle">
<ext-link xlink:href="https://www.rct.uk/collection/407216/the-mystic-marriage-of-st-catherine-of-alexandria" ext-link-type="uri">https://www.rct.uk/collection/407216/the-mystic-marriage-of-st-catherine-of-alexandria</ext-link>
</td>
</tr>
<tr>
<td align="left" valign="middle">7</td>
<td align="left" valign="middle">The Holy Family</td>
<td align="left" valign="middle">Caravaggisti School</td>
<td align="left" valign="middle">17th century</td>
<td align="left" valign="middle">
<ext-link xlink:href="https://www.mutualart.com/Artwork/The-Holy-Family/AA108F3F26B11479DB06718BC7A2EB24" ext-link-type="uri">https://www.mutualart.com/Artwork/The-Holy-Family/AA108F3F26B11479DB06718BC7A2EB24</ext-link>
</td>
</tr>
<tr>
<td align="left" valign="middle">8</td>
<td align="left" valign="middle">The Holy Family</td>
<td align="left" valign="middle">Italian anonymous</td>
<td align="left" valign="middle">17th century</td>
<td align="left" valign="middle">
<ext-link xlink:href="http://fpjuliovisconti.com/anonimo-italiano-sagrada-familia-s-xvii/" ext-link-type="uri">http://fpjuliovisconti.com/anonimo-italiano-sagrada-familia-s-xvii/</ext-link>
</td>
</tr>
<tr>
<td align="left" valign="middle">9</td>
<td align="left" valign="middle">&#x2013;</td>
<td align="left" valign="middle">Art student</td>
<td align="left" valign="middle">21th century</td>
<td align="left" valign="middle">&#x2013;</td>
</tr>
<tr>
<td align="left" valign="middle">10</td>
<td align="left" valign="middle">Holy Family with Barbara</td>
<td align="left" valign="middle">
<ext-link xlink:href="http://Reprodart.com" ext-link-type="uri">Reprodart.com</ext-link>
</td>
<td align="left" valign="middle">21th century</td>
<td align="left" valign="middle">
<ext-link xlink:href="https://www.reprodart.com/a/veronese-paolo-eigentl-pa/pveroneseholyfamilywithba.html" ext-link-type="uri">https://www.reprodart.com/a/veronese-paolo-eigentl-pa/pveroneseholyfamilywithba.html</ext-link>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>A major challenge in this study is the limited size of the training dataset, which comprises only six paintings. Such a small sample poses a significant risk of overfitting and constrains a CNN&#x2019;s ability to generalize effectively. To mitigate this constraint, a sliding-window method was implemented and applied to three distinct input representations (RGB, grayscale, and edge maps) for each painting, as detailed below. This approach substantially increased the number of training samples by generating many overlapping patches from each image, enhancing data diversity and enabling the model to learn local stylistic and textural features while preserving the overall artistic context.</p>
<p>CNN performance was evaluated using two distinct test sets. First, a negative test set was defined (see <xref ref-type="fig" rid="fig3">Figure 3</xref>; <xref ref-type="table" rid="tab1">Table 1</xref>), comprising four Holy Family paintings with known non-Veronese authorship (Paintings 7, 8, 9, and 10). Paintings 7, 8, and 9 are copies of the work under examination: painting 7 is a 17th-century work by a Caravaggisti school artist; painting 8 is a work by an anonymous 17<sup>th</sup>-century Italian artist; and painting 9 is a modern copy created by an art student as a controlled experiment for this study. Painting 10 is a modern replica of Veronese&#x2019;s painting 4. Second, to evaluate classification performance on authentic Veronese works, a painting-level leave-one-out cross-validation on Veronese paintings (Paintings 1&#x2013;4) was conducted. Each Veronese painting (e.g., Painting 1) was held out as the test case, while training was performed on the remaining paintings (e.g., Paintings 2, 3, 4, 5, 6). This process was repeated, using each of paintings 1&#x2013;4 as the test case in turn.</p>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p>Test dataset of Holy Family paintings (paintings 7&#x2013;10). Image 7 reproduced from &#x201C;The Holy Family&#x201D;, <ext-link xlink:href="https://www.mutualart.com/Artwork/The-Holy-Family/AA108F3F26B11479DB06718" ext-link-type="uri">https://www.mutualart.com/Artwork/The-Holy-Family/AA108F3F26B11479DB06718</ext-link>; Image 8 reproduced from the Fundaci&#x00F3;n Pintor Julio Visconti, &#x201C;An&#x00F3;nimo Italiano Sagrada Familia, s. XVII&#x201D;, <ext-link xlink:href="http://fpjuliovisconti.com/anonimo-italiano-sagrada-familia-s-xvii/" ext-link-type="uri">http://fpjuliovisconti.com/anonimo-italiano-sagrada-familia-s-xvii/</ext-link>; Image 9 adapted by the authors from &#x201C;The Holy Family&#x201D;, <ext-link xlink:href="https://www.mutualart.com/Artwork/The-Holy-Family/AA108F3F26B11479DB06718BC7A2EB24l" ext-link-type="uri">https://www.mutualart.com/Artwork/The-Holy-Family/AA108F3F26B11479DB06718BC7A2EB24l</ext-link>; Image 10 reproduced from &#x201C;Holy Family with St Barbara and the Infant St John&#x201D; [c.1570] by Paolo Veronese, Holy Family with St Barbara and the Infant St John, c.1570 - Paolo Veronese - WikiArt.org, licensed under CC0.</p>
</caption>
<graphic xlink:href="frai-08-1738444-g003.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Top left artwork shows a serene woman holding a sleeping child, with an elderly man beside them. Top right image depicts a similar scene with slightly different postures. Bottom left artwork presents a softly painted version of the same theme. Bottom right image includes two women lovingly attending to the child, with an additional figure reaching towards them.</alt-text>
</graphic>
</fig>
<p>Images of all paintings by Veronese and his workshop, which are in the public domain, were downloaded from WikiArt - Visual Art Encyclopedia.<xref ref-type="fn" rid="fn0001"><sup>1</sup></xref> The image for test painting 9 originated from the authors&#x2019; laboratory, while the images of paintings 7, 8, and 10 were obtained from the sources listed in <xref ref-type="table" rid="tab1">Table 1</xref>. All images were cropped to square regions to include the entire Holy Family in each painting. These crops were downsampled to 640&#x202F;&#x00D7;&#x202F;640 pixels. This resolution was selected as the smallest square size capable of accommodating all figures, thus avoiding supersampling. The images used in the dataset can be accessed at <ext-link xlink:href="https://www.ugr.es/~demiras/PaintingAuthentication/" ext-link-type="uri">https://www.ugr.es/~demiras/PaintingAuthentication/</ext-link>.</p>
</sec>
<sec id="sec5">
<label>3.2</label>
<title>Methods</title>
<p>This section details the methodological pipeline: sliding-window based feature extraction (Section 3.2.1), the proposed CNN architecture (Section 3.2.2), and model validation and testing (Section 3.2.3).</p>
<sec id="sec6">
<label>3.2.1</label>
<title>Sliding-window-based feature extraction from images of paintings</title>
<p>CNN training typically requires large, diverse datasets to ensure robust generalization and to mitigate overfitting. However, in the highly specialized context of painting authentication, the availability of labeled data is often severely limited, especially for rare or historically significant works. The training dataset comprises only six paintings, which poses a substantial challenge for conventional deep-learning models. CNNs trained on such small datasets tend to memorize the training samples rather than learn generalizable features, resulting in poor performance on unseen test data. This limitation has been acknowledged in the literature with studies exploring various strategies to mitigate overfitting, such as data augmentation, transfer learning, and feature extraction techniques (<xref ref-type="bibr" rid="ref27">Sabha et al., 2024</xref>; <xref ref-type="bibr" rid="ref28">Safa aldin et al., 2024</xref>). To address this issue, a sliding-window-based feature-extraction method was implemented. This technique produces a substantially larger set of localized patches, increasing the number of training samples and enhancing the CNN&#x2019;s capacity to learn discriminative features.</p>
<p>The sliding-window technique processes the input image by dividing it into fixed-size patches (windows) (see <xref ref-type="fig" rid="fig4">Figure 4</xref>, Step 1). The patches are obtained by moving the window across the image with a predefined step size, known as the stride. The stride can equal the patch dimensions or be smaller. When the stride is less than the patch size, the resulting patches exhibit a specified overlap, which is crucial for capturing continuous, fine-grained details and textures and generating a greater number of samples. Each patch inherits the source painting&#x2019;s class label (&#x201C;Veronese&#x201D;/&#x201C;Non-Veronese&#x201D;; see <xref ref-type="fig" rid="fig4">Figure 4</xref>, Step 2).</p>
<fig position="float" id="fig4">
<label>Figure 4</label>
<caption>
<p>Proposed authentication methodology: (1) Original training dataset with application of the sliding-window technique; (2) Extraction of image patches via the sliding-window process to create an expanded training dataset; (3) CNN training on the expanded dataset; (4) Test image under authentication; (5) Extraction of image patches from the test image via the same sliding-window process; (6) CNN-based classification of all test patches; and (7) Generation of localized Veronese-probability maps (heatmap and overlay) and aggregation of patch-level classifications to estimate the painting&#x2019;s global Veronese probability.</p>
</caption>
<graphic xlink:href="frai-08-1738444-g004.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Flowchart demonstrating a process for training a Convolutional Neural Network (CNN) to classify paintings by Veronese. It starts with an original training dataset of RGB, grayscale, and edge images in section 1. Section 2 shows patches extracted from these images using a sliding-window process. Section 3 indicates training with these patches on a CNN. Sections 4 and 5 display a test image and its patches. Section 6 involves classification, leading to section 7, which shows the output as a Veronese probability heatmap overlaid on the painting.</alt-text>
</graphic>
</fig>
<p>This process is applied independently to three distinct input representations for each painting to maximize feature diversity (see <xref ref-type="fig" rid="fig4">Figure 4</xref>, Steps 1 and 2): (1) the original RGB image, (2) its grayscale version, and (3) a binary edge map. Use of the grayscale version enables the model to focus on structural and textural features without the influence of color, which can be particularly beneficial when color is not a reliable discriminative factor or under varying lighting conditions (<xref ref-type="bibr" rid="ref14">Johnson et al., 2008</xref>). The edge map is generated with the Canny edge-detection algorithm (<xref ref-type="bibr" rid="ref6">Canny, 1986</xref>). The Canny algorithm is particularly suitable because it detects edges by identifying local maxima of the image gradient using a dual-threshold mechanism. This dual-threshold approach classifies edges as strong or weak, including weak edges only if connected to a strong edge. This property makes the Canny algorithm more robust to noise than many alternatives and better at detecting subtle edges, which are critical for characterizing artistic style. This final, large collection of localized patches constitutes the expanded training dataset for the CNN (see <xref ref-type="fig" rid="fig4">Figure 4</xref>, Step 3).</p>
<p>The sliding window process has two parameters: (1) the window size, which determines the dimensions of the square patches extracted from the image, and (2) the overlap percentage, which determines the stride, i.e., the pixel shift between consecutive windows. These parameters directly influence the granularity of feature extraction and the total number of patches produced for training and testing. In this study, the sliding window size was fixed at 64&#x202F;&#x00D7;&#x202F;64 pixels, corresponding to one-tenth of each original image dimension (640&#x202F;&#x00D7;&#x202F;640 pixels). This configuration strikes an effective balance between capturing local details, such as brushstroke patterns and texture, and maintaining sufficient contextual information within each patch. Smaller windows can lead to excessive fragmentation and loss of structural cues, while larger windows may dilute fine-grained features critical for authentication. The overlap percentage (65%) was chosen experimentally by testing multiple values between 0 and 95% and selecting the value that yielded the best validation performance for the CNN. This overlap not only increases the number of training patches but also reduces boundary inconsistencies, as reported in prior work on image tiling for CNN-based analysis (<xref ref-type="bibr" rid="ref1">An et al., 2020</xref>; <xref ref-type="bibr" rid="ref8">Cira et al., 2024</xref>). The process was implemented by stacking the three representations (color, grayscale, and edge maps) to create a single 5-channel input: 3 channels for RGB, 1 channel for grayscale, and 1 channel for the edge map. This yielded 729 patches per painting. Consequently, the training dataset expanded from six original paintings to a total of 4,374 patches (729 patches per painting &#x00D7; 6 paintings), each with dimensions of 64&#x202F;&#x00D7;&#x202F;64&#x202F;&#x00D7;&#x202F;5 pixels.</p>
<p>For the test phase, patches were extracted from each painting using the same sliding-window procedure (see <xref ref-type="fig" rid="fig4">Figure 4</xref>, Steps 4 and 5). The trained CNN then classified each patch individually, yielding class probabilities (&#x201C;Veronese&#x201D; or &#x201C;Non-Veronese&#x201D;) (see <xref ref-type="fig" rid="fig4">Figure 4</xref>, Step 6). The final painting-level prediction was obtained by averaging the probabilities across all patches. Additionally, these patch probabilities were used to generate a Veronese probability heatmap, and overlaying this map onto the original painting yields a visual representation of the spatial probability distribution across the artwork (see <xref ref-type="fig" rid="fig4">Figure 4</xref>, Step 7). This approach ensures that the final decision is grounded in the painting&#x2019;s global stylistic consistency and mitigates the influence of localized or anomalous regions.</p>
</sec>
<sec id="sec7">
<label>3.2.2</label>
<title>CNN architecture and configuration</title>
<p>To classify the image patches as either &#x201C;Veronese&#x201D; or &#x201C;Non-Veronese,&#x201D; a custom CNN architecture was designed, tailored to accommodate the dataset constraints and the nature of the input data. The network input consists of 64&#x202F;&#x00D7;&#x202F;64-pixel patches with five channels. This multichannel input allows the model to leverage color, structural, and edge features simultaneously, enhancing its ability to capture stylistic nuances.</p>
<p>The CNN architecture comprises two convolutional blocks followed by a fully connected layer and a softmax output layer (see <xref ref-type="table" rid="tab2">Table 2</xref> for details). Each convolutional block includes a convolutional layer with a 3&#x202F;&#x00D7;&#x202F;3 kernel, batch normalization (<xref ref-type="bibr" rid="ref13">Ioffe and Szegedy, 2015</xref>), and a ReLU activation (<xref ref-type="bibr" rid="ref21">Nair and Hinton, 2010</xref>). Max-pooling layers with a stride of 2 are applied to progressively reduce spatial dimensions while retaining the most salient features (<xref ref-type="bibr" rid="ref17">Krizhevsky et al., 2017</xref>). A dropout layer with a rate of 0.5 is applied after the second convolutional block to mitigate overfitting, particularly given the high redundancy introduced by overlapping patches (<xref ref-type="bibr" rid="ref31">Srivastava et al., 2014</xref>). The final fully connected layer maps the extracted features to the two output classes, and the softmax layer yields class probabilities.</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>CNN architecture and configuration: layers, their types, parameters, and output sizes.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Layer</th>
<th align="left" valign="top">Type</th>
<th align="center" valign="top">Parameters</th>
<th align="center" valign="top">Output size</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">1</td>
<td align="left" valign="top">Image Input</td>
<td align="center" valign="top">64&#x202F;&#x00D7;&#x202F;64&#x202F;&#x00D7;&#x202F;5, Normalization: None</td>
<td align="center" valign="top">64&#x202F;&#x00D7;&#x202F;64&#x202F;&#x00D7;&#x202F;5</td>
</tr>
<tr>
<td align="left" valign="top">2</td>
<td align="left" valign="top">Convolution 2D</td>
<td align="center" valign="top">3&#x202F;&#x00D7;&#x202F;3 kernel, 16 filters, Padding: same</td>
<td align="center" valign="top">64&#x202F;&#x00D7;&#x202F;64&#x202F;&#x00D7;&#x202F;16</td>
</tr>
<tr>
<td align="left" valign="top">3</td>
<td align="left" valign="top">Batch Normalization</td>
<td align="center" valign="top">&#x2013;</td>
<td align="center" valign="top">64&#x202F;&#x00D7;&#x202F;64&#x202F;&#x00D7;&#x202F;16</td>
</tr>
<tr>
<td align="left" valign="top">4</td>
<td align="left" valign="top">ReLU</td>
<td align="center" valign="top">&#x2013;</td>
<td align="center" valign="top">64&#x202F;&#x00D7;&#x202F;64&#x202F;&#x00D7;&#x202F;16</td>
</tr>
<tr>
<td align="left" valign="top">5</td>
<td align="left" valign="top">Max Pooling 2D</td>
<td align="center" valign="top">2&#x202F;&#x00D7;&#x202F;2, Stride: 2</td>
<td align="center" valign="top">32&#x202F;&#x00D7;&#x202F;32&#x202F;&#x00D7;&#x202F;16</td>
</tr>
<tr>
<td align="left" valign="top">6</td>
<td align="left" valign="top">Convolution 2D</td>
<td align="center" valign="top">3&#x202F;&#x00D7;&#x202F;3 kernel, 32 filters, Padding: same</td>
<td align="center" valign="top">32&#x202F;&#x00D7;&#x202F;32&#x202F;&#x00D7;&#x202F;32</td>
</tr>
<tr>
<td align="left" valign="top">7</td>
<td align="left" valign="top">Batch Normalization</td>
<td align="center" valign="top">&#x2013;</td>
<td align="center" valign="top">32&#x202F;&#x00D7;&#x202F;32&#x202F;&#x00D7;&#x202F;32</td>
</tr>
<tr>
<td align="left" valign="top">8</td>
<td align="left" valign="top">ReLU</td>
<td align="center" valign="top">&#x2013;</td>
<td align="center" valign="top">32&#x202F;&#x00D7;&#x202F;32&#x202F;&#x00D7;&#x202F;32</td>
</tr>
<tr>
<td align="left" valign="top">9</td>
<td align="left" valign="top">Max Pooling</td>
<td align="center" valign="top">2&#x202F;&#x00D7;&#x202F;2, Stride: 2</td>
<td align="center" valign="top">16&#x202F;&#x00D7;&#x202F;16&#x202F;&#x00D7;&#x202F;32</td>
</tr>
<tr>
<td align="left" valign="top">10</td>
<td align="left" valign="top">Dropout</td>
<td align="center" valign="top">Rate: 0.5</td>
<td align="center" valign="top">16&#x202F;&#x00D7;&#x202F;16&#x202F;&#x00D7;&#x202F;32</td>
</tr>
<tr>
<td align="left" valign="top">11</td>
<td align="left" valign="top">Fully Connected</td>
<td align="center" valign="top">2 neurons</td>
<td align="center" valign="top">2</td>
</tr>
<tr>
<td align="left" valign="top">12</td>
<td align="left" valign="top">Softmax</td>
<td align="center" valign="top">&#x2013;</td>
<td align="center" valign="top">2</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The CNN was trained with the Adam optimizer (<xref ref-type="bibr" rid="ref16">Kingma and Ba, 2014</xref>), which combines adaptive learning-rates updates with momentum to accelerate convergence and improve training stability (<xref ref-type="bibr" rid="ref23">Qian, 1999</xref>). Training ran for up to 100 epochs, with a mini-batch size of 128 to balance computational efficiency and gradient estimation accuracy. To improve generalization, the training data were shuffled before each epoch, reducing the risk of learning spurious correlations. To mitigate overfitting caused by the high degree of patch overlap, L2 regularization (<xref ref-type="bibr" rid="ref18">Krogh and Hertz, 1991</xref>) with a factor of 0.0005 was applied to the network weights. This penalizes large weights, encouraging simpler models that generalize better. CNN&#x2019;s performance was assessed on a separate validation set at the end of each epoch. Early stopping (patience&#x202F;=&#x202F;8 epochs) halted training if the validation loss failed to improve for 8 consecutive epochs (<xref ref-type="bibr" rid="ref22">Prechelt, 2012</xref>). Finally, the checkpoint that achieved the best validation performance was retained as the final model.</p>
</sec>
<sec id="sec8">
<label>3.2.3</label>
<title>CNN validation and testing</title>
<p>To ensure a rigorous evaluation of the CNN, a 6-fold cross-validation strategy was implemented at the painting level (leave-one-painting-out), rather than at the patch level, to avoid bias and data leakage caused by high patch overlap. In each fold, the patches of five paintings were used for training, and the remaining painting for validation. The model achieving the highest validation performance in each fold was retained as the fold&#x2019;s final model. After completing all folds, the results were aggregated, and performance metrics, including precision, sensitivity (recall), specificity, and F1 score, were computed from the aggregated confusion matrix. In addition to these standard metrics, the area under the ROC curve (AUC), the geometric mean (G-mean), which balances sensitivity and specificity and is particularly relevant for imbalanced datasets (<xref ref-type="bibr" rid="ref19">Kuncheva et al., 2019</xref>), and Cohen&#x2019;s kappa which accounts for agreement beyond chance and provides a more robust assessment of classification reliability (<xref ref-type="bibr" rid="ref33">Warrens, 2014</xref>) were also computed to assess the model&#x2019;s discriminative ability and reliability.</p>
<p>To evaluate the model&#x2019;s performance beyond cross-validation, a two-part test procedure was performed. First, the final CNN was trained with all patches from the six paintings in the training dataset, reserving 5% of these patches as a hold-out validation set to apply early stopping with the same criteria applied during cross-validation (patience&#x202F;=&#x202F;8 epochs). This model was then tested on the test dataset (see <xref ref-type="fig" rid="fig3">Figure 3</xref>) composed exclusively of non-Veronese paintings, allowing assessment of its ability to correctly reject non-authentic works. The second part of the procedure evaluated the model&#x2019;s capacity to recognize authentic Veronese paintings: each of the four Veronese paintings in the training dataset (see <xref ref-type="fig" rid="fig2">Figure 2</xref>) was used as a test case in turn, while the CNN was retrained after excluding the corresponding painting from the training set. The CNN produced class probabilities for each patch, and the final painting-level classification was obtained by averaging the probabilities across all the patches. These patch probabilities were used to generate Veronese probability heatmaps for visual analysis.</p>
<p>Finally, the painting under examination was evaluated with the final CNN model trained on all six paintings in the training dataset. The model classified its patches individually. The final painting-level score was obtained by averaging the Veronese probabilities across all patches, reflecting the likelihood that the work is an authentic Veronese. As with the test paintings, these patch probabilities were used to generate a Veronese probability heatmap for detailed visual inspection.</p>
<p>The complete MATLAB R2025b source code, which implements the end-to-end proposed methodology for CNN training and testing, along with the full training and test datasets, is publicly available at <ext-link xlink:href="https://www.ugr.es/~demiras/PaintingAuthentication/" ext-link-type="uri">https://www.ugr.es/~demiras/PaintingAuthentication/</ext-link>.</p>
</sec>
</sec>
</sec>
<sec sec-type="results" id="sec9">
<label>4</label>
<title>Results</title>
<p>This section presents the experimental results, first detailing the CNN&#x2019;s cross-validation performance (Section 4.1), then the authentication results for the test paintings and the painting under investigation (Section 4.2), and finally the comparison with the baseline model MobileNetV2 (Section 4.3).</p>
<sec id="sec10">
<label>4.1</label>
<title>CNN validation results</title>
<p>The experimental setup described above is summarized as follows: the training dataset consisted of six paintings (four by Veronese and two from his workshop, see <xref ref-type="fig" rid="fig2">Figure 2</xref>), each cropped and resized to 640&#x202F;&#x00D7;&#x202F;640 pixels. From each painting, patches were extracted using a sliding window procedure (64&#x202F;&#x00D7;&#x202F;64 pixels, 65% overlap) applied to a 5-channel input stack created by combining the three representations: RGB (3 channels), grayscale (1 channel), and edge map (1 channel). This process generated 4,374 patches (729 per painting), with each patch having dimensions of 64&#x202F;&#x00D7;&#x202F;64&#x202F;&#x00D7;&#x202F;5. Finally, the CNN was evaluated with 6-fold, painting-level cross-validation (leave-one-painting-out) to prevent data leakage and ensure a robust assessment of generalization.</p>
<p><xref ref-type="table" rid="tab3">Table 3</xref> reports the confusion matrix aggregated across all six painting-level folds. The key performance metrics derived from this matrix are summarized in <xref ref-type="table" rid="tab4">Table 4</xref>. Finally, <xref ref-type="fig" rid="fig5">Figure 5</xref> depicts the corresponding ROC curve and the resulting AUC.</p>
<table-wrap position="float" id="tab3">
<label>Table 3</label>
<caption>
<p>Confusion matrix of the CNN model&#x2019;s performance across the validation folds.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th>Actual / Predicted</th>
<th align="center" valign="top">Predicted Veronese</th>
<th align="center" valign="top">Predicted non-Veronese</th>
<th align="center" valign="top">Actual total</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Actual Veronese</td>
<td align="center" valign="middle">2,833 (TP)</td>
<td align="center" valign="middle">157 (FN)</td>
<td align="center" valign="middle">2,990</td>
</tr>
<tr>
<td align="left" valign="middle">Actual non-Veronese</td>
<td align="center" valign="middle">83 (FP)</td>
<td align="center" valign="middle">1,301 (TN)</td>
<td align="center" valign="middle">1,384</td>
</tr>
<tr>
<td align="left" valign="middle">Predicted total</td>
<td align="center" valign="middle">2,916</td>
<td align="center" valign="middle">1,458</td>
<td align="center" valign="middle">4,374</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Values represent the total count of patches aggregated across all six cross-validation folds. TP, True positive; FN, False negative; FP, False positive; TN, True negative.</p>
</table-wrap-foot>
</table-wrap>
<table-wrap position="float" id="tab4">
<label>Table 4</label>
<caption>
<p>CNN performance metrics aggregated across all six-painting level validation folds.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Accuracy</th>
<th align="center" valign="top">Precision</th>
<th align="center" valign="top">F1-score</th>
<th align="center" valign="top">Sensitivity</th>
<th align="center" valign="top">Specificity</th>
<th align="center" valign="top">G-mean</th>
<th align="center" valign="top">Kappa</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">94.51%</td>
<td align="center" valign="middle">94.75%</td>
<td align="center" valign="middle">95.94%</td>
<td align="center" valign="middle">97.15%</td>
<td align="center" valign="middle">89.23%</td>
<td align="center" valign="middle">93.11%</td>
<td align="center" valign="middle">0.87</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig position="float" id="fig5">
<label>Figure 5</label>
<caption>
<p>ROC curve and AUC for the CNN from the 6-fold, painting-level cross-validation.</p>
</caption>
<graphic xlink:href="frai-08-1738444-g005.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">ROC curve showing the performance of a CNN model with an area under the curve (AUC) of 0.99. The x-axis represents the false positive rate, and the y-axis represents the true positive rate. A blue line represents the CNN model, and a red dashed line represents a random classifier.</alt-text>
</graphic>
</fig>
<p>As detailed in <xref ref-type="table" rid="tab3">Tables 3</xref>, <xref ref-type="table" rid="tab4">4</xref>, the CNN demonstrates high accuracy and strong agreement beyond chance. The slightly lower specificity indicates that classifying non-Veronese patches is more challenging. The model&#x2019;s overall discriminative power is exceptional, as evidenced by an AUC of 0.99 (see <xref ref-type="fig" rid="fig5">Figure 5</xref>).</p>
</sec>
<sec id="sec11">
<label>4.2</label>
<title>CNN testing and authentication results</title>
<p>The CNN was tested on the test dataset (see <xref ref-type="fig" rid="fig3">Figure 3</xref>) composed exclusively of non-Veronese paintings. Additionally, to evaluate its capacity to recognize authentic Veronese paintings, a leave-one-out approach was used: each of the four Veronese paintings in the training dataset (see <xref ref-type="fig" rid="fig2">Figure 2</xref>) was used as a test case in turn, while the CNN was retrained after excluding that painting from the training set. Because the goal is to authenticate a painting, the test phase prioretized analysis of the probability distribution of the Veronese class across each painting rather than computing standard performance metrics. For each test painting, the CNN produced a class probability for every patch. These scores were then aggregated by averaging to obtain a painting-level authenticity score for the entire painting. This approach also enabled a spatial visualization of the model&#x2019;s predictions in the form of probability heatmaps.</p>
<p><xref ref-type="fig" rid="fig6">Figure 6</xref> shows the results for the test dataset of non-Veronese paintings, displaying both the patch-level Veronese probability heatmap and its overlay on the original image. The heatmap was overlaid on the original painting using alpha blending, with transparency set directly proportional to the Veronese probability. As expected, these works exhibit low Veronese probabilities, with painting-level average Veronese probabilities ranging from 11.9 to 36.7%. This low score is particularly noteworthy for Painting 10, which is a direct copy of authentic Painting 4 (which was included in the training set). Despite sharing an identical composition, the model correctly assigned the copy the lowest Veronese probability (11.9%), demonstrating its ability to distinguish the master&#x2019;s original technique from a reproduction. Analysis of patch-level variance showed that the 95% confidence-interval upper bounds for all non-Veronese copies consistently remained below 40% (specifically, the highest upper bound was 39.93% for Painting 9), indicating a statistically significant separation from the authenticated painting (see results below). The Veronese probability heatmaps display sparse, fragmented regions of high probability concentrated around localized details, reflecting limited stylistic alignment.</p>
<fig position="float" id="fig6">
<label>Figure 6</label>
<caption>
<p>Localized Veronese probabilities for patches in the test dataset of non-Veronese paintings. The columns display: Veronese probability maps (left), probability map overlays with global average probabilities (middle), and the original paintings (right).</p>
</caption>
<graphic xlink:href="frai-08-1738444-g006.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Four sets of images compare Veronese probability maps, overlaid maps, and original paintings. Each row shows a probability map with color gradients, indicating Veronese style likelihood; an overlaid version with the original painting partially visible; and the full original painting. Probabilities range from 11.9% to 36.7%, identifying varying stylistic resemblance to the artist Veronese in the paintings numbered 7 to 10.</alt-text>
</graphic>
</fig>
<p><xref ref-type="fig" rid="fig7">Figure 7</xref> reports the results for the four authentic Veronese paintings. All authentic paintings achieved high painting-level average probabilities (ranging from 80.3 to 99.9%), demonstrating the model&#x2019;s ability to generalize to unseen authentic paintings. Even accounting for patch-level variance, 95% confidence-interval lower bounds for all authentic paintings exceeded 77% (the lowest being 77.45% for Painting 2), maintaining a clear margin above the upper bounds for non-Veronese paintings. The Veronese probability heatmaps corroborate this finding, showing extensive high Veronese probability regions across all key compositional areas.</p>
<fig position="float" id="fig7">
<label>Figure 7</label>
<caption>
<p>Localized Veronese probabilities for patches in authentic Veronese paintings. The columns show: Veronese probability maps (left), probability map overlays with global average probabilities (middle), and the original paintings (right).</p>
</caption>
<graphic xlink:href="frai-08-1738444-g007.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Four-panel image showing Veronese probability maps, overlays, and original paintings. Each row includes the probability map on the left, overlay in the middle, and original painting on the right. Panels are labeled one to four, showing various religious scenes with probability values ranging from eighty to nearly one hundred percent. Color gradients on probability maps indicate confidence levels, with yellow and red indicating higher probabilities.</alt-text>
</graphic>
</fig>
<p>Finally, <xref ref-type="fig" rid="fig8">Figure 8</xref> reports the results for the painting under authentication. Regions of high Veronese likelihood concentrate primarily in areas containing anatomical details and drapery folds. The painting-level average Veronese probability was 61.0%. The standard deviation across the patches was 0.41, resulting in a 95% confidence interval (CI) for the mean of [58.00, 63.96%]. This interval falls within a distinct intermediate range, separated from the upper bound of the non-Veronese copies (&#x003C; 40%) and the lower bound of the authentic works (&#x003E; 77%). This intermediate value suggests a moderate stylistic alignment with authentic Veronese&#x2019;s paintings, indicating that although the painting shares relevant stylistic features with authentic Veronese works, the evidence remains insufficient for full attribution.</p>
<fig position="float" id="fig8">
<label>Figure 8</label>
<caption>
<p>Localized Veronese probabilities for patches in the painting under authentication: Veronese probability map (left), overlay of probability map with global average probability (middle), and original painting (right).</p>
</caption>
<graphic xlink:href="frai-08-1738444-g008.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Three panels depict a Veronese probability map. The first panel shows a heat map in red, yellow, and black, indicating varying probabilities. The second panel overlays this map onto a painting of a woman and child, with a Veronese probability of 61.0 percent. The third panel features the original painting without the overlay.</alt-text>
</graphic>
</fig>
</sec>
<sec id="sec12">
<label>4.3</label>
<title>Comparison with baseline model (MobileNetV2)</title>
<p>To benchmark the proposed shallow architecture against a state-of-the-art model, a comparative evaluation was conducted using MobileNetV2 (<xref ref-type="bibr" rid="ref29">Sandler et al., 2018</xref>) via transfer learning. Since the pretrained architecture of MobileNetV2 expects 224&#x202F;&#x00D7;&#x202F;224-pixel RGB (3 channels) inputs, the experimental setup was adapted accordingly: patches were upsampled from 64&#x202F;&#x00D7;&#x202F;64 to 224&#x202F;&#x00D7;&#x202F;224 using bicubic interpolation, and the grayscale and edge channels were discarded. <xref ref-type="table" rid="tab5">Table 5</xref> summarizes comparative performance.</p>
<table-wrap position="float" id="tab5">
<label>Table 5</label>
<caption>
<p>Performance comparison of the proposed shallow CNN and MobileNetV2.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th>Data</th>
<th align="center" valign="top">Proposed shallow CNN</th>
<th align="center" valign="top">MobileNetV2 (transfer learning)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Input</td>
<td align="center" valign="middle">64&#x202F;&#x00D7;&#x202F;64&#x202F;&#x00D7;&#x202F;5</td>
<td align="center" valign="middle">224&#x202F;&#x00D7;&#x202F;224&#x202F;&#x00D7;&#x202F;3</td>
</tr>
<tr>
<td align="left" valign="middle">Parameters</td>
<td align="center" valign="middle">21,800</td>
<td align="center" valign="middle">2,200,000</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="3">Cross-validation</td>
</tr>
<tr>
<td align="left" valign="middle">Accuracy</td>
<td align="center" valign="middle">94.51%</td>
<td align="center" valign="middle">97.23%</td>
</tr>
<tr>
<td align="left" valign="middle">Precision</td>
<td align="center" valign="middle">94.75%</td>
<td align="center" valign="middle">98.61%</td>
</tr>
<tr>
<td align="left" valign="middle">F1-score</td>
<td align="center" valign="middle">95.94%</td>
<td align="center" valign="middle">97.91%</td>
</tr>
<tr>
<td align="left" valign="middle">Sensitivity</td>
<td align="center" valign="middle">97.15%</td>
<td align="center" valign="middle">97.22%</td>
</tr>
<tr>
<td align="left" valign="middle">Specificity</td>
<td align="center" valign="middle">89.23%</td>
<td align="center" valign="middle">97.26%</td>
</tr>
<tr>
<td align="left" valign="middle">G-mean</td>
<td align="center" valign="middle">93.11%</td>
<td align="center" valign="middle">97.24%</td>
</tr>
<tr>
<td align="left" valign="middle">Kappa</td>
<td align="center" valign="middle">0.87</td>
<td align="center" valign="middle">0.94</td>
</tr>
<tr>
<td align="left" valign="middle">AUC-ROC</td>
<td align="center" valign="middle">0.99</td>
<td align="center" valign="middle">0.99</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="3">Test (% Veronese probability)</td>
</tr>
<tr>
<td align="left" valign="middle">Painting 1 (Veronese)</td>
<td align="center" valign="middle">99.9 (99.7&#x2013;100.0)</td>
<td align="center" valign="middle">72.8 (72.6&#x2013;73.0)</td>
</tr>
<tr>
<td align="left" valign="middle">Painting 2 (Veronese)</td>
<td align="center" valign="middle">80.3 (77.5&#x2013;83.1)</td>
<td align="center" valign="middle">73.0 (72.9&#x2013;73.1)</td>
</tr>
<tr>
<td align="left" valign="middle">Painting 3 (Veronese)</td>
<td align="center" valign="middle">86.8 (84.9&#x2013;88.8)</td>
<td align="center" valign="middle">62.8 (61.6&#x2013;64.0)</td>
</tr>
<tr>
<td align="left" valign="middle">Painting 4 (Veronese)</td>
<td align="center" valign="middle">89.2 (87.2&#x2013;91.2)</td>
<td align="center" valign="middle">69.0 (68.4&#x2013;69.7)</td>
</tr>
<tr>
<td align="left" valign="middle">Painting 7 (non-Veronese)</td>
<td align="center" valign="middle">32.6 (29.6&#x2013;35.6)</td>
<td align="center" valign="middle">56.1 (54.7&#x2013;57.4)</td>
</tr>
<tr>
<td align="left" valign="middle">Painting 8 (non-Veronese)</td>
<td align="center" valign="middle">34.2 (31.2&#x2013;37.2)</td>
<td align="center" valign="middle">72.7 (72.5&#x2013;72.9)</td>
</tr>
<tr>
<td align="left" valign="middle">Painting 9 (non-Veronese)</td>
<td align="center" valign="middle">36.7 (33.6&#x2013;39.9)</td>
<td align="center" valign="middle">34.3 (33.2&#x2013;35.3)</td>
</tr>
<tr>
<td align="left" valign="middle">Painting 10 (non-Veronese)</td>
<td align="center" valign="middle">11.9 (10.0&#x2013;13.8)</td>
<td align="center" valign="middle">71.7 (71.3&#x2013;72.1)</td>
</tr>
<tr>
<td align="left" valign="middle">Painting under authentication</td>
<td align="center" valign="middle">61.0 (58.0&#x2013;64.0)</td>
<td align="center" valign="middle">63.6 (62.4&#x2013;64.8)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Veronese probabilities are reported as mean (95% confidence interval).</p>
</table-wrap-foot>
</table-wrap>
<p>In 6-fold cross-validation, MobileNetV2 demonstrated strong performance, achieving an accuracy of 97.23%, precision of 98.61%, F1 score of 97.91%, sensitivity of 97.22%, specificity of 97.26%, G-mean of 97.24%, and a Kappa coefficient of 0.94. The AUC-ROC was 0.99. These results slightly surpassed those of the proposed shallow CNN during cross-validation.</p>
<p>However, despite strong cross-validation metrics, the model exhibited poor generalization on the test dataset. MobileNetV2 failed to establish a decision boundary between non-Veronese paintings and authentic Veronese works: (1) it assigned high Veronese probabilities to non-Veronese samples, such as 72.7% (Painting 8) and 71.7% (Painting 10), resulting in substantial false positives compared to the proposed model (&#x003C; 37%); and (2) conversely, probabilities for authentic Veronese paintings were lower than expected with MobileNetV2 (62.8&#x2013;73.0%), indicating reduced sensitivity relative to the proposed model (&#x003E; 80%). For the painting under authentication, MobileNetV2 yielded a Veronese probability of 63.6%.</p>
</sec>
</sec>
<sec id="sec13">
<label>5</label>
<title>Discussion and conclusions</title>
<p>This study addressed the challenge of authenticating paintings under conditions of data scarcity by developing a patch-based CNN pipeline tailored to stylistic analysis. The methodology combined sliding window feature extraction with multichannel inputs (RGB, grayscale, and edge maps) to capture complementary visual cues and implemented painting-level cross-validation to prevent data leakage. By expanding the dataset with localized patches and integrating regularization strategies, the approach aimed to enhance generalization while preserving stylistic fidelity. The CNN trained with these patches distinguished authentic Veronese works from non-Veronese paintings at both painting- and patch-levels. Authentic Veronese paintings show high painting-level probabilities (ranging from 80.3 to 99.9%) and Veronese probability heatmaps with broad, contiguous high-probability regions (see <xref ref-type="fig" rid="fig7">Figure 7</xref>), whereas non-Veronese paintings exhibit lower painting-level scores (ranging from 11.9 to 36.7%) and fragmented, localized regions of high probability (see <xref ref-type="fig" rid="fig6">Figure 6</xref>). The painting under authentication yields an intermediate painting-level score (61.0%) with extensive high-probability zones over stylistically salient passages, interspersed with some lower-probability areas (see <xref ref-type="fig" rid="fig8">Figure 8</xref>).</p>
<p>The sliding window design (64&#x202F;&#x00D7;&#x202F;64 patches, 65% overlap) expanded the training data from a small number of paintings to thousands of localized samples, encouraging the CNN to learn microtextural and brushwork signatures while controlling overfitting via dropout, L2 regularization (weight decay), batch normalization, the Adam optimizer, and early stopping. The high overlap mitigates boundary artifacts and stabilizes local predictions, an effect analogous to the overlap-tile inference widely used in biomedical image analysis (<xref ref-type="bibr" rid="ref25">Ronneberger et al., 2015</xref>). The multichannel design (RGB, grayscale, and edge map) leveraged complementary cues: chromatic information, luminance-texture structure, and gradient-defined contours. The Canny edge detector is well-suited here due to its double-threshold mechanism and robustness to noise, which supports capturing subtle, style-relevant edge patterns (<xref ref-type="bibr" rid="ref6">Canny, 1986</xref>).</p>
<p>The 6-fold painting-level cross-validation was essential to prevent data leakage from overlapping patches, an issue known to inflate performance in tile-based pipelines and subject-repeated imaging datasets (<xref ref-type="bibr" rid="ref5">Bussola et al., 2019</xref>; <xref ref-type="bibr" rid="ref26">Rumala, 2023</xref>). Cross-validation confirmed strong overall performance (see <xref ref-type="table" rid="tab4">Table 4</xref>; <xref ref-type="fig" rid="fig5">Figure 5</xref>): accuracy 94.51%; precision 94.75%; F1 score 95.94%; sensitivity 97.15%; specificity 89.23%; G-mean 93.11%; Cohen&#x2019;s kappa 0.87; and AUC-ROC 0.99. This set of metrics provides a balanced view: AUC-ROC summarizes rank discrimination (<xref ref-type="bibr" rid="ref11">Fawcett, 2006</xref>); G-mean emphasizes balanced sensitivity and specificity under class imbalance (<xref ref-type="bibr" rid="ref19">Kuncheva et al., 2019</xref>); and Cohen&#x2019;s kappa quantifies agreement beyond chance (<xref ref-type="bibr" rid="ref33">Warrens, 2014</xref>). The validation metrics confirmed the CNN model&#x2019;s high accuracy and exceptional discriminative power. The model was particularly effective at identifying authentic Veronese paintings (sensitivity&#x202F;=&#x202F;97.15%), with slightly lower performance for non-Veronese paintings (specificity&#x202F;=&#x202F;89.23%).</p>
<p>The heatmap visualization of Veronese probability provides critical insight into CNN&#x2019;s decision-making process. In the painting under authentication (see <xref ref-type="fig" rid="fig8">Figure 8</xref>), high-probability regions cluster around stylistically salient elements such as anatomical contours and tonal transitions, whereas low-probability regions dominate less textured or peripheral areas. This uneven distribution suggests that the model detects Veronese-like features selectively rather than uniformly across the composition, helping to explain the intermediate global score of 61%. From a methodological perspective, this outcome reflects the patch-based classification strategy: aggregate probabilities capture local stylistic heterogeneity rather than enforce global uniformity. From an interpretive standpoint, the observed pattern may indicate (1) an authentic work with workshop participation; (2) an authentic work with later interventions or restoration that may alter original textures; or (3) a high-quality workshop production under close supervision. Consequently, while the CNN identifies meaningful stylistic affinities, the evidence remains insufficient to support definitive attribution.</p>
<p>The comparison between the painting under authentication and the non-Veronese replicas of the composition (paintings 7, 8, and 9) highlights the CNN&#x2019;s capacity to assess stylistic consistency at a global level while acknowledging local similarities. Although the non-Veronese paintings exhibit localized high-probability regions (see <xref ref-type="fig" rid="fig6">Figure 6</xref>), primarily around anatomical details and drapery folds, their overall scores remain low (ranging from 32 to 37%), indicating limited stylistic coherence. In contrast, the painting under authentication shows broader regions of high Veronese probability and a substantially higher global score (61%), suggesting a more pervasive presence of Veronese-like features. This pattern supports the hypothesis of partial authenticity or strong stylistic influence for the painting under authentication and confirms that the CNN does not rely solely on compositional similarity but captures nuanced textural and structural cues. The residual probabilities in non-Veronese paintings may arise from shared iconographic elements, underscoring the importance of interpreting probability heatmaps in conjunction with global metrics rather than in isolation. A key detail is that in all non-Veronese paintings, the area corresponding to St. Joseph in the upper-right corner consistently yields very low probabilities, whereas in the painting under authentication the corresponding region exhibits high probabilities (see <xref ref-type="fig" rid="fig8">Figure 8</xref>). This contrast indicates that the CNN captures subtle stylistic cues in localized passages that are absent or less pronounced in non-Veronese paintings despite compositional similarity.</p>
<p>The probability heatmaps for authentic Veronese paintings (see <xref ref-type="fig" rid="fig7">Figure 7</xref>) reveal broad, continuous regions of high probability. This spatially cohesive distribution contrasts sharply with the fragmented patterns observed in non-Veronese works and supports the CNN&#x2019;s capacity to capture stylistic coherence that extends beyond isolated details. The consistently high painting-level scores for Veronese paintings (ranging from 80.3 to 99.9%) indicate that the model generalizes effectively to unseen authentic works. These findings support the patch-based approach as a robust strategy for identifying nuanced textural and structural cues characteristic of Veronese&#x2019;s technique.</p>
<p>The comparison between paintings 4 (authentic Veronese) and 10 (copy) is particularly noteworthy. Despite compositional similarity, the authentic work yields a high painting-level probability (89.2%) and extensive high-probability regions (<xref ref-type="fig" rid="fig7">Figure 7</xref>). In contrast, the copy yields a low painting-level probability (11.9%) with sparse and fragmented high-probability regions (<xref ref-type="fig" rid="fig6">Figure 6</xref>). This supports the CNN&#x2019;s capacity to differentiate stylistic coherence from superficial compositional similarity.</p>
<p>Notably, one authentic Veronese painting (see painting 2 in <xref ref-type="fig" rid="fig7">Figure 7</xref>) exhibits a sharply defined low-probability region confined to a rectangular area in the upper-left corner. Two nonexclusive explanations are plausible: (1) the model may struggle to classify uniform, low-texture passages that provide weak class-specific signals; (2) such areas may indicate workshop participation in secondary passages, an documented practice in 16th-century Venice in which masters concentrated on principal figures while assistants executed peripheral and ornamental elements (<xref ref-type="bibr" rid="ref12">Gisolfi, 2017</xref>).</p>
<p>To comprehensively benchmark the proposed architecture, a comparative experiment with MobileNetV2 (<xref ref-type="bibr" rid="ref29">Sandler et al., 2018</xref>) via transfer learning using a 224&#x202F;&#x00D7;&#x202F;224&#x202F;&#x00D7;&#x202F;3 input was conducted. The results indicated critical limitations in applying standard deep learning models to this particular domain. Although MobileNetV2 achieved high metrics during cross-validation (accuracy: 97.23%), it failed to establish a discriminative boundary in the test dataset, yielding overlapping probability ranges to Veronese works (62.8&#x2013;73.0%) and non-Veronese paintings (up to 72.7%). Two technical factors account for this generalization failure when compared with the proposed architecture. First, the loss of resolution: the required upsampling from 64&#x202F;&#x00D7;&#x202F;64 to 224&#x202F;&#x00D7;&#x202F;224 pixels effectively acts as a low-pass filter, smoothing out high-frequency microtextures essential for distinguishing Veronese brushwork. Consequently, the pretrained model likely overfitted to macroscopic features, such as color palettes and semantic content and composition, shared by both originals and workshop productions. Second, input rigidity: by restricting the input to RGB channels, MobileNetV2 excluded the explicit grayscale intensity and edge maps used in the proposed pipeline, thereby losing critical topological information about brushstroke dynamics that proved decisive in the shallow CNN.</p>
<p>Notably, for the Holy Family painting under authentication, MobileNetV2 predicted a Veronese probability of 63.6%, closely matching the 61.0% obtained by the proposed shallow CNN. This convergence suggests that the classification of the artwork as an intermediate or workshop production is a robust signal, persisting across different model architectures. Although MobileNetV2 lacks sufficient specificity to reject clearly non-Veronese works, its agreement with the proposed specialized shallow CNN for the disputed artwork supports the reliability of the assessment: the painting exhibits a hybrid visual structure that is neither accepted as a fully authentic Veronese work nor rejected as a mere Veronese-style replica.</p>
<sec id="sec14">
<label>5.1</label>
<title>Limitations</title>
<p>While the CNN pipeline developed in this study demonstrates strong performance in distinguishing authentic Veronese paintings from non-Veronese works, several limitations must be acknowledged, particularly for the authentication of a single, stylistically ambiguous painting. (1) The dataset is narrowly focused on a specific iconographic theme, representations of the Holy Family. While this restriction was deliberate to minimize semantic noise and encourage the model to focus on painterly execution rather than composition, it also limits the generalizability of the learned features. The model may have become particularly sensitive to iconography-specific details, such as poses, color schemes, or thematic composition common to religious scenes, rather than capturing a general Veronese style applicable across other genres like portraiture and mythological scenes. Consequently, this constraint indicates that the model is designed for a specific authentication task rather than for general classification across Veronese&#x2019;s broader body of work. (2) The patch-based approach, while effective for capturing localized stylistic features, inherently limits learning on higher-order stylistic structures such as overall composition, figure-scale relationships, and macro-level color organization. As a result, the authentication lacks the holistic structural analysis typically undertaken by art historians. Conversely, this constraint enables the model to detect execution differences in compositionally identical copies (e.g., Painting 10). (3) The binary classification framework (Veronese versus non-Veronese) reflects limited availability of comparable works and does not account for intermediate cases such as workshop productions or restorations. Future research should extend this framework by incorporating distinct classes such as &#x201C;Workshop of Veronese,&#x201D; &#x201C;In the style of,&#x201D; and &#x201C;Later additions,&#x201D; contingent on sufficient training data becoming available. A multiclass approach would better reflect the complex reality of Renaissance artistic production, enabling the model to distinguish between the master&#x2019;s autograph execution and collaborative production typical of his workshop. (4) Although the CNN offers interpretable probability heatmaps, these visualizations are primarily qualitative and can be affected by patch overlap and boundary artifacts. (5) Finally, while the model identifies stylistic affinities with high sensitivity, its output should be interpreted as probabilistic evidence rather than as definitive attribution.</p>
</sec>
<sec id="sec15">
<label>5.2</label>
<title>Conclusion</title>
<p>This study demonstrates that the challenge of extreme data scarcity in art authentication can be effectively addressed with tailored computational strategies. By reducing the network complexity to align with the limited historical record, the proposed, shallow, custom architecture achieved robust generalization (accuracy: 94.51%, AUC-ROC: 0.99) and higher specificity than standard solutions. The comparative experiment showed that large-scale pretrained models like MobileNetV2, while performing well during cross-validation, fail to reliably distinguish non-Veronese paintings from originals. These results support the premise that for specific heritage tasks, a tailored, texture-focused model is not only more resource-efficient but also diagnostically more reliable than general-purpose large-scale models.</p>
<p>For the Holy Family painting under examination, the proposed model provides strong quantitative evidence against a binary authentic-versus-inauthentic classification. The painting-level probability of 61.0%, supported by a tight confidence interval (58.0&#x2013;64.0%), places the artwork in a statistically distinct category, separated from the non-Veronese paintings (&#x003C; 40%) and authentic Veronese works (&#x003E; 77%). These results support the hypothesis of either high-quality workshop production or partial authorship, providing a level of nuance that subjective visual inspection alone cannot quantify. Therefore, conclusive authentication will require further integration of technical, historical, and provenance analyses beyond computational inference alone.</p>
</sec>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec16">
<title>Data availability statement</title>
<p>The datasets and source code in this study are publicly available at <ext-link xlink:href="https://www.ugr.es/~demiras/PaintingAuthentication/" ext-link-type="uri">https://www.ugr.es/~demiras/PaintingAuthentication/</ext-link>.</p>
</sec>
<sec sec-type="author-contributions" id="sec17">
<title>Author contributions</title>
<p>JRdM: Software, Formal analysis, Conceptualization, Methodology, Writing &#x2013; original draft, Data curation, Validation, Visualization. JV: Supervision, Resources, Writing &#x2013; review &#x0026; editing, Formal analysis, Data curation, Validation. MG: Methodology, Formal analysis, Writing &#x2013; review &#x0026; editing, Supervision. DM: Writing &#x2013; review &#x0026; editing, Conceptualization, Funding acquisition, Supervision, Resources, Project administration, Formal analysis.</p>
</sec>
<sec sec-type="COI-statement" id="sec18">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="sec19">
<title>Generative AI statement</title>
<p>The author(s) declared that Generative AI was used in the creation of this manuscript. English language revision.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="sec20">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>An</surname><given-names>Y.</given-names></name> <name><surname>Ye</surname><given-names>Q.</given-names></name> <name><surname>Guo</surname><given-names>J.</given-names></name> <name><surname>Dong</surname><given-names>R.</given-names></name></person-group> (<year>2020</year>). &#x201C;<article-title>Overlap training to mitigate inconsistencies caused by image tiling in CNNs</article-title>&#x201D; in <source>Lecture notes in computer science (including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics)</source>. eds. <person-group person-group-type="editor"><name><surname>Goos</surname><given-names>G.</given-names></name> <name><surname>Hartmanis</surname><given-names>J.</given-names></name></person-group> (<publisher-loc>Berlin</publisher-loc>: <publisher-name>Springer Science and Business Media Deutschland GmbH</publisher-name>), <fpage>35</fpage>&#x2013;<lpage>48</lpage>.</mixed-citation></ref>
<ref id="ref2"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Basha</surname><given-names>J.</given-names></name> <name><surname>Bacanin</surname><given-names>N.</given-names></name> <name><surname>Vukobrat</surname><given-names>N.</given-names></name> <name><surname>Zivkovic</surname><given-names>M.</given-names></name> <name><surname>Venkatachalam</surname><given-names>K.</given-names></name> <name><surname>Hub&#x00E1;lovsk&#x00FD;</surname><given-names>S.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Chaotic Harris hawks optimization with quasi-reflection-based learning: an application to enhance CNN design</article-title>. <source>Sensors</source> <volume>21</volume>:<fpage>6654</fpage>. doi: <pub-id pub-id-type="doi">10.3390/S21196654</pub-id></mixed-citation></ref>
<ref id="ref3"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bejani</surname><given-names>M. M.</given-names></name> <name><surname>Ghatee</surname><given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>A systematic review on overfitting control in shallow and deep neural networks</article-title>. <source>Artif. Intell. Rev.</source> <volume>54</volume>, <fpage>6391</fpage>&#x2013;<lpage>6438</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10462-021-09975-1</pub-id></mixed-citation></ref>
<ref id="ref4"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Blanc</surname><given-names>R.</given-names></name> <name><surname>Manzano</surname><given-names>E.</given-names></name> <name><surname>L&#x00F3;pez-Montes</surname><given-names>A.</given-names></name> <name><surname>Dom&#x00ED;nguez-Gasca</surname><given-names>N.</given-names></name> <name><surname>V&#x00ED;lchez</surname><given-names>J. L.</given-names></name></person-group> (<year>2023</year>). <article-title>Non-invasive study of the pigments of a painting on copper with the inscription &#x201C;Boceto di Pablo Veronese&#x201D; on the back</article-title>. <source>Heritage</source> <volume>6</volume>, <fpage>4787</fpage>&#x2013;<lpage>4801</lpage>. doi: <pub-id pub-id-type="doi">10.3390/HERITAGE6060254</pub-id></mixed-citation></ref>
<ref id="ref5"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Bussola</surname><given-names>N.</given-names></name> <name><surname>Marcolini</surname><given-names>A.</given-names></name> <name><surname>Maggio</surname><given-names>V.</given-names></name> <name><surname>Jurman</surname><given-names>G.</given-names></name> <name><surname>Furlanello</surname><given-names>C.</given-names></name></person-group> (<year>2019</year>). &#x201C;<article-title>AI slipping on tiles: data leakage in digital pathology</article-title>&#x201D; in <source>Lecture notes in computer science (including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics)</source>. ed. <person-group person-group-type="editor"><name><surname>Bussola</surname><given-names>N.</given-names></name></person-group> (<publisher-loc>Berlin</publisher-loc>: <publisher-name>Springer Science and Business Media Deutschland GmbH</publisher-name>), <fpage>167</fpage>&#x2013;<lpage>182</lpage>.</mixed-citation></ref>
<ref id="ref6"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Canny</surname><given-names>J.</given-names></name></person-group> (<year>1986</year>). <article-title>A computational approach to edge detection</article-title>. <source>IEEE Trans. Pattern Anal. Mach. Intell.</source> <volume>PAMI-8</volume>, <fpage>679</fpage>&#x2013;<lpage>698</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TPAMI.1986.4767851</pub-id>, <pub-id pub-id-type="pmid">21869365</pub-id></mixed-citation></ref>
<ref id="ref7"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname><given-names>R.</given-names></name> <name><surname>Ghavidel Aghdam</surname><given-names>M. R.</given-names></name> <name><surname>Khishe</surname><given-names>M.</given-names></name></person-group> (<year>2024</year>). <article-title>Utilization of artificial intelligence for the automated recognition of fine arts</article-title>. <source>PLoS One</source> <volume>19</volume>:<fpage>e0312739</fpage>. doi: <pub-id pub-id-type="doi">10.1371/JOURNAL.PONE.0312739</pub-id>, <pub-id pub-id-type="pmid">39585839</pub-id></mixed-citation></ref>
<ref id="ref8"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cira</surname><given-names>C. I.</given-names></name> <name><surname>Manso-Callejo</surname><given-names>M. &#x00C1;.</given-names></name> <name><surname>Yokoya</surname><given-names>N.</given-names></name> <name><surname>S&#x0103;l&#x0103;gean</surname><given-names>T.</given-names></name> <name><surname>Badea</surname><given-names>A. C.</given-names></name></person-group> (<year>2024</year>). <article-title>Impact of tile size and tile overlap on the prediction performance of convolutional neural networks trained for road classification</article-title>. <source>Remote Sens</source> <volume>16</volume>:<fpage>2818</fpage>:<fpage>2818</fpage>. doi: <pub-id pub-id-type="doi">10.3390/RS16152818</pub-id></mixed-citation></ref>
<ref id="ref9"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dobbs</surname><given-names>T.</given-names></name> <name><surname>Nayeem</surname><given-names>A. A. R.</given-names></name> <name><surname>Cho</surname><given-names>I.</given-names></name> <name><surname>Ras</surname><given-names>Z.</given-names></name></person-group> (<year>2023</year>). <article-title>Contemporary art authentication with large-scale classification</article-title>. <source>Big Data Cogn. Comput.</source> <volume>7</volume>:<fpage>162</fpage>. doi: <pub-id pub-id-type="doi">10.3390/BDCC7040162</pub-id></mixed-citation></ref>
<ref id="ref10"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Elgammal</surname><given-names>A.</given-names></name> <name><surname>Liu</surname><given-names>B.</given-names></name> <name><surname>Kim</surname><given-names>D.</given-names></name> <name><surname>Elhoseiny</surname><given-names>M.</given-names></name> <name><surname>Mazzone</surname><given-names>M.</given-names></name></person-group> (<year>2018</year>). <italic>The shape of art history in the eyes of the machine</italic>. In 32nd AAAI conference on artificial intelligence, AAAI 2018, AAAI Press, pp. 2183&#x2013;2191.</mixed-citation></ref>
<ref id="ref11"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fawcett</surname><given-names>T.</given-names></name></person-group> (<year>2006</year>). <article-title>An introduction to ROC analysis</article-title>. <source>Pattern Recogn. Lett.</source> <volume>27</volume>, <fpage>861</fpage>&#x2013;<lpage>874</lpage>. doi: <pub-id pub-id-type="doi">10.1016/J.PATREC.2005.10.010</pub-id></mixed-citation></ref>
<ref id="ref12"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Gisolfi</surname><given-names>D.</given-names></name></person-group> (<year>2017</year>). <source>Paolo Veronese and the practice of painting in late renaissance Venice</source>. <publisher-loc>New Haven, CT</publisher-loc>: <publisher-name>Yale University Press</publisher-name>.</mixed-citation></ref>
<ref id="ref13"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Ioffe</surname><given-names>S.</given-names></name> <name><surname>Szegedy</surname><given-names>C.</given-names></name></person-group> (<year>2015</year>). <italic>Batch normalization: accelerating deep network training by reducing internal covariate shift</italic>. In: 32nd International Conference on Machine Learning, ICML 2015 International Machine Learning Society (IMLS)), pp. 448&#x2013;456.</mixed-citation></ref>
<ref id="ref14"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Johnson</surname><given-names>C. R.</given-names></name> <name><surname>Hendriks</surname><given-names>E.</given-names></name> <name><surname>Berezhnoy</surname><given-names>I. J.</given-names></name> <name><surname>Brevdo</surname><given-names>E.</given-names></name> <name><surname>Hughes</surname><given-names>S. M.</given-names></name> <name><surname>Daubechies</surname><given-names>I.</given-names></name> <etal/></person-group>. (<year>2008</year>). <article-title>Image processing for artist identification</article-title>. <source>IEEE Signal Process. Mag.</source> <volume>25</volume>, <fpage>37</fpage>&#x2013;<lpage>48</lpage>. doi: <pub-id pub-id-type="doi">10.1109/MSP.2008.923513</pub-id></mixed-citation></ref>
<ref id="ref15"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>King</surname><given-names>E.</given-names></name></person-group> (<year>2024</year>). <italic>Art authentication: Human expertise vs. emerging tech</italic>. MyArtBroker. Available online at: <ext-link xlink:href="https://www.myartbroker.com/collecting/articles/art-authentication-human-expertise-vs-emerging-tech" ext-link-type="uri">https://www.myartbroker.com/collecting/articles/art-authentication-human-expertise-vs-emerging-tech</ext-link> (Accessed July 27, 2025).</mixed-citation></ref>
<ref id="ref16"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Kingma</surname><given-names>D. P.</given-names></name> <name><surname>Ba</surname><given-names>J. L.</given-names></name></person-group> (<year>2014</year>). <italic>Adam: a method for stochastic optimization</italic>. In: 3rd International Conference on Learning Representations, ICLR 2015 - Conference Track Proceedings (International Conference on Learning Representations, ICLR). Available online at: <ext-link xlink:href="https://arxiv.org/pdf/1412.6980" ext-link-type="uri">https://arxiv.org/pdf/1412.6980</ext-link> (Accessed October 9, 2025).</mixed-citation></ref>
<ref id="ref17"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Krizhevsky</surname><given-names>A.</given-names></name> <name><surname>Sutskever</surname><given-names>I.</given-names></name> <name><surname>Hinton</surname><given-names>G. E.</given-names></name></person-group> (<year>2017</year>). <article-title>ImageNet classification with deep convolutional neural networks</article-title>. <source>Commun. ACM</source> <volume>60</volume>, <fpage>84</fpage>&#x2013;<lpage>90</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3065386</pub-id></mixed-citation></ref>
<ref id="ref18"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Krogh</surname><given-names>A.</given-names></name> <name><surname>Hertz</surname><given-names>J. A.</given-names></name></person-group> (<year>1991</year>). <italic>A simple weight decay can improve generalization</italic>. In proceedings of the 5th international conference on neural information processing systems NIPS&#x2019;91. San Francisco, CA: Morgan Kaufmann Publishers Inc., pp. 950&#x2013;957.</mixed-citation></ref>
<ref id="ref19"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kuncheva</surname><given-names>L. I.</given-names></name> <name><surname>Arnaiz-Gonz&#x00E1;lez</surname><given-names>&#x00C1;.</given-names></name> <name><surname>D&#x00ED;ez-Pastor</surname><given-names>J. F.</given-names></name> <name><surname>Gunn</surname><given-names>I. A. D.</given-names></name></person-group> (<year>2019</year>). <article-title>Instance selection improves geometric mean accuracy: a study on imbalanced data classification</article-title>. <source>Prog. Artif. Intell.</source> <volume>8</volume>, <fpage>215</fpage>&#x2013;<lpage>228</lpage>. doi: <pub-id pub-id-type="doi">10.1007/S13748-019-00172-4</pub-id></mixed-citation></ref>
<ref id="ref20"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>L&#x00F3;pez-Baldomero</surname><given-names>A. B.</given-names></name> <name><surname>Mart&#x00ED;nez-Domingo</surname><given-names>M. A.</given-names></name> <name><surname>Hern&#x00E1;ndez-Andr&#x00E9;s</surname><given-names>J.</given-names></name> <name><surname>Blanc</surname><given-names>R.</given-names></name> <name><surname>Vilchez-Quero</surname><given-names>J. L.</given-names></name> <name><surname>L&#x00F3;pez-Montes</surname><given-names>A.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Endmember extraction for pigment identification pre- and post-intervention: a case study from a XVIth century copper plate painting</article-title>. <source>Arch. Conf</source> <volume>20</volume>, <fpage>198</fpage>&#x2013;<lpage>203</lpage>. doi: <pub-id pub-id-type="doi">10.2352/ISSN.2168-3204.2023.20.1.40</pub-id></mixed-citation></ref>
<ref id="ref21"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Nair</surname><given-names>V.</given-names></name> <name><surname>Hinton</surname><given-names>G. E.</given-names></name></person-group> (<year>2010</year>). <italic>Rectified linear units improve restricted Boltzmann machines</italic>. in Proceedings of the 27th international conference on international conference on machine learning ICML&#x2019;10, Madison, WI: Omnipress, pp. 807&#x2013;814.</mixed-citation></ref>
<ref id="ref22"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Prechelt</surname><given-names>L.</given-names></name></person-group> (<year>2012</year>). &#x201C;<article-title>Early stopping&#x2013;but when?</article-title>&#x201D; in <source>Lecture notes in computer science (including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics)</source>. ed. <person-group person-group-type="editor"><name><surname>Prechelt</surname><given-names>L.</given-names></name></person-group> (<publisher-loc>Berlin, Heidelberg</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>53</fpage>&#x2013;<lpage>67</lpage>.</mixed-citation></ref>
<ref id="ref23"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Qian</surname><given-names>N.</given-names></name></person-group> (<year>1999</year>). <article-title>On the momentum term in gradient descent learning algorithms</article-title>. <source>Neural Netw.</source> <volume>12</volume>, <fpage>145</fpage>&#x2013;<lpage>151</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S0893-6080(98)00116-6</pub-id>, <pub-id pub-id-type="pmid">12662723</pub-id></mixed-citation></ref>
<ref id="ref24"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Radojcic</surname><given-names>V.</given-names></name> <name><surname>Bacanin</surname><given-names>N.</given-names></name> <name><surname>Jovanovic</surname><given-names>L.</given-names></name> <name><surname>Dobrojevic</surname><given-names>M.</given-names></name> <name><surname>Simic</surname><given-names>V.</given-names></name> <name><surname>Pamucar</surname><given-names>D.</given-names></name> <etal/></person-group>. (<year>2026</year>). <article-title>A two-layer TinyML approach aided by metaheuristics optimization for leveraging agriculture 4.0 and plant disease classification</article-title>. <source>Appl. Soft Comput.</source> <volume>186</volume>:<fpage>114179</fpage>. doi: <pub-id pub-id-type="doi">10.1016/J.ASOC.2025.114179</pub-id></mixed-citation></ref>
<ref id="ref25"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Ronneberger</surname><given-names>O.</given-names></name> <name><surname>Fischer</surname><given-names>P.</given-names></name> <name><surname>Brox</surname><given-names>T.</given-names></name></person-group> (<year>2015</year>). &#x201C;<article-title>U-net: convolutional networks for biomedical image segmentation</article-title>&#x201D; in <source>Lecture notes in computer science (including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics)</source>. ed. <person-group person-group-type="editor"><name><surname>Ronneberger</surname><given-names>O.</given-names></name></person-group> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>234</fpage>&#x2013;<lpage>241</lpage>.</mixed-citation></ref>
<ref id="ref26"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Rumala</surname><given-names>D. J.</given-names></name></person-group> (<year>2023</year>). &#x201C;<article-title>How you Split matters: data leakage and subject characteristics studies in longitudinal brain MRI analysis</article-title>&#x201D; in <source>Lecture notes in computer science (including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics)</source>. ed. <person-group person-group-type="editor"><name><surname>Rumala</surname><given-names>D. J.</given-names></name></person-group> (<publisher-loc>Berlin</publisher-loc>: <publisher-name>Springer Science and Business Media Deutschland GmbH</publisher-name>), <fpage>235</fpage>&#x2013;<lpage>245</lpage>.</mixed-citation></ref>
<ref id="ref27"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sabha</surname><given-names>S. u.</given-names></name> <name><surname>Assad</surname><given-names>A.</given-names></name> <name><surname>Shafi</surname><given-names>S.</given-names></name> <name><surname>Din</surname><given-names>N. M. U.</given-names></name> <name><surname>Dar</surname><given-names>R. A.</given-names></name> <name><surname>Bhat</surname><given-names>M. R.</given-names></name></person-group> (<year>2024</year>). <article-title>Imbalcbl: addressing deep learning challenges with small and imbalanced datasets</article-title>. <source>Int. J. Syst. Assur. Eng. Manag.</source> <volume>1</volume>, <fpage>1</fpage>&#x2013;<lpage>13</lpage>. doi: <pub-id pub-id-type="doi">10.1007/S13198-024-02346-3</pub-id></mixed-citation></ref>
<ref id="ref28"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Safa aldin</surname><given-names>S.</given-names></name> <name><surname>Aldin</surname><given-names>N. B.</given-names></name> <name><surname>Ayka&#x00E7;</surname><given-names>M.</given-names></name></person-group> (<year>2024</year>). <article-title>Enhanced image classification using edge CNN (E-CNN)</article-title>. <source>Vis. Comput.</source> <volume>40</volume>, <fpage>319</fpage>&#x2013;<lpage>332</lpage>. doi: <pub-id pub-id-type="doi">10.1007/S00371-023-02784-3</pub-id></mixed-citation></ref>
<ref id="ref29"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Sandler</surname><given-names>M.</given-names></name> <name><surname>Howard</surname><given-names>A.</given-names></name> <name><surname>Zhu</surname><given-names>M.</given-names></name> <name><surname>Zhmoginov</surname><given-names>A.</given-names></name> <name><surname>Chen</surname><given-names>L. C.</given-names></name></person-group> (<year>2018</year>). <italic>MobileNetV2: inverted residuals and linear bottlenecks</italic>. in Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition (IEEE Computer Society), pp. 4510&#x2013;4520.</mixed-citation></ref>
<ref id="ref30"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schaerf</surname><given-names>L.</given-names></name> <name><surname>Postma</surname><given-names>E.</given-names></name> <name><surname>Popovici</surname><given-names>C.</given-names></name></person-group> (<year>2024</year>). <article-title>Art authentication with vision transformers</article-title>. <source>Neural Comput. &#x0026; Applic.</source> <volume>36</volume>, <fpage>11849</fpage>&#x2013;<lpage>11858</lpage>. doi: <pub-id pub-id-type="doi">10.1007/S00521-023-08864-8</pub-id></mixed-citation></ref>
<ref id="ref31"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Srivastava</surname><given-names>N.</given-names></name> <name><surname>Hinton</surname><given-names>G.</given-names></name> <name><surname>Krizhevsky</surname><given-names>A.</given-names></name> <name><surname>Sutskever</surname><given-names>I.</given-names></name> <name><surname>Salakhutdinov</surname><given-names>R.</given-names></name></person-group> (<year>2014</year>). <article-title>Dropout: a simple way to prevent neural networks from overfitting</article-title>. <source>J. Mach. Learn. Res.</source> <volume>15</volume>, <fpage>1929</fpage>&#x2013;<lpage>1958</lpage>. doi: <pub-id pub-id-type="doi">10.5555/2627435.2670313</pub-id></mixed-citation></ref>
<ref id="ref32"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ugail</surname><given-names>H.</given-names></name> <name><surname>Stork</surname><given-names>D. G.</given-names></name> <name><surname>Edwards</surname><given-names>H.</given-names></name> <name><surname>Seward</surname><given-names>S. C.</given-names></name> <name><surname>Brooke</surname><given-names>C.</given-names></name></person-group> (<year>2023</year>). <article-title>Deep transfer learning for visual analysis and attribution of paintings by Raphael</article-title>. <source>Herit. Sci.</source> <volume>11</volume>, <fpage>1</fpage>&#x2013;<lpage>15</lpage>. doi: <pub-id pub-id-type="doi">10.1186/S40494-023-01094-0</pub-id></mixed-citation></ref>
<ref id="ref33"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Warrens</surname><given-names>M. J.</given-names></name></person-group> (<year>2014</year>). <article-title>New interpretations of Cohen&#x2019;s kappa</article-title>. <source>J. Undergrad. Math.</source> <volume>2014</volume>:<fpage>203907</fpage>. doi: <pub-id pub-id-type="doi">10.1155/2014/203907</pub-id></mixed-citation></ref>
<ref id="ref34"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Zivkovic</surname><given-names>M.</given-names></name> <name><surname>Antonijevic</surname><given-names>M.</given-names></name> <name><surname>Jovanovic</surname><given-names>L.</given-names></name> <name><surname>Krasic</surname><given-names>M.</given-names></name> <name><surname>Bacanin</surname><given-names>N.</given-names></name> <name><surname>Zivkovic</surname><given-names>T.</given-names></name> <etal/></person-group>. (<year>2025</year>). <italic>Ocular disease diagnosis using CNNs optimized by modified variable neighborhood search algorithm</italic>. In: International joint conference on advances in computational intelligence (IJCACI 2024), Singapore: Springer, pp. 99&#x2013;112.</mixed-citation></ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0002">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1126749/overview">Eva Tuba</ext-link>, Trinity University, United States</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0003">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2184974/overview">Miodrag Zivkovic</ext-link>, Singidunum University, Serbia</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2907884/overview">Geeta Sandeep Nadella</ext-link>, University of the Cumberlands, United States</p>
</fn>
</fn-group>
<fn-group>
<fn id="fn0001"><label>1</label><p><ext-link xlink:href="https://www.wikiart.org/" ext-link-type="uri">https://www.wikiart.org/</ext-link></p></fn>
</fn-group>
</back>
</article>