<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2025.1646611</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Enhanced-RICAP: a novel data augmentation strategy for improved deep learning-based plant disease identification and mobile diagnosis</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Diallo</surname>
<given-names>Mamadou Bailo</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2829271/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Li</surname>
<given-names>Yue</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chukwuka</surname>
<given-names>Okafor Sylevester</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Boamah</surname>
<given-names>Solomon</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1265398/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Gao</surname>
<given-names>Yuhong</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Kana Kone</surname>
<given-names>Mohamed Meyer</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Rocho</surname>
<given-names>Gelebo</given-names>
</name>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wei</surname>
<given-names>Linjing</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>College of Information Sciences and Technology, Gansu Agricultural University</institution>, <addr-line>Lanzhou</addr-line>,&#xa0;<country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Gansu Provincial Key Laboratory of Aridland Crop Science, Gansu Agricultural University</institution>, <addr-line>Lanzhou</addr-line>,&#xa0;<country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>College of Plant Protection, Gansu Agricultural University</institution>, <addr-line>Lanzhou</addr-line>,&#xa0;<country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>College of Agronomy, Gansu Agricultural University</institution>, <addr-line>Lanzhou</addr-line>,&#xa0;<country>China</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Department of Computer Sciences and Technology, Nanchang University</institution>, <addr-line>Nanchang, Jiangxi</addr-line>,&#xa0;<country>China</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>College of Food Science and Engineering, Gansu Agricultural University</institution>, <addr-line>Lanzhou</addr-line>,&#xa0;<country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1568126/overview">Xing Yang</ext-link>, Anhui Science and Technology University, China</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3140194/overview">Qi Tian</ext-link>, Northwest A &amp; F University Hospital, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3141211/overview">Md. Milon Rana</ext-link>, Hajee Mohammad Danesh Science and Technology University, Bangladesh</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Yue Li, <email xlink:href="mailto:liyue@gsau.edu.cn">liyue@gsau.edu.cn</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>24</day>
<month>09</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>16</volume>
<elocation-id>1646611</elocation-id>
<history>
<date date-type="received">
<day>13</day>
<month>06</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>25</day>
<month>08</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Diallo, Li, Chukwuka, Boamah, Gao, Kana Kone, Rocho and Wei.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Diallo, Li, Chukwuka, Boamah, Gao, Kana Kone, Rocho and Wei</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Plant diseases pose a significant threat to global food security and agricultural productivity, making accurate and timely disease identification essential for effective crop management and minimizing economic losses. Although data augmentation techniques such as RICAP improve model robustness, their reliance on randomly extracted image regions can introduce label noise, potentially misleading the training of deep learning models.</p>
</sec>
<sec>
<title>Methods</title>
<p>This study introduces Enhanced-RICAP, an advanced data augmentation technique designed to improve the accuracy of deep learning models for plant disease detection. Enhanced-RICAP replaces random patch selection with an attention module guided by class activation maps, focusing on discriminative regions, Enhanced-RICAP reduces label noise and improves model accuracy for plant disease detection, addressing a key limitation of traditional augmentation methods. The method was evaluated using several deep learning architectures, such as ResNet18, ResNet34, ResNet50, EfficientNet-b, and Xception, on the cassava leaf disease and PlantVillage tomato leaf disease datasets.</p>
</sec>
<sec>
<title>Results</title>
<p>The experimental results demonstrate that Enhanced-RICAP consistently outperforms existing augmentation methods, including CutMix, MixUp, CutOut, Hide-and-Seek, and RICAP, across key evaluation metrics: accuracy, precision, recall, and F1-score. The ResNet18+Enhanced-RICAP configuration achieved 99.86% accuracy on the tomato leaf disease dataset, whereas the Xception+Enhanced-RICAP model attained 96.64% accuracy in classifying four cassava leaf disease categories.</p>
</sec>
<sec>
<title>Discussion and Conclusion</title>
<p>To bridge the gap between research and practical application, the ResNet18+Enhanced-RICAP model was deployed in PlantDisease, a mobile application that enables real-time disease identification and management recommendations. This approach supports sustainable agriculture and strengthens food security by providing farmers with accessible and reliable diagnostic tools.</p>
</sec>
</abstract>
<kwd-group>
<kwd>deep learning</kwd>
<kwd>plant disease identification</kwd>
<kwd>data augmentation</kwd>
<kwd>food security</kwd>
<kwd>sustainable agriculture</kwd>
</kwd-group>
<contract-num rid="cn001">32460443</contract-num>
<contract-sponsor id="cn001">National Natural Science Foundation of China<named-content content-type="fundref-id">10.13039/501100001809</named-content>
</contract-sponsor>
<contract-sponsor id="cn002">Science and Technology Program of Gansu Province<named-content content-type="fundref-id">10.13039/501100018554</named-content>
</contract-sponsor>
<counts>
<fig-count count="5"/>
<table-count count="5"/>
<equation-count count="7"/>
<ref-count count="45"/>
<page-count count="12"/>
<word-count count="6541"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Technical Advances in Plant Science</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Agriculture remains the primary source of livelihood for a large portion of the global population. However, food security continues to be threatened by a range of factors, including climate change and plant diseases (<xref ref-type="bibr" rid="B18">Khan et&#xa0;al., 2022</xref>). Plant diseases are not only a global threat to food security but also pose devastating risks to smallholder farmers whose livelihoods depend on healthy crops. The rapid commercialization of agricultural (<xref ref-type="bibr" rid="B1">Abbas et&#xa0;al., 2021</xref>) practices has further impacted the environment, complicating efforts to maintain sustainable farming. Among the critical challenges in modern agriculture is the early and accurate identification of plant diseases, such as cassava leaf and tomato leaf diseases. Timely identification of plant diseases is essential to prevent the spread of infections to healthy plants, thereby reducing the risk of substantial economic losses. The consequences of plant diseases can range from minor symptoms to the complete destruction of plantations, severely undermining agricultural productivity and economic stability. In particular, the increased cultivation of cassava and tomato crops has made disease identification increasingly important. These crops are susceptible to various infections, which often present with subtle and overlapping symptoms that complicate visual diagnosis. Although expert-based manual inspection remains a primary method for diagnosing plant diseases, its dependence on human judgment introduces variability and inefficiency, often leading to delayed or inaccurate assessments (<xref ref-type="bibr" rid="B42">Zarboubi et&#xa0;al., 2025</xref>). These diagnostic challenges, coupled with environmental influences, contribute to delayed or ineffective treatment, reducing yield and crop quality. To address these limitations, the integration of computer vision and deep learning offers a promising solution for developing automated and scalable plant disease identification systems. Such tools can support farmers in early diagnosis and more effective disease management, ultimately strengthening food security and agricultural resilience.</p>
<p>Recent advances in deep learning, particularly Convolutional Neural Networks (CNNs), have shown great potential in addressing challenges in plant disease identification by automating the feature extraction and identification process, enhancing the reliability and efficiency of the identification (<xref ref-type="bibr" rid="B15">Jafar et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B26">Sajitha et&#xa0;al., 2024</xref>). CNNs are widely used in diverse domains such as medical imaging, object detection, and agricultural disease diagnosis, due to their capacity to automatically learn and capture meaningful features from images (<xref ref-type="bibr" rid="B19">Li et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B44">Zhang and Mu, 2024</xref>). Several studies have employed CNN-based approaches to identify diseases in tomato and cassava leaves. For tomato disease identification, researchers have developed custom CNN architectures capable of distinguishing between multiple disease types and localizing affected regions on the leaf surface (<xref ref-type="bibr" rid="B2">Agarwal et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B45">Zhang et&#xa0;al., 2020</xref>). Popular CNN architectures such as VGGNet, RNeT, GoogLeNet, MobileNet, and Inception have been adapted for plant disease identification in numerous studies (<xref ref-type="bibr" rid="B27">Sanida et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B38">Vengaiah and Priyadharshini, 2023</xref>; <xref ref-type="bibr" rid="B3">Ajitha et&#xa0;al., 2024</xref>). In relation to cassava disease identification, CNN-based methods have also gained traction. <xref ref-type="bibr" rid="B5">Ayu et&#xa0;al. (2021)</xref> proposed a customized version of MobileNetV2 for detecting cassava leaf diseases, while <xref ref-type="bibr" rid="B30">Singh et&#xa0;al. (2023)</xref> explored the application of InceptionRNeTV2 to improve disease identification. <xref ref-type="bibr" rid="B29">Sholihin et&#xa0;al. (2023)</xref> utilized AlexNet as a feature extractor in combination with a support vector machine (SVM) classifier. Additionally, pre-trained CNN models, such as VGG19, have been employed using transfer learning techniques to enhance model generalization and reduce the need for extensive training from scratch (<xref ref-type="bibr" rid="B4">Alford and Tuba, 2024</xref>).</p>
<p>A variety of data augmentation techniques (<xref ref-type="bibr" rid="B43">Zhang et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B31">Summers and Dinneen, 2019</xref>) aim at mixing data to enhance data diversity. As a result, the data mixing strategy compels the neural network to attend to multiple objects and regions in the input image, thereby enhancing its feature extraction capabilities for the networks. Among data augmentation techniques, CutOut (<xref ref-type="bibr" rid="B8">DeVries, 2017</xref>) exemplifies a method that enhances training data by systematically removing rectangular regions from images. Another category comprises data-mixing techniques (<xref ref-type="bibr" rid="B14">Inoue, 2018</xref>; <xref ref-type="bibr" rid="B36">Tokozume et&#xa0;al., 2018</xref>), which have garnered significant attention in the domain of image identification in recent years. Mixing data to extend the training distribution was first proposed by Zhang et&#xa0;al (<xref ref-type="bibr" rid="B43">Zhang et&#xa0;al., 2018</xref>). MixUp entails generating training samples by linearly mixing images and fusing their labels using the same coefficients. This technique has demonstrated notable effectiveness in mitigating the impact of noisy labels and enhancing overall model performance.</p>
<p>Recently, Mixup variants (<xref ref-type="bibr" rid="B32">Takahashi et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B11">Guo et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B31">Summers and Dinneen, 2019</xref>) have been proposed; they perform feature-level interpolation and other types of transformations. Random Image Cropping and Patching (RICAP) (<xref ref-type="bibr" rid="B32">Takahashi et&#xa0;al., 2018</xref>) is introduced as a data augmentation method that enhances training diversity by cropping regions from four distinct images and combining them into a single composite image, unlike traditional approaches that utilize only two images. However, the risk of label noise increases since the region randomly extracted to form the mixed image may cover a meaningless region of their respective source image. CutMix, introduced by Yun et&#xa0;al (<xref ref-type="bibr" rid="B41">Yun et&#xa0;al., 2019</xref>), generates new images by replacing a region of one image with a patch from another. The corresponding labels are combined in proportion to the area of the exchanged patches, similar to the approach used in MixUp. Based on CutMix, and SaliencyMix (<xref ref-type="bibr" rid="B37">Uddin et&#xa0;al., 2020</xref>) guide mixing patches by saliency regions in the image (based on CAM or a saliency detector) to obtain mixed samples with more class-relevant information; ResizeMix (<xref ref-type="bibr" rid="B25">Qin et&#xa0;al., 2020</xref>) maintains the information integrity by replacing one resized image directly into a rectangular area of another image. Despite their contributions, these previous studies often overlook thorough evaluations, particularly with respect to localization performance and the ability to capture discriminative regions.</p>
<p>The objective of this study is to create a more efficient data-mixing augmentation technique for enhancing the identification of cassava and tomato leaf diseases. Unlike the original RICAP (<xref ref-type="bibr" rid="B32">Takahashi et al., 2018</xref>) which relies on random region box generation, Enhanced-RICAP incorporates an attention module. Specifically, it leverages class activation maps to extract discriminative regions from four distinct images, which are then patched together to match the size of the original image. The corresponding labels are mixed according to the semantic composition of the newly generated image. This approach improves the model&#x2019;s ability to generalize and reliably detect plant diseases while reducing the risk of overfitting.</p>
<list list-type="bullet">
<list-item>
<p>We introduce a data-mixing augmentation technique that efficiently preserves important discriminative regions while introducing sufficient variability.</p>
</list-item>
<list-item>
<p>Applying CAM to guide the augmentation process, ensuring that crucial features are not obscured.</p>
</list-item>
<list-item>
<p>We provide a comparative analysis demonstrating that Enhanced-RICAP out-performs existing augmentation methods on both cassava and tomato dataset, and deploys the resulting model in a mobile app for real-time, on-site disease identification reducing reliance on experts and enabling more timely, accurate crop management.</p>
</list-item>
</list>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Dataset</title>
<p>To evaluate the performance of the proposed method in this study, analysis was conducted using two datasets: the cassava leaf disease dataset and the PlantVillage repository (<xref ref-type="bibr" rid="B13">Hughes et&#xa0;al., 2015</xref>), which contains 18162 images of tomato leaf diseases. The cassava leaf disease dataset, updated by Gomez-Pupo et&#xa0;al (<xref ref-type="bibr" rid="B10">G&#xf3;mez-Pupo et&#xa0;al., 2022</xref>), consists of 6,745 images, with 80% allocated for training, 10% for validation, and 10% for testing, respectively. <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref> shows the different cassava and tomato leaf diseases. The tomato leaf disease dataset comprised 10 distinct classes, including nine disease classes and one healthy class. In both analyses, all images were resized to 224 x 224 for experimental purpose.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Collage of images showing tomato and cassava leaf diseases with labels. Tomato diseases include bacterial spot, early blight, late blight, leaf mold, septoria leaf spot, spider mite, yellow leaf curl virus, mosaic virus, and target spot. A healthy tomato leaf is also shown. Cassava diseases include bacteria blight, green mottle, mosaic disease, and brown streak disease, alongside a healthy cassava leaf. The background is light purple.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1646611-g001.tif">
<alt-text content-type="machine-generated">Collage of images showing tomato and cassava leaf diseases with labels. Tomato diseases include bacterial spot, early blight, late blight, leaf mold, septoria leaf spot, spider mite, yellow leaf curl virus, mosaic virus, and target spot. A healthy tomato leaf is also shown. Cassava diseases include bacteria blight, green mottle, mosaic disease, and brown streak disease, alongside a healthy cassava leaf. The background is light purple.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Networks</title>
<p>Three distinct networks ResNeT (RNet), Xception, and EfficientNetb (EffNetb) were employed in this study. ResNet, also known as Residual Network (<xref ref-type="bibr" rid="B12">He et&#xa0;al., 2016</xref>), is a deep learning framework that utilizes residual connections or skip links to circumvent levels within the network. These connections enable the training of extremely deep networks by resolving the issue of disappearing gradients and enhancing the flow of gradients, resulting in improved performance across many tasks. Xception is a neural network architecture created by Francois Chollet et&#xa0;al (<xref ref-type="bibr" rid="B7">Chollet, 2017</xref>). It combines depthwise separable convolutions with pointwise convolutions to achieve deep learning. This architecture improves image identification performance by replacing traditional Inception modules with depthwise separable convolutions, resulting in 36 convolutional layers and linear residual connections. EffNetb (<xref ref-type="bibr" rid="B33">Tan and Le, 2019</xref>) is a convolutional neural network. Architecture is known for its efficiency in terms of accuracy and computational resources. In 2019, Google AI researchers developed EfNetb. The core idea behind EfficientNet is to balance model width, depth, and resolution in order to improve performance without significantly increasing computational costs.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Preliminaries</title>
<p>Let <inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x2208;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <italic>y</italic> represent the training images and their labels, respectively. W and H, the width and height of an input image. (<italic>y</italic>
<sub>1</sub> and <italic>y</italic>
<sub>2</sub>) represent the source and the target labels, <inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x2208;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im3">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x2208;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represent the source and the target image.</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Algorithm of Enhanced-RICAP</title>
<p>The main purposal of Enhanced-RICAP is to generate new training samples (<inline-formula>
<mml:math display="inline" id="im4">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mo>,</mml:mo>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula>) to increase data diversity, whereby label noise is mitigated. Inspired by RICAP, which crops randomly four patches from four distinct images and patches them together from the upper left to the bottom right to generate augmented images. In RICAP, it has been observed that the randomly generated patches may cover meaningless information about the source image; therefore, mixing label proportionally to area-based static may lead to label noise and mislead the training process. To overcome the aforementioned limitation, we introduced a new data mixing augmentation technique named Enhanced-RICAP, specially designed for plant disease identification. <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref> provides a comprehensive overview of the proposed Enhanced-RICAP method. The random region generation module in RICAP is replaced by the attention region generation module in Enhanced-RICAP. In each iteration, Enhanced-RICAP leverages the class activation map to obtain discriminative regions <italic>P</italic>
<sub>1</sub>
<italic>,&#x2026;,P</italic>
<sub>4</sub> of four distinct images <italic>x</italic>
<sub>1</sub>
<italic>,&#x2026;,x</italic>
<sub>4</sub> respectively, as in <xref ref-type="statement" rid="algo1">Algorithm 1</xref>. This process is accomplished in <xref ref-type="statement" rid="algo1">
<bold>Algorithm 1</bold>
</xref>. Subsequently, <xref ref-type="statement" rid="algo1">
<bold>Algorithm 1</bold>
</xref> is incorporated into <xref ref-type="statement" rid="algo2">
<bold>Algorithm 2</bold>
</xref> to complete the training. The class activation map is obtained from the last convolutional layer of the network and can be expressed as shown in <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>:</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Diagram showing a process involving cassava leaves analyzed by a Convolutional Neural Network (CNN). Input images of leaves are processed through the CNN, generating class activation maps highlighting discriminative regions. The maps are then used to form an output collage, illustrating the areas of interest in the original images.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1646611-g002.tif">
<alt-text content-type="machine-generated">Diagram showing a process involving cassava leaves analyzed by a Convolutional Neural Network (CNN). Input images of leaves are processed through the CNN, generating class activation maps highlighting discriminative regions. The maps are then used to form an output collage, illustrating the areas of interest in the original images.</alt-text>
</graphic>
</fig>
<disp-formula id="eq1">
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>m</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mi>d</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msubsup>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<statement id="algo1">
<label>Algorithm 1</label>
<title>Enhanced-RICAP at the beginning of the algorithm section.</title>
<p>
<preformat>
1 <bold>Input:</bold> a CNN function <italic>f</italic>, a training images (<italic>x</italic>
<sub>1</sub>,&#x2026;<italic>x<sub>i</sub>
</italic>), where <italic>I</italic> is the images and <italic>L</italic> is the labels.
&#xD;2 <bold>for</bold> <italic>k in range (4)</italic> <bold>do</bold>
&#xD;3 &#x2003;&#x2003;<italic>P<sub>i</sub>
</italic> &#x2190; obtain the discriminative region of <italic>x<sub>i</sub>
</italic> with Equations 4, 5
&#xD;4 &#x2003;&#x2003;<named-content content-type="inline-equation"><inline-formula>
<mml:math display="inline" id="im12">
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:math>
</inline-formula></named-content> &#x2190; paste <italic>P<sub>i</sub>
</italic> into the corner left of <italic>x</italic>
<sub>1</sub>
&#xD;5 &#x2003;&#x2003;else paste it according to the previous paste patches.&#xD;
6 <bold>end</bold>
</preformat>
</p>
</statement>
<statement id="algo2">
<label>Algorithm 2</label>
<title>Augmented Samples at the beginning of the algorithm section.</title>
<p>
<preformat>
<bold>1 Input:</bold> a CNN function <italic>f</italic>, a training sample (<italic>I</italic>, <italic>L</italic>), where <italic>I</italic> denotes the images and <italic>L</italic> denotes the labels.
&#xD;2 <bold>for</bold> <italic>epoch in range (epochs)</italic> <bold>do</bold>
&#xD;3 <bold>for</bold> (<italic>I</italic>, <italic>L</italic>) <italic>in training samples</italic> <bold>do</bold>
&#xD;4 <italic>r<sub>ex</sub>
</italic> = randomly shuffle the batch images;
&#xD;5 <named-content content-type="inline-equation"><inline-formula>
<mml:math display="inline" id="im14">
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:math>
</inline-formula></named-content> &#x2190; generate the new training image using Algorithm 1
&#xD;6 <named-content content-type="inline-equation"><inline-formula>
<mml:math display="inline" id="im15">
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:math>
</inline-formula></named-content> &#x2190; generate the new training label using Equations 6-7
&#xD;7 doning backpropagation to optimize <italic>f</italic> using the new training samples
&#xD;8 <bold>end</bold>
&#xD;9 <bold>end</bold>
</preformat>
</p>
</statement>
<p>where <inline-formula>
<mml:math display="inline" id="im5">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&#x2208;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>h</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> signifies the result of the final convolutional layer, <inline-formula>
<mml:math display="inline" id="im6">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
<mml:mo>&#xa0;</mml:mo>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&#x2208;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represents the <italic>l<sup>th</sup>
</italic> feature map of <italic>F</italic>(<italic>x<sub>i</sub>
</italic>) and <inline-formula>
<mml:math display="inline" id="im7">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3c9;</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x2208;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mi>d</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represents the weight in the fully connected layer associated with class <italic>y<sub>i</sub>
</italic>. The most salient regions coordinates <inline-formula>
<mml:math display="inline" id="im8">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>u</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mover accent="true">
<mml:mi>v</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> can be obtained as shown in the <xref ref-type="disp-formula" rid="eq2">Equation 2</xref>:</p>
<disp-formula id="eq2">
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:mtable equalrows="true" equalcolumns="true">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>u</mml:mi>
<mml:mo stretchy="true">&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>v</mml:mi>
<mml:mo stretchy="true">&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mtext>argmax&#xa0;</mml:mtext>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>u</mml:mi>
<mml:mo stretchy="true">&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>v</mml:mi>
<mml:mo stretchy="true">&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:munder>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>C</mml:mi>
<mml:mover accent="true">
<mml:mi>a</mml:mi>
<mml:mo stretchy="true">&#xaf;</mml:mo>
</mml:mover>
<mml:msubsup>
<mml:mi>m</mml:mi>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>u</mml:mi>
<mml:mo stretchy="true">&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>v</mml:mi>
<mml:mo stretchy="true">&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Since the coordinates above are in the class activation dimension, we use the <xref ref-type="disp-formula" rid="eq3">Equation 3</xref> below to convert the coordinates back to the original image dimension as:</p>
<disp-formula id="eq3">
<label>(3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:mtable equalrows="true" equalcolumns="true">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msup>
<mml:mi>u</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>u</mml:mi>
<mml:mo stretchy="true">&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
<mml:mo>&#xd7;</mml:mo>
<mml:mfrac>
<mml:mi>W</mml:mi>
<mml:mi>w</mml:mi>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mtext>&#xa0;&#xa0;&#xa0;&#xa0;</mml:mtext>
<mml:msup>
<mml:mi>v</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>v</mml:mi>
<mml:mo stretchy="true">&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
<mml:mo>&#xd7;</mml:mo>
<mml:mfrac>
<mml:mi>H</mml:mi>
<mml:mi>h</mml:mi>
</mml:mfrac>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where <italic>w</italic> and <italic>h</italic> are the width and height of the class activation map of <italic>x<sub>i</sub>
</italic>. The parameter <italic>&#x3b3;<sub>i</sub>
</italic> is used to define the width and height of the discriminative region <italic>P<sub>i</sub>
</italic> of image <italic>x<sub>i</sub>
</italic> by <italic>w<sub>i</sub>
</italic>= <italic>W &#xd7; &#x3b3;<sub>i</sub>
</italic>, and <italic>h<sub>i</sub>
</italic>= <italic>H &#xd7; &#x3b3;<sub>i</sub>
</italic>. The follow Equation expresses how to extract the discriminative region <italic>P<sub>i</sub>
</italic>of <italic>x<sub>i</sub>
</italic>as:</p>
<disp-formula id="eq4">
<label>(4)</label>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:mtable columnalign="left" equalrows="true" equalcolumns="true">
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msubsup>
<mml:mi>u</mml:mi>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mi>u</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2003;</mml:mtext>
<mml:msubsup>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mi>u</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:mfrac>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msubsup>
<mml:mi>v</mml:mi>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mi>v</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2003;</mml:mtext>
<mml:msubsup>
<mml:mi>v</mml:mi>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mi>v</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:mfrac>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq5">
<label>(5)</label>
<mml:math display="block" id="M5">
<mml:mrow>
<mml:mtable columnalign="left" equalrows="true" equalcolumns="true">
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mtext>if&#x2004;</mml:mtext>
<mml:msubsup>
<mml:mi>u</mml:mi>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2264;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>{</mml:mo>
<mml:mtable columnalign="left" equalrows="true" equalcolumns="true">
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msubsup>
<mml:mi>u</mml:mi>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msubsup>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mtext>if&#x2004;</mml:mtext>
<mml:msubsup>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2265;</mml:mo>
<mml:mi>W</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>{</mml:mo>
<mml:mtable columnalign="left" equalrows="true" equalcolumns="true">
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msubsup>
<mml:mi>u</mml:mi>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mi>W</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msubsup>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mtext>if&#x2004;</mml:mtext>
<mml:msubsup>
<mml:mi>v</mml:mi>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2264;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>{</mml:mo>
<mml:mtable columnalign="left" equalrows="true" equalcolumns="true">
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msubsup>
<mml:mi>v</mml:mi>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msubsup>
<mml:mi>v</mml:mi>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mtext>if&#x2004;</mml:mtext>
<mml:msubsup>
<mml:mi>v</mml:mi>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2265;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>{</mml:mo>
<mml:mtable columnalign="left" equalrows="true" equalcolumns="true">
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msubsup>
<mml:mi>v</mml:mi>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msubsup>
<mml:mi>v</mml:mi>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math display="inline" id="im9">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:msubsup>
<mml:mi>u</mml:mi>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>v</mml:mi>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>v</mml:mi>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> to denote the discriminative region of the image <italic>x<sub>i</sub>
</italic>, and <inline-formula>
<mml:math display="inline" id="im10">
<mml:mrow>
<mml:msubsup>
<mml:mi>u</mml:mi>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>v</mml:mi>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>v</mml:mi>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> represent the left, right, bottom, and top boundaries of the region <italic>P<sub>i</sub>
</italic>. At the end, those discriminative regions <inline-formula>
<mml:math display="inline" id="im11">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo>{</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mn>4</mml:mn>
</mml:msub>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> are patched together the upper left, upper right, lower left, and lower right regions to generate the augmented image.</p>
<sec id="s2_4_1">
<label>2.4.1</label>
<title>Label mixing</title>
<p>In RICAP, the mixed labels are computed based on the proportion of the image area that comes from each source image. It is observed that area-based labels mixing may not reflect the intrinsic composition of the mixed images, which can cause model instability. To tackle this issue, we exploit the class activation map of each to obtain the intrinsic semantic composition of each region that composed the mixed image. This operation can be expressed as follows:</p>
<disp-formula id="eq6">
<label>(6)</label>
<mml:math display="block" id="M6">
<mml:mrow>
<mml:mtable columnalign="left" equalrows="true" equalcolumns="true">
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3bb;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo>&#x2211;</mml:mo>
<mml:mtext>&#x3a6;</mml:mtext>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>C</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>m</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo stretchy="false">/</mml:mo>
<mml:mtext>&#x3a6;</mml:mtext>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>C</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>m</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where &#x3a6; denotes the operation that enlarges the dimensions of a feature map to align with those of the image <italic>x<sub>i</sub>
</italic>. The notation <inline-formula>
<mml:math display="inline" id="im13">
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:math>
</inline-formula> represents the target label vector corresponding to the four mixed images. It is computed by multiplying each original one-hot class label vector <italic>y<sub>i</sub> </italic>by its associated label weight <italic>&#x3bb;<sub>i</sub>
</italic>, which reflects the image&#x2019;s contribution to the augmented sample, and summing the four weighted vectors for <italic>i</italic> = 1<italic>,&#x2026;</italic>,4.</p>
<disp-formula id="eq7">
<label>(7)</label>
<mml:math display="block" id="M7">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mo>=</mml:mo>
<mml:mo stretchy="false">(</mml:mo>
<mml:munder>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mo>{</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>3</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>4</mml:mn>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:munder>
<mml:msub>
<mml:mtext>&#x3bb;</mml:mtext>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
</sec>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Ablation study outcomes for the proposed method on Cassava leaf diseases</title>
<p>In our analysis, the standard deviation of the values is denoted by the numbers following the <italic>&#xb1;</italic> operator, each method was computed over four distinct runs. Consequently, in this section, we performed tests to compare our proposed method with previously existing studies utilizing RNet18, RNet34, RNet50, EffNetb0, and Xception with pretrained ImageNet weights. Considering that these studies did not officially report results on cassava leaf disease and tomato leaf disease, the methods were implemented based on the released codes and conducted experiments on the two datasets. We initially explored various hyperparameters for each method and identified the optimal one for the network architecture. We defined the hyperparameters as 0.5 for Cutout and CutMix, and 1.0 for MixUp, and used alpha values of 1.0 and 3.0 for CutMix and MixUp, respectively. An initial learning rate of 0.0001 and a weight decay of 1e-5 were applied using the Adam optimizer. Compared to the SGD optimizer, Adam has been observed to achieve higher accuracy on the cassava leaf disease dataset. The comparative analysis of test accuracy is presented in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>. The proposed method is evaluated against state-of-the-art techniques. When using the RNet18 network, Enhanced-RICAP demonstrates superior performance, exceeding MixUp by 0.83% and Hide and Seek by 0.51%. Similarly, with RNet34, Enhanced-RICAP outperforms CutOut by 2.16% and MixUp by 1.36%. However, the performance of the proposed method using RNet50 is comparatively lower than that achieved with RNet34. Furthermore, when applied to EffNetb0, the method achieves a marginal improvement of 0.2% over ResizeMix. Furthermore, when applied to EffNetb0, the method achieves a marginal improvement of 0.2% over ResizeMix. Notably, it significantly surpasses Hide and Seek and CutMix by 1.55% and 2.51%, respectively, when integrated with the Xception architecture The experimental results, as presented in <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>, reveal that our methodology achieved superior performance compared to RICAP and ResizeMix techniques when evaluating test error rates using Xception on the cassava leaf disease identification (CLDD). Furthermore, our approach substantially surpassed the baseline, underscoring the consistent effectiveness of Enhanced RICAP across diverse network architectures.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Comparison of different methods and their accuracy on cassava leaf disease dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="left">Method</th>
<th valign="middle" colspan="4" align="center">Accuracy (%)</th>
</tr>
<tr>
<th valign="middle" align="center">RNet18</th>
<th valign="middle" align="center">RNet34</th>
<th valign="middle" align="center">RNet50</th>
<th valign="middle" align="center">EffNetb0</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Baseline</td>
<td valign="middle" align="center">91.18 <italic>&#xb1;</italic> 0.70</td>
<td valign="middle" align="center">93.58 <italic>&#xb1;</italic> 1.03</td>
<td valign="middle" align="center">92.46 <italic>&#xb1;</italic> 0.48</td>
<td valign="middle" align="center">91.66 <italic>&#xb1;</italic> 0.56</td>
</tr>
<tr>
<td valign="middle" align="left">CutMix</td>
<td valign="middle" align="center">91.02 <italic>&#xb1;</italic> 0.99</td>
<td valign="middle" align="center">91.50 <italic>&#xb1;</italic> 0.054</td>
<td valign="middle" align="center">92.62 <italic>&#xb1;</italic> 0.21</td>
<td valign="middle" align="center">89.58 <italic>&#xb1;</italic> 0.31</td>
</tr>
<tr>
<td valign="middle" align="left">MixUp</td>
<td valign="middle" align="center">91.18 <italic>&#xb1;</italic> 0.07</td>
<td valign="middle" align="center">92.78 <italic>&#xb1;</italic> 0.38</td>
<td valign="middle" align="center">91.02 <italic>&#xb1;</italic> 0.30</td>
<td valign="middle" align="center">90.54 <italic>&#xb1;</italic> 0.26</td>
</tr>
<tr>
<td valign="middle" align="left">ResizeMix</td>
<td valign="middle" align="center">90.54 <italic>&#xb1;</italic> 0.019</td>
<td valign="middle" align="center">91.98 <italic>&#xb1;</italic> 0.26</td>
<td valign="middle" align="center">71.15 <italic>&#xb1;</italic> 0.18</td>
<td valign="middle" align="center">93.58 <italic>&#xb1;</italic> 0.33</td>
</tr>
<tr>
<td valign="middle" align="left">CutOut</td>
<td valign="middle" align="center">90.70 <italic>&#xb1;</italic> 0.65</td>
<td valign="middle" align="center">91.98 <italic>&#xb1;</italic> 0.41</td>
<td valign="middle" align="center">92.42 <italic>&#xb1;</italic> 0.85</td>
<td valign="middle" align="center">91.02 <italic>&#xb1;</italic> 0.22</td>
</tr>
<tr>
<td valign="middle" align="left">Hide and Seek</td>
<td valign="middle" align="center">91.50 <italic>&#xb1;</italic> 0.33</td>
<td valign="middle" align="center">91.66 <italic>&#xb1;</italic> 0.19</td>
<td valign="middle" align="center">91.67 <italic>&#xb1;</italic> 0.28</td>
<td valign="middle" align="center">89.74 <italic>&#xb1;</italic> 0.011</td>
</tr>
<tr>
<td valign="middle" align="left">Enhanced-Ricap</td>
<td valign="middle" align="center">92.01 <italic>&#xb1;</italic> 0.18</td>
<td valign="middle" align="center">94.14 <italic>&#xb1;</italic> 0.22</td>
<td valign="middle" align="center">93.18 <italic>&#xb1;</italic> 0.031</td>
<td valign="middle" align="center">93.78 <italic>&#xb1;</italic> 0.26</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Top-1 error rates comparison of ResizeMix, RICAP, and our method on cassava leaf disease.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Model + Method</th>
<th valign="middle" align="center">epochs</th>
<th valign="middle" align="center">Top-1 Err (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Baseline</td>
<td valign="middle" align="center">200</td>
<td valign="middle" align="center">8.50 <italic>&#xb1;</italic> 1.250</td>
</tr>
<tr>
<td valign="middle" align="left">Baseline+ResizeMix</td>
<td valign="middle" align="center">200</td>
<td valign="middle" align="center">9.46 <italic>&#xb1;</italic> 0.20</td>
</tr>
<tr>
<td valign="middle" align="left">Baseline+Ricap</td>
<td valign="middle" align="center">200</td>
<td valign="middle" align="center">6.88 <italic>&#xb1;</italic> 0.83</td>
</tr>
<tr>
<td valign="middle" align="left">Baseline+Enhanced-Ricap</td>
<td valign="middle" align="center">200</td>
<td valign="middle" align="center">3.36 <italic>&#xb1;</italic> 0.43</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Evaluation of Xception-based cassava leaf disease identification using confusion matrix analysis</title>
<p>By using Xception with the suggested approach, 625 untrained images, comprising four categories of cassava leaf diseases and healthy leaves, were chosen for identification. The resulting confusion matrix for recognizing cassava leaf diseases is depicted in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>. The blue background illustrates identification accuracy, with a darker blue color indicating a higher level of identification accuracy. The confusion matrix reveals that our method achieves the highest identification accuracy, with 603 images correctly identified when distinguishing between four main cassava leaf diseases and healthy leaves. Among these, CBB exhibits the highest error rate, 5 of the 9 images that were incorrectly recognized were classified as CBSD. Therefore, there were mutual identification errors between CBB and CBSD. Bacterial blight and brown streak disease both cause yellowing of leaves. Similarly, errors in disease identification occurred because the spots associated with various diseases appeared alike at the same time. In addition, the numbers of correct identifications were 561 for Resizemix, 570 for CutOut, 572 for CutMix, 579 for Hide and Seek, and 585 for MixUp. The confusion matrix was utilized to calculate the accuracy, recall, precision, and F1-score for the five cassava leaf categories, serving as performance evaluation indicators for our method, as presented in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>. The proposed method achieves an average accuracy of 96.64% in classifying four types of cassava leaf disease along with healthy leaf images. Additionally, the method attains an average precision of 96.4%, an average F1-score of 96.4%, and an average recall of 96.6%. These results demonstrate the effectiveness of the approach in accurately recognizing cassava leaf.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Six confusion matrices display classification results for five categories: CBB, CBSD, CGM, CMD, and Healthy. Each matrix varies slightly, with dark blue squares indicating higher accuracy. The matrices compare true labels versus predicted labels, using a color gradient from light to dark blue, representing values from zero to one hundred twenty.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1646611-g003.tif">
<alt-text content-type="machine-generated">Six confusion matrices display classification results for five categories: CBB, CBSD, CGM, CMD, and Healthy. Each matrix varies slightly, with dark blue squares indicating higher accuracy. The matrices compare true labels versus predicted labels, using a color gradient from light to dark blue, representing values from zero to one hundred twenty.</alt-text>
</graphic>
</fig>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Comparative analysis of precision, recall, and F1-score on cassava leaf disease.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Categories</th>
<th valign="middle" align="left">Precision (%)</th>
<th valign="middle" align="left">Recall (%)</th>
<th valign="middle" align="left">F1-score (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">CBB</td>
<td valign="middle" align="left">0.98</td>
<td valign="middle" align="left">0.90</td>
<td valign="middle" align="left">0.94</td>
</tr>
<tr>
<td valign="middle" align="left">CBSD</td>
<td valign="middle" align="left">0.98</td>
<td valign="middle" align="left">0.98</td>
<td valign="middle" align="left">0.98</td>
</tr>
<tr>
<td valign="middle" align="left">CGM</td>
<td valign="middle" align="left">0.96</td>
<td valign="middle" align="left">0.98</td>
<td valign="middle" align="left">0.97</td>
</tr>
<tr>
<td valign="middle" align="left">CMD</td>
<td valign="middle" align="left">0.95</td>
<td valign="middle" align="left">0.99</td>
<td valign="middle" align="left">0.97</td>
</tr>
<tr>
<td valign="middle" align="left">Healthy</td>
<td valign="middle" align="left">0.95</td>
<td valign="middle" align="left">0.98</td>
<td valign="middle" align="left">0.96</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Evaluation</title>
<p>As shown in <xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref>, a comprehensive comparison of various training methods and their corresponding accuracy on the cassava leaf disease dataset when training from scratch, without utilizing ImageNet pre-trained weights. This analysis demonstrates that our proposed method remains effective even in the absence of pre-trained weights, highlighting its adaptability. it is evident that the baseline model using RNet34 achieved an accuracy of 84.45%, which is higher than the accuracies obtained by the baselines on other Convolutional Neural Networks. Specifically, the baseline models using RNet18, RNet50, EffNetb0, and Xception achieved accuracies of 83.17%, 81.21%, 78.43%, and 82.60%, respectively. This indicates that RNet34 may be particularly well-suited for this dataset when trained from scratch. Among the various augmentation techniques applied, Xception with CutOut exhibited the lowest identification accuracy of 72.27%. In contrast, CutMix, MixUp, Hide and Seek, and Enhanced-RICAP achieved identification accuracies ranging between 80% and 90%. Notably, Enhanced-RICAP demonstrated the highest accuracy of 90.01%, surpassing all other methods. This highlights the efficacy of Enhanced+Ricap in improving model performance through advanced data augmentation and training strategies.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Performance analysis of state-of-the-art models on cassava leaf disease identification without pre-trained weights.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Method</th>
<th valign="middle" colspan="5" align="center">Accuracy (%)</th>
</tr>
<tr>
<th valign="middle" align="center">RNet18</th>
<th valign="middle" align="center">RNet34</th>
<th valign="middle" align="center">RNet50</th>
<th valign="middle" align="center">EffNetb0</th>
<th valign="middle" align="center">Xception</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">Baseline</td>
<td valign="middle" align="center">83.17 + 0.12</td>
<td valign="middle" align="center">83.58 + 0.042</td>
<td valign="middle" align="center">81.21 &#xb1; 0.15</td>
<td valign="middle" align="center">78.43 &#xb1; 0.85</td>
<td valign="middle" align="center">82.60 + 0.60</td>
</tr>
<tr>
<td valign="middle" align="center">CutMix</td>
<td valign="middle" align="center">83.17 &#xb1; 0.40</td>
<td valign="middle" align="center">84.45 &#xb1; 0.77</td>
<td valign="middle" align="center">83.65 + 0.26</td>
<td valign="middle" align="center">85.89 + 0.13</td>
<td valign="middle" align="center">79.01 + 0.56</td>
</tr>
<tr>
<td valign="middle" align="center">MixUp</td>
<td valign="middle" align="center">83.97 + 0.67</td>
<td valign="middle" align="center">82.32 &#xb1; 0.27</td>
<td valign="middle" align="center">83.01 &#xb1; 0.053</td>
<td valign="middle" align="center">85.89 + 0.15</td>
<td valign="middle" align="center">76.76 + 0.81</td>
</tr>
<tr>
<td valign="middle" align="center">CutOut</td>
<td valign="middle" align="center">82.53 &#xb1; 0.94</td>
<td valign="middle" align="center">84.45 + 0.065</td>
<td valign="middle" align="center">83.17 + 0.35</td>
<td valign="middle" align="center">86.21 + 0.73</td>
<td valign="middle" align="center">72.27 + 0.38</td>
</tr>
<tr>
<td valign="middle" align="center">Hide and Seek</td>
<td valign="middle" align="center">84.29 + 0.36</td>
<td valign="middle" align="center">83.49 + 0.012</td>
<td valign="middle" align="center">84.29 &#xb1; 0.041</td>
<td valign="middle" align="center">89.58 &#xb1; 0.23</td>
<td valign="middle" align="center">73.07 &#xb1; 0.47</td>
</tr>
<tr>
<td valign="middle" align="center">Enhanced+Ricap</td>
<td valign="middle" align="center">84.43 + 0.024</td>
<td valign="middle" align="center">84.75 &#xb1; 0.17</td>
<td valign="middle" align="center">84.82 &#xb1; 0.04</td>
<td valign="middle" align="center">90.01 + 0.083</td>
<td valign="middle" align="center">83.10 + 0.052</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Analysis of experimental results on a publicly available tomato leaf disease dataset</title>
<p>
<xref ref-type="table" rid="T5">
<bold>Table&#xa0;5</bold>
</xref> presents a comprehensive comparison between the proposed technique and previous studies, utilizing classical models such as RNeT18 as common baselines with our method. The results consistently indicate that Enhanced-Ricap outperforms the models proposed in these studies by a considerable margin. When comparing the Enhanced-Ricap to other recent methodologies, its superiority becomes increasingly apparent. For example, <xref ref-type="bibr" rid="B20">Li et&#xa0;al. (2023)</xref> documented an accuracy rate of 99.70%, whereas Enhanced-Ricap demonstrated an accuracy of 99.86%. This indicates that our proposed method exhibits a 0.16% enhancement in accuracy relative to the findings. Similarly, <xref ref-type="bibr" rid="B24">Paul et&#xa0;al. (2023)</xref> reported an accuracy of 89.00%, reflecting a 2% improvement over VGG16, which was significantly lower than the 12.87% enhancement demonstrated by our method. Moreover, although <xref ref-type="bibr" rid="B27">Sanida et&#xa0;al., 2023</xref> reported notable improvements with RNeT50 and VGG16, yielding gains of 2.33% and 2%, respectively, these gains remained inferior to those achieved by Enhanced-Ricap. In comparison, <xref ref-type="bibr" rid="B42">Zarboubi et&#xa0;al. (2025)</xref> reported an accuracy of 99.12%, with a 1.22% improvement over RNeT50, while Enhanced-Ricap delivered 99.86% accuracy, further underscoring its superior performance. These comparisons highlight the capability of the model to demonstrate enhanced metrics in various evaluation criteria, including precision, recall, F1-score and accuracy, thereby solidifying its position as a leading solution for complex identification tasks.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Comparative performance analysis of existing methods and our approach.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Authors</th>
<th valign="middle" align="left">Models+Method</th>
<th valign="middle" align="left">Accuracy</th>
<th valign="middle" align="left">Precision</th>
<th valign="middle" align="left">Recal</th>
<th valign="middle" align="left">F1-Score</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">
<xref ref-type="bibr" rid="B20">Li et&#xa0;al. (2023)</xref>
</td>
<td valign="middle" align="left">Custom-LMBRNet</td>
<td valign="middle" align="left">99.70</td>
<td valign="middle" align="left">99.72</td>
<td valign="middle" align="left">99.66</td>
<td valign="middle" align="left">99.69</td>
</tr>
<tr>
<td valign="middle" align="left">
<xref ref-type="bibr" rid="B40">Yang et&#xa0;al. (2024)</xref>
</td>
<td valign="middle" align="left">Custom-LSGNET</td>
<td valign="middle" align="left">95.54</td>
<td valign="middle" align="left">93.62</td>
<td valign="middle" align="left">94.13</td>
<td valign="middle" align="left">93.78</td>
</tr>
<tr>
<td valign="middle" align="left">
<xref ref-type="bibr" rid="B24">Paul et&#xa0;al. (2023)</xref>
</td>
<td valign="middle" align="left">Custom-CNN</td>
<td valign="middle" align="left">89.00</td>
<td valign="middle" align="left">89.00</td>
<td valign="middle" align="left">89.00</td>
<td valign="middle" align="left">89.00</td>
</tr>
<tr>
<td valign="middle" align="left">
<xref ref-type="bibr" rid="B27">Sanida et&#xa0;al. (2023)</xref>
</td>
<td valign="middle" align="left">Custom-CNN</td>
<td valign="middle" align="left">99.63</td>
<td valign="middle" align="left">99.12</td>
<td valign="middle" align="left">99.29</td>
<td valign="middle" align="left">99.20</td>
</tr>
<tr>
<td valign="middle" align="left">
<xref ref-type="bibr" rid="B42">Zarboubi et&#xa0;al. (2025)</xref>
</td>
<td valign="middle" align="left">Custom-CNN</td>
<td valign="middle" align="left">99.12</td>
<td valign="middle" align="left">99.13</td>
<td valign="middle" align="left">99.12</td>
<td valign="middle" align="left">99.11</td>
</tr>
<tr>
<td valign="middle" align="left">Our</td>
<td valign="middle" align="left">RNet18+Enhanced-Ricap</td>
<td valign="middle" align="left">99.86</td>
<td valign="middle" align="left">99.76</td>
<td valign="middle" align="left">99.69</td>
<td valign="middle" align="left">99.73</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Application and deployment of a mobile app for plant disease identification</title>
<p>This section focuses on the selection of an optimal model, such as RNeT18 with Enhanced-RICAP, designed to function efficiently on mobile devices while minimizing computational cost. To improve accessibility and practical utility for supporting farmers and agricultural experts, we developed a mobile app using Android Studio. The model is embedded within the application and executes locally on the device, thereby facilitating disease identification without reliance on an internet connection. The user interface of the PlantDisease Android application is depicted in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>. The home screen of the PlantDisease include two primary options that can be select between Tomato and Cassava in (I in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4A</bold>
</xref>). To start an identification request, users can click the camera icon, which provides alternative options to either capture a new image or by uploading an existing image, as shown (II in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4B</bold>
</xref>). After the system finishes identifying, the result is delivered to the Disease Analysis screen (in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4C</bold>
</xref>). If a diseases is detected the predicted image, the diagnosed disease, the prediction confidence score (IV in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4C</bold>
</xref>), and a brief treatment recommendation (V in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4C</bold>
</xref>), Along with corresponding information about possible diseases, their symptoms, methods, and treatments steps are provided (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4D</bold>
</xref>). Function panel (I in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4A</bold>
</xref>) can be requested by tapping the respective buttons in Cassava or Tomato Disease Info (III in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4B</bold>
</xref>). The function panel provides access to other tools (I in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4A</bold>
</xref>): the book icon links to button of tomato disease general prevention measures, the plant leaf icon links to an introduction of the nine tomato diseases assessed in this work, and the phone icon links to the contact information of the PlantDisease development team.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>A series of mobile app screens for plant disease detection. Panel A shows a selection screen for cassava or tomato plant diseases. Panel B displays options for cassava disease recognition, protection, information, and contact. Panel C provides a diagnosis of cassava mosaic disease with a confidence level of 94 percent and treatment recommendations. Panel D outlines disease information, including basic information, severity level (three stars), symptoms, and prevention methods for cassava mosaic disease, detailing management strategies.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1646611-g004.tif">
<alt-text content-type="machine-generated">Four panels of a mobile app interface for plant disease detection and information. Panel A: A screen to select the plant disease type with options &#x201c;Cassava&#x201d; and &#x201c;Tomato.&#x201d; Panel B: A screen for &#x201c;Cassava Disease Detection&#x201d; with options for &#x201c;Cassava Disease Recognition,&#x201d; &#x201c;Cassava Protection Info,&#x201d; &#x201c;Cassava Disease Info,&#x201d; and &#x201c;Contact Us.&#x201d; Panel C: A &#x201c;Cassava Disease Analysis&#x201d; screen showing an image of cassava leaves with a diagnosis of &#x201c;Cassava Mosaic Disease,&#x201d; a 94% confidence level, and treatment recommendations. Panel D: A &#x201c;Disease Information&#x201d; screen displaying basic information, severity, symptoms, and prevention methods for the disease.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3_6">
<label>3.6</label>
<title>Class activation mapping visualization</title>
<p>The visualization of CAM heatmaps was conducted using the following techniques: the baseline.</p>
<p>Xception model, ResizeMix, and our proposed method. Specifically, during visualization, the attention heatmap was merged with the original image, as shown in <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5</bold>
</xref>. This approach allows a direct comparison between the original image and the outputs of the baseline, ResizeMix, and our method. Notably, darker colors in the heatmap correspond to higher activation values, highlighting regions most relevant for decision-making. Furthermore, compared with the baseline model and ResizeMix, our method demonstrates stronger feature extraction and improved detection of Cassava leaf disease, effectively capturing diverse color patterns and contextual information. In particular, Enhanced-RICAP further directs the network&#x2019;s attention to the most informative object regions, emphasizing discriminative features while reducing sensitivity to background noise. Consequently, these results indicate that Xception and ResizeMix can struggle with accurately discriminating leaf colors and extracting relevant background information. In contrast, the proposed method shows superior understanding of sample features and more effectively identifies key regions for classification.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Comparison of cassava leaf disease detection methods. The top row shows an original CBSD image, Xception Grad-CAM, ResizeMix Grad-CAM, and a model's Grad-CAM. The table displays certainty and improvement percentages for CBSD detection. The bottom row shows an original CMD image with similar Grad-CAM visualizations. Another table shows certainty and improvement for CMD detection. The model demonstrates notable improvements over others.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1646611-g005.tif">
<alt-text content-type="machine-generated">Comparison of cassava leaf disease detection methods. The top row shows an original CBSD image, Xception Grad-CAM, ResizeMix Grad-CAM, and a model's Grad-CAM. The table displays certainty and improvement percentages for CBSD detection. The bottom row shows an original CMD image with similar Grad-CAM visualizations. Another table shows certainty and improvement for CMD detection. The model demonstrates notable improvements over others.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>In this current study, different models and methods were employed, all aiming to achieve high accuracy. <xref ref-type="bibr" rid="B21">Maryum et&#xa0;al. (2021)</xref> used EffNetb4 with an 85:15 train-validation split, achieving 89.09% accuracy. <xref ref-type="bibr" rid="B6">Chen et&#xa0;al. (2022)</xref> used RNet50 in a fivefold cross-validation, improving accuracy to 89.7%. <xref ref-type="bibr" rid="B39">Vijayalata et&#xa0;al. (2022)</xref> achieved 92.6% accuracy with EffNetb0 using an 80:20 split, while <xref ref-type="bibr" rid="B35">Thai et&#xa0;al. (2021)</xref> used Vision Transformer with the same split, reaching 90.0%. <xref ref-type="bibr" rid="B22">Methil et&#xa0;al. (2021)</xref> also used EffNetb4 with an 80:20 split, obtaining 85.64% accuracy. The proposed model in this current study, combining Enhanced-RICAP with transfer learning using the Xception model, outperformed all others with an accuracy of 96.64%, demonstrating the effectiveness of this approach for cassava leaf disease identification. Our findings are consistent with previous studies that utilized the weights of the MobileNetV2 CNN model to classify cassava images, leveraging the extensive visual knowledge acquired from the ImageNet database (<xref ref-type="bibr" rid="B34">Tewari and Kumari, 2024</xref>). In addition, <xref ref-type="bibr" rid="B17">Karpathy et&#xa0;al. (2014)</xref> demonstrated that transfer learning is effective across various applications and significantly reduces computational demands compared to training from scratch, which is advantageous for machine applications.</p>
<p>In this current study, a comprehensive overview of identification performance for cassava disease identification demonstrates robust results across all categories. The precision, Recall, and F1 scores for each disease category highlight the effectiveness of the model. CBB and CBSD achieved high scores, with CBSD slightly outperforming CBB in all metrics. Notably, CGM and CMD showed exceptional performance, with CMD achieving the highest scores across all metrics, indicating particularly effective identification. Previous studies have underscored the importance of precision and Recall in agricultural disease identification, often noting trade-offs between these metrics. Research by <xref ref-type="bibr" rid="B23">Mohanty et&#xa0;al. (2016)</xref> and others has demonstrated that high precision and Recall are crucial for practical applications in disease identification. The high scores for CGM and CMD in this study align with findings from similar works, which suggest that advanced models and techniques can lead to more accurate and reliable identification. The model&#x2019;s overall accuracy of 0.96.64, with a macro-average precision, Recall, and F1-score of 0.96, reflects its robustness and reliability, consistent with recent advancements in deep learning for agricultural applications. This performance reinforces the findings of previous research, which highlights the efficacy of state-of-the-art models in achieving high identification accuracy across diverse classes. The strong results across all classes, including the Healthy class, further demonstrate the model capability to distinguish between the leaves of diseased and healthy cassava plants effectively, supporting its practical utility in real-world scenarios.</p>
<p>Previously, <xref ref-type="bibr" rid="B16">Jiang et&#xa0;al. (2021)</xref> reported that Class Activation Maps are designed to highlight the regions in an image that a convolutional neural network (CNN) considers most relevant for identifying a specific category. This approach leverages the spatial information present in each activation map, with convolutional layers closer to the network&#x2019;s identification stage providing more meaningful high-level activations. These activations are used for visual localization, helping to explain the network&#x2019;s final prediction. Previous research highlights the importance of CAM in improving model transparency and understanding. Techniques such as Grad-CAM and its variants have been shown to enhance interpretability by providing visual insights into which parts of an image contribute to the final identification (<xref ref-type="bibr" rid="B28">Selvaraju et&#xa0;al., 2020</xref>). The findings from this study provide essential information about how CAM have developed over the years with the establishment of robust evaluation metrics and the development of relatively high-performing models such as Enhanced-RICAP, the future trajectory of CAM-based methods shifting toward more application-oriented research. Our findings align with these observations, demonstrating that CAM can effectively highlight the strengths of the proposed model and reveal areas where other augmentation techniques may fall short. This aligns with the broader understanding that CAM visualization is crucial for validating model performance and ensuring that the decision-making process is both accurate and interpretable.</p>
<p>Crop diseases continue to impose substantial financial burdens on farmers, significantly reducing yield and compromising both food quality and environmental health. The lack of access to advanced diagnostic technologies often results in ineffective disease management, leading to soil degradation, increased chemical use, and disruptions in the food supply chain. Traditional methods, while informative, are often time-consuming, subjective, and limited in scalability. In response to these challenges, we developed a deep learning-based system, PlantDisease, utilizing an enhanced Xception+Enhanced-RICAP architecture fine-tuned for precision and efficiency. Our model was trained on annotated dataset encompassing four (<xref ref-type="bibr" rid="B4">Alford and Tuba, 2024</xref>) cassava diseases and nine (<xref ref-type="bibr" rid="B9">Ferentinos, 2018</xref>) tomato diseases. Integrated into a user-friendly Android application, the system not only classifies diseases with 96.64% accuracy, but also provides users with detailed information on symptoms, prevention strategies, and recommended treatment protocols. Compared to prior studies, our results demonstrate improved performance. For instance, <xref ref-type="bibr" rid="B23">Mohanty et&#xa0;al. (2016)</xref> achieved 91.2% accuracy in classifying 26 diseases across 14 crop species using AlexNet and GoogLeNet models. Similarly, <xref ref-type="bibr" rid="B9">Ferentinos (2018)</xref> reported an average accuracy of 99.53% using deep convolutional neural networks across 58 disease classes, but with limitations in mobile deployment and real-time feedback. Unlike these studies, our model prioritizes both accuracy and real-world usability through a lightweight architecture optimized for mobile platforms. Furthermore, our approach improves upon the generalizability and interpretability challenges observed in earlier works by incorporating disease-specific guidance and interactive support within the application. The inclusion of contextual knowledge such as visual symptoms and actionable management steps bridges the gap between automated identification and practical field application. In summary, our system not only advances the technical accuracy of plant disease identification but also enhances its accessibility and utility for farmers, contributing to more resilient agricultural systems and sustainable food production.</p>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusions</title>
<p>In this study, we introduced Enhanced-RICAP, a novel data augmentation method designed to enhance image identification accuracy while effectively mitigating model overfitting. We also developed PlantDisease, a mobile application developed specifically for the real-time identification of cassava and tomato leaf diseases. The method introduced in this study was rigorously evaluated using various benchmark deep learning architectures RNeT18, RNeT34, RNeT50, and Xception under identical training conditions. We compared Enhanced-RICAP against established augmentation techniques such as CutMix, MixUp, CutOut, Hide-and-Seek, and RICAP. Experimental results consistently demonstrated the superior performance of Enhanced-RICAP across key evaluation metrics, including accuracy, precision, recall, and F1-score. Notably, the RNeT18+Enhanced-RICAP configuration achieved an impressive accuracy of 99.86%, while preserving computational efficiency due to its lightweight architecture. Furthermore, the Xception+Enhanced-RICAP model attained 96.64% accuracy in classifying four cassava leaf disease, demonstrating the robustness of our approach across different model types and dataset. To ensure practical applicability, we integrated the RNeT18+Enhanced-RICAP model into the PlantDisease mobile app. This user-friendly application will empower farmers and agricultural practitioners to diagnose tomato and cassava leaf conditions promptly and accurately. In addition to disease identification, the application will provide straightforward recommendations for prevention and treatment. By enabling early and accurate diagnosis, PlantDisease will reduce the overuse or misuse of pesticides and lessens the dependency on expert intervention, thereby supporting sustainable agricultural practices. In this work, the effectiveness of the proposed method was evaluated using only two datasets. Nevertheless, in future research, the method can be extended to a broader range of plant disease identification and severity estimation dataset, encompassing diverse leaf images from various plants affected by different diseases. This expansion would not only test the method&#x2019;s generalization and robustness across multiple plant species but also enhance its scalability and practical applicability.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material. Further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>MD: Investigation, Conceptualization, Writing &#x2013; original draft, Software, Methodology. YL: Supervision, Investigation, Resources, Writing &#x2013; review &amp; editing, Project administration. OC: Investigation, Writing &#x2013; review &amp; editing, Supervision. SB: Supervision, Investigation, Writing &#x2013; review &amp; editing. YG: Writing &#x2013; review &amp; editing, Investigation, Resources, Supervision, Visualization. MK: Investigation, Writing &#x2013; review &amp; editing, Supervision. GR: Supervision, Writing &#x2013; review &amp; editing, Investigation. LW: Investigation, Supervision, Writing &#x2013; review &amp; editing, Funding acquisition.</p>
</sec>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research and/or publication of this article. Gansu Provincial Higher Education Industry Support Program (Project No. 2023CYZC-54); Gansu Provincial Key Research and Development Program (Project No. 23YFWA0013); Gansu Provincial High-End Foreign Expert Introduction Program (Project No. 25RCKA015); Lanzhou Talent Innovation and Entrepreneurship Program (2021-RC-47).</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>We are thankful to our supervisor, Li Yue, for his supervision and support throughout the entire research process. We are also grateful to our colleagues and friends for their help.</p>
</ack>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abbas</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Jain</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Gour</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Vankudothu</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Tomato plant disease detection using transfer learning with c-gan synthetic images</article-title>. <source>Comput. Electron. Agric.</source> <volume>187</volume>, <fpage>106279</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2021.106279</pub-id>
</citation></ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Agarwal</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Singh</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Arjaria</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Sinha</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Gupta</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Toled: Tomato leaf disease detection using convolution neural network</article-title>. <source>Proc. Comput. Sci.</source> <volume>167</volume>, <fpage>293</fpage>&#x2013;<lpage>301</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.procs.2020.03.225</pub-id>
</citation></ref>
<ref id="B3">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Ajitha</surname> <given-names>M. E.</given-names>
</name>
<name>
<surname>Nivedha</surname> <given-names>M. V.</given-names>
</name>
<name>
<surname>Parvathi</surname> <given-names>M. B.</given-names>
</name>
</person-group> (<year>2024</year>). &#x201c;<article-title>Detection and prevention of tomato leaf disease using convolutional neural network and inception net</article-title>,&#x201d; in <conf-name>2024 Third International Conference on Intelligent Techniques in Control, Optimization and Signal Processing (INCOS)</conf-name>. <fpage>1</fpage>&#x2013;<lpage>6</lpage> (<publisher-name>IEEE</publisher-name>).</citation></ref>
<ref id="B4">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Alford</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Tuba</surname> <given-names>E.</given-names>
</name>
</person-group> (<year>2024</year>). &#x201c;<article-title>Cassava plant disease detection using transfer learning with convolutional neural networks</article-title>,&#x201d; in <conf-name>2024 12th International Symposium on Digital Forensics and Security (ISDFS)</conf-name>. <conf-loc>San Antonio, Texas</conf-loc>: <publisher-name>IEEE</publisher-name>. <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</citation></ref>
<ref id="B5">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Ayu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Surtono</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Apriyanto</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Deep learning for detection cassava leaf disease</article-title>,&#x201d; in <conf-name>Journal of Physics: Conference Series</conf-name>, Vol. <volume>1751</volume>. <fpage>012072</fpage> (<publisher-name>IOP Publishing</publisher-name>).</citation></ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Ban</surname> <given-names>X.</given-names>
</name>
<name>
<surname>He</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Improved cross entropy loss for noisy labels in vision leaf disease classification</article-title>. <source>IET Image Process.</source> <volume>16</volume>, <fpage>1511</fpage>&#x2013;<lpage>1519</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1049/ipr2.12402</pub-id>
</citation></ref>
<ref id="B7">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Chollet</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Xception: Deep learning with depthwise separable convolutions</article-title>,&#x201d; in <conf-name>IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>, <conf-loc>Honolulu, HI, USA</conf-loc>, <conf-date>21&#x2013;26 July 2017</conf-date>. <fpage>1251</fpage>&#x2013;<lpage>1258</lpage>.</citation></ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>DeVries</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Improved regularization of convolutional neural networks with cutout</article-title>. <source>arXiv preprint arXiv:1708.04552</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1708.04552</pub-id>
</citation></ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ferentinos</surname> <given-names>K. P.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Deep learning models for plant disease detection and diagnosis</article-title>. <source>Comput. Electron. Agric.</source> <volume>145</volume>, <fpage>311</fpage>&#x2013;<lpage>318</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2018.01.009</pub-id>
</citation></ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>G&#xf3;mez-Pupo</surname> <given-names>S. M.</given-names>
</name>
<name>
<surname>Patin&#x2dc;o-Saucedo</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Agudelo</surname> <given-names>M. A. F.</given-names>
</name>
<name>
<surname>Mesa</surname> <given-names>E. C.</given-names>
</name>
<name>
<surname>Patin&#x2dc;o-Vanegas</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Convolutional neural networks for the recognition of diseases and pests in cassava leaves (manihot esculenta)</article-title>. <source>ResearchGate Preprint</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.18687/LACCEI2022</pub-id>
</citation></ref>
<ref id="B11">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Guo</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Mao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Mixup as locally linear out-of-manifold regularization</article-title>,&#x201d; in <conf-name>Proceedings of the AAAI conference on artificial intelligence</conf-name>, <conf-loc>Honolulu, Hawaii, USA</conf-loc>. Vol. <volume>33</volume>. <fpage>3714</fpage>&#x2013;<lpage>3722</lpage>.</citation></ref>
<ref id="B12">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>He</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Deep residual learning for image recognition</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>, <conf-loc>Las Vegas, Nevada</conf-loc>. <fpage>770</fpage>&#x2013;<lpage>778</lpage>.</citation></ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hughes</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Salath&#xe9;</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>An open access repository of images on plant health to enable the development of mobile disease diagnostics</article-title>. <source>arXiv preprint arXiv:1511.08060</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1511.08060</pub-id>
</citation></ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Inoue</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Data augmentation by pairing samples for images classification</article-title>. <source>arXiv preprint arXiv:1801.02929</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1801.02929</pub-id>
</citation></ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jafar</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Bibi</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Naqvi</surname> <given-names>R. A.</given-names>
</name>
<name>
<surname>Sadeghi-Niaraki</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Jeong</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Revolutionizing agriculture with artificial intelligence: plant disease detection methods, applications, and their limitations</article-title>. <source>Front. Plant Sci.</source> <volume>15</volume>, <elocation-id>1356260</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2024.1356260</pub-id>, PMID: <pub-id pub-id-type="pmid">38545388</pub-id></citation></ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname> <given-names>P.-T.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>C.-B.</given-names>
</name>
<name>
<surname>Hou</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Cheng</surname> <given-names>M.-M.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Layercam: Exploring hierarchical class activation maps for localization</article-title>. <source>IEEE Trans. Image Process.</source> <volume>30</volume>, <fpage>5875</fpage>&#x2013;<lpage>5888</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TIP.2021.3089943</pub-id>, PMID: <pub-id pub-id-type="pmid">34156941</pub-id></citation></ref>
<ref id="B17">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Karpathy</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Toderici</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Shetty</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Leung</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Sukthankar</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Fei-Fei</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>Large-scale video classification with convolutional neural networks</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on Computer Vision and Pattern Recognition</conf-name>, <conf-loc>Columbus, Ohio</conf-loc>. <fpage>1725</fpage>&#x2013;<lpage>1732</lpage>.</citation></ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khan</surname> <given-names>A. I.</given-names>
</name>
<name>
<surname>Quadri</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Banday</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Shah</surname> <given-names>J. L.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Deep diagnosis: A real-time apple leaf disease detection system based on deep learning</article-title>. <source>Comput. Electron. Agric.</source> <volume>198</volume>, <fpage>107093</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2022.107093</pub-id>
</citation></ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Sugarcanegan: A novel dataset generating approach for sugarcane leaf diseases based on lightweight hybrid cnn-transformer network</article-title>. <source>Comput. Electron. Agric.</source> <volume>219</volume>, <fpage>108762</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2024.108762</pub-id>
</citation></ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Identification of tomato leaf diseases based on lmbrnet</article-title>. <source>Eng. Appl. Artif. Intell.</source> <volume>123</volume>, <fpage>106195</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.engappai.2023.106195</pub-id>
</citation></ref>
<ref id="B21">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Maryum</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Akram</surname> <given-names>M. U.</given-names>
</name>
<name>
<surname>Salam</surname> <given-names>A. A.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Cassava leaf disease classification using deep neural networks</article-title>,&#x201d; in <conf-name>2021 IEEE 18th international conference on smart communities: improving quality of life using ICT, IoT and AI (HONET)</conf-name>. <fpage>32</fpage>&#x2013;<lpage>37</lpage> (<publisher-name>IEEE</publisher-name>).</citation></ref>
<ref id="B22">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Methil</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Agrawal</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Kaushik</surname> <given-names>V.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>One-vs-all methodology based cassava leaf disease detection</article-title>,&#x201d; in <conf-name>2021 12th International Conference on Computing Communication and Networking Technologies (ICCCNT)</conf-name>. <fpage>1</fpage>&#x2013;<lpage>7</lpage> (<publisher-name>IEEE</publisher-name>).</citation></ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mohanty</surname> <given-names>S. P.</given-names>
</name>
<name>
<surname>Hughes</surname> <given-names>D. P.</given-names>
</name>
<name>
<surname>Salath&#xe9;</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Using deep learning for image-based plant disease detection</article-title>. <source>Front. Plant Sci.</source> <volume>7</volume>, <elocation-id>215232</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2016.01419</pub-id>, PMID: <pub-id pub-id-type="pmid">27713752</pub-id></citation></ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Paul</surname> <given-names>S. G.</given-names>
</name>
<name>
<surname>Biswas</surname> <given-names>A. A.</given-names>
</name>
<name>
<surname>Saha</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Zulfiker</surname> <given-names>M. S.</given-names>
</name>
<name>
<surname>Ritu</surname> <given-names>N. A.</given-names>
</name>
<name>
<surname>Zahan</surname> <given-names>I.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>A real-time application-based convolutional neural network approach for tomato leaf disease classification</article-title>. <source>Array</source> <volume>19</volume>, <fpage>100313</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.array.2023.100313</pub-id>
</citation></ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qin</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Fang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Resizemix: Mixing data with preserved object information and true labels</article-title>. <source>arXiv preprint arXiv:2012.11101</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2012.11101</pub-id>
</citation></ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sajitha</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Andrushia</surname> <given-names>A. D.</given-names>
</name>
<name>
<surname>Anand</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Naser</surname> <given-names>M. Z.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>A review on machine learning and deep learning image-based plant disease classification for industrial farming systems</article-title>. <source>J. Ind. Inf. Integration</source> <volume>38</volume>, <fpage>100572</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jii.2024.100572</pub-id>
</citation></ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sanida</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Sideris</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Sanida</surname> <given-names>M. V.</given-names>
</name>
<name>
<surname>Dasygenis</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Tomato leaf disease identification via two&#x2013;stage transfer learning approach</article-title>. <source>Smart Agric. Technol.</source> <volume>5</volume>, <fpage>100275</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.atech.2023.100275</pub-id>
</citation></ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Selvaraju</surname> <given-names>R. R.</given-names>
</name>
<name>
<surname>Cogswell</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Das</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Vedantam</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Parikh</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Batra</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Grad-cam: visual explanations from deep networks via gradient-based localization</article-title>. <source>Int. J. Comput. Vision</source> <volume>128</volume>, <fpage>336</fpage>&#x2013;<lpage>359</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11263-019-01228-7</pub-id>
</citation></ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sholihin</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Fudzee</surname> <given-names>M. F. M.</given-names>
</name>
<name>
<surname>Ismail</surname> <given-names>M. N.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Alexnet-based feature extraction for cassava classification: A machine learning approach</article-title>. <source>Baghdad Sci. J.</source> <volume>20</volume>, <fpage>2624</fpage>&#x2013;<lpage>2624</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.21123/bsj.2023.9120</pub-id>
</citation></ref>
<ref id="B30">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Singh</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Sharma</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Sharma</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Sharma</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Gupta</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>A deep learning-based inceptionresnet v2 model for cassava leaf disease detection</article-title>,&#x201d; in <conf-name>International Conference on Emerging Trends in Expert Applications &amp; Security</conf-name>, <conf-loc>Jaipur Engineering College and Research Centre, Jaipur, India</conf-loc>, <conf-date>February 17&#x2013;19, 2023</conf-date>. <fpage>423</fpage>&#x2013;<lpage>432</lpage> (<publisher-name>Springer</publisher-name>).</citation></ref>
<ref id="B31">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Summers</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Dinneen</surname> <given-names>M. J.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Improved mixed-example data augmentation</article-title>,&#x201d; in <conf-name>2019 IEEE winter conference on applications of computer vision (WACV)</conf-name>. <conf-loc>Waikoloa, HI, USA</conf-loc>: <publisher-name>IEEE</publisher-name>. <fpage>1262</fpage>&#x2013;<lpage>1270</lpage>.</citation></ref>
<ref id="B32">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Takahashi</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Matsubara</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Uehara</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Ricap: Random image cropping and patching data augmentation for deep cnns</article-title>,&#x201d; in <conf-name>Proceedings of The 10th Asian Conference on Machine Learning</conf-name>. <fpage>786</fpage>&#x2013;<lpage>798</lpage> (<publisher-name>PMLR</publisher-name>).</citation></ref>
<ref id="B33">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Tan</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Le</surname> <given-names>Q.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Efficientnet: Rethinking model scaling for convolutional neural networks</article-title>,&#x201d; in <conf-name>Proceedings of the 36th International Conference on Machine Learning</conf-name>, <conf-loc>Long Beach, California, USA</conf-loc>: <publisher-name>PMLR</publisher-name>. p. <fpage>6105</fpage>&#x2013;<lpage>6114</lpage> (PMLR).</citation></ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tewari</surname> <given-names>A. S.</given-names>
</name>
<name>
<surname>Kumari</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Lightweight modified attention based deep learning model for cassava leaf diseases classification</article-title>. <source>Multimedia Tools Appl.</source> <volume>83</volume>, <fpage>57983</fpage>&#x2013;<lpage>58007</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11042-023-17459-3</pub-id>
</citation></ref>
<ref id="B35">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Thai</surname> <given-names>H.-T.</given-names>
</name>
<name>
<surname>Tran-Van</surname> <given-names>N.-Y.</given-names>
</name>
<name>
<surname>Le</surname> <given-names>K.-H.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Artificial cognition for early leaf disease detection using vision transformers</article-title>,&#x201d; in <conf-name>Proceedings - 2021 International Conference on Advanced Technologies for Communications</conf-name>. <conf-loc>Ho Chi Minh City, Vietnam</conf-loc>: <publisher-name>IEEE</publisher-name>. p. <fpage>33</fpage>&#x2013;<lpage>38</lpage>.</citation></ref>
<ref id="B36">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Tokozume</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Ushiku</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Harada</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Between-class learning for image classification</article-title>,&#x201d; in <conf-name>2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2018)</conf-name>, <conf-loc>Salt Lake City, Utah, USA</conf-loc>. <fpage>5486</fpage>&#x2013;<lpage>5494</lpage>.</citation></ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Uddin</surname> <given-names>A. F. M.</given-names>
</name>
<name>
<surname>Monira</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Shin</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Chung</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Bae</surname> <given-names>S.-H.</given-names>
</name>
</person-group>. (<year>2020</year>). <article-title>Saliencymix: A saliency guided data augmentation strategy for better regularization</article-title>. <source>arXiv preprint arXiv:2006.01791</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2006.01791</pub-id>
</citation></ref>
<ref id="B38">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Vengaiah</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Priyadharshini</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>Cnn model suitability analysis for prediction of tomato leaf diseases</article-title>,&#x201d; in <conf-name>2023 6th International Conference on Information Systems and Computer Networks (ISCON)</conf-name>, <conf-loc>GLA University in Mathura, India</conf-loc>. <fpage>1</fpage>&#x2013;<lpage>4</lpage> (<publisher-name>IEEE</publisher-name>).</citation></ref>
<ref id="B39">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Vijayalata</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Billakanti</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Veeravalli</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Deepa</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Kota</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Early detection of casava plant leaf diseases using efficientnet-b0</article-title>,&#x201d; in <conf-name>2022 IEEE Delhi Section Conference (DELCON)</conf-name>. <fpage>1</fpage>&#x2013;<lpage>5</lpage> (<publisher-name>IEEE</publisher-name>).</citation></ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Cernava</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Cai</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Pan</surname> <given-names>R.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Lsgnet: A lightweight convolutional neural network model for tomato disease identification</article-title>. <source>Crop Prot.</source> <volume>182</volume>, <fpage>106715</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.cropro.2024.106715</pub-id>
</citation></ref>
<ref id="B41">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Yun</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Oh</surname> <given-names>S. J.</given-names>
</name>
<name>
<surname>Chun</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Choe</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yoo</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Cutmix: Regularization strategy to train strong classifiers with localizable features</article-title>,&#x201d; in <conf-name>2019 IEEE/CVF International Conference on Computer Vision (ICCV 2019)</conf-name>, <conf-loc>Seoul, South Korea</conf-loc>, <conf-date>27 October &#x2013; 2 November 2019</conf-date>. <fpage>6023</fpage>&#x2013;<lpage>6032</lpage>.</citation></ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zarboubi</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Bellout</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Chabaa</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Dliou</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Custombottleneck-vggnet: Advanced tomato leaf disease identification for sustainable agriculture</article-title>. <source>Comput. Electron. Agric.</source> <volume>232</volume>, <fpage>110066</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2025.110066</pub-id>
</citation></ref>
<ref id="B43">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Cisse</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Dauphin</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Lopez-Paz</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>mixup: Beyond empirical risk management</article-title>,&#x201d; in <conf-name>6th International Conference on Learning Representations (ICLR 2018)</conf-name>, <conf-loc>Vancouver Convention Centre, Vancouver, BC, Canada</conf-loc>, <conf-date>30 April &#x2013; 3 May 2018</conf-date>. <fpage>1</fpage>&#x2013;<lpage>13</lpage>.</citation></ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Mu</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Gmamba: State space model with convolution for grape leaf disease segmentation</article-title>. <source>Comput. Electron. Agric.</source> <volume>225</volume>, <fpage>109290</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2024.109290</pub-id>
</citation></ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Song</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Deep learning-based object detection improvement for tomato disease</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>56607</fpage>&#x2013;<lpage>56614</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/Access.6287639</pub-id>
</citation></ref>
</ref-list>
</back>
</article>