<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1253934</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2023.1253934</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>An efficient convolutional neural network-based diagnosis system for citrus fruit diseases</article-title>
<alt-title alt-title-type="left-running-head">Huang et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fgene.2023.1253934">10.3389/fgene.2023.1253934</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Huang</surname>
<given-names>Zhangcai</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2366315/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Jiang</surname>
<given-names>Xiaoxiao</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2369195/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Huang</surname>
<given-names>Shaodong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2412738/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Qin</surname>
<given-names>Sheng</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2395934/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yang</surname>
<given-names>Su</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1828188/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Guangxi Key Laboratory of Brain-Inspired Computing and Intelligent Chips</institution>, <institution>School of Electronic and Information Engineering</institution>, <institution>Guangxi Normal University</institution>, <addr-line>Guilin</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Computer Science</institution>, <institution>Swansea University</institution>, <addr-line>Swansea</addr-line>, <country>United Kingdom</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1076532/overview">Min Zeng</ext-link>, Central South University, China</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1489998/overview">Liangliang Liu</ext-link>, Henan Agricultural University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2375540/overview">Rongjun Ge</ext-link>, Southeast University, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Xiaoxiao Jiang, <email>gxnujiang@gxnu.edu.cn</email>; Sheng Qin, <email>qinsheng@gxnu.edu.cn</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>24</day>
<month>08</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>14</volume>
<elocation-id>1253934</elocation-id>
<history>
<date date-type="received">
<day>06</day>
<month>07</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>11</day>
<month>08</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Huang, Jiang, Huang, Qin and Yang.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Huang, Jiang, Huang, Qin and Yang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>
<bold>Introduction:</bold> Fruit diseases have a serious impact on fruit production, causing a significant drop in economic returns from agricultural products. Due to its excellent performance, deep learning is widely used for disease identification and severity diagnosis of crops. This paper focuses on leveraging the high-latitude feature extraction capability of deep convolutional neural networks to improve classification performance.</p>
<p>
<bold>Methods:</bold> The proposed neural network is formed by combining the Inception module with the current state-of-the-art EfficientNetV2 for better multi-scale feature extraction and disease identification of citrus fruits. The VGG is used to replace the U-Net backbone to enhance the segmentation performance of the network.</p>
<p>
<bold>Results:</bold> Compared to existing networks, the proposed method achieved recognition accuracy of over 95%. In addition, the accuracies of the segmentation models were compared. VGG-U-Net, a network generated by replacing the backbone of U-Net with VGG, is found to have the best segmentation performance with an accuracy of 87.66%. This method is most suitable for diagnosing the severity level of citrus fruit diseases. In the meantime, transfer learning is applied to improve the training cycle of the network model, both in the detection and severity diagnosis phases of the disease.</p>
<p>
<bold>Discussion:</bold> The results of the comparison experiments reveal that the proposed method is effective in identifying and diagnosing the severity of citrus fruit diseases identification.</p>
</abstract>
<kwd-group>
<kwd>identification and quantification</kwd>
<kwd>high-latitude features</kwd>
<kwd>EfficientNetv2</kwd>
<kwd>VGG</kwd>
<kwd>U-net</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computational Genomics</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>Citrus is cultivated worldwide for its high commercial and nutritional value (<xref ref-type="bibr" rid="B33">Yang et al., 2022</xref>). The popularity of citrus cultivation in southern China has endured because of its good economic returns, especially in Guangxi (<xref ref-type="bibr" rid="B34">Zhou, 2020</xref>). With the great growth of citrus cultivation, the direct economic losses caused by citrus diseases are also climbed. Citrus canker is one of the major diseases affecting the quality of citrus, which is difficult to eradicate (<xref ref-type="bibr" rid="B7">Conti et al., 2020</xref>). Therefore, prompt treatment of citrus diseases is particularly important. To achieve this, the accurate identification of the disease type in citrus fruit and the accurate assessment of its severity is indispensable.</p>
<p>Many techniques are used for the identification of citrus diseases. Computer vision is one of the important methods, which is divided into pre-processing, segmentation, feature extraction and final classification. Pre-processing refers to the optimization of plant images to prepare for the next image processing. Common methods include image binarisation, noise reduction, enhancement, geometric changes and interpolation. Segmentation extracts feature such as size, colour and texture from an image by dividing it into different regions. Finally, the image is classified based on the extracted feature information (<xref ref-type="bibr" rid="B18">Lee et al., 2017</xref>). The refinement of accurate diagnostic computer systems has solved many of the problems of plant disease identification. Enables computer vision technology to be used in a wide range of practical scenarios. However, traditional computer vision techniques rely on manual feature extraction algorithms, which require high-quality data sets, making it difficult to achieve the accuracy expect from detection.</p>
<p>The progress of machine learning algorithms promotes the emergence of new methods. Deep Learning (DL) is regarded as the most potential and future computing technology in modern agriculture because of its high accuracy in classification and recognition tasks. Hence, DL plays an indispensable role in the automation of disease identification and detection. Especially, due to the excellent performance of Convolutional Neural Networks (CNNs) in image feature extraction, it is widely used for fruit recognition and prediction by providing an automatic feature extraction scheme without human intervention (<xref ref-type="bibr" rid="B28">Vasconez et al., 2020</xref>). CNN automatically extracts relevant features through training a large number of data sets, thus eliminating the traditional manual feature extraction link. The accuracy of the former is often much higher than that of the latter.</p>
<p>Different CNN models show different efficiencies. Depending on the database in question, the CNN with optimized depth, width and resolution may lead to much-improved results. Improving the network from these three aspects also means improving the network performance, and strengthening feature extraction capabilities. Features are divided into basic features and high-dimensional features, and network improvements nowadays tend to focus on the former rather than the latter. This results in models that increased complexity but struggle to reap corresponding performance gains. Improvements in high-latitude feature extraction are gaining attention in models saturated with basic feature extraction. High-latitude features are features that are extracted from multiple scales to exploit the multi-scale information of the image, in a way that is not limited to only one dimension. The enhanced capability of high-latitude feature extraction can improve the generalization performance of the network model and enhance the adaptability of the model to different datasets. Therefore, it is challenging to improve the ability of the model to extract high-dimensional features. and the quality of the extracted high-dimensional features can be judged by the accuracy of the disease identification. Once the type of disease has been accurately determined, the next challenge is to quantify the severity of the disease to determine the dose of medication.</p>
<p>Indeed, it is difficult to gauge the effectiveness of management practices without a quantifiable measure. Traditionally, the human eye relies on experience with the help of measurement aids to determine the severity of the disease, but this method lacks accuracy (<xref ref-type="bibr" rid="B13">Hassan et al., 2021</xref>). On the other hand, the traditional machine learning methods cannot quantify the severity, they can only determine the interval of the disease, such as early or late stages. It cannot give good advice on the progression and subtle changes in some subsequent diseases because it cannot achieve sufficient detection accuracy (<xref ref-type="bibr" rid="B30">Wang et al., 2017</xref>). Benefiting from the extension of the DL model, the image can be detected with sufficient resolution. Through pixel-by-pixel detection, the highest accuracy can be achieved when calculating the severity of the disease. However, the training of DL model depends on a large number of tagged image data, Therefore, collecting data for training is one of the challenges for disease severity analysis.</p>
<p>The main contributions of this study are: 1) A citrus disease detection system has been constructed, which is divided into a phase for the identification of disease species and a phase for the diagnosis of their severity. 2) A fast and accurate model for citrus fruit disease diagnosis is constructed by integrating InceptionV1 and EfficientNetV2. 3) In the system proposed in this paper, Transfer Learning (TL) is used to import the initialization weights of the network to reduce the training cycle of the model and compare it with the advanced models of disease diagnosis. 4) Estimation models are constructed to quantify the severity of citrus diseases, and the performance of different segmentation models is compared on our dataset.</p>
<p>The rest of the paper is organized as follows: <xref ref-type="sec" rid="s2">Section 2</xref> introduces the background of disease identification and severity quantification. <xref ref-type="sec" rid="s3">Section 3</xref> outlines the materials and methods of the proposed models for disease identification, and the experiment results using other advanced DL models are discussed and compared with the proposed method. The architecture and implementation of disease severity analysis are described in <xref ref-type="sec" rid="s4">Section 4</xref>. Finally, the contribution of this study and plans are summarized in <xref ref-type="sec" rid="s5">Section 5</xref>.</p>
</sec>
<sec id="s2">
<title>2 Related work</title>
<p>This section reviews the methods of disease recognition and semantic segmentation. Representative methods applied to these two fields in engineering are presented.</p>
<sec id="s2-1">
<title>2.1 Disease recognition</title>
<p>Traditional manual methods of identifying citrus diseases often make identification inefficient and difficult to achieve the desired level of accuracy due to the tedious nature of the identification process and the variability of the preceding and following processes (<xref ref-type="bibr" rid="B15">Ismail and Malik, 2021</xref>). Compared with traditional manual recognition, computer vision-based technology can provide better solutions for citrus disease recognition. Images contain many visible features including textures, shapes, and colors. The machine learning methods extract the feature information contained in the image through the algorithm processing of the citrus image, to achieve the purpose of citrus classification. Citrus images are detected by a multispectral imaging sensor. Moreover, an approach for citrus classification using threshold processing is proposed in (<xref ref-type="bibr" rid="B1">Abdelsalam and Sayed, 2016</xref>). Adaptive neuro-fuzzy inference systems and linear, and nonlinear regression methods are used to grade citrus fruits (<xref ref-type="bibr" rid="B23">Sabzi et al., 2017</xref>). A system for classifying diseases of orange using multiclass Support Vector Machines (SVM) and calculating the severity of diseases using fuzzy logic is proposed in (<xref ref-type="bibr" rid="B3">Behera et al., 2018</xref>). The automatic citrus grading detection is performed by using BP neural network (<xref ref-type="bibr" rid="B5">Chen et al., 2018</xref>). These methods are interpretable and have the features of a high correct recognition rate compared with the manual method but the tediousness of the feature extraction process and the loss of features due to dimensionality reduction are challenges that need to be addressed (<xref ref-type="bibr" rid="B4">Chao et al., 2021</xref>). Although these methods are a significant improvement over manual methods, the non-automatic nature of feature extraction in the recognition process has prevented their widespread use in practical production.</p>
<p>DL has been widely researched for its automated feature extraction process, and it can effectively reduce the loss of information caused by manual feature extraction algorithms. In particular, the rise of CNNs has raised enthusiasm for DL to a whole new level. VGG (<xref ref-type="bibr" rid="B24">Simonyan and Zisserman, 2014</xref>), AlexNet (<xref ref-type="bibr" rid="B14">Iandola et al., 2016</xref>) and GoogleNet (<xref ref-type="bibr" rid="B25">Szegedy et al., 2015</xref>) are classic representations of CNN models, although these models cannot achieve very high accuracy, they are still widely employed in the field of agricultural engineering. These networks only require an input image to actively extract the feature information embedded in the image, but the performance of their output fluctuates with the merit of the dataset. This means that these models cannot be applied in some complex environments, and the reason for this is the inadequate feature extraction capability of these network models. In recent years, based on the developed computer hardware, people begin to optimize the depth learning model from depth, width and resolution. So deep Residual Neural Networks (ResNet) (<xref ref-type="bibr" rid="B32">Wu et al., 2019</xref>), Xception (<xref ref-type="bibr" rid="B2">Avery et al., 2014</xref>), EfficientNet (<xref ref-type="bibr" rid="B26">Tan and Le, 2019</xref>) and other more generalized DL models emerged at the times required.</p>
<p>ResNet classifiers are trained to detect the defects of tomato fruit (<xref ref-type="bibr" rid="B8">da Costa et al., 2020</xref>) and achieved an average precision of 94.6%. By combining TL with ShuffleNet, a lightweight model (Context Driven Detection Network) is constructed to detect and classify surface defects in carrots (<xref ref-type="bibr" rid="B10">Deng et al., 2021</xref>). Achieving 99.82% and 93.01% accuracy in binary and multiclass classification, respectively. The conclusion that CNNs are more accurate than SVM is proved in (<xref ref-type="bibr" rid="B12">Fan et al., 2020</xref>) by comparing the performance of CNN and SVM in Apple defect detection. Three learning models: AlexNet, GoogleNet and ResNet50 are used to grade Okra (<xref ref-type="bibr" rid="B22">Raikar et al., 2020</xref>). The accuracies obtained are 63.45% for AlexNet, 68.99% for the GoogleNet model and 99% for ResNet50 which is better than the others. By training, testing and comparing ResNet, DenseNet, MobileNetV2, NASNet and EfficientNet, EfficientNet is proven to be the best fruit grading model (<xref ref-type="bibr" rid="B15">Ismail and Malik, 2021</xref>). The accuracy exceeded 98% on both the apple and banana datasets. Although these models have better accuracy than those classical models, it can be observed that these models lack the ability to extract multi-scale features. The lack of high latitude feature extraction capability makes the DL model unable to achieve ideal results on some similar data sets. At the same time, the optimization of depth, width or resolution means the increase of model parameters and the occupation of computing resources. Therefore, it is a challenge for all DL models to improve the high latitude feature extraction capability of the model and reduce computing resources.</p>
</sec>
<sec id="s2-2">
<title>2.2 Disease severity diagnosis</title>
<p>To effectively control and treat plant diseases, an accurate diagnosis of the severity of the disease is an integral part of the effective identification of the plant disease. Disease severity diagnosis can be used to improve crop yields and reduce the economic losses caused by plant diseases. Disease detection models at this stage are not suitable for disease severity diagnosis. More often, segmentation models are used to distinguish between diseased and healthy areas for the next step of disease severity analysis.</p>
<p>A fuzzy logic inference system based on the DeeplabV3&#x2b; model is proposed in (<xref ref-type="bibr" rid="B16">Ji and Wu, 2022</xref>) for automated detection and disease analysis of grapevine black measles disease. DeeplabV3&#x2b; is used to separate the infected and healthy areas and a fuzzy inference system is introduced to diagnose the disease severity. The method has been shown to have high classification accuracy and also to be able to accurately measure the severity of grapevine black measles disease under controlled conditions. Also based on DeeplabV3&#x2b;, a DeeplabV3&#x2b; model with multi-scale inputs is proposed to improve image recognition and segmentation performance of cancerous areas in pathological sections of gastric cancer (<xref ref-type="bibr" rid="B29">Wang and Liu, 2021</xref>). By incorporating a unique nested jumping device in U-Net to generate semantically similar feature maps in the connected section, a model called U-Net&#x2b;&#x2b; is proposed (<xref ref-type="bibr" rid="B6">Cheng et al., 2018</xref>). By comparing the segmentation performance of a set of the most representative models (Deeplabv3&#x2b;, U-Net and U-Net&#x2b;&#x2b;) for the bull&#x2019;s-eye region in ultrasound images. It is concluded that U-Net&#x2b;&#x2b; has the best performance compared to the other models, achieving a segmentation accuracy of more than 97% (<xref ref-type="bibr" rid="B9">de Melo et al., 2022</xref>). A real-time detection system for apple leaf disease detection is proposed in (<xref ref-type="bibr" rid="B17">Khan et al., 2022</xref>). The system divides the detection phase into 2 stages, initial and detection. The initial phase is used to differentiate between diseased and disease-free leaves, while the detection phase is used to detect disease-susceptible areas of the leaves. By combining VGG and U-Net, a system for diagnosing the severity of tomato leaf diseases is proposed in (<xref ref-type="bibr" rid="B31">Wspanialy and Moussa, 2020</xref>), obtaining results comparable to human assessments. The proportion of regions suggested as the most appropriate indicators of disease severity for plant diseases caused by fungi or bacteria, the ordinal classification is more applicable to diseases caused by viruses or insects.</p>
</sec>
</sec>
<sec sec-type="methods" id="s3">
<title>3 Methodology</title>
<p>EfficientNetV2 (<xref ref-type="bibr" rid="B27">Tan and Le, 2021</xref>) and the Inception module are introduced in detail in this section and an improved EfficientNetV2 is applied to disease detection in citrus fruits. In addition to this, several different segmentation algorithms are compared to find the most suitable model for disease severity diagnosis.</p>
<sec id="s3-1">
<title>3.1 Dataset</title>
<p>The dataset for this article is mainly from the Kaggle website, which includes 800 images for the citrus fruit black spot and canker diseases. All of them are taken in a uniform laboratory setting, and some sample images are shown in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Citrus dataset: <bold>(A)</bold> Citrus black spot; <bold>(B)</bold> Citrus black spot after mirror flip; <bold>(C)</bold> Citrus black spot after rotation; <bold>(D)</bold> Citrus canker; <bold>(E)</bold> Citrus canker after mirror flip; <bold>(F)</bold> Citrus canker after rotation.</p>
</caption>
<graphic xlink:href="fgene-14-1253934-g001.tif"/>
</fig>
<p>For effective differentiation in the experiment, fresh citrus images without disease are added for classification based on these two disease images. In addition to this, the disease images are expanded to 2,000 by data enhancement operations, including mirror flip and angular rotation. Moreover, of these 2,000 images, the number of black spots and cankers each accounted for 50%. At the stage of severity diagnosis, Manual pixel-level labels of the raw dataset are made by using an annotation tool named labelme (<xref ref-type="bibr" rid="B19">Marois and Syssau, 2008</xref>), image pixels are labelled in one of four categories: healthy, black spotted, cankered and background. We use different colors to differentiate.</p>
</sec>
<sec id="s3-2">
<title>3.2 EfficientNetV2</title>
<p>EfficientNet is considered the best CNN network when it is first proposed. It improves the performance of the network by simultaneously improving the width, depth and resolution of the network. However, with the improved performance, EfficientNet also has its drawbacks: 1) The training period is limited by the size of the input image and becomes extremely inefficient when the image size is too large. 2) Premature use of deep convolution can make the model counterproductive. 3) Equivalent amplification of each stage is suboptimal. These drawbacks limited EfficientNet and prevented it from being widely used in practice until the advent of EfficientNetV2.</p>
<p>EfficientNetV2 replaces the originally available MBConv by Fused-MBConv based on EfficientNet and proposes an improved progressive learning method that can not only improve the training speed but also the accuracy rate. Fused-MBConv replaces expansion conv1x1 and depth-wise conv3x3 in the main branch with a normal conv3x3, as shown in <xref ref-type="fig" rid="F2">Figure 2</xref>. However, structure replacement like this does not happen at every layer, if the shallow MBConv structure is replaced with a Fused-MBConv structure, the training speed can be significantly improved, but if one is to use all Fused-MBConv modules instead, the training period would rise significantly with the increase in computational complexity. So the best combination of MBConv and Fused-MBConv is searched in EfficientNetV2 using NAS technology.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Structure of Fused-MBConv module.</p>
</caption>
<graphic xlink:href="fgene-14-1253934-g002.tif"/>
</fig>
<p>Similar to EfficientNet, which includes several models from B0-B7, EfficientNetV2 also includes several classical models, namely, EfficientNetV2-S, EfficientNetV2-M and EfficientNetV2-L. In our experiments, EfficientNetV2-S is used as a base model. The structure of the EfficientNetV2-S is shown in <xref ref-type="fig" rid="F3">Figure 3</xref>.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Structure of EfficientNetV2-S.</p>
</caption>
<graphic xlink:href="fgene-14-1253934-g003.tif"/>
</fig>
</sec>
<sec id="s3-3">
<title>3.3 Transfer learning</title>
<p>The advent of TL has brought new life to DL models, where weights trained on the initial training set are migrated to the target network to reduce training cycles and improve model accuracy. Using TL means that instead of training with randomly initialized weights from start to finish, weights for up-training on some large labelled datasets (e.g., public image datasets, etc.) can be obtained by pre-training them and using them as a way to initialize the target network weights. In this paper, pre-trained models learned from ImageNet are considered and transferred to the target dataset for task-specific training. Pre-training weights trained on ImageNet are imported into the EfficientNetV2 model as a way to improve the classification performance of the model.</p>
</sec>
<sec id="s3-4">
<title>3.4 Proposed approach</title>
<p>As mentioned in <xref ref-type="sec" rid="s3-2">Section 3.2</xref>, EfficientNet represents the most advanced CNN model framework. It further reduces the computational complexity and improves performance. However, EfficientNetV2&#x2019;s thin final stage layer caught our attention. The gap before pooling and 1 &#xd7; 1 convolution mean that the final layer of the EfficientNetV2 model does not allow for the extraction of multi-scale features, which will inevitably have an impact on the final classification. The first few convolutional layers of a convolutional neural network are usually used to extract colour and corner point features of an image, while the end layer performs the resolution of weights and computation of features, so the lack of performance of the end layer is critical to the overall network model. In addition, the MBConv in EfficientNetV2 also inspires and reminds us of the Inception module (<xref ref-type="bibr" rid="B9">de Melo et al., 2022</xref>), which is similar to it. The Inception module is added to the final phase of EfficientNetV2 to enhance network performance.</p>
<p>MBConv is an inverted linear bottleneck layer with depth-separated convolution, Inception is a module that is a discrete spectrum between normal convolution and convolution along depth-separated convolution, compared to normal convolution, the major difference between these two types of convolutions is the much-reduced number of parameters. Suppose the input feature map dimension is <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">H</mml:mi>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">W</mml:mi>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mi mathvariant="bold">M</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">H</mml:mi>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">W</mml:mi>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:mi mathvariant="bold">M</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the height, width and number of channels of the input feature map respectively. The convolution kernel size is <inline-formula id="inf5">
<mml:math id="m5">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">D</mml:mi>
<mml:mi mathvariant="bold-italic">K</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">D</mml:mi>
<mml:mi mathvariant="bold-italic">K</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf6">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">D</mml:mi>
<mml:mi mathvariant="bold-italic">K</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the height and width of the convolution kernel. The output feature map dimension is <inline-formula id="inf7">
<mml:math id="m7">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">H</mml:mi>
<mml:mi mathvariant="bold-italic">O</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">W</mml:mi>
<mml:mi mathvariant="bold-italic">O</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mi mathvariant="bold">N</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf8">
<mml:math id="m8">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">H</mml:mi>
<mml:mi mathvariant="bold-italic">O</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf9">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">W</mml:mi>
<mml:mi mathvariant="bold-italic">O</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf10">
<mml:math id="m10">
<mml:mrow>
<mml:mi mathvariant="bold">N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the height, width and number of channels of the output feature map respectively. For the standard convolution, the computational complexity can be calculated as<disp-formula id="e1">
<mml:math id="m11">
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">H</mml:mi>
<mml:mi mathvariant="bold-italic">O</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">W</mml:mi>
<mml:mi mathvariant="bold-italic">O</mml:mi>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mi mathvariant="bold-italic">D</mml:mi>
</mml:mrow>
<mml:mi mathvariant="bold-italic">K</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">D</mml:mi>
<mml:mi mathvariant="bold-italic">K</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mi mathvariant="bold">N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi mathvariant="bold">M</mml:mi>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>where <inline-formula id="inf11">
<mml:math id="m12">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the computation of the standard convolution. Depth-separable convolution can be calculated as<disp-formula id="e2">
<mml:math id="m13">
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">H</mml:mi>
<mml:mi mathvariant="bold-italic">O</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">W</mml:mi>
<mml:mi mathvariant="bold-italic">O</mml:mi>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mi mathvariant="bold-italic">D</mml:mi>
</mml:mrow>
<mml:mi mathvariant="bold-italic">K</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">D</mml:mi>
<mml:mi mathvariant="bold-italic">K</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mi mathvariant="bold">M</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">H</mml:mi>
<mml:mi mathvariant="bold-italic">O</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">W</mml:mi>
<mml:mi mathvariant="bold-italic">O</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mi mathvariant="bold">N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi mathvariant="bold">M</mml:mi>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>where <inline-formula id="inf12">
<mml:math id="m14">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the computation of the depth-separable convolution. The ratio between the two calculations can be found as<disp-formula id="e3">
<mml:math id="m15">
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">R</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">D</mml:mi>
<mml:mi mathvariant="bold-italic">K</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">D</mml:mi>
<mml:mi mathvariant="bold-italic">K</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>where <inline-formula id="inf13">
<mml:math id="m16">
<mml:mrow>
<mml:mi mathvariant="bold-italic">R</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the ratio of the calculation volumes of the two calculation methods. The advantage of the Inception module is that it allows the aggregation of visual information of different sizes, while first down-scaling larger matrices to facilitate feature extraction from different scales. The framework of the InceptionV1 module used in this paper is shown in <xref ref-type="fig" rid="F4">Figure 4</xref>, which replaces vertically stacked convolutions with parallel convolutions. This module uses three different scales of convolution kernels <inline-formula id="inf14">
<mml:math id="m17">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn mathvariant="bold">1,3</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn mathvariant="bold">3,5</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn mathvariant="bold">5</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> and a maximum pooling kernel <inline-formula id="inf15">
<mml:math id="m18">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn mathvariant="bold">3</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn mathvariant="bold">3</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> to increase the adaptability of the network to different scales.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Structure of InceptionV1.</p>
</caption>
<graphic xlink:href="fgene-14-1253934-g004.tif"/>
</fig>
<p>Features from the previous layer are extracted and stitched together at the end after passing through these three different sizes of convolution kernels. This means that the network can perceive local areas of the image from different sizes in the same layer and fuse features from different scales. Thus, InceptionV1 has the following advantages over standard convolution:<list list-type="bullet">
<list-item>
<p>Control the computational complexity while increasing the parameters.</p>
</list-item>
<list-item>
<p>The multi-scale processing performance of the network is improved by aggregating feature information.</p>
</list-item>
</list>
</p>
<p>In this paper, a 2-layer InceptionV1 module is added to the final stage between the 1 &#xd7; 1 convolutional layer and the global pooling layer, moreover, the newly generated network architecture is shown in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Parameters related to the proposed network model.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Stage</th>
<th align="center">Type</th>
<th align="center">Kernel size</th>
<th align="center">Stride</th>
<th align="center">Layers</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">0</td>
<td align="center">Conv3 &#xd7; 3</td>
<td align="center">3 &#xd7; 3</td>
<td align="center">2</td>
<td align="center">1</td>
</tr>
<tr>
<td align="center">1</td>
<td align="center">Fused-MBConv1</td>
<td align="center">3 &#xd7; 3</td>
<td align="center">1</td>
<td align="center">2</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">Fused-MBConv4</td>
<td align="center">3 &#xd7; 3</td>
<td align="center">2</td>
<td align="center">4</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">Fused-MBConv4</td>
<td align="center">3 &#xd7; 3</td>
<td align="center">2</td>
<td align="center">4</td>
</tr>
<tr>
<td align="center">4</td>
<td align="center">MBConv4</td>
<td align="center">3 &#xd7; 3</td>
<td align="center">2</td>
<td align="center">6</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">MBConv6</td>
<td align="center">3 &#xd7; 3</td>
<td align="center">1</td>
<td align="center">9</td>
</tr>
<tr>
<td align="center">6</td>
<td align="center">MBConv6</td>
<td align="center">3 &#xd7; 3</td>
<td align="center">2</td>
<td align="center">15</td>
</tr>
<tr>
<td rowspan="2" align="center">7</td>
<td align="center">InceptionV1</td>
<td align="center">-</td>
<td align="center">1</td>
<td align="center">2</td>
</tr>
<tr>
<td align="center">Conv1 &#xd7; 1&#x26;Pooling&#x26;FC</td>
<td align="center">-</td>
<td align="center">-</td>
<td align="center">1</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The excellent multi-scale inference capability of the InceptionV1 model fills in well the lack of feature extraction capability before the descending convolution of the tail layer. It both picks up the feature extraction from the previous stage and prepares the ground for the dimensionality reduction operation in the next part. Therefore, the newly generated network usually consists of two parts: the first part is the pre-training module, stages 0&#x2013;6, which is used for basic feature extraction; the second part is the extension layer, stage 7, which is used for extracting high-latitude features and using multi-scale feature maps for classification. In addition, the training of the model is performed using two-TL with the following training strategy. In the first step, model parameters are inferred from scratch while freezing the weights from the bottom multiscale module (stage 7) pre-trained from ImageNet. The second step retrains all weights by loading the model imported in the first stage of training and using the target citrus dataset. The multiscale module at the bottom of the model has initial weights and is trained using the citrus dataset, thus network performance is improved.</p>
</sec>
<sec id="s3-5">
<title>3.5 Severity diagnosis</title>
<p>Severity diagnosis is one of the sub-tasks of semantic segmentation, which aims to calculate the severity of disease by accurately measuring the area of the diseased region, calculated as<disp-formula id="e4">
<mml:math id="m19">
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">S</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="bold-italic">D</mml:mi>
<mml:mi mathvariant="bold-italic">A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">A</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>where <inline-formula id="inf16">
<mml:math id="m20">
<mml:mrow>
<mml:mi mathvariant="bold-italic">S</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the severity of the disease, <inline-formula id="inf17">
<mml:math id="m21">
<mml:mrow>
<mml:mi mathvariant="bold-italic">D</mml:mi>
<mml:mi mathvariant="bold-italic">A</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the area of the disease area and <inline-formula id="inf18">
<mml:math id="m22">
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">A</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the total fruit area.</p>
<p>Using the label annotation of the dataset in <xref ref-type="sec" rid="s3-1">Section 3.1</xref>, 400 images are obtained for the two citrus fruit diseases shown in <xref ref-type="fig" rid="F5">Figure 5</xref>. Each disease category dataset is divided into subsets for training, validation and testing in proportions of 70%, 10% and 20% respectively.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Images of citrus fruit before and after labelling: <bold>(A)</bold> Original black spot image; <bold>(B)</bold> Original canker disease image; <bold>(C)</bold> Image of the black spot after labelling; <bold>(D)</bold> Image of canker disease after labelling.</p>
</caption>
<graphic xlink:href="fgene-14-1253934-g005.tif"/>
</fig>
<p>The U-Net, as the name suggests, is a U-shaped network architecture, divided into a down-sampling part (the backbone feature extraction network) and an up-sampling part (the enhanced feature extraction network). The &#x201c;U&#x201d; structure of its features consists of conventional convolution and maximum pooling forming the down-sampling, followed by a mirroring up-sampling step. In this work, the down-sampling part of U-Net will be replaced by VGG16 to enhance feature extraction from its backbone feature network. 3 &#xd7; 3 convolution is used in the same horizontal layer as the ReLu activation function and is carried through to the next dimension by 2 &#xd7; 2 maximum pooling. The last step of each horizontal layer is connected to its associated up-sampling block in the upstream path as shown in <xref ref-type="fig" rid="F6">Figure 6</xref>. Similarly, in the training phase, the VGG16 weights pre-trained on ImageNet are imported into the model with the help of TL to shorten its training cycle. The U-Net model is then retrained on the citrus fruit dataset and the parameters are fine-tuned to obtain the optimal weights. Finally, the test output of the model is compared with the real labels and its performance is analyzed.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>VGG-U-Net Architecture.</p>
</caption>
<graphic xlink:href="fgene-14-1253934-g006.tif"/>
</fig>
</sec>
</sec>
<sec sec-type="results" id="s4">
<title>4 Results</title>
<p>This section provides the results of the qualitative analysis of the disease detection models presented and the quantitative analysis of the severity diagnosis models in <xref ref-type="sec" rid="s3">Section 3</xref>, which analyses the performance of these two types of models in detail.</p>
<sec id="s4-1">
<title>4.1 Experimental configuration and parameters</title>
<p>In this paper, experiments are conducted using the Python3 programming language, and the models are implemented using the Tensorflow 2.0 (<xref ref-type="bibr" rid="B20">Mart&#xb4;&#x131;n et al., 2016</xref>) framework. In addition, the training and testing of network models in this paper are performed on an AMD5700G and an NVIDIA A6000. In the model training stage, set the training batch size to 8, and a stochastic gradient descent algorithm is chosen to optimize the parameters. The initial learning rate is set to 0.01 and decreased with training epochs. Meanwhile, the momentum is set to 0.9 for accelerating convergence. The dropout is set to 0.1 for preventing overfitting.</p>
</sec>
<sec id="s4-2">
<title>4.2 Disease recognition results</title>
<p>Based on the model proposed in <xref ref-type="sec" rid="s3-4">Section 3.4</xref>, this section trained and tested the model on the citrus fruit dataset. To fairly evaluate the performance of the model, each class of citrus fruit is randomly and evenly divided into 5 portions. Four of these are used to train the fine-tuned model and the remaining one is used to test and evaluate the model&#x2019;s performance. In addition, we use K-fold cross-validation (K &#x3d; 5) for model training and hyperparameter selection. Thus the ratio of the training, validation and test sets for the experiment is set to 6:2:2. Five different fine-tuned models are obtained after cross-validation. Where the accuracy of a single model is calculated by<disp-formula id="e5">
<mml:math id="m23">
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">A</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mfenced open="{" close="" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mn mathvariant="bold">0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>!</mml:mo>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>where <inline-formula id="inf19">
<mml:math id="m24">
<mml:mrow>
<mml:mi mathvariant="bold-italic">A</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the accuracy, <inline-formula id="inf20">
<mml:math id="m25">
<mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the number of inputs, <inline-formula id="inf21">
<mml:math id="m26">
<mml:mrow>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the predicted outcome of the model and <inline-formula id="inf22">
<mml:math id="m27">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is its true label. These 5 model results are combined to obtain the average accuracy of the model. The average accuracy avoids the errors caused by single training and gives us a more objective view of the model&#x2019;s performance.</p>
<p>
<xref ref-type="table" rid="T2">Table 2</xref> shows the test results of the 5 training sessions and what can be seen is that although there is some fluctuation in the results of the 5 data sessions, the overall performance is at a high level. In addition, to further assess the feasibility of the proposed methodology, three classical and convincing CNN models are added to the comparison experiments, including ResNet50, GoogleNet, and EfficientNet. Again, these network models are loaded with weights pre-trained on ImageNet. Again, these network models are loaded with weights pre-trained on ImageNet. The TL strategy similar to the proposed method is used to shorten the training cycle, training is performed on randomly assigned image data to compare the performance strengths and weaknesses of each network. The accuracy of the proposed method on the citrus fruit dataset is shown in <xref ref-type="fig" rid="F7">Figure 7</xref>. The proposed model performs extremely well on the citrus fruit dataset, as can be seen from the figure, the loss function and accuracy converge at 25 training sessions, one of the surprising things is that the accuracy is above 99% and the losses converge to 0%. We have repeated the experiments many times by using different epochs (including epoch &#x3d; 30, 40 and 50). The results show that the model&#x2019;s performance has reached convergence at epoch &#x3d; 25, and the accuracy of the subsequent rounds always fluctuates, so this paper concludes that the model can obtain good performance at epoch &#x3d; 25. <xref ref-type="table" rid="T3">Table 3</xref> shows the average accuracy of each model after 5 training experiments.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Results for 5 training sessions of the model.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">-</th>
<th align="center">1st fold</th>
<th align="center">2nd fold</th>
<th align="center">3rd fold</th>
<th align="center">4th fold</th>
<th align="center">5th fold</th>
<th align="center">Average</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Accuracy</td>
<td align="center">97.5</td>
<td align="center">98.4</td>
<td align="center">98.7</td>
<td align="center">97.2</td>
<td align="center">99.2</td>
<td align="center">98.2</td>
</tr>
<tr>
<td align="center">Loss</td>
<td align="center">0.123</td>
<td align="center">0.076</td>
<td align="center">0.076</td>
<td align="center">0.112</td>
<td align="center">0.043</td>
<td align="center">0.086</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Accuracy and loss of the proposed method on the citrus dataset: <bold>(A)</bold> accuracy; <bold>(B)</bold> loss.</p>
</caption>
<graphic xlink:href="fgene-14-1253934-g007.tif"/>
</fig>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Performance comparisons with other approaches.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Pre-trained model</th>
<th align="center">Average accuracy (train)%</th>
<th align="center">Average accuracy (val)%</th>
<th align="center">Average accuracy (test)%</th>
<th align="center">Average loss</th>
<th align="center">Average time (each epoch)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">ResNet50 <xref ref-type="bibr" rid="B16">Ji and Wu, (2022)</xref>
</td>
<td align="center">96.6</td>
<td align="center">94.9</td>
<td align="center">90.2</td>
<td align="center">0.123</td>
<td align="center">17&#x2032;25&#x2033;</td>
</tr>
<tr>
<td align="center">GoogleNet <xref ref-type="bibr" rid="B21">qun PAN et al., (2022)</xref>
</td>
<td align="center">97.7</td>
<td align="center">96.4</td>
<td align="center">92.5</td>
<td align="center">0.100</td>
<td align="center">2&#x2032;05&#x2033;</td>
</tr>
<tr>
<td align="center">EfficientNet <xref ref-type="bibr" rid="B11">Espejo-Garcia et al., (2022)</xref>
</td>
<td align="center">96.0</td>
<td align="center">94.3</td>
<td align="center">89.6</td>
<td align="center">0.335</td>
<td align="center">23&#x2032;43&#x2033;</td>
</tr>
<tr>
<td align="center">EfficientNetV2 <xref ref-type="bibr" rid="B27">Tan and Le, (2021)</xref>
</td>
<td align="center">97.3</td>
<td align="center">95.2</td>
<td align="center">92.9</td>
<td align="center">0.194</td>
<td align="center">32&#x2032;38&#x2033;</td>
</tr>
<tr>
<td align="center">EfficientNetV2 <xref ref-type="bibr" rid="B27">Tan and Le, (2021)</xref> (with TL)</td>
<td align="center">97.7</td>
<td align="center">95.3</td>
<td align="center">92.9</td>
<td align="center">0.180</td>
<td align="center">9&#x2032;12&#x2033;</td>
</tr>
<tr>
<td align="center">Proposed method (without TL)</td>
<td align="center">98.6</td>
<td align="center">97.8</td>
<td align="center">95.2</td>
<td align="center">0.106</td>
<td align="center">34&#x2032;47&#x2033;</td>
</tr>
<tr>
<td align="center">Proposed method</td>
<td align="center">99.3</td>
<td align="center">98.2</td>
<td align="center">95.6</td>
<td align="center">0.086</td>
<td align="center">9&#x2032;22&#x2033;</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>It can be seen that the proposed model outperforms the comparison methods on the citrus fruit dataset. In particular, despite the increase in training time compared to the baseline EfficientNetV2. However, it shows a large improvement in accuracy and a decrease in training loss compared to the EfficientNetV2. This means that the combination of Inception module and EfficientNetV2 can enhance the feature extraction ability of EfficientNetV2 to some extent and improve the performance of the classifier. The main reason for this is that all other networks are single-layer networks with only a classification layer, while the proposed model applies the Inception module to the tail-layer classification, extracting high latitude for the final classification. In addition, the use of TL significantly reduces the training cycle of the model. The training time is reduced to 1/3 results in faster convergence and improved performance. The reason is that in transfer learning the weights of the first few layers of the model is froze and the pre-trained parameters are imported. This eliminates the need to train the model from scratch and greatly reduces the training period. In summary, the proposed model combines the advantages of EfficientNetV2 and the Inception module, including the former excellent basic feature extraction ability and the latter excellent multi-scale feature extraction ability, which results in such a perfect performance on the citrus fruit dataset. After five experiments with 50 epochs of training each, the average test accuracy of 95.6% and the average loss of 0.01 are obtained. The results of the five predictions made on the test set are averaged and <xref ref-type="fig" rid="F8">Figure 8</xref> shows the average confusion matrix of the test results. It can be seen that all the test samples were well classified. This shows that the proposed model is effective in identifying citrus fruit diseases.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Confusion matrix for the proposed method.</p>
</caption>
<graphic xlink:href="fgene-14-1253934-g008.tif"/>
</fig>
</sec>
<sec id="s4-3">
<title>4.3 Disease severity diagnosis</title>
<p>To verify the effectiveness of the VGG-U-Net segmentation model, this paper conducts segmentation experiments on the DeeplabV3, U-Net and VGG-U-Net models respectively under the same segmentation dataset, the hyper-parameters of the experiments are uniformly set to an initial learning rate of 0.0001, the training number is 100 rounds. If the loss of validation does not decrease, training will end early to prevent it from being over-fitted. In addition, to show the segmentation performance of the model more intuitively, the Mean Intersection Ratio (MIoU), Mean Pixel Accuracy (MPA) and Precision are used to evaluate the segmentation performance of the model species. The more these indicators converge to 1, the better the segmentation performance. The calculation process for the evaluation indicators is described as follows:<disp-formula id="e6">
<mml:math id="m28">
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">M</mml:mi>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">U</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">0</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mi mathvariant="bold-italic">N</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
<disp-formula id="e7">
<mml:math id="m29">
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">M</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mi mathvariant="bold-italic">A</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">0</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mi mathvariant="bold-italic">N</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>
<disp-formula id="e8">
<mml:math id="m30">
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>where <inline-formula id="inf23">
<mml:math id="m31">
<mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the number of validations, <inline-formula id="inf24">
<mml:math id="m32">
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (true positive) means the label is true and the prediction is true, <inline-formula id="inf25">
<mml:math id="m33">
<mml:mrow>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (false negative) means the label is false and the prediction is true, <inline-formula id="inf26">
<mml:math id="m34">
<mml:mrow>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (false positive) means the label is true and the prediction is false and <inline-formula id="inf27">
<mml:math id="m35">
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (true negative) means the label is false and the prediction is false, <inline-formula id="inf28">
<mml:math id="m36">
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the Precision.</p>
<p>The test results for all detection models are shown in <xref ref-type="table" rid="T4">Table 4</xref>. Among them, VGG-U-Net shows the best performance, achieving an average pixel accuracy of 87.66%, which is better than the base U-Net model and has a large performance improvement compared to DeeplabV3. Therefore, building on U-Net is the right choice, using VGG to replace the U-Net coding backbone extraction network can enhance the feature extraction capability of U-Net, which is more conducive to the segmentation in the decoding stage and improve the segmentation performance of the network.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Detection results of the three segmentation models.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="center">Model</th>
<th rowspan="2" align="center">Metrics</th>
<th colspan="4" align="center">Category</th>
<th rowspan="2" align="center">Average value (%)</th>
</tr>
<tr>
<th align="center">Canker</th>
<th align="center">Black-spot</th>
<th align="center">Orange</th>
<th align="center">Background</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="3" align="center">DeeplabV3 <xref ref-type="bibr" rid="B29">Wang and Liu, (2021)</xref>
</td>
<td align="center">IoU</td>
<td align="center">0.56</td>
<td align="center">0.28</td>
<td align="center">0.95</td>
<td align="center">0.97</td>
<td align="center">69.01</td>
</tr>
<tr>
<td align="center">PA</td>
<td align="center">0.60</td>
<td align="center">0.30</td>
<td align="center">0.99</td>
<td align="center">0.98</td>
<td align="center">71.46</td>
</tr>
<tr>
<td align="center">Precision</td>
<td align="center">0.91</td>
<td align="center">0.83</td>
<td align="center">0.96</td>
<td align="center">0.99</td>
<td align="center">92.31</td>
</tr>
<tr>
<td rowspan="3" align="center">U-Net <xref ref-type="bibr" rid="B9">de Melo et al., (2022)</xref>
</td>
<td align="center">IoU</td>
<td align="center">0.79</td>
<td align="center">0.51</td>
<td align="center">0.97</td>
<td align="center">0.97</td>
<td align="center">80.85</td>
</tr>
<tr>
<td align="center">PA</td>
<td align="center">0.86</td>
<td align="center">0.63</td>
<td align="center">0.99</td>
<td align="center">0.99</td>
<td align="center">86.65</td>
</tr>
<tr>
<td align="center">Precision</td>
<td align="center">0.90</td>
<td align="center">0.72</td>
<td align="center">0.98</td>
<td align="center">0.99</td>
<td align="center">89.66</td>
</tr>
<tr>
<td rowspan="3" align="center">VGG-U-Net <xref ref-type="bibr" rid="B31">Wspanialy and Moussa, (2020)</xref>
</td>
<td align="center">IoU</td>
<td align="center">0.80</td>
<td align="center">0.56</td>
<td align="center">0.97</td>
<td align="center">0.97</td>
<td align="center">82.53</td>
</tr>
<tr>
<td align="center">PA</td>
<td align="center">0.89</td>
<td align="center">0.65</td>
<td align="center">0.98</td>
<td align="center">0.99</td>
<td align="center">87.66</td>
</tr>
<tr>
<td align="center">Precision</td>
<td align="center">0.89</td>
<td align="center">0.80</td>
<td align="center">0.98</td>
<td align="center">0.99</td>
<td align="center">91.56</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>It is worth noting that the segmentation performance of the three segmentation models for the black spot is much less than that of canker disease, due to the small size and often scattered distribution of black spots, which greatly increased the difficulty of detecting segmentation, whereas the large area and dense distribution of canker disease greatly helped the performance of the segmentation models.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s5">
<title>5 Conclusion</title>
<p>In this paper, a citrus disease detection system is proposed, which has the function of diagnosing and detecting the severity of citrus diseases. The disease identification phase focuses on the model&#x2019;s high-latitude feature extraction capabilities. The proposed model revolves around the extraction of multi-scale information from images, which combines the state-of-the-art EfficientNetV2 with the classical Inception module. In addition, TL methods are applied to the model to reduce training cycles and improve accuracy. Experiment results show that the proposed model has produced the best performance compared to the classical CNN. In the disease severity diagnosis stage of the fruit, three different segmentation models are compared, and their performance in terms of pixel-level accuracy is evaluated. Results show that the VGG-U-Net has the highest average accuracy, proving the effectiveness of replacing the underlying U-Net with VGG to encode the backbone feature extraction network. Future work will enhance the performance of the segmentation network for the detection of small spot targets and extend this system to other crops.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: <ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/datasets/jonathansilva2020/orange-diseases-dataset">https://www.kaggle.com/datasets/jonathansilva2020/orange-diseases-dataset</ext-link>.</p>
</sec>
<sec id="s7">
<title>Author contributions</title>
<p>ZH: Formal Analysis, Investigation, Methodology, Writing&#x2013;original draft, Writing&#x2013;review and editing. XJ: Formal Analysis, Investigation, Validation, Writing&#x2013;review and editing. SH: Methodology, Validation, Writing&#x2013;review and editing. SQ: Formal Analysis, Investigation, Methodology, Validation, Writing&#x2013;review and editing. SY: Methodology, Validation, Writing&#x2013;review and editing.</p>
</sec>
<sec id="s8">
<title>Funding</title>
<p>This research is supported by the Guangxi Natural Science Foundation under Grant 2022GXNSFFA035028, research fund of Guangxi Normal University under Grant 2021JC006, the AI &#x2b; Education research project of Guangxi Humanities Society Science Development Research Center under Grant ZXZJ202205.</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Abdelsalam</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Sayed</surname>
<given-names>M. S.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Real-time defects detection system for orange citrus fruits using multi-spectral imaging</article-title>,&#x201d; in <source>Midwest symposium on circuits and systems</source>, <fpage>1</fpage>&#x2013;<lpage>4</lpage>.</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Avery</surname>
<given-names>K. R.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Engler-Pinto</surname>
<given-names>C. C.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>Xception: deep learning with depthwise separable convolutions</article-title>. <source>SAE Int. J. Mat. Manuf.</source> <volume>7</volume> (<issue>3</issue>), <fpage>560</fpage>&#x2013;<lpage>566</lpage>. <pub-id pub-id-type="doi">10.4271/2014-01-0975</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Behera</surname>
<given-names>S. K.</given-names>
</name>
<name>
<surname>Jena</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Rath</surname>
<given-names>A. K.</given-names>
</name>
<name>
<surname>Sethy</surname>
<given-names>P. K.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Disease classification and grading of orange using machine learning and fuzzy logic</article-title>,&#x201d; in <conf-name>Proceeding of the International Conference on Communication and Signal Processing (ICCSP)</conf-name>, <conf-date>April 2018</conf-date>, <conf-loc>Chennai, India</conf-loc>, <publisher-name>IEEE</publisher-name>, <fpage>0678</fpage>&#x2013;<lpage>0682</lpage>.</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Construction of apple leaf diseases identification networks based on Xception fused by SE module</article-title>. <source>Appl. Sci.</source> <volume>11</volume> (<issue>10</issue>), <fpage>4614</fpage>&#x2013;<lpage>4628</lpage>. <pub-id pub-id-type="doi">10.3390/app11104614</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Automatic classification and detection of oranges based on computer vision</article-title>,&#x201d; in <conf-name>Proceeding of the IEEE 4th International Conference on Computer and Communications, ICCC</conf-name>, <conf-date>December 2018</conf-date>, <conf-loc>Chengdu, China</conf-loc>, <publisher-name>IEEE</publisher-name>, <fpage>1551</fpage>&#x2013;<lpage>1556</lpage>.</citation>
</ref>
<ref id="B6">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Cheng</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Galimzianova</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Lesjak</surname>
<given-names>&#x17d;.</given-names>
</name>
<name>
<surname>&#x160;piclin</surname>
<given-names>&#x17d;.</given-names>
</name>
<name>
<surname>Lock</surname>
<given-names>C. B.</given-names>
</name>
<name>
<surname>Rubin</surname>
<given-names>D. L.</given-names>
</name>
</person-group> (<year>2018</year>). <source>Deep learning in medical image analysis and multimodal learning for clinical decision support</source>, <volume>11045</volume>.</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Conti</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Gardella</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Vandecaveye</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Gomez</surname>
<given-names>C. A.</given-names>
</name>
<name>
<surname>Joris</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Hauteville</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Transgenic citrange troyer rootstocks overexpressing antimicrobial potato snakin-1 show reduced citrus canker disease symptoms</article-title>. <source>J. Biotechnol.</source> <volume>324</volume>, <fpage>99</fpage>&#x2013;<lpage>102</lpage>. <pub-id pub-id-type="doi">10.1016/j.jbiotec.2020.09.010</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>da Costa</surname>
<given-names>A. Z.</given-names>
</name>
<name>
<surname>Figueroa</surname>
<given-names>H. E. H.</given-names>
</name>
<name>
<surname>Fracarolli</surname>
<given-names>J. A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Computer vision based detection of external defects on tomatoes using deep learning</article-title>. <source>Biosyst. Eng.</source> <volume>190</volume> (<issue>25</issue>), <fpage>131</fpage>&#x2013;<lpage>144</lpage>. <pub-id pub-id-type="doi">10.1016/j.biosystemseng.2019.12.003</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>de Melo</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Gon&#xe7;alves</surname>
<given-names>D. N.</given-names>
</name>
<name>
<surname>Gomes</surname>
<given-names>M. d. N. B.</given-names>
</name>
<name>
<surname>Faria</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Silva</surname>
<given-names>J. d. A.</given-names>
</name>
<name>
<surname>Ramos</surname>
<given-names>A. P. M.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Automatic segmentation of cattle rib-eye area in ultrasound images using the UNet&#x2b;&#x2b; deep neural network</article-title>. <source>Comput. Electron. Agric.</source> <volume>195</volume>, <fpage>106818</fpage>. <pub-id pub-id-type="doi">10.1016/j.compag.2022.106818</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Deng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Online defect detection and automatic grading of carrots using computer vision combined with deep learning methods</article-title>. <source>LWT</source> <volume>149</volume> (<issue>2020</issue>), <fpage>111832</fpage>. <pub-id pub-id-type="doi">10.1016/j.lwt.2021.111832</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Espejo-Garcia</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Malounas</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Mylonas</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Kasimati</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Fountas</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Using EfficientNet and transfer learning for image-based diagnosis of nutrient deficiencies</article-title>. <source>Comput. Electron. Agric.</source> <volume>196</volume>, <fpage>106868</fpage>. <pub-id pub-id-type="doi">10.1016/j.compag.2022.106868</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Tian</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>On line detection of defective apples using computer vision system combined with deep learning methods</article-title>. <source>J. Food Eng.</source> <volume>286</volume> (<issue>2019</issue>), <fpage>110102</fpage>. <pub-id pub-id-type="doi">10.1016/j.jfoodeng.2020.110102</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hassan</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Maji</surname>
<given-names>A. K.</given-names>
</name>
<name>
<surname>Jasi&#x144;ski</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Leonowicz</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Jasi&#x144;ska</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Identification of plant-leaf diseases using CNN and transfer-learning approach</article-title>. <source>Electron</source> <volume>10</volume> (<issue>12</issue>), <fpage>1388</fpage>&#x2013;<lpage>1407</lpage>. <pub-id pub-id-type="doi">10.3390/electronics10121388</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Iandola</surname>
<given-names>F. N.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Moskewicz</surname>
<given-names>M. W.</given-names>
</name>
<name>
<surname>Ashraf</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Dally</surname>
<given-names>W. J.</given-names>
</name>
<name>
<surname>Keutzer</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2016</year>). <source>SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and &#x3c;0.5MB model size</source>. <comment>arXiv Prepr. arXiv1602.07360</comment>, <fpage>1</fpage>&#x2013;<lpage>13</lpage>.</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ismail</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Malik</surname>
<given-names>O. A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Real-time visual inspection system for grading fruits using computer vision and deep learning techniques</article-title>. <source>Inf. Process. Agric.</source> <volume>110</volume> (<issue>43</issue>), <fpage>1</fpage>&#x2013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1016/j.inpa.2021.01.005</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ji</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Automatic detection and severity analysis of grape black measles disease based on deep learning and fuzzy logic</article-title>. <source>Comput. Electron. Agric.</source> <volume>193</volume> (<issue>24</issue>), <fpage>106718</fpage>. <pub-id pub-id-type="doi">10.1016/j.compag.2022.106718</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khan</surname>
<given-names>A. I.</given-names>
</name>
<name>
<surname>Quadri</surname>
<given-names>S. M. K.</given-names>
</name>
<name>
<surname>Banday</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Latief Shah</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Deep diagnosis: A real-time apple leaf disease detection system based on deep learning</article-title>. <source>Comput. Electron. Agric.</source> <volume>198</volume>, <fpage>107093</fpage>. <pub-id pub-id-type="doi">10.1016/j.compag.2022.107093</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname>
<given-names>S. H.</given-names>
</name>
<name>
<surname>Chan</surname>
<given-names>C. S.</given-names>
</name>
<name>
<surname>Mayo</surname>
<given-names>S. J.</given-names>
</name>
<name>
<surname>Remagnino</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>How deep learning extracts and learns leaf features for plant classification</article-title>. <source>Pattern Recognit.</source> <volume>71</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1016/j.patcog.2017.05.015</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marois</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Syssau</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>LabelMe: A database and web-based tool for image annotation</article-title>. <source>Int. J. Comput. Vis.</source> <volume>32</volume> (<issue>1&#x2013;3</issue>), <fpage>157</fpage>&#x2013;<lpage>173</lpage>. <pub-id pub-id-type="doi">10.1007/s11263-007-0090-8</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mart&#xb4;&#x131;n</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Paul</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>{TensorFlow}: A system for {Large-Scale} machine learning</article-title>,&#x201d; in <conf-name>Proceeding of the 12th USENIX symposium on operating systems design and implementation (OSDI 16)</conf-name>, <conf-date>November 2&#x2013;4, 2016</conf-date>, <conf-loc>Savannah, GA, USA</conf-loc>, <fpage>265</fpage>&#x2013;<lpage>283</lpage>.</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>qun Pan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Qiao</surname>
<given-names>J. f.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>H. l.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Taylor</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Intelligent diagnosis of northern corn leaf blight with deep learning model</article-title>. <source>J. Integr. Agric.</source> <volume>21</volume> (<issue>4</issue>), <fpage>1094</fpage>&#x2013;<lpage>1105</lpage>. <pub-id pub-id-type="doi">10.1016/s2095-3119(21)63707-3</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Raikar</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>Meena</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Kuchanur</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Girraddi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Benagi</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Classification and grading of okra-ladies finger using deep learning</article-title>. <source>Procedia Comput. Sci.</source> <volume>171</volume> (<issue>2019</issue>), <fpage>2380</fpage>&#x2013;<lpage>2389</lpage>. <pub-id pub-id-type="doi">10.1016/j.procs.2020.04.258</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Sabzi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Abbaspour-Gilandeh</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Arribas</surname>
<given-names>J. I.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Non-intrusive image processing thompson orange grading methods</article-title>,&#x201d; in <conf-name>Proceeding of the 56th FITCE Congress</conf-name>, <conf-date>September 2017</conf-date>, <conf-loc>Madrid, Spain</conf-loc>, <publisher-name>IEEE</publisher-name>, <fpage>35</fpage>&#x2013;<lpage>39</lpage>.</citation>
</ref>
<ref id="B24">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Simonyan</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zisserman</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>Very deep convolutional networks for large-scale image recognition</article-title>,&#x201d; in <source>3rd int. Conf. Learn. Represent. ICLR 2015 - conf. Track proc.</source>, <fpage>1</fpage>&#x2013;<lpage>14</lpage>.</citation>
</ref>
<ref id="B25">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Szegedy</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Jia</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sermanet</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Reed</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Angeulov</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). &#x201c;<article-title>Going deeper with convolutions</article-title>,&#x201d; in <conf-name>Proceeding of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>, <publisher-name>IEEE</publisher-name>, <fpage>1</fpage>&#x2013;<lpage>9</lpage>.</citation>
</ref>
<ref id="B26">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Tan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Le</surname>
<given-names>Q. V.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>EfficientNet: rethinking model scaling for convolutional neural networks</article-title>,&#x201d; in <source>36th international conference on machine learning, ICML</source>, <fpage>6105</fpage>&#x2013;<lpage>6114</lpage>.</citation>
</ref>
<ref id="B27">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Tan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Le</surname>
<given-names>Q. V.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>EfficientNetV2: smaller models and faster training</article-title>,&#x201d; in <source>Proceedings of the 38th international conference on machine learning</source>, <fpage>10096</fpage>&#x2013;<lpage>10106</lpage>.</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vasconez</surname>
<given-names>J. P.</given-names>
</name>
<name>
<surname>Delpiano</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Vougioukas</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Auat Cheein</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Comparison of convolutional neural networks in fruit detection and counting: A comprehensive evaluation</article-title>. <source>Comput. Electron. Agric.</source> <volume>173</volume> (<issue>2019</issue>), <fpage>105348</fpage>. <pub-id pub-id-type="doi">10.1016/j.compag.2020.105348</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Medical image recognition and segmentation of pathological slices of gastric cancer based on Deeplab v3&#x2b; neural network</article-title>. <source>Comput. Methods Programs Biomed.</source> <volume>207</volume> (<issue>1</issue>), <fpage>106210</fpage>. <pub-id pub-id-type="doi">10.1016/j.cmpb.2021.106210</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Automatic image-based plant disease severity estimation using deep learning</article-title>. <source>Comput. Intell. Neurosci.</source> <volume>2017</volume> (<issue>1</issue>), <fpage>2917536</fpage>&#x2013;<lpage>2917538</lpage>. <pub-id pub-id-type="doi">10.1155/2017/2917536</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wspanialy</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Moussa</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A detection and severity estimation system for generic diseases of tomato greenhouse plants</article-title>. <source>Comput. Electron. Agric.</source> <volume>178</volume>, <fpage>105701</fpage>. <pub-id pub-id-type="doi">10.1016/j.compag.2020.105701</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>van den Hengel</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Wider or deeper: revisiting the ResNet model for visual recognition</article-title>. <source>Pattern Recognit.</source> <volume>90</volume>, <fpage>119</fpage>&#x2013;<lpage>133</lpage>. <pub-id pub-id-type="doi">10.1016/j.patcog.2019.01.006</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>J. f.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X. k.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>T. s.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>New geographic distribution and molecular diversity of citrus chlorotic dwarf-associated virus in China</article-title>. <source>J. Integr. Agric.</source> <volume>21</volume> (<issue>1</issue>), <fpage>293</fpage>&#x2013;<lpage>298</lpage>. <pub-id pub-id-type="doi">10.1016/s2095-3119(20)63601-2</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>The status of citrus huanglongbing in China</article-title>. <source>Trop. Plant Pathol.</source> <volume>45</volume> (<issue>3</issue>), <fpage>279</fpage>&#x2013;<lpage>284</lpage>. <pub-id pub-id-type="doi">10.1007/s40858-020-00363-8</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>