<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2024.1384709</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Artificial Intelligence</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Deep learning models for the early detection of maize streak virus and maize lethal necrosis diseases in Tanzania</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Mayo</surname> <given-names>Flavia</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2646298/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Maina</surname> <given-names>Ciira</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Mgala</surname> <given-names>Mvurya</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Mduma</surname> <given-names>Neema</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1913794/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Computational and Communication Science Engineering (CoCSE), The Nelson Mandela African Institution of Science and Technology (NM-AIST)</institution>, <addr-line>Arusha</addr-line>, <country>Tanzania</country></aff>
<aff id="aff2"><sup>2</sup><institution>Electrical and Electronic Engineering, Dedan Kimathi University of Technology</institution>, <addr-line>Nyeri</addr-line>, <country>Kenya</country></aff>
<aff id="aff3"><sup>3</sup><institution>Institute of Computing and Informatics, Technical University of Mombasa</institution>, <addr-line>Mombasa</addr-line>, <country>Kenya</country></aff>
<author-notes>
<fn fn-type="edited-by" id="fn0001">
<p>Edited by: Tarun Belwal, Texas A&#x0026;M University, United States</p>
</fn>
<fn fn-type="edited-by" id="fn0002">
<p>Reviewed by: Pankaj B. Pathare, Sultan Qaboos University, Oman</p>
<p>Emrullah Acar, Batman University, T&#x00FC;rkiye</p>
</fn>
<corresp id="c001">&#x002A;Correspondence: Flavia Mayo, <email>mayof@nm-aist.ac.tz</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>16</day>
<month>08</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>7</volume>
<elocation-id>1384709</elocation-id>
<history>
<date date-type="received">
<day>10</day>
<month>02</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>02</day>
<month>08</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2024 Mayo, Maina, Mgala and Mduma.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Mayo, Maina, Mgala and Mduma</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Agriculture is considered the backbone of Tanzania&#x2019;s economy, with more than 60% of the residents depending on it for survival. Maize is the country&#x2019;s dominant and primary food crop, accounting for 45% of all farmland production. However, its productivity is challenged by the limitation to detect maize diseases early enough. Maize streak virus (MSV) and maize lethal necrosis virus (MLN) are common diseases often detected too late by farmers. This has led to the need to develop a method for the early detection of these diseases so that they can be treated on time. This study investigated the potential of developing deep-learning models for the early detection of maize diseases in Tanzania. The regions where data was collected are Arusha, Kilimanjaro, and Manyara. Data was collected through observation by a plant. The study proposed convolutional neural network (CNN) and vision transformer (ViT) models. Four classes of imagery data were used to train both models: MLN, Healthy, MSV, and WRONG. The results revealed that the ViT model surpassed the CNN model, with 93.1 and 90.96% accuracies, respectively. Further studies should focus on mobile app development and deployment of the model with greater precision for early detection of the diseases mentioned above in real life.</p>
</abstract>
<kwd-group>
<kwd>deep learning models</kwd>
<kwd>maize diseases</kwd>
<kwd>early detection</kwd>
<kwd>convolutional neural network</kwd>
<kwd>vision transformer</kwd>
<kwd>maize streak virus</kwd>
<kwd>maize lethal necrosis</kwd>
</kwd-group>
<counts>
<fig-count count="7"/>
<table-count count="5"/>
<equation-count count="0"/>
<ref-count count="43"/>
<page-count count="10"/>
<word-count count="7254"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>AI in Food, Agriculture and Water</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec1">
<label>1</label>
<title>Introduction</title>
<p>Tanzania&#x2019;s economy is predominantly centered around agriculture, and the country gains from a wide range of agricultural activities, such as livestock, essential food crops, and many cash crops (<xref ref-type="bibr" rid="ref28">Oxfordbusinessgroup, 2018</xref>). In Tanzania, agricultural output accounts for about 29.1% of the country&#x2019;s Gross Domestic Product (GDP). It also employs 67% of the labor force, a paramount supplier of food, raw materials for industry, and foreign exchange (<xref ref-type="bibr" rid="ref18">International Trade Administration, 2021</xref>). Moreover, as agronomy production is far too low, food demand is increasing dramatically (<xref ref-type="bibr" rid="ref9">Dewbre et al., 2014</xref>). Farmers, scientists, researchers, analysts, specialists, and the government are working hard to enhance agricultural production to meet growing needs (<xref ref-type="bibr" rid="ref29">Panigrahi et al., 2020</xref>). However, crop diseases continue to be a challenge affecting major food security crops like maize (<xref ref-type="bibr" rid="ref34">Savary and Willocquet, 2020</xref>). Maize is a very crucial and important crop in Tanzania, contributing significantly to the country&#x2019;s agricultural sector (<xref ref-type="bibr" rid="ref26">Maiga, 2024</xref>). However, maize leaf diseases such as Maize Streak Virus and Maize Lethal Necrosis, pose a severe threat to maize production with the potential to reduce yield (<xref ref-type="bibr" rid="ref35">Shepherd et al., 2010</xref>; <xref ref-type="bibr" rid="ref25">Mahuku et al., 2015</xref>; <xref ref-type="bibr" rid="ref21">Kiruwa et al., 2020</xref>). Early detection of these diseases is crucial for implementing timely preventive measures and mitigating yield losses (<xref ref-type="bibr" rid="ref5">Boddupalli et al., 2020</xref>; <xref ref-type="bibr" rid="ref16">Haque et al., 2022</xref>). Traditional visual analysis methods for disease detection in crops are prone to errors, labor-intensive, and time-consuming. Moreover, these methods have been observed to identify diseases at a later stage, potentially leading to more harm to the crops (<xref ref-type="bibr" rid="ref40">Toseef and Khan, 2018</xref>; <xref ref-type="bibr" rid="ref14">Gong and Zhang, 2023</xref>). These traditional methods rely heavily on the expertise of farmers, plant pathologists, and agriculture experts. Additionally, the subjective nature of these methods can lead to inconsistent diagnoses among different experts.</p>
<p>Recently, technology has been used to improve yields in agriculture, whereby researchers have devised several solutions, including image processing and object detection using deep learning models (<xref ref-type="bibr" rid="ref29">Panigrahi et al., 2020</xref>). Deep learning (DL) is a branch of machine learning that involves training artificial neural networks to learn from large volumes of data and make predictions. Moreover, it is known for its ability to use many processing layers to discover patterns and structures in large datasets (<xref ref-type="bibr" rid="ref33">Rusk, 2015</xref>). It moreover automatically extracts features from the data, making them suitable for various applications, such as image recognition, natural language processing, speech recognition, and autonomous systems (<xref ref-type="bibr" rid="ref17">Ho, 2016</xref>). It has become widely known for its potential and advanced ability to efficiently process large numbers of images, yielding reliable outcomes. It is doing very well in many fields, including agriculture (<xref ref-type="bibr" rid="ref20">Kamilaris and Prenafeta-Bold&#x00FA;, 2018</xref>). During the last few years, many crops have become accustomed to detecting, classifying, and assessing a broad spectrum of diseases, pests, and stresses (<xref ref-type="bibr" rid="ref38">Singh et al., 2016</xref>; <xref ref-type="bibr" rid="ref29">Panigrahi et al., 2020</xref>; <xref ref-type="bibr" rid="ref16">Haque et al., 2022</xref>). For the past several years, deep learning achievements in computer vision tasks have strongly depended on Convolutional Neural Networks (CNNs) (<xref ref-type="bibr" rid="ref31">Raghu et al., 2021</xref>). CNNs prevail in the domain of computer vision as a foundation for various applications, such as image classification (<xref ref-type="bibr" rid="ref36">Sibiya and Sumbwanyambe, 2019</xref>; <xref ref-type="bibr" rid="ref8">Darwish et al., 2020</xref>; <xref ref-type="bibr" rid="ref39">Syarief and Setiawan, 2020</xref>; <xref ref-type="bibr" rid="ref3">Atila et al., 2021</xref>; <xref ref-type="bibr" rid="ref7">Chen et al., 2021</xref>; <xref ref-type="bibr" rid="ref23">Liu and Wang, 2021</xref>; <xref ref-type="bibr" rid="ref16">Haque et al., 2022</xref>), object detection (<xref ref-type="bibr" rid="ref43">Zhang et al., 2020</xref>; <xref ref-type="bibr" rid="ref23">Liu and Wang, 2021</xref>; <xref ref-type="bibr" rid="ref27">Maxwell et al., 2021</xref>; <xref ref-type="bibr" rid="ref32">Roy et al., 2022</xref>) and image segmentation (<xref ref-type="bibr" rid="ref13">Gayatri et al., 2021</xref>; <xref ref-type="bibr" rid="ref23">Liu and Wang, 2021</xref>; <xref ref-type="bibr" rid="ref24">Loyani and Machuve, 2021</xref>; <xref ref-type="bibr" rid="ref27">Maxwell et al., 2021</xref>; <xref ref-type="bibr" rid="ref37">Sibiya and Sumbwanyambe, 2021</xref>). The CNN architecture consists of components such as a convolutional layer, a pooling layer, a fully connected layer, and activation functions (<xref ref-type="bibr" rid="ref4">Bharali et al., 2019</xref>; <xref ref-type="bibr" rid="ref12">Francis and Deisy, 2019</xref>; <xref ref-type="bibr" rid="ref19">Jasim and Al-Tuwaijari, 2020</xref>), as shown in <xref ref-type="fig" rid="fig1">Figure 1</xref>.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>CNN architecture (<xref ref-type="bibr" rid="ref42">Voulodimos et al., 2018</xref>).</p>
</caption>
<graphic xlink:href="frai-07-1384709-g001.tif"/>
</fig>
<p>Natural language processing has been performed using transformer architecture, and vision transformers have produced outstanding outcomes compared to CNNs (<xref ref-type="bibr" rid="ref41">Vaswani et al., 2017</xref>; <xref ref-type="bibr" rid="ref30">Qi et al., 2022</xref>). Researchers have recently adapted transformers to computer vision applications, inspired by the significant success of transformer architectures in the field of NLP. The Vision Transformer (ViT) has achieved cutting-edge performance on various image recognition benchmarks. In addition to image classification, transformers have been used to solve a variety of computer vision problems, including object identification, semantic segmentation, image processing, and video interpretation. Because of their superior performance, an increasing number of academics are proposing transformer-based models for improving a wide range of visual tasks (<xref ref-type="bibr" rid="ref15">Han et al., 2023</xref>). ViT works by implementing a transformer-like architecture over image patches. Images are divided into fixed-size patches, which are then linearly embedded. Position embeddings are then added, then the resulting vector sequence is fed into a standard transformer encoder. The standard approach of adding an extra learnable classification token to the sequence is used to perform classification (<xref ref-type="bibr" rid="ref41">Vaswani et al., 2017</xref>; <xref ref-type="bibr" rid="ref10">Dosovitskiy et al., 2020</xref>). The sequence of the 1D array is passed to the transformer structure. To process 2D image patches, the 2D patches are extracted from the first, and then they are reshaped to create 1D arrays that are suitable for the ViT structure. They are added to the positional encoder to finish preparing the patch embedding for the next layer. The positional encoder aids the network in remembering the relative position of the patches with one another. Inputs are then normalized with the normalization layer before entering the transformer block. The multi-head attention layer is the most important aspect of this block. The multi-head attention layer calculates weights to assign higher values to the more important areas. In other words, network attention is focused on the most important parts of the network. The output of the multi-head attention layer is a linear combination of each head (<xref ref-type="bibr" rid="ref6">Borhani et al., 2022</xref>). <xref ref-type="fig" rid="fig2">Figure 2</xref> shows the ViT architecture inspired by <xref ref-type="bibr" rid="ref41">Vaswani et al. (2017)</xref>.</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Vision transformer architecture (<xref ref-type="bibr" rid="ref10">Dosovitskiy et al., 2020</xref>).</p>
</caption>
<graphic xlink:href="frai-07-1384709-g002.tif"/>
</fig>
<p>Both the ViT and CNN models have achieved state-of-the-art results in various computer vision tasks, including plant disease detection. However, the relative performance of the model would depend on the specific dataset, model architecture, and training hyperparameters used in a certain study. A lot of various techniques have been developed and proposed for the detection of diseases in general. The most adopted techniques CNN and ViT have shown great performance when used separately Therefore, this study aimed to develop combined deep-learning models for the early detection of Maize Streak Virus (MSV) and Maize Lethal Necrosis (MLN) diseases in maize plants based on images obtained and collected directly from the field, allowing the model to be trained with real data. The grand purpose is to utilize the maize imagery datasets collected from farms and made available in open source to the research community for future studies on MLN and MSV infections, by introducing, an approach that enhances the effectiveness and efficiency of these diseases in maize. Hence this paper fills a gap existing in a debate between the most quality and reliable model for detection of maize diseases.</p>
</sec>
<sec id="sec2">
<label>2</label>
<title>Related works</title>
<p>The diagnosis of a wide variety of plant diseases and pests has shown encouraging and remarkable results when employing deep learning techniques in computer vision, such as CNNs. A convolutional neural network deep learning model was developed to analyze images of healthy and unhealthy plant leaves. A total of 87,848 images in an open database with 25 distinct plants in 58 distinct categories of healthy and unhealthy images were trained using five model architectures, AlexNet, AlexNetOWTBn, GoogLeNet, Overfeat, and VGG. VGG was the most common architecture for detecting plant diseases, with a higher success rate. Implementation was performed using the Torch71 machine learning computational framework, which uses the LuaJIT programming language. The model&#x2019;s exceptionally excellent performance makes it suitable as a vital early warning or advising tool (<xref ref-type="bibr" rid="ref11">Ferentinos, 2018</xref>). This study was conducted in Athens, Greece, to detect many plant diseases and not specifically for the detection of maize streak virus and lethal maize necrosis.</p>
<p>Another deep-learning model was developed to detect maize diseases in Indonesia. The study used a classification approach to detect 3 diseases, Cercospora, northern leaf blight, and common rust. A support vector machine, k-nearest neighbor, and decision tree were used to classify the maize leaf images, and seven other CNN architectures were used to analyze the maize leaf images. The architectures used included ResNet50, GoogleNet, VGG19, AlexNet, Inception-V3, VGG16, ResNet110 and VGG19. The data consisted of 200 images that were divided into 4 classes, 50 images per class with a size of 256&#x00D7;256 pixels. However, AlexNet and SVM were the best methods for feature extraction and image classification of maize leaf diseases. This study used fewer samples (200 images), which were collected in Asia (<xref ref-type="bibr" rid="ref39">Syarief and Setiawan, 2020</xref>).</p>
<p>Additionally, a Mobile-DANet model was developed to identify 8 maize crop diseases, gibberella ear rot, maize eyespot, crazy top, gray leaf spot, Goss&#x2019;s bacterial wilt, common smut, phaeosphaeria spot, and southern rust. Except for some samples, the results of the Mobile-DANet model demonstrated that the majority of the images and maize diseases were correctly identified. Mobile-DANet correctly detected samples with phaeosphaeria spots with a probability of 0.71. Similarly, the model accurately detected gibberella ear rot and southern rust disease, with probabilities of 0.83 and 0.93, respectively. China served as the study location, and this study focused on maize images other than MSV and MLN images. The model employed in the study is Mobile-DANet (<xref ref-type="bibr" rid="ref7">Chen et al., 2021</xref>).</p>
<p>Furthermore, another study from India proposed a deep convolutional neural network to detect healthy and diseased images of maize leaves. The dataset contained 5,939 images of maize leaves. The dataset consisted of images of three diseases, Maydis leaf blight (MLB), Sheath blight (BLSB), Turcicum leaf blight (TLB), and banded leaf, as well as healthy maize leaves. The study employed the Inception-v3 network structure, as well as three more different models were developed using the normal training procedure (<xref ref-type="bibr" rid="ref16">Haque et al., 2022</xref>).</p>
<p>In Cairo, Egypt, a classification model for the identification of common rust, northern leaf blight, healthy maize leaves, and gray leaf spots was developed. To identify plant diseases, an ensemble model composed of two pre-trained convolutional neural networks, VGG19 and VGG16, was used to distinguish between the leaves in healthy photos and the leaves in unhealthy photos. The outcomes show how well the suggested strategy works, outperforming alternative methods for VGG19. Even though the created model performed well, this study struggled with the categorization of unbalanced data, and the dataset employed lacked sufficient images to properly train CNNs that were created from scratch (<xref ref-type="bibr" rid="ref8">Darwish et al., 2020</xref>).</p>
<p>A model for the recognition of common rust (Puccinia sorghi), gray leaf spot (Cercospora), and northern corn leaf blight (Exserohilum) from healthy leaves was developed due to the impacts of these diseases on the majority of the maize plantations in South Africa. Neuroph was used for training the convolution neural network to recognize and classify images of maize. CNN was quite correct in identifying these diseases. This research was restricted to the neuroph framework of the Java neural network, which is an integrated environment for developing and deploying neural networks to Java programs, despite the model&#x2019;s strong performance (<xref ref-type="bibr" rid="ref36">Sibiya and Sumbwanyambe, 2019</xref>).</p>
<p>A similar study was conducted by <xref ref-type="bibr" rid="ref37">Sibiya and Sumbwanyambe (2021)</xref> to develop a CNN deep learning model. The diseased leaf area was calculated using segmentation by the threshold on diseased images of leaves of maize impacted by common rust disease. This information was used to create ambiguous decision guidelines in assigning common rust images to severity groups with images created using this proposed approach. The VGG-16 network, trained with images generated using this suggested method, achieved a higher testing and validation accuracy when tested on photos of common rust illness in 4 stages of severity (early stage, middle stage, late stage, and healthy stage). Despite the good performance of the developed model, this study was limited to only the image segmentation approach, which tends to partition a digital image into multiple segments. Furthermore, the study used a CNN architecture, which lacked a detailed description.</p>
<p><xref ref-type="bibr" rid="ref1">Arnaud et al. (2022)</xref> from Kenya developed a deep learning model to examine, in contrast, 6 convolutional neural network architectures. Transfer learning was employed for model training, and the architectures used included EfficientNet b7, VGG19, SqueezeNet, GoogleNet, AlexNet, and DenseNet. The study analyzed four hyperparameters: the batch size, learning rate, number of epochs, and number of optimizers. An open-source dataset with 4,082 photos was used. DenseNet121 outperformed other models by achieving a higher accuracy and F1 score. DenseNet121 was trained with batch 32, a learning rate of 0.01, and stochastic gradient descent (SGD) as the optimizer. In general, various techniques for detecting plant diseases have been proposed. These techniques have shown good performance; however, no studies have focused on building a combined deep-learning model for the detection of MSV and MLN together, and there is no publicly available dataset containing images of maize leaves infected by MSV and MLN. Moreover, several studies have used a large number of images from online sources, which might not accurately represent field scenarios. As a result, this study aimed to develop a combined deep learning model for MSV and MLN detection based on images collected directly from the field, allowing the model to be trained with real data. The dataset will be made available in open source to the research community for future studies on MLN and MSV infections. Furthermore, the majority of the studies employed transfer learning methods, and the scope of their studies was not in Tanzania.</p>
</sec>
<sec sec-type="materials|methods" id="sec3">
<label>3</label>
<title>Materials and methods</title>
<sec id="sec4">
<label>3.1</label>
<title>Overview of the proposed method</title>
<p><xref ref-type="fig" rid="fig3">Figure 3</xref> provides an overview of the proposed method from the acquisition of data to model development, model validation, and delivery of an optimized model. Images of healthy and diseased maize leaves were collected from the farms. The image datasets were then pre-processed and divided into training and testing sets. The models were then trained and tested to evaluate the performance and accuracy of the created models.</p>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p>Block diagram summary for the proposed work.</p>
</caption>
<graphic xlink:href="frai-07-1384709-g003.tif"/>
</fig>
</sec>
<sec id="sec5">
<label>3.2</label>
<title>The dataset</title>
<p>The datasets were collected from three regions which are Arusha, Kilimanjaro, and Manyara. These regions were selected due to having a large number of farmers across the country. The focus of the dataset collection was on the affected maize plants. Two main diseases MSV and MLN were observed from the leaves and images were captured. Leaves were selected from the middle tier of the maize plants. This tier was chosen to provide a consistent basis for comparison, as leaves at different heights may exhibit varying levels of disease symptoms. They were collected during the mid-season phase of the growing season. This phase was selected because it is when the symptoms of Maize Streak Virus (MSV) and Maize Lethal Necrosis (MLN) are most prominent and easily identifiable. Moreover, the study focused on two widely cultivated maize varieties in Tanzania: Situka M1 and T105. These varieties were chosen due to their regional prevalence and known susceptibility to MSV and MLN. By including two varieties, the study aimed to ensure that the model is robust and generalizable across different genetic backgrounds. The process of data collection took a period of (6) months, starting from February to July, the process involved plant pathologists to be able to identify the symptoms of the diseases. The Open Data Kit (ODK) tool installed in a smartphone was used to capture these images. All the images were captured in the format of a Joint Photographic Group (JPG). At the end of data collection, 27,660 images were obtained which were sufficient for model development. The distribution of these images was 9,145 healthy images, 8,604 MLN images, and 9,911 MSV images. To prepare the proposed model to be able to identify images other than maize leaf images, 675 more images of different things were acquired from open-access databases to be included for training the model. <xref ref-type="fig" rid="fig4">Figure 4</xref> shows the researcher collecting data in the field, and <xref ref-type="fig" rid="fig5">Figure 5</xref> shows the sample image data samples captured from the three classes that were collected from the field. Image labeled (a) is an image of a maize leaf that is healthy, image labeled (b) is an image of a maize leaf affected with Maize Lethal Necrosis (MLN) and the last image labeled (c) is an image of a maize leaf affected by Maize Streak Virus (MSV).</p>
<fig position="float" id="fig4">
<label>Figure 4</label>
<caption>
<p>Researcher collecting imagery leaves in maize farms.</p>
</caption>
<graphic xlink:href="frai-07-1384709-g004.tif"/>
</fig>
<fig position="float" id="fig5">
<label>Figure 5</label>
<caption>
<p>Examples of imagery data from maize dataset where <bold>(A)</bold> Healthy, <bold>(B)</bold> MLN, <bold>(C)</bold> MSV.</p>
</caption>
<graphic xlink:href="frai-07-1384709-g005.tif"/>
</fig>
</sec>
<sec id="sec6">
<label>3.3</label>
<title>Data cleaning and preprocessing</title>
<p>This is a very crucial stage, where all the collected data is cleaned and ensured it is free of any erroneous or fraudulent information. This process normally uses various tools and software (<xref ref-type="bibr" rid="ref22">Lee et al., 2021</xref>). In the data-cleaning stage, the following steps were conducted.</p>
<sec id="sec7">
<label>3.3.1</label>
<title>Removing duplicates and cropping</title>
<p>In this step, duplicate images from the three classes, Healthy, MSV, and MLN, were removed using the VisiPics tool (<xref ref-type="bibr" rid="ref2">Arora et al., 2016</xref>). The tool was selected because of easy usage and it functions very well in eliminating exactly similar images. In total there were 27,660 images collected from the field before removing duplicates, 747 images were found duplicates and deleted. 26,913 images remained after removing duplicates. <xref ref-type="table" rid="tab1">Table 1</xref> lists the total number of images from the three classes, before and after the duplicates have been removed. The images were also cropped manually to remove unnecessary background so that maize leaf would be the main focus. This is seen in <xref ref-type="fig" rid="fig5">Figure 5</xref>.</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Number of images before and after duplicate images.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Classes</th>
<th align="center" valign="top">Numbers of images before duplicates</th>
<th align="center" valign="top">Duplicate images</th>
<th align="center" valign="top">Number of images after duplicates</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Healthy</td>
<td align="center" valign="top">9,145</td>
<td align="center" valign="top">530</td>
<td align="center" valign="top">8,615</td>
</tr>
<tr>
<td align="left" valign="top">MLN</td>
<td align="center" valign="top">8,604</td>
<td align="center" valign="top">26</td>
<td align="center" valign="top">8,578</td>
</tr>
<tr>
<td align="left" valign="top">MSV</td>
<td align="center" valign="top">9,911</td>
<td align="center" valign="top">191</td>
<td align="center" valign="top">9,720</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">27,660</td>
<td align="center" valign="top">748</td>
<td align="center" valign="top">26,913</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec8">
<label>3.3.2</label>
<title>Labeling and resizing</title>
<p>The labeling process was conducted with the help of a tool named bulk rename utility to fasten the labeling process. Image labeling was done by naming the data to the corresponding classes. These images were ensured to have a jpg format to be able to function during the development of the model. The labeling involves a process for determining what number of images will be used for model training and model validation. The image dataset was also resized according to the proposed deep-learning model requirements. Images employed to train and test the CNN model were resized to a uniform pixel of size 256&#x002A;256, and images used to train and test the Vit model were resized to a uniform pixel of size 200&#x002A;200. Proposed models.</p>
<p>This study focused on developing two deep learning models, a Convolutional Neural Network (CNN) and a Vision Transformer (ViT), for the early detection of Maize Streak Virus (MSV) and Maize Lethal Necrosis (MLN) diseases.</p>
</sec>
</sec>
<sec id="sec9">
<label>3.4</label>
<title>Model development</title>
<sec id="sec10">
<label>3.4.1</label>
<title>CNN</title>
<p>CNNs are a class of deep learning algorithms primarily used for image recognition and classification. They are designed to recognize local patterns in the input image. This algorithm comprises key components that include convolution layers, pooling layers, fully connected layers, and activation functions. Convolutional layers are used to apply convolution operations to the input image, passing the results to the next layer. Pooling layers play the role of down-sampling operations to reduce the dimensionality of the feature maps, which assists in the reduction of overfitting and computational complexity. Fully connected layers are commonly used at the end of the network to output a class score, however just like traditional neural networks they connect every neuron in one layer to every neuron in the next layer. Activation functions are used to introduce non-linearity to the model.</p>
<p>CNN model was developed with a total of 27,588 images from four classes (Healthy, MLN, MSV, and WRONG). The dataset was split into 80% for the training set and 20% for the testing set for all four classes. Because of the large number of images, the model was trained in four groups of batches where the output weights that were utilized in training the first batch were employed as input in training the second batch, then the same thing for the third and fourth batch. The first three batches each contained 6,000 datasets. The datasets were split into 4,800 images for the training set and 1,200 images for the test set for each batch in (Healthy, MSV, and MLN); however, for the WRONG class in the training set, 540 images were included, and for the test set, 135 images were included, maintaining an 80:20 ratio for each class. For the fourth batch, the model was trained using the remaining 8,913 datasets. The dataset was again split into an 80:20 ratio for the training set and the test set, resulting in 7,131 samples for training and 1,782 samples for testing for Healthy, MSV, and MLN, where the number of the WRONG image class remained the same. A sequential model was employed in this implementation that defined 5 convolutional layers, and each layer was followed by a max pooling layer. The first convolution layer had 16 filters; the second convolution layer had 32 filters; and the third to fifth layers had 64 filters. These were then followed by a flattening layer and a dense layer with 512 neurons. A rectified linear unit (ReLU) was employed as an activation function in all the convolutional layers. The number of classes was represented by the output dense layer, which had 4 neurons with a softmax activation function. The images were rescaled by (1.0/255) and resized to 256&#x2009;&#x00D7;&#x2009;256 pixels. The hyperparameters used for training the CNN model and their values are shown in <xref ref-type="table" rid="tab2">Table 2</xref>.</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>Hyperparameters used for training the CNN model.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Parameter</th>
<th align="left" valign="top">Value</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Epoch</td>
<td align="left" valign="top" style="background-color:#ffffff;color:#0000ff">50</td>
</tr>
<tr>
<td align="left" valign="top">Batch size</td>
<td align="left" valign="top" style="background-color:#ffffff;color:#0000ff">32</td>
</tr>
<tr>
<td align="left" valign="top">Steps per epoch</td>
<td align="left" valign="top" style="background-color:#ffffff;color:#0000ff">167</td>
</tr>
<tr>
<td align="left" valign="top">Optimizers</td>
<td align="left" valign="top" style="background-color:#ffffff;color:#0000ff">Adam</td>
</tr>
<tr>
<td align="left" valign="top">Losses</td>
<td align="left" valign="top" style="background-color:#ffffff;color:#0000ff">Categorical_crossentropy</td>
</tr>
<tr>
<td align="left" valign="top">Metrics</td>
<td align="left" valign="top" style="background-color:#ffffff;color:#0000ff">Accuracy, Precision, Recall, F-measure</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec11">
<label>3.4.2</label>
<title>ViT</title>
<p>Vision Transformers (ViT) represents a novel approach to image recognition tasks by utilizing the transformer architecture which was initially created for challenges related to natural language. Important ViT components include patch embedding, transformer encoder, self-attention mechanism, and position embedding. In patch embedding an input image is split into fixed-size patches, and each patch is linearly embedded into a vector. These embeddings are then combined to form a sequence. The sequence of patch embeddings is processed through multiple layers of the transformer encoder. Each encoder layer consists of a multi-head self-attention mechanism and feed-forward neural networks. The self-attention mechanism is what allows the model to weigh the importance of different patches in the image enabling it to capture long-range dependencies and contextual information. Since transformers do not have a built-in notion of spatial relationships, position embeddings are added to the patch embeddings to retain the spatial information of the image.</p>
<p>The ViT model was developed with a dataset consisting of a total of 6,675 samples from four classes (HEALTHY, MLN, MSV, and WRONG). The images were resized to a uniform size of 200&#x00D7;200 pixels. The ViT model architecture comprises patch embedding, positional embedding, 12 transformer layers, and a classification head. Each transformer layer includes 12 attention heads in the multi-head attention mechanism, and the feedforward neural networks in the transformer have a dimensionality of 3,072. Each patch in the image has a size of 25, and the number of output classes is 3, corresponding to the number of classes in the dataset. The hidden dimensionality of the transformer model is 768, and a dropout rate of 0.1 was applied. The activation function used in this model was the Gaussian error linear unit (GELU). The hyperparameters used for training the ViT model are shown in <xref ref-type="table" rid="tab3">Table 3</xref>.</p>
<table-wrap position="float" id="tab3">
<label>Table 3</label>
<caption>
<p>Hyperparameters used for training the ViT model.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Parameters</th>
<th align="left" valign="top">Value</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Epoch</td>
<td align="left" valign="middle">50</td>
</tr>
<tr>
<td align="left" valign="middle">Steps per epoch</td>
<td align="left" valign="middle">154</td>
</tr>
<tr>
<td align="left" valign="middle">Batch size</td>
<td align="left" valign="middle">32</td>
</tr>
<tr>
<td align="left" valign="middle">Optimizer</td>
<td align="left" valign="middle">Adam</td>
</tr>
<tr>
<td align="left" valign="middle">Metric</td>
<td align="left" valign="middle">Accuracy</td>
</tr>
<tr>
<td align="left" valign="middle">Learning rate</td>
<td align="left" valign="middle">0.0001</td>
</tr>
<tr>
<td align="left" valign="middle">Losses</td>
<td align="left" valign="middle">categorical-Crossentropy</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="sec12">
<label>3.5</label>
<title>Experimental setup</title>
<p>The experiment for this study was conducted on a machine running Windows 10 with an Intel(R) Core (TM) i5-4200U CPU @ 1.60&#x2009;GHz and 2.30&#x2009;GHz with an installed RAM of 8&#x2009;GB and a 64-bit operating system. Both the CNN and ViT models were trained online using Google Collab, which consists of Python3 as the run-time and a GPU as the hardware accelerator. The implementation was carried out using the Keras library with TensorFlow on the backend. The language used during model training was Python because of its ability to provide a variety of freely available machine-learning libraries.</p>
</sec>
</sec>
<sec sec-type="results" id="sec13">
<label>4</label>
<title>Results and discussion</title>
<sec id="sec14">
<label>4.1</label>
<title>CNN model training results</title>
<p>The model training results show that the second batch got the highest validation accuracy of 0.9791 and a low validation loss of 0.1465. The average of the validation accuracy for the entire training for all datasets from all 4 batches is 0.90965. The results for model performance recorded during the 1st to the 50th epoch for each of the four batches are summarized in <xref ref-type="table" rid="tab4">Table 4</xref>. <xref ref-type="fig" rid="fig6">Figure 6</xref> on the left shows the CNN training accuracy and loss curve of over 50 epochs. The results for accuracy over the epoch graph show that the validation accuracy increased rapidly up to the 5th epoch, then remained steady at around 90% exhibiting fluctuations up to the 16th epoch where it dropped to 0.8824 on the 17th epoch and went high again remaining steady in the 0.90 with fluctuations up to the last epoch and reaching a peak of 0.9790. Meanwhile, the training accuracy increased rapidly up to the 12th epoch and followed a similar trend of remaining steady at 0.90 with fluctuations hitting a maximum accuracy of 0.9998 surpassing the validation accuracy without any significant fluctuations. This indicates that the model exhibited effective generalization. On the loss over epoch graph in <xref ref-type="fig" rid="fig6">Figure 6</xref> on the right, the results demonstrate that the training loss decreases rapidly from the 1st epoch to the 10th epoch, after which it starts to fluctuate slightly, exhibiting periodic increases and decreases until the end. Meanwhile, the validation loss shows a rapid decrease from the outset until the 5th epoch, followed by a pattern of fluctuation with periodic increases and decreases until the final epoch. This shows that the model aligns closely with the characteristics of the dataset throughout both the initial and final phases of the training process.</p>
<table-wrap position="float" id="tab4">
<label>Table 4</label>
<caption>
<p>CNN model performance results.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Batches</th>
<th align="center" valign="top">Validation accuracy</th>
<th align="center" valign="top">Validation loss</th>
<th align="center" valign="top">Precision</th>
<th align="center" valign="top">Recall</th>
<th align="center" valign="top">F measure</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Batch 1</td>
<td align="center" valign="middle">0.9581</td>
<td align="center" valign="top">0.3436</td>
<td align="center" valign="top">1.0000</td>
<td align="center" valign="top">1.0000</td>
<td align="center" valign="top">1.0</td>
</tr>
<tr>
<td align="left" valign="middle">Batch 2</td>
<td align="center" valign="middle">0.9790</td>
<td align="center" valign="top">0.1465</td>
<td align="center" valign="top">0.9998</td>
<td align="center" valign="top">0.9998</td>
<td align="center" valign="top">0.9998</td>
</tr>
<tr>
<td align="left" valign="middle">Batch 3</td>
<td align="center" valign="middle">0.8135</td>
<td align="center" valign="top">1.9335</td>
<td align="center" valign="top">0.9882</td>
<td align="center" valign="top">0.9872</td>
<td align="center" valign="top">0.9880</td>
</tr>
<tr>
<td align="left" valign="middle">Batch 4</td>
<td align="center" valign="middle">0.8878</td>
<td align="center" valign="top">0.5497</td>
<td align="center" valign="top">0.9672</td>
<td align="center" valign="top">0.9625</td>
<td align="center" valign="top">0.9648</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig position="float" id="fig6">
<label>Figure 6</label>
<caption>
<p>Training and validation plot for CNN model.</p>
</caption>
<graphic xlink:href="frai-07-1384709-g006.tif"/>
</fig>
</sec>
<sec id="sec15">
<label>4.2</label>
<title>ViT model training results</title>
<p>The ViT model was trained in only one batch. The model achieved a validation accuracy of 0.9310 and a validation loss of 0.3371. The results for model performance recorded during the 1st to the 50th epochs are plotted in <xref ref-type="fig" rid="fig7">Figure 7</xref>. The results for accuracy over the epoch graph show that the validation accuracy increased rapidly up to the 4th epoch, then remained steady at around 80%, and then 90% exhibiting fluctuations up to the 26th epoch where it dropped to 0.8606 on the 27th epoch, and went high again remaining steady in the 90% with fluctuations but dropped again in 40th epoch and went up to the last epoch and reaching a peak of 0.9310. Meanwhile, the training accuracy increased rapidly up to the 10th epoch and followed a similar trend of remaining steady at 90% with fluctuations hitting a maximum accuracy of 0.9777 surpassing the validation accuracy without any significant fluctuations. This indicates that the model exhibited effective generalization. On the loss over epoch graph in <xref ref-type="fig" rid="fig7">Figure 7</xref> on the right, the results demonstrate that the training loss decreases rapidly from the 1st epoch to the 5th epoch, after which it starts to fluctuate slightly, exhibiting periodic increases and decreases until the end. Meanwhile, the validation loss shows a drop-down from the outset to the 4th epoch, followed by a pattern of fluctuation with periodic increases and decreases until the final epoch. This observation suggests that the model aligns closely with the characteristics of the dataset throughout both the initial and final phases of the training process.</p>
<fig position="float" id="fig7">
<label>Figure 7</label>
<caption>
<p>Training and validation plots for ViT model.</p>
</caption>
<graphic xlink:href="frai-07-1384709-g007.tif"/>
</fig>
</sec>
<sec id="sec16">
<label>4.3</label>
<title>Comparative analysis of accuracy results from related works</title>
<p>The model efficiency results from other related studies were reviewed and compared to those obtained in this work. The findings of this study fairly correlate with those from other studies (<xref ref-type="table" rid="tab5">Table 5</xref>).</p>
<table-wrap position="float" id="tab5">
<label>Table 5</label>
<caption>
<p>Comparison of accuracy results from related works.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Crop diseases</th>
<th align="left" valign="top">Model architectures</th>
<th align="left" valign="top">Study reference</th>
<th align="center" valign="top">Highest Accuracy (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Variety crop diseases</td>
<td align="left" valign="top">AlexNet, AlexNetOWTBn, GoogLeNet, Overfeat, VGG</td>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref11">Ferentinos (2018)</xref>
</td>
<td align="center" valign="top">99.53%</td>
</tr>
<tr>
<td align="left" valign="top">Cercospora, common rust, and northern leaf blight</td>
<td align="left" valign="top">AlexNet, virtual geometry group (VGG) 16, VGG19, GoogleNet, Inception-V3, residual network 50 (ResNet50) and ResNet101</td>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref39">Syarief and Setiawan (2020)</xref>
</td>
<td align="center" valign="top">93.5%</td>
</tr>
<tr>
<td align="left" valign="top">Phaeosphaeria leaf spot, gibberella ear rot, crazy top, grey leaf spot, common smut, southern rust, Goss&#x2019;s bacterial wilt, maize eyespot</td>
<td align="left" valign="top">Mobile-DANet</td>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref7">Chen et al. (2021)</xref>
</td>
<td align="center" valign="top">95.86%</td>
</tr>
<tr>
<td align="left" valign="top">Maydis Leaf Blight, Turcicum Leaf Blight and Banded Leaf and Sheath Blight</td>
<td align="left" valign="top">VGG-16, VGG-19, Inception-v3, ResNet-50-v2, ResNet-101-v2, ResNet-152-v2 and InceptionResNet-v2</td>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref16">Haque et al. (2022)</xref>
</td>
<td align="center" valign="top">95.99%</td>
</tr>
<tr>
<td align="left" valign="top">Variety crop diseases</td>
<td align="left" valign="top">VGG16 and VGG19</td>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref8">Darwish et al. (2020)</xref>
</td>
<td align="center" valign="top">96.7%</td>
</tr>
<tr>
<td align="left" valign="top">Northern corn leaf blight (<italic>Exserohilum</italic>), common rust (<italic>Puccinia sorghi</italic>) and gray leaf spot (<italic>Cercospora</italic>)</td>
<td align="left" valign="top">CNN</td>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref36">Sibiya and Sumbwanyambe (2019)</xref>
</td>
<td align="center" valign="top">92.85%</td>
</tr>
<tr>
<td align="left" valign="top">Maize common rust disease (Early stage, Middle stage, Late Stage, and Healthy stage.)</td>
<td align="left" valign="top">VGG-16</td>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref37">Sibiya and Sumbwanyambe (2021)</xref>
</td>
<td align="center" valign="top">95.63%</td>
</tr>
<tr>
<td align="left" valign="top">Potato late blight and early blight are common</td>
<td align="left" valign="top">EfficientNet b7, VGG19, SqueezeNet, GoogleNet, AlexNet, and DenseNet</td>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref1">Arnaud et al. (2022)</xref>
</td>
<td align="center" valign="top">98.34%</td>
</tr>
<tr>
<td align="left" valign="top">Maize Streak Virus and Maize lethal Necrosis</td>
<td align="left" valign="top">CNN and ViT</td>
<td align="left" valign="top">Proposed method</td>
<td align="center" valign="top">93.1%</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec sec-type="discussion" id="sec17">
<label>5</label>
<title>Discussion</title>
<p>This study developed two deep learning models, CNN and ViT. Both models performed well in detecting MSV and MLN diseases in maize plants. The ViT model achieved a validation accuracy of 93.1%, whereas the CNN model achieved an overall average validation accuracy of 90.97%. These results suggest that both models are capable of detecting the presence of diseases in maize plants. Furthermore, these results are considered to be among the best examples of a good model, as a good model is expected to have an accuracy greater than 70% (<xref ref-type="bibr" rid="ref27">Maxwell et al., 2021</xref>). However, deep learning models also perform very well when trained with larger datasets. The CNN model for this study was trained with 27,588 data samples compared to <xref ref-type="bibr" rid="ref39">Syarief and Setiawan (2020)</xref> who used a few data samples (200) for model training in the detection of maize diseases. The majority of the studies have employed transfer learning to train deep learning models for maize diseases detection and their scope is not focused on Tanzania (<xref ref-type="bibr" rid="ref8">Darwish et al., 2020</xref>; <xref ref-type="bibr" rid="ref39">Syarief and Setiawan, 2020</xref>; <xref ref-type="bibr" rid="ref7">Chen et al., 2021</xref>; <xref ref-type="bibr" rid="ref1">Arnaud et al., 2022</xref>; <xref ref-type="bibr" rid="ref16">Haque et al., 2022</xref>), unlike the study where both CNN and ViT deep learning models were developed from scratch and the study area is Tanzania. Another study by <xref ref-type="bibr" rid="ref37">Sibiya and Sumbwanyambe (2021)</xref> developed a deep learning model for early detection of maize disease using a segmentation approach, while the approach of the study for our case was classification. Furthermore, none of the studies has come up with a combined deep-learning model for the early detection of MSV and MLN diseases in maize. Additionally, when the developed deep learning models were compared, the ViT model had somewhat greater accuracy than the CNN model. According to <xref ref-type="bibr" rid="ref10">Dosovitskiy et al. (2020)</xref>, the ViT model&#x2019;s key design which includes the ability to capture global dependencies through self-attention mechanisms gives it an advantage in detecting and classifying various plant diseases with higher accuracy than the CNN model. Furthermore, ViT divides the input image into patches and processes these patches as sequences, enabling the model to learn a high-resolution and systematic representation of the image data. However, when the prediction speed for both models per image is compared. CNN is 10 milliseconds faster than ViT which is 20 milliseconds per image.</p>
</sec>
<sec sec-type="conclusions" id="sec18">
<label>6</label>
<title>Conclusion</title>
<p>This study has shown that early maize disease detection is possible in Tanzania, with a specific focus on the Maize Streak Virus (MSV) and Maize Lethal Necrosis (MLN). The study collected a substantial dataset comprising 26,913 field-acquired images and 675 wrong images acquired from open-access databases. The dataset&#x2019;s availability as an open-source resource will facilitate further research on MSV and MLN infections. Deep learning models, namely, convolutional neural networks (CNNs) and vision transformers (ViTs), were developed to address the challenge of early disease detection. Both models were developed from scratch, with CNN demonstrating its ability to extract local image features, while ViT demonstrated proficiency in understanding the global image context. ViT achieved a validation accuracy of 93.10%, while CNN achieved a validation accuracy of 90.96%. This highlights the value of deep learning models in the early diagnosis of plant diseases in maize.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec19">
<title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found at: <ext-link xlink:href="https://data.mendeley.com/datasets/fkw49mz3xs/1" ext-link-type="uri">https://data.mendeley.com/datasets/fkw49mz3xs/1</ext-link>.</p>
</sec>
<sec sec-type="ethics-statement" id="sec20">
<title>Ethics statement</title>
<p>Written informed consent was obtained from the individual(s) for the publication of any potentially identifiable images or data included in this article.</p>
</sec>
<sec sec-type="author-contributions" id="sec21">
<title>Author contributions</title>
<p>FM: Data curation, Methodology, Software, Validation, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. CM: Supervision, Writing &#x2013; review &#x0026; editing. MM: Supervision, Writing &#x2013; review &#x0026; editing. NM: Funding acquisition, Project administration, Supervision, Writing &#x2013; review &#x0026; editing, Data curation, Resources.</p>
</sec>
<sec sec-type="funding-information" id="sec22">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. This research was funded entirely by the International Development Research Centre (IDRC) and Swedish International Development Cooperation Agency (SIDA) under the Artificial Intelligence for Development (AI4D) Africa Programme under the Africa Centre for Technology Studies (ACTS).</p>
</sec>
<sec sec-type="COI-statement" id="sec23">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="sec24">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Arnaud</surname> <given-names>S. E.</given-names></name> <name><surname>Rehema</surname> <given-names>N.</given-names></name> <name><surname>Aoki</surname> <given-names>S.</given-names></name> <name><surname>Kananu</surname> <given-names>M. L.</given-names></name></person-group> (<year>2022</year>). <article-title>Comparison of deep learning architectures for late blight and early blight disease detection on potatoes</article-title>. <source>Open J. Appl. Sci.</source> <volume>12</volume>, <fpage>723</fpage>&#x2013;<lpage>743</lpage>. doi: <pub-id pub-id-type="doi">10.4236/ojapps.2022.125049</pub-id></citation></ref>
<ref id="ref2"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Arora</surname> <given-names>R.</given-names></name> <name><surname>Trelogan</surname> <given-names>J.</given-names></name> <name><surname>Ba</surname> <given-names>T. N.</given-names></name></person-group> (<year>2016</year>). &#x201C;<article-title>Using high performance computing for detecting duplicate, Similar and Related Images in a Large Data Collection</article-title>,&#x201D; in <source>Conquering big data with high performance computing</source>. <publisher-name>Springer</publisher-name>.</citation></ref>
<ref id="ref3"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Atila</surname> <given-names>&#x00DC;.</given-names></name> <name><surname>U&#x00E7;ar</surname> <given-names>M.</given-names></name> <name><surname>Akyol</surname> <given-names>K.</given-names></name> <name><surname>U&#x00E7;ar</surname> <given-names>E.</given-names></name></person-group> (<year>2021</year>). <article-title>Plant leaf disease classification using EfficientNet deep learning model</article-title>. <source>Eco. Inform.</source> <volume>61</volume>:<fpage>101182</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ecoinf.2020.101182</pub-id></citation></ref>
<ref id="ref4"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bharali</surname> <given-names>P.</given-names></name> <name><surname>Bhuyan</surname> <given-names>C.</given-names></name> <name><surname>Boruah</surname> <given-names>A.</given-names></name></person-group> (<year>2019</year>). <article-title>Plant disease detection by leaf image classification using convolutional neural network</article-title>. <source>Commun. Comput. Inform. Sci.</source> <volume>1025</volume>, <fpage>194</fpage>&#x2013;<lpage>205</lpage>. doi: <pub-id pub-id-type="doi">10.1007/978-981-15-1384-8_16</pub-id></citation></ref>
<ref id="ref5"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Boddupalli</surname> <given-names>P.</given-names></name> <name><surname>Suresh</surname> <given-names>L. M.</given-names></name> <name><surname>Mwatuni</surname> <given-names>F.</given-names></name> <name><surname>Beyene</surname> <given-names>Y.</given-names></name> <name><surname>Makumbi</surname> <given-names>D.</given-names></name> <name><surname>Gowda</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Maize lethal necrosis (MLN): efforts toward containing the spread and impact of a devastating transboundary disease in sub-Saharan Africa</article-title>. <source>Virus Res.</source> <volume>282</volume>:<fpage>197943</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.virusres.2020.197943</pub-id>, PMID: <pub-id pub-id-type="pmid">32205142</pub-id></citation></ref>
<ref id="ref6"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Borhani</surname> <given-names>Y.</given-names></name> <name><surname>Khoramdel</surname> <given-names>J.</given-names></name> <name><surname>Najafi</surname> <given-names>E.</given-names></name></person-group> (<year>2022</year>). <article-title>A deep learning based approach for automated plant disease classification using vision transformer</article-title>. <source>Sci. Rep.</source> <volume>12</volume>, <fpage>11554</fpage>&#x2013;<lpage>11510</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-022-15163-0</pub-id>, PMID: <pub-id pub-id-type="pmid">35798775</pub-id></citation></ref>
<ref id="ref7"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>J.</given-names></name> <name><surname>Wang</surname> <given-names>W.</given-names></name> <name><surname>Zhang</surname> <given-names>D.</given-names></name> <name><surname>Zeb</surname> <given-names>A.</given-names></name> <name><surname>Nanehkaran</surname> <given-names>Y. A.</given-names></name></person-group> (<year>2021</year>). <article-title>Attention embedded lightweight network for maize disease recognition</article-title>. <source>Plant Pathol.</source> <volume>70</volume>, <fpage>630</fpage>&#x2013;<lpage>642</lpage>. doi: <pub-id pub-id-type="doi">10.1111/ppa.13322</pub-id></citation></ref>
<ref id="ref8"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Darwish</surname> <given-names>A.</given-names></name> <name><surname>Ezzat</surname> <given-names>D.</given-names></name> <name><surname>Hassanien</surname> <given-names>A. E.</given-names></name></person-group> (<year>2020</year>). <article-title>An optimized model based on convolutional neural networks and orthogonal learning particle swarm optimization algorithm for plant diseases diagnosis</article-title>. <source>Swarm Evol. Comput.</source> <volume>52</volume>:<fpage>100616</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.swevo.2019.100616</pub-id></citation></ref>
<ref id="ref9"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Dewbre</surname> <given-names>D.</given-names></name> <name><surname>Soglo</surname> <given-names>C. J.</given-names></name> <name><surname>Production</surname> <given-names>F.</given-names></name> <name><surname>Cervantes-Godoy</surname> <given-names>J.</given-names></name> <name><surname>Amegnaglo</surname> <given-names>Y. Y.</given-names></name> <name><surname>Akpa</surname> <given-names>A. F.</given-names></name> <etal/></person-group>. (<year>2014</year>). <source>The future of food and agriculture: Trends and challenges</source>.</citation></ref>
<ref id="ref10"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Dosovitskiy</surname> <given-names>A.</given-names></name> <name><surname>Beyer</surname> <given-names>L.</given-names></name> <name><surname>Kolesnikov</surname> <given-names>A.</given-names></name> <name><surname>Weissenborn</surname> <given-names>D.</given-names></name> <name><surname>Zhai</surname> <given-names>X.</given-names></name> <name><surname>Unterthiner</surname> <given-names>T.</given-names></name> <etal/></person-group>. (<year>2020</year>). An image is worth 16x16 words: transformers for image recognition at scale. Available at: <ext-link xlink:href="http://arxiv.org/abs/2010.11929" ext-link-type="uri">http://arxiv.org/abs/2010.11929</ext-link></citation></ref>
<ref id="ref11"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ferentinos</surname> <given-names>K. P.</given-names></name></person-group> (<year>2018</year>). <article-title>Deep learning models for plant disease detection and diagnosis</article-title>. <source>Comput. Electron. Agric.</source> <volume>145</volume>, <fpage>311</fpage>&#x2013;<lpage>318</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2018.01.009</pub-id></citation></ref>
<ref id="ref12"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Francis</surname> <given-names>M.</given-names></name> <name><surname>Deisy</surname> <given-names>C.</given-names></name></person-group> (<year>2019</year>). <article-title>Disease detection and classification in agricultural plants using convolutional neural networks - a visual understanding</article-title>. <conf-name>2019 6th international conference on signal processing and integrated networks, SPIN 2019</conf-name> <fpage>1063</fpage>&#x2013;<lpage>1068</lpage>.</citation></ref>
<ref id="ref13"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gayatri</surname> <given-names>K.</given-names></name> <name><surname>Kanti</surname> <given-names>R. D.</given-names></name> <name><surname>Sekhar Rao Rayavarapu</surname> <given-names>V. C.</given-names></name> <name><surname>Sridhar</surname> <given-names>B.</given-names></name> <name><surname>Rama Gowri Bobbili</surname> <given-names>V.</given-names></name></person-group> (<year>2021</year>). <article-title>Image processing and pattern recognition based plant leaf diseases identification and classification</article-title>. <source>J. Phys. Conf. Ser.</source> <volume>1804</volume>:<fpage>012160</fpage>. doi: <pub-id pub-id-type="doi">10.1088/1742-6596/1804/1/012160</pub-id></citation></ref>
<ref id="ref14"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gong</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>S.</given-names></name></person-group> (<year>2023</year>). <article-title>An analysis of plant diseases identification based on deep learning methods</article-title>. <source>Plant Pathol. J.</source> <volume>39</volume>, <fpage>319</fpage>&#x2013;<lpage>334</lpage>. doi: <pub-id pub-id-type="doi">10.5423/PPJ.OA.02.2023.0034</pub-id>, PMID: <pub-id pub-id-type="pmid">37550979</pub-id></citation></ref>
<ref id="ref15"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Han</surname> <given-names>K.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Chen</surname> <given-names>H.</given-names></name> <name><surname>Chen</surname> <given-names>X.</given-names></name> <name><surname>Guo</surname> <given-names>J.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>A survey on vision transformer</article-title>. <source>IEEE Trans. Pattern Anal. Mach. Intell.</source> <volume>45</volume>, <fpage>87</fpage>&#x2013;<lpage>110</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TPAMI.2022.3152247</pub-id></citation></ref>
<ref id="ref16"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Haque</surname> <given-names>M. A.</given-names></name> <name><surname>Marwaha</surname> <given-names>S.</given-names></name> <name><surname>Deb</surname> <given-names>C. K.</given-names></name> <name><surname>Nigam</surname> <given-names>S.</given-names></name> <name><surname>Arora</surname> <given-names>A.</given-names></name> <name><surname>Hooda</surname> <given-names>K. S.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Deep learning-based approach for identification of diseases of maize crop</article-title>. <source>Sci. Rep.</source> <volume>12</volume>, <fpage>6334</fpage>&#x2013;<lpage>6314</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-022-10140-z</pub-id>, PMID: <pub-id pub-id-type="pmid">35428845</pub-id></citation></ref>
<ref id="ref17"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ho</surname> <given-names>B.</given-names></name></person-group> (<year>2016</year>). <article-title>Deep learning: image &#x0026; video recognition</article-title>. <source>Int. J. Semantic Comput.</source> <volume>10</volume>, <fpage>417</fpage>&#x2013;<lpage>439</lpage>. doi: <pub-id pub-id-type="doi">10.1142/S1793351X16500045</pub-id></citation></ref>
<ref id="ref18"><citation citation-type="other"><person-group person-group-type="author"><collab id="coll1">International Trade Administration</collab></person-group>. (<year>2021</year>). Tanzania - Agriculture and Agricultural Processing. Available at: <ext-link xlink:href="https://www.trade.gov/country-commercial-guides/tanzania-agriculture-and-agricultural-processing" ext-link-type="uri">https://www.trade.gov/country-commercial-guides/tanzania-agriculture-and-agricultural-processing</ext-link></citation></ref>
<ref id="ref19"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Jasim</surname> <given-names>M. A.</given-names></name> <name><surname>Al-Tuwaijari</surname> <given-names>J. M.</given-names></name></person-group> (<year>2020</year>). <article-title>Plant leaf diseases detection and classification using image processing and deep learning techniques</article-title>. <conf-name>Proceedings of the 2020 international conference on computer science and software engineering, CSASE 2020</conf-name>, <fpage>259</fpage>&#x2013;<lpage>265</lpage>.</citation></ref>
<ref id="ref20"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kamilaris</surname> <given-names>A.</given-names></name> <name><surname>Prenafeta-Bold&#x00FA;</surname> <given-names>F. X.</given-names></name></person-group> (<year>2018</year>). <article-title>Deep learning in agriculture: a survey</article-title>. <source>Comput. Electron. Agric.</source> <volume>147</volume>, <fpage>70</fpage>&#x2013;<lpage>90</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2018.02.016</pub-id></citation></ref>
<ref id="ref21"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kiruwa</surname> <given-names>F. H.</given-names></name> <name><surname>Mutiga</surname> <given-names>S.</given-names></name> <name><surname>Njuguna</surname> <given-names>J.</given-names></name> <name><surname>Machuka</surname> <given-names>E.</given-names></name> <name><surname>Senay</surname> <given-names>S.</given-names></name> <name><surname>Feyissa</surname> <given-names>T.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Status and epidemiology of maize lethal necrotic disease in northern Tanzania</article-title>. <source>Pathogens</source> <volume>9</volume>, <fpage>1</fpage>&#x2013;<lpage>15</lpage>. doi: <pub-id pub-id-type="doi">10.3390/pathogens9010004</pub-id>, PMID: <pub-id pub-id-type="pmid">31861452</pub-id></citation></ref>
<ref id="ref22"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Lee</surname> <given-names>G. Y.</given-names></name> <name><surname>Alzamil</surname> <given-names>L.</given-names></name> <name><surname>Doskenov</surname> <given-names>B.</given-names></name> <name><surname>Termehchy</surname> <given-names>A.</given-names></name></person-group> (<year>2021</year>). A survey on data cleaning methods for improved machine learning model performance. Available at: <ext-link xlink:href="http://arxiv.org/abs/2109.07127" ext-link-type="uri">http://arxiv.org/abs/2109.07127</ext-link></citation></ref>
<ref id="ref23"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>J.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name></person-group> (<year>2021</year>). <article-title>Plant diseases and pests detection based on deep learning: a review</article-title>. <source>Plant Methods</source> <volume>17</volume>, <fpage>22</fpage>&#x2013;<lpage>18</lpage>. doi: <pub-id pub-id-type="doi">10.1186/s13007-021-00722-9</pub-id>, PMID: <pub-id pub-id-type="pmid">33627131</pub-id></citation></ref>
<ref id="ref24"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Loyani</surname> <given-names>L.</given-names></name> <name><surname>Machuve</surname> <given-names>D.</given-names></name></person-group> (<year>2021</year>). <article-title>A deep learning-based Mobile application for segmenting Tuta Absoluta&#x2019;s damage on tomato plants</article-title>. <source>Eng. Technol. Appl. Sci. Res.</source> <volume>11</volume>, <fpage>7730</fpage>&#x2013;<lpage>7737</lpage>. doi: <pub-id pub-id-type="doi">10.48084/etasr.4355</pub-id></citation></ref>
<ref id="ref25"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mahuku</surname> <given-names>G.</given-names></name> <name><surname>Lockhart</surname> <given-names>B. E.</given-names></name> <name><surname>Wanjala</surname> <given-names>B.</given-names></name> <name><surname>Jones</surname> <given-names>M. W.</given-names></name> <name><surname>Kimunye</surname> <given-names>J. N.</given-names></name> <name><surname>Stewart</surname> <given-names>L. R.</given-names></name> <etal/></person-group>. (<year>2015</year>). <article-title>Maize lethal necrosis (MLN), an emerging threat to maize-based food security in sub-Saharan Africa</article-title>. <source>Phytopathology</source> <volume>105</volume>, <fpage>956</fpage>&#x2013;<lpage>965</lpage>. doi: <pub-id pub-id-type="doi">10.1094/PHYTO-12-14-0367-FI</pub-id>, PMID: <pub-id pub-id-type="pmid">25822185</pub-id></citation></ref>
<ref id="ref26"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Maiga</surname> <given-names>Y.</given-names></name></person-group> (<year>2024</year>). <article-title>Temporal forecast of maize Production in Tanzania: an autoregressive integrated moving average approach</article-title>. <source>J. Agric. Stud.</source> <volume>12</volume>:<fpage>118</fpage>. doi: <pub-id pub-id-type="doi">10.5296/jas.v12i2.21679</pub-id></citation></ref>
<ref id="ref27"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Maxwell</surname> <given-names>A. E.</given-names></name> <name><surname>Warner</surname> <given-names>T. A.</given-names></name> <name><surname>Guill&#x00E9;n</surname> <given-names>L. A.</given-names></name></person-group> (<year>2021</year>). <article-title>Accuracy assessment in convolutional neural network-based deep learning remote sensing studies&#x2014;part 1: literature review</article-title>. <source>Remote Sens.</source> <volume>13</volume>:<fpage>450</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs13132450</pub-id></citation></ref>
<ref id="ref28"><citation citation-type="other"><person-group person-group-type="author"><collab id="coll2">Oxfordbusinessgroup</collab></person-group>. (<year>2018</year>). Already Tanzania&#x2019;s largest sector, agriculture continues to post positive trend. Available at: <ext-link xlink:href="https://oxfordbusinessgroup.com/overview/growth-ground-already-largest-economic-sector-agriculture-continues-see-positive-trends-many-areas" ext-link-type="uri">https://oxfordbusinessgroup.com/overview/growth-ground-already-largest-economic-sector-agriculture-continues-see-positive-trends-many-areas</ext-link></citation></ref>
<ref id="ref29"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Panigrahi</surname> <given-names>K. P.</given-names></name> <name><surname>Das</surname> <given-names>H.</given-names></name> <name><surname>Sahoo</surname> <given-names>A. K.</given-names></name> <name><surname>Moharana</surname> <given-names>S. C.</given-names></name></person-group> (<year>2020</year>). &#x201C;<article-title>Maize leaf disease detection and classification using machine learning algorithms</article-title>&#x201D; in <source>Progress in computing, analytics and networking</source>. eds. <person-group person-group-type="editor"><name><surname>Das</surname> <given-names>H.</given-names></name> <name><surname>Pattnaik</surname> <given-names>P. K.</given-names></name> <name><surname>Rautaray</surname> <given-names>S. S.</given-names></name> <name><surname>Li</surname> <given-names>K.-C.</given-names></name></person-group> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer Singapore</publisher-name>), <fpage>659</fpage>&#x2013;<lpage>669</lpage>.</citation></ref>
<ref id="ref30"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Qi</surname> <given-names>Z.</given-names></name> <name><surname>MaungMaung</surname> <given-names>A. P.</given-names></name> <name><surname>Kinoshita</surname> <given-names>Y.</given-names></name> <name><surname>Kiya</surname> <given-names>H.</given-names></name></person-group> (<year>2022</year>). <article-title>Privacy-preserving image classification using vision transformer</article-title>. <conf-name>European Signal Processing Conference, 2022-August</conf-name>, <fpage>543</fpage>&#x2013;<lpage>547</lpage>.</citation></ref>
<ref id="ref31"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Raghu</surname> <given-names>M.</given-names></name> <name><surname>Unterthiner</surname> <given-names>T.</given-names></name> <name><surname>Kornblith</surname> <given-names>S.</given-names></name> <name><surname>Zhang</surname> <given-names>C.</given-names></name> <name><surname>Dosovitskiy</surname> <given-names>A.</given-names></name></person-group> (<year>2021</year>). <article-title>Do vision transformers see like convolutional neural networks?</article-title> <source>Adv. Neural Inf. Proces. Syst.</source> <volume>15</volume>, <fpage>12116</fpage>&#x2013;<lpage>12128</lpage>.</citation></ref>
<ref id="ref32"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Roy</surname> <given-names>A. M.</given-names></name> <name><surname>Bose</surname> <given-names>R.</given-names></name> <name><surname>Bhaduri</surname> <given-names>J.</given-names></name></person-group> (<year>2022</year>). <article-title>A fast accurate fine-grain object detection model based on YOLOv4 deep neural network</article-title>. <source>Neural Comput. Appl.</source> <volume>34</volume>, <fpage>3895</fpage>&#x2013;<lpage>3921</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s00521-021-06651-x</pub-id></citation></ref>
<ref id="ref33"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rusk</surname> <given-names>N.</given-names></name></person-group> (<year>2015</year>). <article-title>Deep learning</article-title>. <source>Nat. Methods</source> <volume>13</volume>:<fpage>35</fpage>. doi: <pub-id pub-id-type="doi">10.1038/nmeth.3707</pub-id></citation></ref>
<ref id="ref34"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Savary</surname> <given-names>S.</given-names></name> <name><surname>Willocquet</surname> <given-names>L.</given-names></name></person-group> (<year>2020</year>). <article-title>Modeling the impact of crop diseases on global food security</article-title>. <source>Annu. Rev. Phytopathol.</source> <volume>58</volume>, <fpage>313</fpage>&#x2013;<lpage>341</lpage>. doi: <pub-id pub-id-type="doi">10.1146/annurev-phyto-010820-012856</pub-id></citation></ref>
<ref id="ref35"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shepherd</surname> <given-names>D. N.</given-names></name> <name><surname>Martin</surname> <given-names>D. P.</given-names></name> <name><surname>Van Der Walt</surname> <given-names>E.</given-names></name> <name><surname>Dent</surname> <given-names>K.</given-names></name> <name><surname>Varsani</surname> <given-names>A.</given-names></name> <name><surname>Rybicki</surname> <given-names>E. P.</given-names></name></person-group> (<year>2010</year>). <article-title>Maize streak virus: an old and complex &#x201C;emerging&#x201D; pathogen</article-title>. <source>Mol. Plant Pathol.</source> <volume>11</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. doi: <pub-id pub-id-type="doi">10.1111/j.1364-3703.2009.00568.x</pub-id>, PMID: <pub-id pub-id-type="pmid">20078771</pub-id></citation></ref>
<ref id="ref36"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sibiya</surname> <given-names>M.</given-names></name> <name><surname>Sumbwanyambe</surname> <given-names>M.</given-names></name></person-group> (<year>2019</year>). <article-title>A computational procedure for the recognition and classification of maize leaf diseases out of healthy leaves using convolutional neural networks</article-title>. <source>AgriEngineering</source> <volume>1</volume>, <fpage>119</fpage>&#x2013;<lpage>131</lpage>. doi: <pub-id pub-id-type="doi">10.3390/agriengineering1010009</pub-id></citation></ref>
<ref id="ref37"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sibiya</surname> <given-names>M.</given-names></name> <name><surname>Sumbwanyambe</surname> <given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>Automatic fuzzy logic-based maize common rust disease severity predictions with thresholding and deep learning</article-title>. <source>Pathogens</source> <volume>10</volume>, <fpage>1</fpage>&#x2013;<lpage>17</lpage>. doi: <pub-id pub-id-type="doi">10.3390/pathogens10020131</pub-id>, PMID: <pub-id pub-id-type="pmid">33525312</pub-id></citation></ref>
<ref id="ref38"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Singh</surname> <given-names>A.</given-names></name> <name><surname>Ganapathysubramanian</surname> <given-names>B.</given-names></name> <name><surname>Singh</surname> <given-names>A. K.</given-names></name> <name><surname>Sarkar</surname> <given-names>S.</given-names></name></person-group> (<year>2016</year>). <article-title>Machine learning for high-throughput stress phenotyping in plants</article-title>. <source>Trends Plant Sci.</source> <volume>21</volume>, <fpage>110</fpage>&#x2013;<lpage>124</lpage>. doi: <pub-id pub-id-type="doi">10.1016/J.TPLANTS.2015.10.015</pub-id></citation></ref>
<ref id="ref39"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Syarief</surname> <given-names>M.</given-names></name> <name><surname>Setiawan</surname> <given-names>W.</given-names></name></person-group> (<year>2020</year>). <article-title>Convolutional neural network for maize leaf disease image classification</article-title>. <source>Telkomnika</source> <volume>18</volume>, <fpage>1376</fpage>&#x2013;<lpage>1381</lpage>. doi: <pub-id pub-id-type="doi">10.12928/TELKOMNIKA.v18i3.14840</pub-id></citation></ref>
<ref id="ref40"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Toseef</surname> <given-names>M.</given-names></name> <name><surname>Khan</surname> <given-names>M. J.</given-names></name></person-group> (<year>2018</year>). <article-title>An intelligent mobile application for diagnosis of crop diseases in Pakistan using fuzzy inference system</article-title>. <source>Comput. Electron. Agric.</source> <volume>153</volume>, <fpage>1</fpage>&#x2013;<lpage>11</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2018.07.034</pub-id></citation></ref>
<ref id="ref41"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Vaswani</surname> <given-names>A.</given-names></name> <name><surname>Shazeer</surname> <given-names>N.</given-names></name> <name><surname>Parmar</surname> <given-names>N.</given-names></name> <name><surname>Uszkoreit</surname> <given-names>J.</given-names></name> <name><surname>Jones</surname> <given-names>L.</given-names></name> <name><surname>Gomez</surname> <given-names>A. N.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Attention is all you need</article-title>. <conf-name>31st Conference on Neural Information Processing Systems (NIPS 2017)</conf-name> <fpage>5999</fpage>&#x2013;<lpage>6009</lpage>.</citation></ref>
<ref id="ref42"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Voulodimos</surname> <given-names>A.</given-names></name> <name><surname>Doulamis</surname> <given-names>N.</given-names></name> <name><surname>Doulamis</surname> <given-names>A.</given-names></name> <name><surname>Protopapadakis</surname> <given-names>E.</given-names></name></person-group> (<year>2018</year>). <article-title>Deep learning for computer vision: a brief review</article-title>. <source>Comput. Intell. Neurosci.</source> <volume>2018</volume>, <fpage>1</fpage>&#x2013;<lpage>13</lpage>. doi: <pub-id pub-id-type="doi">10.1155/2018/7068349</pub-id>, PMID: <pub-id pub-id-type="pmid">29487619</pub-id></citation></ref>
<ref id="ref43"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Song</surname> <given-names>C.</given-names></name> <name><surname>Zhang</surname> <given-names>D.</given-names></name></person-group> (<year>2020</year>). <article-title>Deep learning-based object detection improvement for tomato disease</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>56607</fpage>&#x2013;<lpage>56614</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ACCESS.2020.2982456</pub-id></citation></ref>
</ref-list>
</back>
</article>