<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Archiving and Interchange DTD v2.3 20070202//EN" "archivearticle.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="methods-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2021.770916</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Methods</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Automatic and Accurate Calculation of Rice Seed Setting Rate Based on Image Segmentation and Deep Learning</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Guo</surname> <given-names>Yixin</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1466982/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Li</surname> <given-names>Shuai</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhang</surname> <given-names>Zhanguo</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Li</surname> <given-names>Yang</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Hu</surname> <given-names>Zhenbang</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Xin</surname> <given-names>Dawei</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/460465/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Chen</surname> <given-names>Qingshan</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/904374/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Wang</surname> <given-names>Jingguo</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c002"><sup>&#x002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1473185/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Zhu</surname> <given-names>Rongsheng</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c003"><sup>&#x002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/660787/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>College of Engineering, Northeast Agricultural University</institution>, <addr-line>Harbin</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>College of Arts and Sciences, Northeast Agricultural University</institution>, <addr-line>Harbin</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>Agricultural College, Northeast Agricultural University</institution>, <addr-line>Harbin</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Wanneng Yang, Huazhong Agricultural University, China</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Marcin Wozniak, Silesian University of Technology, Poland; Michael Gomez Selvaraj, Consultative Group on International Agricultural Research (CGIAR), United States; Lejun Yu, Hainan University, China</p></fn>
<corresp id="c001">&#x002A;Correspondence: Qingshan Chen, <email>qshchen@126.com</email></corresp>
<corresp id="c002">Jingguo Wang, <email>wangjg@neau.edu.cn</email></corresp>
<corresp id="c003">Rongsheng Zhu, <email>rshzhu@126.com</email></corresp>
<fn fn-type="other" id="fn004"><p>This article was submitted to Technical Advances in Plant Science, a section of the journal Frontiers in Plant Science</p></fn>
</author-notes>
<pub-date pub-type="epub">
<day>14</day>
<month>12</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="collection">
<year>2021</year>
</pub-date>
<volume>12</volume>
<elocation-id>770916</elocation-id>
<history>
<date date-type="received">
<day>05</day>
<month>09</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>23</day>
<month>11</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2021 Guo, Li, Zhang, Li, Hu, Xin, Chen, Wang and Zhu.</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Guo, Li, Zhang, Li, Hu, Xin, Chen, Wang and Zhu</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>The rice seed setting rate (RSSR) is an important component in calculating rice yields and a key phenotype for its genetic analysis. Automatic calculations of RSSR through computer vision technology have great significance for rice yield predictions. The basic premise for calculating RSSR is having an accurate and high throughput identification of rice grains. In this study, we propose a method based on image segmentation and deep learning to automatically identify rice grains and calculate RSSR. By collecting information on the rice panicle, our proposed image automatic segmentation method can detect the full grain and empty grain, after which the RSSR can be calculated by our proposed rice seed setting rate optimization algorithm (RSSROA). Finally, the proposed method was used to predict the RSSR during which process, the average identification accuracy reached 99.43%. This method has therefore been proven as an effective, non-invasive method for high throughput identification and calculation of RSSR. It is also applicable to soybean yields, as well as wheat and other crops with similar characteristics.</p>
</abstract>
<kwd-group>
<kwd>rice grain identification</kwd>
<kwd>computer vision</kwd>
<kwd>deep learning</kwd>
<kwd>rice seed setting rate</kwd>
<kwd>image segmentation</kwd>
</kwd-group>
<contract-sponsor id="cn001">Science and Technology Department, Heilongjiang Province<named-content content-type="fundref-id">10.13039/501100011844</named-content></contract-sponsor>
<counts>
<fig-count count="11"/>
<table-count count="4"/>
<equation-count count="13"/>
<ref-count count="38"/>
<page-count count="15"/>
<word-count count="8811"/>
</counts>
</article-meta>
</front>
<body>
<sec id="S1" sec-type="intro">
<title>Introduction</title>
<p>Rice (Oryza sativa) is a cereal grain and the most widely consumed staple food for a large part of the world&#x2019;s human population, especially in Asia (<xref ref-type="bibr" rid="B11">Ghadirnezhad and Fallah, 2014</xref>). The number of rice grains per panicle is a key trait that effects grain cultivation, management, and subsequent yield (<xref ref-type="bibr" rid="B30">Wu et al., 2019</xref>). The grains per panicle are usually divided into two categories, one is full grain and the other is empty grain. Among them, full grain is the real measure of the number of grains per panicle, and the ratio of full grain to the total number of grains per panicle is called the seed setting rate. The number of grains per panicle and the seed setting rate are considered to be the two most important traits directly reflecting rice yield (<xref ref-type="bibr" rid="B24">Oosterom and Hammer, 2008</xref>; <xref ref-type="bibr" rid="B12">Gong et al., 2018</xref>).</p>
<p>Generally, grain weight, grain number, panicle number, and RSSR are considered to be the main factors affecting rice yield. However, research into RSSR is improving with the advancements in science and technology. <xref ref-type="bibr" rid="B18">Li et al. (2013)</xref> have shown that the domestication-related POLLEN TUBE BLOCKED 1 (PTB1), a RING-type E3 ubiquitin ligase, positively regulates the rice seed setting rate by promoting pollen tube growth. <xref ref-type="bibr" rid="B34">Xu et al. (2017)</xref> proposed that OsCNGC13 acts as a novel maternal sporophytic factor required for stylar [<italic>C</italic><italic>a</italic><sup>2</sup>]<sub><italic>c</italic><italic>y</italic><italic>t</italic></sub> accumulation, ECM components modification, and STT cell death, and thus facilitates the penetration of the pollen tube for successful double fertilization and seed setting in rice. <xref ref-type="bibr" rid="B31">Xiang et al. (2019)</xref> reported on a novel rice gene, LOW SEED SETTING RATE1 (LSSR1), which regulates the seed setting rate by facilitating rice fertilization. Through these studies and their achievements, improving the RSSR has become an expected thing. However, a new issue has arisen with them, a problem posed by the automatic high-throughput calculation of the RSSR.</p>
<p>With developments in deep learning and plant phenotypic science, efficient and accurate research on rice through information technology (IT) has become very anticipated. <xref ref-type="bibr" rid="B7">Desai et al. (2019)</xref> proposed a simple pipeline which uses ground level RGB images of paddy rice to detect which regions contain flowering panicles, and then uses the flowering panicle region count to estimate the heading date of the crop. <xref ref-type="bibr" rid="B14">Hong Son and Thai-Nghe (2019)</xref> proposed an approach for rice quality classification. In their approach, image processing algorithms, the convolutional neural network (CNN), and machine learning methods are used to recognize and classify two different categories of rice (whole rice and broken rice), based on rice sizes according to the national standard of rice quality evaluation. <xref ref-type="bibr" rid="B19">Lin et al. (2018)</xref> proposed a machine vision system based on the deep convolutional neural network (DCNN) architecture to improve, compared with traditional approaches, the accuracy with which three distinct groups of rice kernel images are classified. <xref ref-type="bibr" rid="B33">Xu et al. (2020)</xref> proposed a simple, yet effective method termed the Multi-Scale Hybrid Window Panicle Detect (MHW-PD), which focuses on enhancing the panicle features to then detect and count the large number of small-sized rice panicles in the in-field scene. <xref ref-type="bibr" rid="B3">Chatnuntawech et al. (2018)</xref> developed a non-destructive rice variety classification system that benefits from the synergy between hyperspectral imaging and the deep CNN. The rice varieties are then determined from the acquired spatio-spectral data using a deep CNN. <xref ref-type="bibr" rid="B37">Zhou et al. (2019)</xref> developed and implemented a panicle detection and counting system based on improved region-based fully convolutional networks, and used the system to automate rice-phenotype measurements. <xref ref-type="bibr" rid="B22">Lu et al. (2017)</xref> proposed an innovative technique to enhance the deep learning ability of CNNs. The proposed CNN-based model can effectively classify 10 common rice diseases through image recognition technology. <xref ref-type="bibr" rid="B5">Chu and Yu (2020)</xref> constructed a novel end-to-end model based on deep learning fusion to accurately predict the rice yields for 81 counties in the Guangxi Zhuang Autonomous Region, China, using a combination of time-series meteorology data and area data. <xref ref-type="bibr" rid="B32">Xiong et al. (2017)</xref> proposed a rice panicle segmentation algorithm called Panicle-SEG, which is based on the generation of simple linear iterative clustering super pixel regions, CNN classification, and entropy rate super pixel optimization. <xref ref-type="bibr" rid="B16">Kundu et al. (2021)</xref> develop the &#x201C;Automatic and Intelligent Data Collector and Classifier&#x201D; framework by integrating IoT and deep learning. The framework automatically collects the imagery and parametric data and automatically sends the collected data to the cloud server and the Raspberry Pi. It collaborates with the Raspberry Pi to precisely predict the blast and rust diseases in pearl millet. <xref ref-type="bibr" rid="B8">Dhaka et al. (2021)</xref> present a survey of the existing literature in applying deep CNNs to predict plant diseases from leaf images. This manuscript presents an exemplary comparison of the pre-processing techniques, CNN models, frameworks, and optimization techniques applied to detect and classify plant diseases using leaf images as a data set.</p>
<p>RSSR was initially calculated manually. However, <xref ref-type="bibr" rid="B15">Kong and Chen (2021)</xref> proposed a method based on a mask region convolutional neural network (Mask R-CNN) for feature extraction and three- dimensional (3-D) recognition in CT images of rice panicles, and then calculated the seed setting rate through the obtained three-dimensional image. However, due to the difficulty and high cost of CT image acquisition, this method lacks practicality.</p>
<p>In our research, we closely link deep learning with RSSR, making it a portable tool for the automatic and high-throughput study of RSSR. Through experimental verification, we have found that the correlation between our proposed RSSROA and the results from manual RSSR calculations is as high as 93.21%. In addition, through the verification of 10 randomly selected rice panicle images, our proposed method has been shown to be able to correctly distinguish between two kinds of rice grains. The average accuracy of the number of full grains per panicle is 97.69% and the average accuracy of the number of empty grains per panicle is 93.20%. Therefore, our proposed method can effectively detect two different grains in rice panicles and can accurately calculate RSSR. It can thus become an effective method for low-cost, high-throughput calculations of RSSR.</p>
</sec>
<sec id="S2" sec-type="materials|methods">
<title>Materials and Methods</title>
<p>An overview of the proposed method can be seen in <xref ref-type="fig" rid="F1">Figure 1</xref>. The input to our system consists of a sequence of images (across different days and times) of different rice varieties taken in a particular environment (<xref ref-type="supplementary-material" rid="TS1">Supplementary Table 1</xref>). The collected images were first cropped to give them the best possible resolution for the network input, and then they were input into the deep learning network we adopted for training after calibration. The training results from each network were compared, and the best network was adopted as the method to calculate the RSSR.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption><p>Research flow diagram. <bold>(A)</bold> Original images <bold>(B)</bold> Segmentation images <bold>(C)</bold> Labelimg <bold>(D)</bold> Data integration and classification <bold>(E)</bold> Optional model selection <bold>(F)</bold> Calculation of rice seed setting rate.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-12-770916-g001.tif"/>
</fig>
<sec id="S2.SS1">
<title>Image Acquisition and Processing</title>
<p>Rice planting was carried out in both 2018 and 2019 at Northeast Agricultural University&#x2019;s experimental practice and demonstration base in Acheng, which is located at an east longitude of 127&#x00B0;22&#x2032;&#x223C;127&#x00B0;50&#x2032; and north latitude of 45&#x00B0;34&#x2032;&#x223C;45&#x00B0;46&#x2032;. The test soil was black soil, and there were protection and isolation rows around each 20 m<sup>2</sup> plot area. The seeds were sown on April 20, 2018 (April 17 for the 2019 crop) and transplanted on May 20, 2018 (May 24 for the 2019 crop). The transplanting size was 30 cm &#x00D7; 10 cm and the field management was the same as for the production field (<xref ref-type="bibr" rid="B36">Zhao et al., 2020</xref>).</p>
<p>In order to improve the generalization ability of the experiment and reduce the time required for the artificial labeling of rice grains, 56 varieties of rice were randomly selected from the experimental field and the rice panicle information was collected using a smartphone iPhone X. The image collection environment consisted in a cubed darkroom with a length, width, and height all measuring 80 cm. The top of the darkroom environment possessed a unique light source, while the other directions were all covered by all-black light-absorbing cloth. The shooting method was to artificially push the keys on the mobile phone from the oval entrance on the front of the cubed darkroom (a rectangle measuring 55 cm in length and 40 cm in width). The shooting equipment was kept about 30 cm from the top of the rice panicles (The shooting equipment is not fixed, it only needs to be maintained manually). The image collection cubed darkroom for the rice panicles is shown in <xref ref-type="fig" rid="F2">Figure 2</xref>.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption><p>Rice panicle image collection cubed darkroom. <bold>(A)</bold> Real map and <bold>(B)</bold> structural diagram.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-12-770916-g002.tif"/>
</fig>
<p>A total of 263 rice panicles and 298 images were obtained. Each panicle of rice is shot in both natural and artificially shaped states. Each image contains a different panicle of rice, at least one panicle of rice and at most four panicles of rice. The panicles of each rice variety ranged from 2 to 11. Among them, 60 images were used as the data to calculate the RSSR, while the remaining images were divided into a training verification set and a test set by a ratio of 8:2.</p>
<p>We calibrated the obtained images by labeling with a target detection marking tool, and then used these images for training and prediction purposes. <xref ref-type="fig" rid="F3">Figure 3A</xref> shows the calibration difference between different data sets, and <xref ref-type="fig" rid="F3">Figure 3B</xref> shows the detailed differences between various categories in the image cutting process, where &#x201C;full&#x201D; represents a full rice grain, &#x201C;empty&#x201D; represents an empty rice grain, &#x201C;half&#x201D; represents a half rice grain, &#x201C;H-full&#x201D; and &#x201C;H-empty&#x201D; represent the full and empty grains detected in in the half grain count after cropping.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption><p>Feature image for depth learning. <bold>(A)</bold> Comparison of local characteristics of rice grains, <bold>(B)</bold> comparison of grain characteristics of different rice varieties.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-12-770916-g003.tif"/>
</fig>
</sec>
<sec id="S2.SS2">
<title>Convolutional Neural Network</title>
<p>The CNN consists of several layers of neurons and computes a multidimensional function with several variables (<xref ref-type="bibr" rid="B4">Chen et al., 2014</xref>; <xref ref-type="bibr" rid="B28">Schmidhuber, 2015</xref>). The neurons in each layer, other than from the first layer, are connected with the neurons from the preceding layer. The first layer is called the input layer (<xref ref-type="bibr" rid="B35">Zhang et al., 2015</xref>; <xref ref-type="bibr" rid="B9">Dong et al., 2016</xref>), which is then followed by hidden layers, and the concluding layer. Each neuron connection has a weight that is adjusted during the learning process. Initially, the weights are taken at random. All neurons receive input values, which they then process and send out as output values. The input layer neurons&#x2019; input and output values are the values from the variables of the function. In the other layers meanwhile, a neuron receives at its input the weighted sum of the output values from the neurons with which the neuron in question is connected. The weights of the connections are used as the weights for the weighing process. Each neuron gives its function to an input value and these functions are called activation functions (<xref ref-type="bibr" rid="B17">LeCun et al., 2015</xref>; <xref ref-type="bibr" rid="B23">Mitra et al., 2017</xref>).</p>
<p>The motivation of building an Object Detection model is to provide solutions in the field of computer vision. The primary essence of object detection can be broken down into two parts: to locate objects in a scene (by drawing a bounding box around the object) and later to classify the objects (based on the classes it was trained on). There are two deep learning based approaches for object detection: one-stage methods (YOLO&#x2013;You Only Look Once, SSD&#x2013;Single Shot Detection) and two-stage approaches (Faster R-CNN) (<xref ref-type="bibr" rid="B25">Rajeshwari et al., 2019</xref>). In addition, we have added a newer one-stage object detector-EfficientDet. These will be our main research methods.</p>
<sec id="S2.SS2.SSS1">
<title>Faster Region Convolutional Neural Network</title>
<p>As a typical two-stage object detection algorithm, the faster region convolutional neural network (Faster R-CNN) has been widely applied in many fields since its proposal (<xref ref-type="bibr" rid="B27">Ren et al., 2016</xref>). As shown in <xref ref-type="fig" rid="F4">Figure 4A</xref>, a region proposal network (RPN) is constructed to generate confident proposal for multi-classification and bounding box refinement. More precisely, RPN first generates a dense grid of anchor regions (candidate bounding boxes) with specified sizes and aspect ratios over each spatial location of the feature maps. According to intersection over union (IOU) ratio with the ground truth object bounding boxes, an anchor will be assigned with a positive or negative label on top of the feature maps, a shallow CNN is built to judge whether an anchor contains an object and predict an offset for each anchor. Then anchors with high confidence are rectified by the offset predicted in RPN. Then the corresponding features of each anchor will go through a RoI pooling layer, a convolution layer and a fully connected layer to predict a specific class as well as refined bounding boxes (<xref ref-type="bibr" rid="B38">Zou et al., 2020</xref>). In addition, it is worth noting that we use ResNet50 and VGG16 as the backbone networks for training.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption><p>Convolutional neural network. <bold>(A)</bold> Faster R-CNN, <bold>(B)</bold> SSD, <bold>(C)</bold> EfficientDet, <bold>(D)</bold> YOLO V3, and <bold>(E)</bold> YOLO V4.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-12-770916-g004.tif"/>
</fig>
</sec>
<sec id="S2.SS2.SSS2">
<title>Single Shot Detector</title>
<p>The single shot detector (SSD) (<xref ref-type="bibr" rid="B20">Liu et al., 2016</xref>) discretizes the bounding boxes&#x2019; output space into a set of default boxes over different aspect ratios and scales per feature mAP location. At the predicted time, the network awards scores to the situation of each object category in each default box, after which, it makes the according adjustments to the box to better match the object shape. Additionally, in order to naturally handle objects of various sizes, the network combines predictions from multiple feature mAPs with different resolutions. SSD is simple compared to methods that require object proposals, because it completely eliminates the need for proposal generations and the subsequent pixel or feature resampling stages, and encapsulates all the necessary computations in a single network. This makes SSD easily trainable and straightforward to integrate into systems requiring a detection component (see <xref ref-type="fig" rid="F4">Figure 4B</xref>).</p>
</sec>
<sec id="S2.SS2.SSS3">
<title>EfficientDet</title>
<p>EfficientDet proposes a weighted bi-directional feature pyramid network (BiFPN) and then uses it as the feature network. It takes level 3&#x2013;7 features (P3, P4, P5, P6, P7) from the backbone network and repeats the top-down and bottom-up bi-directional feature fusion. These fused features are fed to the class and box networks to generate object class and boundary box predictions, respectively. A composite scaling extension method is also proposed, which is able to uniformly scale the resolution, depth and width of all the backbone networks, feature networks and prediction networks. The network structure of EfficientDet is shown in <xref ref-type="fig" rid="F4">Figure 4C</xref> (<xref ref-type="bibr" rid="B29">Tan et al., 2020</xref>).</p>
</sec>
<sec id="S2.SS2.SSS4">
<title>You Only Look Once</title>
<p>YOLO V3 adopts a network structure called Darknet53. It draws on the practice of residual network, and sets up fast links between some layers to form a deeper network level and multi-scale detection, which improves the detection effect of mAP and small objects (<xref ref-type="bibr" rid="B26">Redmon and Farhadi, 2018</xref>). Its basic network structure is shown in <xref ref-type="fig" rid="F4">Figure 4D</xref>.</p>
<p>The real-time and high-precision target detection model, YOLO V4, allows anyone training and testing with a conventional GPU to achieve real-time, high quality and convincing object detection results. As an improved version of YOLO V3, YOLO V4 combines many of the techniques from YOLO V3. Among them, the feature extraction network, Darknet53, which was the backbone network for YOLO V3, has been changed to CSPDarknet53, the feature pyramid has become SPP and PAN, while the classification regression layer remains the same as in YOLO V3. In order to achieve better target detection accuracy without increasing inference costs, a method is used that either only changes the training strategy or only increases the training cost. This method is called the &#x201C;bag of freebies.&#x201D; A common method for target detection that meets the requirements of being a &#x201C;free bag&#x201D; in the &#x201C;bag of freebies&#x201D; method, is data enhancement. The purpose of data augmentation is to increase the variability of the input images, meaning that the designed object detection model will have higher robustness to images obtained in different environments. Another addition to this method, is known as the &#x201C;bag of specials.&#x201D; This bag consists of plugin modules and a post-processing method that can significantly improve the accuracy of object detection and only increase the inference cost by a small amount. Generally speaking, these plugin modules are used to enhance certain attributes in a model, such as enlarging the receptive field, introducing an attention mechanism, or strengthening feature integration capability. Post-processing meanwhile, consists in a method used for screening model prediction results. Its basic network structure is shown in <xref ref-type="fig" rid="F4">Figure 4E</xref> (<xref ref-type="bibr" rid="B2">Bochkovskiy et al., 2020</xref>).</p>
</sec>
</sec>
<sec id="S2.SS3">
<title>Hardware and Software</title>
<p>The CNNs were trained on the rice image dataset using a hardware solution from our computer. This was a personal desktop computer with Intel core i9-9900k CPU, NVIDIA Titan XP (12G) GPU, and 64G RAM. We used the desktop to train the six networks in Python language under a Windows operating system with a Pytorch framework.</p>
</sec>
<sec id="S2.SS4">
<title>Rice Seed Setting Rate Optimization Algorithm</title>
<p>Obtaining the RSSR is the ultimate goal of this research. According to the traditional RSSR calculation formula used in agriculture, the following formula was offered for adaption to our research results:</p>
<disp-formula id="S2.E1"><label>(1)</label><mml:math id="M1"><mml:mrow><mml:mrow><mml:mi>R</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>S</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>S</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:msub><mml:mi>R</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">=</mml:mo><mml:mfrac><mml:mrow><mml:mi>N</mml:mi><mml:mo>&#x2062;</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mi>N</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:msub><mml:mi>F</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">+</mml:mo><mml:mrow><mml:mi>N</mml:mi><mml:mo>&#x2062;</mml:mo><mml:msub><mml:mi>E</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:math></disp-formula>
<p>We put forward a novel method to calculate the RSSR, which is to segment the original rice images to form the third category &#x201C;half grain,&#x201D; and calculate the RSSR by finding the correlation among them. This method is called the rice seed setting rate optimization algorithm (RSSROA), the formula is as follows:</p>
<disp-formula id="S2.E2"><label>(2)</label><mml:math id="M2"><mml:mrow><mml:mrow><mml:mi>R</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>S</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>S</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:msub><mml:mi>R</mml:mi><mml:mi>a</mml:mi></mml:msub></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">=</mml:mo><mml:mfrac><mml:mrow><mml:mrow><mml:mi>N</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:mi>F</mml:mi></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">+</mml:mo><mml:mrow><mml:mrow><mml:mi>P</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:mi>H</mml:mi></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">&#x00D7;</mml:mo><mml:mfrac><mml:mrow><mml:mi>N</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>H</mml:mi></mml:mrow><mml:mn>2</mml:mn></mml:mfrac></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mi>N</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:mi>F</mml:mi></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">+</mml:mo><mml:mrow><mml:mi>N</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:mi>E</mml:mi></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">+</mml:mo><mml:mfrac><mml:mrow><mml:mi>N</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>H</mml:mi></mml:mrow><mml:mn>2</mml:mn></mml:mfrac></mml:mrow></mml:mfrac></mml:mrow></mml:math></disp-formula>
<disp-formula id="S2.E3"><label>(3)</label><mml:math id="M3"><mml:mrow><mml:mrow><mml:mi>R</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>a</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>t</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:msub><mml:mi>o</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">=</mml:mo><mml:mfrac><mml:mrow><mml:mi>N</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mi>N</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:mi>F</mml:mi></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">+</mml:mo><mml:mrow><mml:mi>N</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>E</mml:mi></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:math></disp-formula>
<disp-formula id="S2.E4"><label>(4)</label><mml:math id="M4"><mml:mrow><mml:mrow><mml:mi>R</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>a</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>t</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:msub><mml:mi>o</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">=</mml:mo><mml:mfrac><mml:mrow><mml:mi>N</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>F</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>H</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mi>N</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>F</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:mi>H</mml:mi></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">+</mml:mo><mml:mrow><mml:mi>N</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>E</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>H</mml:mi></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:math></disp-formula>
<p>where <italic>R</italic><italic>S</italic><italic>S</italic><italic>R</italic><sub><italic>t</italic></sub> is a traditional measurement method used for calculating the RSSR in agronomy, <italic>N</italic><italic>F</italic><sub><italic>t</italic></sub> is the number of full grains obtained by traditional methods, <italic>N</italic><italic>E</italic><sub><italic>t</italic></sub> is the number of empty grains obtained by traditional methods, <italic>R</italic><italic>S</italic><italic>S</italic><italic>R</italic><sub><italic>a</italic></sub> is the RSSR result calculated by our rice seed setting rate optimization algorithm (RSSROA), <italic>N</italic><italic>F</italic>(<italic>N</italic><italic>U</italic><italic>M</italic><italic>B</italic><italic>E</italic><italic>R</italic><italic>O</italic><italic>F</italic><italic>F</italic><italic>U</italic><italic>L</italic><italic>L</italic><italic>G</italic><italic>R</italic><italic>A</italic><italic>I</italic><italic>N</italic>) is the number of full rice grains obtained by RSSROA, <italic>N</italic><italic>E</italic>(<italic>N</italic><italic>U</italic><italic>M</italic><italic>B</italic><italic>E</italic><italic>R</italic><italic>O</italic><italic>F</italic><italic>E</italic><italic>M</italic><italic>P</italic><italic>T</italic><italic>Y</italic><italic>G</italic><italic>R</italic><italic>A</italic><italic>I</italic><italic>N</italic>) is the number of empty grains obtained by RSSROA, <italic>N</italic><italic>H</italic>(<italic>N</italic><italic>U</italic><italic>M</italic><italic>B</italic><italic>E</italic><italic>R</italic><italic>O</italic><italic>F</italic><italic>H</italic><italic>A</italic><italic>L</italic><italic>F</italic><italic>G</italic><italic>R</italic><italic>A</italic><italic>I</italic><italic>N</italic>) is the number of half grains obtained by RSSROA, <italic>P</italic><italic>H</italic>(<italic>P</italic><italic>R</italic><italic>O</italic><italic>B</italic><italic>A</italic><italic>B</italic><italic>I</italic><italic>L</italic><italic>I</italic><italic>T</italic><italic>Y</italic><italic>O</italic><italic>F</italic><italic>F</italic><italic>U</italic><italic>L</italic><italic>L</italic><italic>H</italic><italic>A</italic><italic>L</italic><italic>F</italic><italic>S</italic><italic>E</italic><italic>E</italic><italic>D</italic>) is the prior probability of there being full grains of rice in the half grain count, <italic>N</italic><italic>F</italic><italic>H</italic>(<italic>N</italic><italic>U</italic><italic>M</italic><italic>B</italic><italic>E</italic><italic>R</italic><italic>O</italic><italic>F</italic><italic>F</italic><italic>U</italic><italic>L</italic><italic>L</italic><italic>G</italic><italic>R</italic><italic>A</italic><italic>I</italic><italic>N</italic><italic>I</italic><italic>N</italic><italic>H</italic><italic>A</italic><italic>L</italic><italic>F</italic><italic>G</italic><italic>R</italic><italic>A</italic><italic>I</italic><italic>N</italic>) is the number of full grains in the half grain count, and <italic>N</italic><italic>E</italic><italic>H</italic>(<italic>N</italic><italic>U</italic><italic>M</italic><italic>B</italic><italic>E</italic><italic>R</italic><italic>O</italic><italic>F</italic><italic>E</italic><italic>M</italic><italic>P</italic><italic>T</italic><italic>Y</italic><italic>G</italic><italic>R</italic><italic>A</italic><italic>I</italic><italic>N</italic><italic>I</italic><italic>N</italic><italic>H</italic><italic>A</italic><italic>L</italic><italic>F</italic><italic>G</italic><italic>R</italic><italic>A</italic><italic>I</italic><italic>N</italic>) is the number of empty grains in the half grain count.</p>
<p>Through our simulation study, it was found that there is a certain linear relationship between <italic>R</italic><italic>a</italic><italic>t</italic><italic>i</italic><italic>o</italic><sub>1</sub> and <italic>R</italic><italic>a</italic><italic>t</italic><italic>i</italic><italic>o</italic><sub>2</sub>. This can be seen in <xref ref-type="fig" rid="F5">Figure 5A</xref>, which shows the distribution density curves of <italic>R</italic><italic>a</italic><italic>t</italic><italic>i</italic><italic>o</italic><sub>1</sub> and <italic>R</italic><italic>a</italic><italic>t</italic><italic>i</italic><italic>o</italic><sub>2</sub>, where both curves belong to normal distribution and have 99.89% probability of consistency by the Kolmogorov-Smirnov test (<xref ref-type="bibr" rid="B10">Frank, 1951</xref>). Therefore, we further explored and obtained the scatter diagram with <italic>R</italic><italic>a</italic><italic>t</italic><italic>i</italic><italic>o</italic><sub>1</sub> as the <italic>X</italic>-axis and <italic>R</italic><italic>a</italic><italic>t</italic><italic>i</italic><italic>o</italic><sub>2</sub> as the <italic>Y</italic>-axis, as shown in <xref ref-type="fig" rid="F5">Figure 5B</xref>. Through a correlation analysis, we then obtained the correlation coefficient of 0.8327 and the linear equation of <italic>P</italic><italic>H</italic> = <italic>R</italic><italic>a</italic><italic>t</italic><italic>i</italic><italic>o</italic><sub>2</sub> = 0.797<italic>R</italic><italic>a</italic><italic>t</italic><italic>i</italic><italic>o</italic><sub>1</sub>+0.1972. The result of this current method can be used as our <italic>PH</italic> coefficient.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption><p>Research on the relationship of Ratio. <bold>(A)</bold> The proportion of cumulative frequency according to the change of ratio <bold>(B)</bold> relationship between <italic>R</italic><italic>a</italic><italic>t</italic><italic>i</italic><italic>o</italic><sub>1</sub> and <italic>R</italic><italic>a</italic><italic>t</italic><italic>i</italic><italic>o</italic><sub>2</sub>.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-12-770916-g005.tif"/>
</fig>
</sec>
<sec id="S2.SS5">
<title>Evaluation Standard</title>
<p>We evaluated the results from the different networks used on our data set. For the evaluation, a detected instance was considered a true positive if it had a Jaccard Index similarity coefficient, also known as an intersection-over-union (IOU) (<xref ref-type="bibr" rid="B13">He and Garcia, 2009</xref>; <xref ref-type="bibr" rid="B6">Csurka et al., 2013</xref>) of 0.5 or more, with a ground truth instance. The IOU is defined as the ratio of pixel number in the intersection to pixel number in the union. The instances of ground truth which did not overlap with any detected instance were considered false negatives. From these measures, the precision, recall, F1 score, AP, and mAP were calculated (<xref ref-type="bibr" rid="B1">Afonso et al., 2020</xref>):</p>
<disp-formula id="S2.E5"><label>(5)</label><mml:math id="M5"><mml:mrow><mml:mrow><mml:mi>P</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>r</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>e</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>c</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>s</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>o</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:mi>n</mml:mi></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:mi>P</mml:mi></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">+</mml:mo><mml:mrow><mml:mi>F</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>P</mml:mi></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:math></disp-formula>
<disp-formula id="S2.E6"><label>(6)</label><mml:math id="M6"><mml:mrow><mml:mrow><mml:mi>R</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>e</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>c</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>a</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>l</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:mi>l</mml:mi></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:mi>P</mml:mi></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">+</mml:mo><mml:mrow><mml:mi>F</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>N</mml:mi></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:math></disp-formula>
<disp-formula id="S2.E7"><label>(7)</label><mml:math id="M7"><mml:mrow><mml:mrow><mml:mi>F</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:mn>1</mml:mn></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">=</mml:mo><mml:mfrac><mml:mrow><mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mo>&#x2062;</mml:mo><mml:mi>P</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>r</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>e</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>c</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>s</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>o</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:mi>n</mml:mi></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">&#x00D7;</mml:mo><mml:mi>R</mml:mi></mml:mrow><mml:mo>&#x2062;</mml:mo><mml:mi>e</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>c</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>a</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>l</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mi>P</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>r</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>e</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>c</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>s</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>o</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:mi>n</mml:mi></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">+</mml:mo><mml:mrow><mml:mi>R</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>e</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>c</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>a</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>l</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>l</mml:mi></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:math></disp-formula>
<disp-formula id="S2.E8"><label>(8)</label><mml:math id="M8"><mml:mrow><mml:mrow><mml:mi>A</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:mi>P</mml:mi></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">=</mml:mo><mml:mrow><mml:munderover><mml:mo largeop="true" movablelimits="false" symmetric="true">&#x2211;</mml:mo><mml:mrow><mml:mpadded width="+3.3pt"><mml:mi>k</mml:mi></mml:mpadded><mml:mo rspace="5.8pt">=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:munderover><mml:mrow><mml:mi>P</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>r</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>e</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>c</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>s</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>o</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>n</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mi>k</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2062;</mml:mo><mml:mi mathvariant="normal">&#x25B3;</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>R</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>e</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>c</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>a</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>l</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>l</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mi>k</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mrow></mml:mrow></mml:math></disp-formula>
<disp-formula id="S2.E9"><label>(9)</label><mml:math id="M9"><mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>A</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:mi>P</mml:mi></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">=</mml:mo><mml:mfrac><mml:mrow><mml:msubsup><mml:mo largeop="true" symmetric="true">&#x2211;</mml:mo><mml:mi>i</mml:mi><mml:mi>M</mml:mi></mml:msubsup><mml:mrow><mml:mi>A</mml:mi><mml:mo>&#x2062;</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mrow><mml:mi>M</mml:mi></mml:mfrac></mml:mrow></mml:math></disp-formula>
<p>where <italic>TP</italic> = the number of true positives, <italic>FP</italic> = the number of false positives, and <italic>FN</italic> = the number of false negatives. Where <italic>N</italic> is the total number of images in the test dataset, <italic>M</italic> is the number of classes, <italic>P</italic><italic>r</italic><italic>e</italic><italic>c</italic><italic>i</italic><italic>s</italic><italic>i</italic><italic>o</italic><italic>n</italic>(<italic>k</italic>) is the precision value at <italic>k</italic> images, and &#x25B3;<italic>R</italic><italic>e</italic><italic>c</italic><italic>a</italic><italic>l</italic><italic>l</italic>(<italic>k</italic>) is the recall change between the <italic>k</italic> and <italic>k-1</italic> images.</p>
<p>In addition, the mean absolute error (<italic>MAE</italic>), the mean squared error (<italic>MSE</italic>), the root mean squared error (<italic>RMSE</italic>), and the correlation coefficient (<italic>R</italic>), were used as the evaluation metrics to assess the counting performance. They take the forms:</p>
<disp-formula id="S2.E10"><label>(10)</label><mml:math id="M10"><mml:mrow><mml:mrow><mml:mi>M</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>A</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:mi>E</mml:mi></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">=</mml:mo><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:mfrac><mml:mo>&#x2062;</mml:mo><mml:mrow><mml:munderover><mml:mo largeop="true" movablelimits="false" symmetric="true">&#x2211;</mml:mo><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:munderover><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:msub><mml:mi>t</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mi>c</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mrow></mml:mrow></mml:mrow></mml:math></disp-formula>
<disp-formula id="S2.E11"><label>(11)</label><mml:math id="M11"><mml:mrow><mml:mrow><mml:mi>M</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>S</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:mi>E</mml:mi></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">=</mml:mo><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:mfrac><mml:mo>&#x2062;</mml:mo><mml:mrow><mml:munderover><mml:mo largeop="true" movablelimits="false" symmetric="true">&#x2211;</mml:mo><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:munderover><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>t</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mi>c</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:mrow></mml:mrow></mml:math></disp-formula>
<disp-formula id="S2.E12"><label>(12)</label><mml:math id="M12"><mml:mrow><mml:mrow><mml:mi>R</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>M</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mi>S</mml:mi><mml:mo>&#x2062;</mml:mo><mml:mpadded width="+3.3pt"><mml:mi>E</mml:mi></mml:mpadded></mml:mrow><mml:mo rspace="5.8pt">=</mml:mo><mml:msqrt><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:mfrac><mml:mo>&#x2062;</mml:mo><mml:mrow><mml:munderover><mml:mo largeop="true" movablelimits="false" symmetric="true">&#x2211;</mml:mo><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:munderover><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>t</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mi>c</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:mrow></mml:msqrt></mml:mrow></mml:math></disp-formula>
<disp-formula id="S2.E13"><label>(13)</label><mml:math id="M13"><mml:mrow><mml:mpadded width="+3.3pt"><mml:mi>R</mml:mi></mml:mpadded><mml:mo rspace="5.8pt">=</mml:mo><mml:msqrt><mml:mrow><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:mfrac><mml:mrow><mml:msubsup><mml:mo largeop="true" symmetric="true">&#x2211;</mml:mo><mml:mrow><mml:mpadded width="+3.3pt"><mml:mi>i</mml:mi></mml:mpadded><mml:mo rspace="5.8pt">=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:msubsup><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>t</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mi>c</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow><mml:mrow><mml:msubsup><mml:mo largeop="true" symmetric="true">&#x2211;</mml:mo><mml:mrow><mml:mpadded width="+3.3pt"><mml:mi>i</mml:mi></mml:mpadded><mml:mo rspace="5.8pt">=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:msubsup><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>t</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>-</mml:mo><mml:mover accent="true"><mml:mi>t</mml:mi><mml:mo>&#x00AF;</mml:mo></mml:mover></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:mfrac></mml:mrow></mml:msqrt></mml:mrow></mml:math></disp-formula>
<p>where <italic>N</italic> denotes the number of test images, <italic>t</italic><sub><italic>i</italic></sub> is the ground truth count for the <italic>i-th</italic> image, <italic>c<sub>i</sub></italic> is the inferred count for the <italic>i-th</italic> image, and <inline-formula><mml:math id="INEQ33"><mml:mover accent="true"><mml:mi>t</mml:mi><mml:mo>&#x00AF;</mml:mo></mml:mover></mml:math></inline-formula> is the arithmetic mean of <italic>t</italic><sub><italic>i</italic></sub>.</p>
</sec>
</sec>
<sec id="S3" sec-type="results">
<title>Results</title>
<sec id="S3.SS1">
<title>Rice Grain Detection</title>
<p>First, we evaluated the convergence between the YOLO series model (YOLO V3, YOLO V4) and its four alternatives [Faster R-CNN (ResNet50), Faster R-CNN (VGG16), SSD, and EfficientDet], as well as the number of iterations. The loss curves of the training and verification processes from the adopted six deep neural networks are shown in <xref ref-type="fig" rid="F6">Figure 6</xref>. For the full six networks, the uniform batch size is 4 and the learning rate starts from 0.0001. In terms of iterations, 200 are used for Faster R-CNN (ResNet50) and Faster R-CNN (VGG16), while SSD, EfficientDet, YOLO V3 and YOLO V4 use 120. It can be seen that at the beginning of the training phase, the training loss drops sharply, and then after a certain number of iterations, the loss value slowly converges around an accurate value.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption><p>Loss curves of the different CNNs. <bold>(A)</bold> Faster R-CNN (ResNet50), <bold>(B)</bold> Faster R-CNN (VGG16), <bold>(C)</bold> SSD, <bold>(D)</bold> EfficientDet, <bold>(E)</bold> YOLO V3, and <bold>(F)</bold> YOLO V4.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-12-770916-g006.tif"/>
</fig>
<p><xref ref-type="bibr" rid="B21">Liu et al. (2021)</xref> proposes a self-attention negative feed-back network (SRAFBN) for realizing the real-time image super-resolution (SR). The network model constrains the image mapping space and selects the key information of the image through the self-attention negative feedback model, so that higher quality images can be generated to meet human visual perception. There are good processing methods for the mapping from low resolution image to high resolution image, but there is still a lack of processing method from high resolution to low resolution. Therefore, we propose the following idea: We cut the 190 images into 4,560 images, re-tagged them, and added the &#x201C;half&#x201D; category. Among these newly cut images, 2,705 were marked as foreground images and 1,855 were not marked as background images. We input the 2,705 foreground images into the six networks that we proposed as a data set, and obtained the precision-recall curve (<xref ref-type="supplementary-material" rid="FS1">Supplementary Figure 1</xref>). This greatly improved the recognition effect of all the networks (<xref ref-type="supplementary-material" rid="TS2">Supplementary Table 2</xref>). Among them, the mAP of the proposed YOLO V4 model in the training set reached 90.13%, which is the most effective.</p>
<p>The features of the full grains are that they are full and the middle of the grain presents a raised state (We believe that partially filled grains caused by abiotic stress are also full grains), empty grains meanwhile, are flat and the whole grain presents a plane effect. The three-dimensional sense in an empty grain is weaker than in a full grain, and part of the empty grain is reflected by cracks and openings in its center. The fact that these differences are small results in a poor detection effect by the alternative models we proposed. The proposed YOLO V4 model uses a Mosaic data enhancing method to reduce training costs and CSPDarknet53 to reduce the number of parameters and FLOPS of the model, which not only ensures the speed and accuracy of reasoning, but also reduces the model size. At the same time, DropBlock regularization and class label smoothing are employed to avoid any overfitting due to small differences. Thus, this means that our proposed YOLO V4 model performs much better than the other alternative models.</p>
<p>Following this, we tested the performance of different networks on the test set (<xref ref-type="table" rid="T1">Table 1</xref> and <xref ref-type="fig" rid="F7">Figure 7</xref>), where we plotted the precision and recall index graphs for full grain, empty grain, and half grain, with the <italic>X</italic>-axis corresponding to recall and the <italic>Y</italic>-axis corresponding to precision (<xref ref-type="fig" rid="F8">Figure 8</xref>). Each color corresponds to the test results of a network structure. For each color, the symbols &#x201C;&#x00B0;,&#x201D; &#x201C;&#x002A;,&#x201D; and &#x201C;&#x2033; represent the respective overlapping IoU thresholds of 0.25, 0.50, and 0.75. Since in an ideal situation, both indicators will be close to 1, the best approach will be shown as close to the upper right corner as possible. It is clear from <xref ref-type="fig" rid="F8">Figure 8</xref> that the results from the YOLO V4 model were significantly better than those from the other networks, regardless of their category. For all methods, we noted that both accuracy and recall measures were lower when the overlap threshold was 0.75, and highest when the overlap threshold was 0.25. This means that in the case of more stringent matching criteria (higher IoU thresholds), fewer detected rice grains were matched with instances from the ground truth, which resulted in lower indices for both. The network closest to the top right was YOLO V4, with an overlap threshold of 0.25 and 0.50, respectively.</p>
<table-wrap position="float" id="T1">
<label>TABLE 1</label>
<caption><p>Detection performance of different models in the test set during the clipping stage.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td valign="top" align="left">Network name</td>
<td valign="top" align="center">Category</td>
<td valign="top" align="center">Precision</td>
<td valign="top" align="center">Recall</td>
<td valign="top" align="center">F1</td>
<td valign="top" align="center">AP</td>
<td valign="top" align="center">mAP</td>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Faster R-CNN (ResNet50)</td>
<td valign="top" align="center">Full grain</td>
<td valign="top" align="center">74.24%</td>
<td valign="top" align="center">87.80%</td>
<td valign="top" align="center">0.80</td>
<td valign="top" align="center">84.10%</td>
<td valign="top" align="center">50.65%</td>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">Empty grain</td>
<td valign="top" align="center">56.28%</td>
<td valign="top" align="center">56.21%</td>
<td valign="top" align="center">0.56</td>
<td valign="top" align="center">44.70%</td>
<td/>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">Half grain</td>
<td valign="top" align="center">50.20%</td>
<td valign="top" align="center">32.95%</td>
<td valign="top" align="center">0.40</td>
<td valign="top" align="center">23.15%</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Faster R-CNN (VGG16)</td>
<td valign="top" align="center">Full grain</td>
<td valign="top" align="center">82.32%</td>
<td valign="top" align="center">88.43%</td>
<td valign="top" align="center">0.85</td>
<td valign="top" align="center">86.55%</td>
<td valign="top" align="center">59.70%</td>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">Empty grain</td>
<td valign="top" align="center">61.07%</td>
<td valign="top" align="center">51.77%</td>
<td valign="top" align="center">0.56</td>
<td valign="top" align="center">46.10%</td>
<td/>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">Half grain</td>
<td valign="top" align="center">69.35%</td>
<td valign="top" align="center">50.16%</td>
<td valign="top" align="center">0.58</td>
<td valign="top" align="center">46.45%</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">SSD</td>
<td valign="top" align="center">Full grain</td>
<td valign="top" align="center">36.43%</td>
<td valign="top" align="center">71.47%</td>
<td valign="top" align="center">0.48</td>
<td valign="top" align="center">66.09%</td>
<td valign="top" align="center">31.01%</td>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">Empty grain</td>
<td valign="top" align="center">10.24%</td>
<td valign="top" align="center">60.05%</td>
<td valign="top" align="center">0.18</td>
<td valign="top" align="center">17.87%</td>
<td/>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">Half grain</td>
<td valign="top" align="center">3.18%</td>
<td valign="top" align="center">56.91%</td>
<td valign="top" align="center">0.06</td>
<td valign="top" align="center">9.08%</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">EfficientDet</td>
<td valign="top" align="center">Full grain</td>
<td valign="top" align="center">79.43%</td>
<td valign="top" align="center">84.45%</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">86.99%</td>
<td valign="top" align="center">54.54%</td>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">Empty grain</td>
<td valign="top" align="center">100.00%</td>
<td valign="top" align="center">0.02%</td>
<td valign="top" align="center">0.00</td>
<td valign="top" align="center">15.84%</td>
<td/>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">Half grain</td>
<td valign="top" align="center">92.54%</td>
<td valign="top" align="center">27.26%</td>
<td valign="top" align="center">0.42</td>
<td valign="top" align="center">60.78%</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">YOLO V3</td>
<td valign="top" align="center">Full grain</td>
<td valign="top" align="center">81.00%</td>
<td valign="top" align="center">84.07%</td>
<td valign="top" align="center">0.83</td>
<td valign="top" align="center">88.29%</td>
<td valign="top" align="center">62.62%</td>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">Empty grain</td>
<td valign="top" align="center">60.12%</td>
<td valign="top" align="center">35.19%</td>
<td valign="top" align="center">0.44</td>
<td valign="top" align="center">40.84%</td>
<td/>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">Half grain</td>
<td valign="top" align="center">83.94%</td>
<td valign="top" align="center">44.54%</td>
<td valign="top" align="center">0.58</td>
<td valign="top" align="center">58.72%</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">YOLO V4</td>
<td valign="top" align="center">Full grain</td>
<td valign="top" align="center">89.79%</td>
<td valign="top" align="center">92.79%</td>
<td valign="top" align="center">0.91</td>
<td valign="top" align="center">94.78%</td>
<td valign="top" align="center">83.98%</td>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">Empty grain</td>
<td valign="top" align="center">77.66%</td>
<td valign="top" align="center">74.68%</td>
<td valign="top" align="center">0.76</td>
<td valign="top" align="center">73.92%</td>
<td/>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">Half grain</td>
<td valign="top" align="center">87.79%</td>
<td valign="top" align="center">75.83%</td>
<td valign="top" align="center">0.81</td>
<td valign="top" align="center">83.24%</td>
<td/>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption><p>Precision-recall curves of the different convolutional neural networks in test set. <bold>(A&#x2013;C)</bold> Are the Faster R-CNN (ResNet50) network Precision-Recall curves, where <bold>(A)</bold> is the full grain precision-recall curve obtained by the Faster R-CNN (ResNet50) network, <bold>(B)</bold> is the empty grain precision-recall curve obtained by the Faster R-CNN (ResNet50) network, and <bold>(C)</bold> is the half grain precision-recall curve obtained by the Faster R-CNN (ResNet50) network. <bold>(D&#x2013;F)</bold> Are the Faster R-CNN (VGG16) network Precision-Recall curves, where <bold>(D)</bold> is the full grain precision-recall curve obtained by the Faster R-CNN (VGG16) network, <bold>(E)</bold> is the empty grain precision-recall curve obtained by the Faster R-CNN (VGG16) network, and <bold>(F)</bold> is the half grain precision-recall curve obtained by the Faster R-CNN (VGG16) network. <bold>(G&#x2013;I)</bold> Are the SSD network precision-recall curves, where <bold>(G)</bold> is the full grain precision-recall curve obtained by the SSD network, <bold>(H)</bold> is the empty grain precision-recall curve obtained by the SSD network, and <bold>(I)</bold> is the half grain precision-recall curve obtained by the SSD network. <bold>(J&#x2013;L)</bold> Are the EfficientDet network precision-recall curves, where <bold>(J)</bold> is the full grain precision-recall curve obtained by the EfficientDet network, <bold>(K)</bold> is the empty grain precision-recall curve obtained by the EfficientDet network, and <bold>(L)</bold> is the half grain precision-recall curve obtained by the EfficientDet network. <bold>(M&#x2013;O)</bold> Are the YOLO V3 network precision-recall curves, where <bold>(M)</bold> is the full grain precision-recall curve obtained by the YOLO V3 network, <bold>(N)</bold> is the empty grain precision-recall curve obtained by the YOLO V3 network, and <bold>(O)</bold> is the half grain precision-recall curve obtained by the YOLO V3 network. <bold>(P&#x2013;R)</bold> Are the YOLO V4 network precision-recall curves, where <bold>(P)</bold> is the full grain precision-recall curve obtained by the YOLO V4 network, <bold>(Q)</bold> is the empty grain precision-recall curve obtained by the YOLO V4 network, and <bold>(R)</bold> is the half grain precision-recall curve obtained by the YOLO V4 network.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-12-770916-g007.tif"/>
</fig>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption><p>Each color corresponds to the test results from a different network model, while the symbols &#x201C;&#x00B0;,&#x201D; &#x201C;&#x002A;,&#x2033; and &#x201C;&#x2033; correspond to a 0.25, 0.5, and 0.75 overlap IOU, respectively. The results from each method and their use of these IOU thresholds are connected by dashed lines: <bold>(A)</bold> Test results in full grain, <bold>(B)</bold> test results in empty grain, and <bold>(C)</bold> test results in half grain.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-12-770916-g008.tif"/>
</fig>
</sec>
<sec id="S3.SS2">
<title>Calculation of Rice Seed Setting Rate</title>
<p>Through an analysis and comparison, YOLO V4 was finally selected as the main network to be used for RSSR predictions, due to its good partitioning effect on the rice grains. For the calculation of RSSR, the rice images were first input for automatic cropping, with the number of full grain, empty grain, and half grain in each cropped image predicted by the YOLO V4 network. Following this, all sub-images belonging to an image were automatically synthesized, and the RSSR was calculated according to the algorithm we provided.</p>
<p>The linear regression between the manual calculation result and the optimization algorithm&#x2019;s calculation result of 60 rice images is shown through (<xref ref-type="fig" rid="F9">Figures 9A&#x2013;C</xref>). It can be observed that YOLO V4 is the most efficient at identifying rice grains, and that its correlation coefficient <italic>R</italic> surpasses 90%.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption><p>The results calculated by the algorithm are in the form of a linear regression: <bold>(A)</bold> Linear regression of full grains in the optimization algorithm, <bold>(B)</bold> linear regression of empty grains in the optimization algorithm, and <bold>(C)</bold> linear regression of half grains in the optimization algorithm.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-12-770916-g009.tif"/>
</fig>
<p><xref ref-type="table" rid="T2">Table 2</xref> is a comparison of the results from the proposed method and those that were obtained manually. From <xref ref-type="table" rid="T2">Table 2</xref>, it can be seen that the proposed method&#x2019;s average accuracy for calculating the full grain number per panicle was 97.69%, for the empty grain number per panicle it was 93.20%, and for the RSSR it was 99.43%. This indicates that the proposed method offers high accuracy and stability. The deviations in a few cases can be attributed to identification errors for some small empty grains and half grains during the YOLO V4 model&#x2019;s testing process. The characteristics of some empty grains are not obvious, appearing highly similar to the full grains. Some half grains have a relatively complete shape, which is similar to the shape of full grains with their shielding, resulting in recognition difficulties.</p>
<table-wrap position="float" id="T2">
<label>TABLE 2</label>
<caption><p>Comparison of the proposed method&#x2019;s results and those obtained manually.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td valign="top" align="left">Sample label</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">6</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">8</td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">10</td>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">No. of full grains per panicle determined manually</td>
<td valign="top" align="center">64</td>
<td valign="top" align="center">88</td>
<td valign="top" align="center">117</td>
<td valign="top" align="center">83</td>
<td valign="top" align="center">97</td>
<td valign="top" align="center">141</td>
<td valign="top" align="center">54</td>
<td valign="top" align="center">64</td>
<td valign="top" align="center">52</td>
<td valign="top" align="center">89</td>
</tr>
<tr>
<td valign="top" align="left">No. of full grains per panicle determined using proposed algorithm</td>
<td valign="top" align="center">64</td>
<td valign="top" align="center">86</td>
<td valign="top" align="center">119</td>
<td valign="top" align="center">82</td>
<td valign="top" align="center">99</td>
<td valign="top" align="center">146</td>
<td valign="top" align="center">55</td>
<td valign="top" align="center">66</td>
<td valign="top" align="center">55</td>
<td valign="top" align="center">91</td>
</tr>
<tr>
<td valign="top" align="left">No. of empty grains per panicle determined manually</td>
<td valign="top" align="center">35</td>
<td valign="top" align="center">39</td>
<td valign="top" align="center">27</td>
<td valign="top" align="center">21</td>
<td valign="top" align="center">15</td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">20</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">12</td>
</tr>
<tr>
<td valign="top" align="left">No. of empty grains per panicle determined using proposed algorithm</td>
<td valign="top" align="center">34</td>
<td valign="top" align="center">40</td>
<td valign="top" align="center">27</td>
<td valign="top" align="center">20</td>
<td valign="top" align="center">16</td>
<td valign="top" align="center">10</td>
<td valign="top" align="center">20</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">11</td>
</tr>
<tr>
<td valign="top" align="left">RSSR determined manually, %</td>
<td valign="top" align="center">64.65</td>
<td valign="top" align="center">69.29</td>
<td valign="top" align="center">81.25</td>
<td valign="top" align="center">79.81</td>
<td valign="top" align="center">86.61</td>
<td valign="top" align="center">94.00</td>
<td valign="top" align="center">72.97</td>
<td valign="top" align="center">92.75</td>
<td valign="top" align="center">94.55</td>
<td valign="top" align="center">88.12</td>
</tr>
<tr>
<td valign="top" align="left">RSSR determined using proposed algorithm, %</td>
<td valign="top" align="center">64.89</td>
<td valign="top" align="center">68.53</td>
<td valign="top" align="center">81.55</td>
<td valign="top" align="center">80.23</td>
<td valign="top" align="center">86.18</td>
<td valign="top" align="center">93.65</td>
<td valign="top" align="center">73.08</td>
<td valign="top" align="center">92.69</td>
<td valign="top" align="center">95.79</td>
<td valign="top" align="center">88.98</td>
</tr>
<tr>
<td valign="top" align="left">Accuracy of the full grain number per panicle, %</td>
<td valign="top" align="center">100</td>
<td valign="top" align="center">97.73</td>
<td valign="top" align="center">98.32</td>
<td valign="top" align="center">98.80</td>
<td valign="top" align="center">97.98</td>
<td valign="top" align="center">96.58</td>
<td valign="top" align="center">98.18</td>
<td valign="top" align="center">96.97</td>
<td valign="top" align="center">94.55</td>
<td valign="top" align="center">97.80</td>
</tr>
<tr>
<td valign="top" align="left">Accuracy of the empty grain number per panicle, %</td>
<td valign="top" align="center">97.14</td>
<td valign="top" align="center">97.50</td>
<td valign="top" align="center">100</td>
<td valign="top" align="center">95.24</td>
<td valign="top" align="center">93.75</td>
<td valign="top" align="center">90.00</td>
<td valign="top" align="center">100</td>
<td valign="top" align="center">100</td>
<td valign="top" align="center">66.67</td>
<td valign="top" align="center">91.67</td>
</tr>
<tr>
<td valign="top" align="left">Accuracy of the seed setting rate, %</td>
<td valign="top" align="center">99.63</td>
<td valign="top" align="center">98.90</td>
<td valign="top" align="center">99.63</td>
<td valign="top" align="center">99.48</td>
<td valign="top" align="center">99.50</td>
<td valign="top" align="center">99.63</td>
<td valign="top" align="center">99.85</td>
<td valign="top" align="center">99.94</td>
<td valign="top" align="center">98.71</td>
<td valign="top" align="center">99.03</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="S4" sec-type="discussion">
<title>Discussion</title>
<sec id="S4.SS1">
<title>Detection Effect of Different Data Sets</title>
<p>To better understand the performance of our proposed methods, we studied the network detection effects during different image states. First, however, it must be noted that the rice identification process is carried out using the initial image, which has 4,032 &#x00D7; 3,024 pixels.</p>
<p><xref ref-type="table" rid="T3">Table 3</xref> shows the detection performances of the six deep learning networks, all of which are clear as the high input images undergo the necessary resizing before going through the networks. However, in spite of the preservation of various network category characteristics, the minor differences between full and empty grains are still easily ignored. Therefore, although we adopted a variety of networks to train the data set, we were still unable to find a network with an accuracy as high as our own experimental results. Our proposed model, the YOLO V4 network, achieved the best accuracy among the six networks, with an mAP value of 17.97%, however, this is still far below our target expectations.</p>
<table-wrap position="float" id="T3">
<label>TABLE 3</label>
<caption><p>Detection performance of the different models during the training data set&#x2019;s untrimmed state.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td valign="top" align="left">Network name</td>
<td valign="top" align="center">Category</td>
<td valign="top" align="center">Precision</td>
<td valign="top" align="center">Recall</td>
<td valign="top" align="center">F1</td>
<td valign="top" align="center">AP</td>
<td valign="top" align="center">mAP</td>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Faster R-CNN (ResNet50)</td>
<td valign="top" align="left">Full grain</td>
<td valign="top" align="center">14.43%</td>
<td valign="top" align="center">3.01%</td>
<td valign="top" align="center">0.05</td>
<td valign="top" align="center">0.55%</td>
<td valign="top" align="center">0.30%</td>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="left">Empty grain</td>
<td valign="top" align="center">6.61%</td>
<td valign="top" align="center">0.26%</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">0.05%</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Faster R-CNN (VGG16)</td>
<td valign="top" align="left">Full grain</td>
<td valign="top" align="center">12.47%</td>
<td valign="top" align="center">2.40%</td>
<td valign="top" align="center">0.04</td>
<td valign="top" align="center">0.37%</td>
<td valign="top" align="center">0.21%</td>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="left">Empty grain</td>
<td valign="top" align="center">7.63%</td>
<td valign="top" align="center">0.22%</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">0.04%</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">SSD</td>
<td valign="top" align="left">Full grain</td>
<td valign="top" align="center">9.37%</td>
<td valign="top" align="center">9.95%</td>
<td valign="top" align="center">0.1</td>
<td valign="top" align="center">1.11%</td>
<td valign="top" align="center">0.67%</td>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="left">Empty grain</td>
<td valign="top" align="center">2.14%</td>
<td valign="top" align="center">0.14%</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">0.22%</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">EfficientDet</td>
<td valign="top" align="left">Full grain</td>
<td valign="top" align="center">0.01%</td>
<td valign="top" align="center">0.01%</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">0.26%</td>
<td valign="top" align="center">0.14%</td>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="left">Empty grain</td>
<td valign="top" align="center">0.01%</td>
<td valign="top" align="center">0.01%</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">0.01%</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">YOLO V3</td>
<td valign="top" align="left">Full grain</td>
<td valign="top" align="center">45.53%</td>
<td valign="top" align="center">45.77%</td>
<td valign="top" align="center">0.46</td>
<td valign="top" align="center">29.82%</td>
<td valign="top" align="center">16.65%</td>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="left">Empty grain</td>
<td valign="top" align="center">37.21%</td>
<td valign="top" align="center">4.39%</td>
<td valign="top" align="center">0.08</td>
<td valign="top" align="center">3.48%</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">YOLO V4</td>
<td valign="top" align="left">Full grain</td>
<td valign="top" align="center">49.54%</td>
<td valign="top" align="center">40.30%</td>
<td valign="top" align="center">0.44</td>
<td valign="top" align="center">24.51%</td>
<td valign="top" align="center">17.97%</td>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="left">Empty grain</td>
<td valign="top" align="center">43.69%</td>
<td valign="top" align="center">17.60%</td>
<td valign="top" align="center">0.25</td>
<td valign="top" align="center">11.43%</td>
<td/>
</tr>
</tbody>
</table>
</table-wrap>
<p><xref ref-type="table" rid="T4">Table 4</xref> shows the detection effect under precise division. 4,560 images were obtained by cropping 190 images, whereupon these were used as the data set. The cropping principle is that the size of the cropped images be as close as possible to the input size of each network, and that the categories of half-full grain and half-empty grain are added. H-full and H-empty represent the full and empty grains detected in in the half grain count after cropping. It can be observed that the accuracy of all the networks and the recognition accuracy of some of the categories have been improved. These results accorded with our hypothesis and proved the effectiveness of the proposed method. However, the overall performance remains unsatisfactory.</p>
<table-wrap position="float" id="T4">
<label>TABLE 4</label>
<caption><p>Detection performance of various networks under precise division.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td valign="top" align="left">Network name</td>
<td valign="top" align="center">Category</td>
<td valign="top" align="center">Precision</td>
<td valign="top" align="center">Recall</td>
<td valign="top" align="center">F1</td>
<td valign="top" align="center">AP</td>
<td valign="top" align="center">mAP</td>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Faster R-CNN (ResNet50)</td>
<td valign="top" align="center">Full grain</td>
<td valign="top" align="center">73.85%</td>
<td valign="top" align="center">86.68%</td>
<td valign="top" align="center">0.80</td>
<td valign="top" align="center">80.82%</td>
<td valign="top" align="center">37.04%</td>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">Empty grain</td>
<td valign="top" align="center">59.84%</td>
<td valign="top" align="center">43.10%</td>
<td valign="top" align="center">0.50</td>
<td valign="top" align="center">36.48%</td>
<td/>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">H-full grain</td>
<td valign="top" align="center">51.31%</td>
<td valign="top" align="center">31.87%</td>
<td valign="top" align="center">0.39</td>
<td valign="top" align="center">25.12%</td>
<td/>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">H-empty grain</td>
<td valign="top" align="center">51.54%</td>
<td valign="top" align="center">4.35%</td>
<td valign="top" align="center">0.08</td>
<td valign="top" align="center">5.73%</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Faster R-CNN (VGG16)</td>
<td valign="top" align="center">Full grain</td>
<td valign="top" align="center">77.89%</td>
<td valign="top" align="center">90.01%</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">86.53%</td>
<td valign="top" align="center">43.91%</td>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">Empty grain</td>
<td valign="top" align="center">59.51%</td>
<td valign="top" align="center">51.42%</td>
<td valign="top" align="center">0.55</td>
<td valign="top" align="center">43.66%</td>
<td/>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">H-full grain</td>
<td valign="top" align="center">75.34%</td>
<td valign="top" align="center">30.13%</td>
<td valign="top" align="center">0.43</td>
<td valign="top" align="center">36.66%</td>
<td/>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">H-empty grain</td>
<td valign="top" align="center">73.08%</td>
<td valign="top" align="center">3.70%</td>
<td valign="top" align="center">0.07</td>
<td valign="top" align="center">8.77%</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">SSD</td>
<td valign="top" align="center">Full grain</td>
<td valign="top" align="center">70.67%</td>
<td valign="top" align="center">75.72%</td>
<td valign="top" align="center">0.73</td>
<td valign="top" align="center">71.24%</td>
<td valign="top" align="center">37.75%</td>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">Empty grain</td>
<td valign="top" align="center">38.80%</td>
<td valign="top" align="center">50.25%</td>
<td valign="top" align="center">0.44</td>
<td valign="top" align="center">38.99%</td>
<td/>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">H-full grain</td>
<td valign="top" align="center">16.15%</td>
<td valign="top" align="center">55.43%</td>
<td valign="top" align="center">0.25</td>
<td valign="top" align="center">28.89%</td>
<td/>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">H-empty grain</td>
<td valign="top" align="center">34.02%</td>
<td valign="top" align="center">10.64%</td>
<td valign="top" align="center">0.16</td>
<td valign="top" align="center">11.87%</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">EfficientDet</td>
<td valign="top" align="center">Full grain</td>
<td valign="top" align="center">80.89%</td>
<td valign="top" align="center">80.01%</td>
<td valign="top" align="center">0.80</td>
<td valign="top" align="center">86.01%</td>
<td valign="top" align="center">44.38%</td>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">Empty grain</td>
<td valign="top" align="center">80.14%</td>
<td valign="top" align="center">1.80%</td>
<td valign="top" align="center">0.04</td>
<td valign="top" align="center">32.36%</td>
<td/>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">H-full grain</td>
<td valign="top" align="center">83.19%</td>
<td valign="top" align="center">25.71%</td>
<td valign="top" align="center">0.39</td>
<td valign="top" align="center">58.46%</td>
<td/>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">H-empty grain</td>
<td valign="top" align="center">0.00%</td>
<td valign="top" align="center">0.00%</td>
<td valign="top" align="center">0.00</td>
<td valign="top" align="center">0.69%</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">YOLO V3</td>
<td valign="top" align="center">Full grain</td>
<td valign="top" align="center">82.93%</td>
<td valign="top" align="center">83.06%</td>
<td valign="top" align="center">0.83</td>
<td valign="top" align="center">87.72%</td>
<td valign="top" align="center">46.78%</td>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">Empty grain</td>
<td valign="top" align="center">65.59%</td>
<td valign="top" align="center">27.47%</td>
<td valign="top" align="center">0.39</td>
<td valign="top" align="center">35.51%</td>
<td/>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">H-full grain</td>
<td valign="top" align="center">80.04%</td>
<td valign="top" align="center">39.53%</td>
<td valign="top" align="center">0.53</td>
<td valign="top" align="center">56.16%</td>
<td/>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">H-empty grain</td>
<td valign="top" align="center">80.00%</td>
<td valign="top" align="center">1.16%</td>
<td valign="top" align="center">0.02</td>
<td valign="top" align="center">7.74%</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">YOLO V4</td>
<td valign="top" align="center">Full grain</td>
<td valign="top" align="center">86.87%</td>
<td valign="top" align="center">93.17%</td>
<td valign="top" align="center">0.9</td>
<td valign="top" align="center">94.27%</td>
<td valign="top" align="center">66.57%</td>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">Empty grain</td>
<td valign="top" align="center">79.30%</td>
<td valign="top" align="center">76.37%</td>
<td valign="top" align="center">0.78</td>
<td valign="top" align="center">78.44%</td>
<td/>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">H-full grain</td>
<td valign="top" align="center">86.73%</td>
<td valign="top" align="center">51.07%</td>
<td valign="top" align="center">0.64</td>
<td valign="top" align="center">64.38%</td>
<td/>
</tr>
<tr>
<td valign="top" align="justify"/><td valign="top" align="center">H-empty grain</td>
<td valign="top" align="center">79.93%</td>
<td valign="top" align="center">14.99%</td>
<td valign="top" align="center">0.25</td>
<td valign="top" align="center">29.19%</td>
<td/>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="S4.SS2">
<title>Prediction Effect of Different Convolution Neural Networks</title>
<p><xref ref-type="fig" rid="F10">Figure 10</xref> shows the predictive effects of our six network architectures: Faster R-CNN (ResNet50), Faster R-CNN (VGG16), SSD, EfficientDet, YOLO V3, and YOLO V4. Through this, it can be seen that most of the target detection methods greatly improve the detection effect once image segmentation has been completed. Faster R-CNN (ResNet50), Faster R-CNN (VGG16), EfficientDet, and YOLO V3 in particular, showed significant improvements when working with the proposed method, and performed well when detecting full grain. Almost all the full grain samples were detected, but empty and half grain samples were not detected as efficiently. YOLO V4 on the other hand, was not only the best at detecting full grains, but also at detecting the empty and half grains, as well as many categories that the other networks were unable to detect.</p>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption><p>Comparison between the prediction results and the actual results from the different networks.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-12-770916-g010.tif"/>
</fig>
</sec>
<sec id="S4.SS3">
<title>Performance vs. Speed</title>
<p><xref ref-type="fig" rid="F11">Figure 11A</xref> shows that as the number of predicted images increased, so did the prediction time, with a roughly linear increase. We calculated that one image&#x2019;s average running time is about 2.65 s, which is much less than that achieved with a manual counting time.</p>
<fig id="F11" position="float">
<label>FIGURE 11</label>
<caption><p>Performance: <bold>(A)</bold> Relationship between the number of different prediction images and prediction time, <bold>(B)</bold> the error in term of mAP vs. Speed (FPS) on test set.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-12-770916-g011.tif"/>
</fig>
<p>We also considered the reasoning speed of various networks. <xref ref-type="fig" rid="F11">Figure 11B</xref> shows the error terms for mAP and speed (FPS) on the test data set. Faster R-CNN (ResNet50), Faster R-CNN (VGG16), SSD, EfficientDet, YOLO V3, YOLO V4 were all implemented using the same Pytorch framework and used the same input image size. We measured the speed of all the methods on a single Nvidia GeForce GTX TITAN XP GPU (12G) computer. According to <xref ref-type="fig" rid="F11">Figure 11B</xref>, YOLO V4 is superior to the other five methods except YOLO V3 in both its speed (FPS) and mAP (the higher the better). YOLO V4 is significantly better than YOLO V3 in mAP, but the detection speed (FPS) is slightly inferior. Considering the overall situation, we think that the importance of mAP is higher than the detection speed (FPS). Therefore, we think that the performance of YOLO V4 is stronger. Faster R-CNN (ResNet50), Faster R-CNN (VGG16), and EfficientDet meanwhile, show less of a difference in their performance and speed. The SSD&#x2019;s speed was similar to Faster R-CNN (ResNet50), Faster R-CNN (VGG16), and EfficientDet, but its performance was far below that of the other networks, with a poor detection of small features being the main issue.</p>
</sec>
<sec id="S4.SS4">
<title>Error Analysis</title>
<p>Through the identification of the grains of 60 rice images, we detected that the average error number of full grains was 5.78 grains, and the average error number of empty grains was 2.76 grains, and the final RSSR error was 2.84%. In addition, the results of MAE, MSE, RMSE for solid grains, shrunken grains, and seed setting rates can be obtained from <xref ref-type="fig" rid="F9">Figures 9A&#x2013;C</xref>, which shows that although our results have certain errors, they are acceptable.</p>
<p>In future work, we plan to continue improving the detection accuracy of full rice grains and empty grains, and to eliminate the impact of full half grains on RSSR as much as possible. Considering the high efficiency of the program, we will also improve the RSSR calculation speed.</p>
</sec>
</sec>
<sec id="S5" sec-type="conclusion">
<title>Conclusion</title>
<p>In this paper, a RSSR calculation method based on deep learning for high-resolution images of rice panicles is proposed for the realization of the automatic calculation of RSSR. The calculation method is composed of both deep learning and RSSROA. Deep learning is used to identify the grain category characteristics of rice, and the RSSROA is used to calculate the RSSR.</p>
<p>In this study, a rice panicle data set composed of 4560 cut images was established. These images were taken from multiple rice varieties which had been grown under the same environment and had been processed based on image segmentation. Through the identification and comparison of data sets, we choose YOLO V4 with the best comprehensive performance as our network for calculating RSSR. In addition, the detection accuracy for full grain, empty grain, and RSSR in 10 randomly selected rice images, were 97.69, 93.20, and 99.43%, respectively. The calculation time for the RSSR in each image was 2.65 s, which meets the needs for automatic calculation. In cooperation with rice research institutions, because this method is a non-destructive operation when collecting rice panicles information, it is more convenient for rice researchers to reserve seeds, and the simple operation method enables rice researchers to obtain RSSR information more efficiently and accurately, which will be a reliable method for further estimating rice yield.</p>
</sec>
<sec id="S6" sec-type="data-availability">
<title>Data Availability Statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found below: <ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/soberguo/riceseedsettingrate">https://www.kaggle.com/soberguo/riceseedsettingrate</ext-link>.</p>
</sec>
<sec id="S7">
<title>Author Contributions</title>
<p>YG: formal analysis, investigation, methodology, visualization, and writing&#x2014;original draft. SL: supervision and validation. YL, ZH, and ZZ: project administration and resources. DX: writing&#x2014;review and editing and funding acquisition. QC: writing&#x2014;review and editing, funding acquisition, and resources. JW: writing&#x2014;review and editing and resources. RZ: designed the research the article, conceptualization, data curation, funding acquisition, resources, and writing&#x2014;review and editing. All authors agreed to be accountable for all aspects of their work to ensure that the questions related to the accuracy or integrity of any part is appropriately investigated and resolved, and approved for the final version to be published.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="pudiscl1" sec-type="disclaimer">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
</body>
<back>
<sec id="S8" sec-type="funding-information">
<title>Funding</title>
<p>This work was supported by the National Natural Science Foundation of China (Grant nos. 31400074, 31471516, 31271747, and 30971809), the Natural Science Foundation of Heilongjiang Province of China (LH2021C021), and the Heilongjiang Postdoctoral Science Foundation (LBH-Q18025).</p>
</sec>
<sec id="S9" sec-type="supplementary-material">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpls.2021.770916/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fpls.2021.770916/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Image_1.TIFF" id="FS1" mimetype="image/tiff" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Supplementary Figure 1</label>
<caption><p>Precision-recall curves of the different convolutional neural networks in training set. <bold>(A&#x2013;C)</bold> Are the Faster R-CNN (ResNet50) network Precision-Recall curves, where <bold>(A)</bold> is the full grain precision-recall curve obtained by the Faster R-CNN (ResNet50) network, <bold>(B)</bold> is the empty grain precision-recall curve obtained by the Faster R-CNN (ResNet50) network, and <bold>(C)</bold> is the half grain precision-recall curve obtained by the Faster R-CNN (ResNet50) network. <bold>(D&#x2013;F)</bold> Are the Faster R-CNN (VGG16) network Precision-Recall curves, where <bold>(D)</bold> is the full grain precision-recall curve obtained by the Faster R-CNN (VGG16) network, <bold>(E)</bold> is the empty grain precision-recall curve obtained by the Faster R-CNN (VGG16) network, and <bold>(F)</bold> is the half grain precision-recall curve obtained by the Faster R-CNN (VGG16) network. <bold>(G&#x2013;I)</bold> Are the SSD network precision-recall curves, where <bold>(G)</bold> is the full grain precision-recall curve obtained by the SSD network, <bold>(H)</bold> is the empty grain precision-recall curve obtained by the SSD network, and <bold>(I)</bold> is the half grain precision-recall curve obtained by the SSD network. <bold>(J&#x2013;L)</bold> Are the EfficientDet network precision-recall curves, where <bold>(J)</bold> is the full grain precision-recall curve obtained by the EfficientDet network, <bold>(K)</bold> is the empty grain precision-recall curve obtained by the EfficientDet network, and <bold>(L)</bold> is the half grain precision-recall curve obtained by the EfficientDet network. <bold>(M&#x2013;O)</bold> Are the YOLO V3 network precision-recall curves, where <bold>(M)</bold> is the full grain precision-recall curve obtained by the YOLO V3 network, <bold>(N)</bold> is the empty grain precision-recall curve obtained by the YOLO V3 network, and <bold>(O)</bold> is the half grain precision-recall curve obtained by the YOLO V3 network. <bold>(P&#x2013;R)</bold> Are the YOLO V4 network precision-recall curves, where <bold>(P)</bold> is the full grain precision-recall curve obtained by the YOLO V4 network, <bold>(Q)</bold> is the empty grain precision-recall curve obtained by the YOLO V4 network, and <bold>(R)</bold> is the half grain precision-recall curve obtained by the YOLO V4 network.</p></caption>
</supplementary-material>
<supplementary-material xlink:href="Table_1.DOCX" id="TS1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_2.docx" id="TS2" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Afonso</surname> <given-names>M.</given-names></name> <name><surname>Fonteijn</surname> <given-names>H.</given-names></name> <name><surname>Fiorentin</surname> <given-names>F.</given-names></name> <name><surname>Lensink</surname> <given-names>D.</given-names></name> <name><surname>Mooij</surname> <given-names>M.</given-names></name> <name><surname>Faber</surname> <given-names>N.</given-names></name></person-group> (<year>2020</year>). <article-title>Tomato fruit detection and counting in greenhouses using deep learning.</article-title> <source><italic>Front. Plant Sci.</italic></source> <volume>11</volume>:<fpage>571299</fpage>. <pub-id pub-id-type="doi">10.3389/fpls.2020.571299</pub-id> <pub-id pub-id-type="pmid">33329628</pub-id></citation></ref>
<ref id="B2"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bochkovskiy</surname> <given-names>A.</given-names></name> <name><surname>Wang</surname> <given-names>C.</given-names></name> <name><surname>Mark Liao</surname> <given-names>H.</given-names></name></person-group> (<year>2020</year>). <article-title>YOLOv4: optimal speed and accuracy of object detection.</article-title> <source><italic>arXiv</italic></source> [<comment>Preprint</comment>]. <volume>arXiv</volume>:<fpage>2004.10934</fpage>.</citation></ref>
<ref id="B3"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chatnuntawech</surname> <given-names>I.</given-names></name> <name><surname>Tantisantisom</surname> <given-names>K.</given-names></name> <name><surname>Khanchaitit</surname> <given-names>P.</given-names></name> <name><surname>Boonkoom</surname> <given-names>T.</given-names></name> <name><surname>Bilgic</surname> <given-names>B.</given-names></name> <name><surname>Chuangsuwanich</surname> <given-names>E.</given-names></name></person-group> (<year>2018</year>). <article-title>Rice classification using spatio-spectral deep convolutional neural network.</article-title> <source><italic>arXiv</italic></source> [<comment>Preprint</comment>] <volume>arXiv</volume>:<fpage>1805.11491</fpage>,</citation></ref>
<ref id="B4"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>X.</given-names></name> <name><surname>Xiang</surname> <given-names>S.</given-names></name> <name><surname>Liu</surname> <given-names>C.</given-names></name> <name><surname>Pan</surname> <given-names>C.</given-names></name></person-group> (<year>2014</year>). <article-title>Vehicle detection in satellite images by hybrid deep convolutional neural networks.</article-title> <source><italic>IEEE Geosci. Remote Sens. Lett.</italic></source> <volume>11</volume> <fpage>1797</fpage>&#x2013;<lpage>1801</lpage>. <pub-id pub-id-type="doi">10.1109/ACPR.2013.33</pub-id></citation></ref>
<ref id="B5"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chu</surname> <given-names>Z.</given-names></name> <name><surname>Yu</surname> <given-names>J.</given-names></name></person-group> (<year>2020</year>). <article-title>An end-to-end model for rice yield prediction using deep learning fusion.</article-title> <source><italic>Comput. Electron. Agric.</italic></source> <volume>174</volume>:<fpage>105471</fpage>. <pub-id pub-id-type="doi">10.1016/j.compag.2020.105471</pub-id></citation></ref>
<ref id="B6"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Csurka</surname> <given-names>G.</given-names></name> <name><surname>Larlus</surname> <given-names>D.</given-names></name> <name><surname>Perronnin</surname> <given-names>F.</given-names></name></person-group> (<year>2013</year>). &#x201C;<article-title>What is a good evaluationmeasure for semantic segmentation?</article-title>,&#x201D; in <source><italic>Proceedings of the British Machine Vision Conference</italic></source>, (<publisher-loc>Bristol</publisher-loc>: <publisher-name>BMV Press</publisher-name>).</citation></ref>
<ref id="B7"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Desai</surname> <given-names>S. V.</given-names></name> <name><surname>Balasubramanian</surname> <given-names>V. N.</given-names></name> <name><surname>Fukatsu</surname> <given-names>T.</given-names></name> <name><surname>Ninomiya</surname> <given-names>S.</given-names></name> <name><surname>Guo</surname> <given-names>W.</given-names></name></person-group> (<year>2019</year>). <article-title>Automatic estimation of heading date of paddy rice using deep learning.</article-title> <source><italic>Plant Methods.</italic></source> <volume>15</volume>:<fpage>76</fpage>. <pub-id pub-id-type="doi">10.1186/s13007-019-0457-1</pub-id> <pub-id pub-id-type="pmid">31338116</pub-id></citation></ref>
<ref id="B8"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dhaka</surname> <given-names>V. S.</given-names></name> <name><surname>Meena</surname> <given-names>S. V.</given-names></name> <name><surname>Rani</surname> <given-names>G.</given-names></name> <name><surname>Sinwar</surname> <given-names>D. K.</given-names></name> <name><surname>Ijaz</surname> <given-names>M. F.</given-names></name></person-group> (<year>2021</year>). <article-title>A survey of deep convolutional neural networks applied for prediction of plant leaf diseases.</article-title> <source><italic>Sensors</italic></source> <volume>21</volume>:<fpage>4749</fpage>. <pub-id pub-id-type="doi">10.3390/s21144749</pub-id> <pub-id pub-id-type="pmid">34300489</pub-id></citation></ref>
<ref id="B9"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dong</surname> <given-names>C.</given-names></name> <name><surname>Loy</surname> <given-names>C.</given-names></name> <name><surname>He</surname> <given-names>K.</given-names></name> <name><surname>Tang</surname> <given-names>X.</given-names></name></person-group> (<year>2016</year>). <article-title>Image super-resolution using deep convolutional networks.</article-title> <source><italic>IEEE Trans. Pattern Anal. Mach. Intell.</italic></source> <volume>38</volume> <fpage>295</fpage>&#x2013;<lpage>307</lpage>. <pub-id pub-id-type="doi">10.1109/TPAMI.2015.2439281</pub-id> <pub-id pub-id-type="pmid">26761735</pub-id></citation></ref>
<ref id="B10"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Frank</surname> <given-names>J.</given-names></name></person-group> (<year>1951</year>). <article-title>The kolmogorov-smirnov test for goodness of fit.</article-title> <source><italic>Am. Stat. Assoc.</italic></source> <volume>46</volume> <fpage>68</fpage>&#x2013;<lpage>78</lpage>. <pub-id pub-id-type="doi">10.1080/01621459.1951.10500769</pub-id></citation></ref>
<ref id="B11"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ghadirnezhad</surname> <given-names>R.</given-names></name> <name><surname>Fallah</surname> <given-names>A.</given-names></name></person-group> (<year>2014</year>). <article-title>Temperature effect on yield and yield components of different rice cultivars in flowering stage.</article-title> <source><italic>Int. J. Agron.</italic></source> <volume>2014</volume>:<fpage>846707</fpage>. <pub-id pub-id-type="doi">10.1155/2014/846707</pub-id></citation></ref>
<ref id="B12"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gong</surname> <given-names>L.</given-names></name> <name><surname>Lin</surname> <given-names>K.</given-names></name> <name><surname>Wang</surname> <given-names>T.</given-names></name> <name><surname>Liu</surname> <given-names>C.</given-names></name> <name><surname>Yuan</surname> <given-names>Z.</given-names></name> <name><surname>Zhang</surname> <given-names>D.</given-names></name><etal/></person-group> (<year>2018</year>). <article-title>Image- based on- panicle Rice [<italic>Oryza sativa L</italic>.] grain counting with a prior edge wavelet correction model.</article-title> <source><italic>Agronomy</italic></source> <volume>8</volume>:<fpage>91</fpage>. <pub-id pub-id-type="doi">10.3390/agronomy8060091</pub-id></citation></ref>
<ref id="B13"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>He</surname> <given-names>H.</given-names></name> <name><surname>Garcia</surname> <given-names>E. A.</given-names></name></person-group> (<year>2009</year>). <article-title>Learning from imbalanced data.</article-title> <source><italic>IEEE Trans. Knowl. Data Eng.</italic></source> <volume>21</volume> <fpage>1263</fpage>&#x2013;<lpage>1284</lpage>. <pub-id pub-id-type="doi">10.1109/TKDE.2008.239</pub-id></citation></ref>
<ref id="B14"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hong Son</surname> <given-names>N.</given-names></name> <name><surname>Thai-Nghe</surname> <given-names>N.</given-names></name></person-group> (<year>2019</year>). &#x201C;<article-title>Deep learning for rice quality classification</article-title>,&#x201D; in <source><italic>Proceedings of the International Conference on Advanced Computing and Applications (ACOMP)</italic></source>, (<publisher-loc>Nha Trang</publisher-loc>: <publisher-name>Institute of Electrical and Electronics Engineers</publisher-name>), <fpage>92</fpage>&#x2013;<lpage>96</lpage>.</citation></ref>
<ref id="B15"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kong</surname> <given-names>H.</given-names></name> <name><surname>Chen</surname> <given-names>P.</given-names></name></person-group> (<year>2021</year>). <article-title>Mask R-CNN-based feature extraction and three-dimensional recognition of rice panicle CT images.</article-title> <source><italic>Plant Direct.</italic></source> <volume>5</volume>:<fpage>e00323</fpage>. <pub-id pub-id-type="doi">10.1002/pld3.323</pub-id> <pub-id pub-id-type="pmid">33981945</pub-id></citation></ref>
<ref id="B16"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kundu</surname> <given-names>N.</given-names></name> <name><surname>Rani</surname> <given-names>G.</given-names></name> <name><surname>Dhaka</surname> <given-names>V. S.</given-names></name> <name><surname>Gupta</surname> <given-names>K.</given-names></name> <name><surname>Nayak</surname> <given-names>S. C.</given-names></name> <name><surname>Verma</surname> <given-names>S.</given-names></name></person-group> (<year>2021</year>). <article-title>IoT and interpretable machine learning based framework for disease prediction in pearl millet.</article-title> <source><italic>Sensors</italic></source> <volume>21</volume>:<fpage>5386</fpage>. <pub-id pub-id-type="doi">10.3390/s21165386</pub-id> <pub-id pub-id-type="pmid">34450827</pub-id></citation></ref>
<ref id="B17"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>LeCun</surname> <given-names>Y.</given-names></name> <name><surname>Bengio</surname> <given-names>Y.</given-names></name> <name><surname>Hinton</surname> <given-names>G.</given-names></name></person-group> (<year>2015</year>). <article-title>Deep Learning.</article-title> <source><italic>Nature</italic></source> <volume>521</volume> <fpage>436</fpage>&#x2013;<lpage>444</lpage>. <pub-id pub-id-type="doi">10.1038/nature14539</pub-id> <pub-id pub-id-type="pmid">26017442</pub-id></citation></ref>
<ref id="B18"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>S.</given-names></name> <name><surname>Li</surname> <given-names>W.</given-names></name> <name><surname>Huang</surname> <given-names>B.</given-names></name> <name><surname>Cao</surname> <given-names>X.</given-names></name> <name><surname>Zhou</surname> <given-names>X.</given-names></name> <name><surname>Ye</surname> <given-names>S.</given-names></name><etal/></person-group> (<year>2013</year>). <article-title>Natural variation in PTB1 regulates rice seed setting rate by controlling pollen tube growth.</article-title> <source><italic>Nat. Commun.</italic></source> <volume>4</volume>:<fpage>2793</fpage>. <pub-id pub-id-type="doi">10.1038/ncomms3793</pub-id> <pub-id pub-id-type="pmid">24240868</pub-id></citation></ref>
<ref id="B19"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lin</surname> <given-names>P.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Chen</surname> <given-names>Y.</given-names></name> <name><surname>He</surname> <given-names>Y.</given-names></name></person-group> (<year>2018</year>). <article-title>A deep convolutional neural network architecture for boosting image discrimination accuracy of rice species.</article-title> <source><italic>Food Bioprocess Technol.</italic></source> <volume>11</volume> <fpage>765</fpage>&#x2013;<lpage>773</lpage>. <pub-id pub-id-type="doi">10.1007/s11947-017-2050-9</pub-id></citation></ref>
<ref id="B20"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>W.</given-names></name> <name><surname>Anguelov</surname> <given-names>D.</given-names></name> <name><surname>Erhan</surname> <given-names>D.</given-names></name> <name><surname>Szegedy</surname> <given-names>C.</given-names></name> <name><surname>Reed</surname> <given-names>S.</given-names></name> <name><surname>Fu</surname> <given-names>C.</given-names></name><etal/></person-group> (<year>2016</year>). <article-title>SSD: single shot multibox detector.</article-title> <source><italic>arXiv</italic></source> [<comment>Preprint</comment>]. <volume>arXiv</volume>:<fpage>1512.02325v5</fpage>.</citation></ref>
<ref id="B21"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>X.</given-names></name> <name><surname>Chen</surname> <given-names>S.</given-names></name> <name><surname>Song</surname> <given-names>L.</given-names></name> <name><surname>Wo&#x017A;niak</surname> <given-names>M.</given-names></name> <name><surname>Liu</surname> <given-names>S.</given-names></name></person-group> (<year>2021</year>). <article-title>Self-attention negative feedback network for real-time image super-resolution.</article-title> <source><italic>J. King Saud Univ. Comput. Inf. Sci.</italic></source> <pub-id pub-id-type="doi">10.1016/j.jksuci.2021.07.014</pub-id></citation></ref>
<ref id="B22"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lu</surname> <given-names>Y.</given-names></name> <name><surname>Yi</surname> <given-names>S.</given-names></name> <name><surname>Zeng</surname> <given-names>N.</given-names></name> <name><surname>Liu</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name></person-group> (<year>2017</year>). <article-title>Identification of rice diseases using deep convolutional neural networks.</article-title> <source><italic>Neurocomputing</italic></source> <volume>267</volume> <fpage>378</fpage>&#x2013;<lpage>384</lpage>. <pub-id pub-id-type="doi">10.1016/j.neucom.2017.06.023</pub-id></citation></ref>
<ref id="B23"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mitra</surname> <given-names>V.</given-names></name> <name><surname>Sivaraman</surname> <given-names>G.</given-names></name> <name><surname>Nam</surname> <given-names>H.</given-names></name> <name><surname>Espy-Wilson</surname> <given-names>C.</given-names></name> <name><surname>Saltzman</surname> <given-names>E.</given-names></name> <name><surname>Tiede</surname> <given-names>M.</given-names></name></person-group> (<year>2017</year>). <article-title>Hybrid convolutional neural networks for articulatory and acoustic information based speech recognition.</article-title> <source><italic>Speech Commun.</italic></source> <volume>89</volume> <fpage>103</fpage>&#x2013;<lpage>112</lpage>. <pub-id pub-id-type="doi">10.1016/j.specom.2017.03.003</pub-id></citation></ref>
<ref id="B24"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Oosterom</surname> <given-names>E. J. V.</given-names></name> <name><surname>Hammer</surname> <given-names>G. L.</given-names></name></person-group> (<year>2008</year>). <article-title>Determination of grain num-ber in sorghum.</article-title> <source><italic>Field Crops Res.</italic></source> <volume>108</volume> <fpage>259</fpage>&#x2013;<lpage>268</lpage>. <pub-id pub-id-type="doi">10.1016/j.fcr.2008.06.001</pub-id></citation></ref>
<ref id="B25"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rajeshwari</surname> <given-names>P.</given-names></name> <name><surname>Abhishek</surname> <given-names>P.</given-names></name> <name><surname>Srikanth</surname> <given-names>P.</given-names></name> <name><surname>Vinod</surname> <given-names>T.</given-names></name></person-group> (<year>2019</year>). <article-title>Object detection: an overview.</article-title> <source><italic>Int. J. Trend Sci. Res. Dev</italic></source> <volume>3</volume> <fpage>1663</fpage>&#x2013;<lpage>1665</lpage>.</citation></ref>
<ref id="B26"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Redmon</surname> <given-names>J.</given-names></name> <name><surname>Farhadi</surname> <given-names>A.</given-names></name></person-group> (<year>2018</year>). <article-title>YOLOv3: an incremental improvement.</article-title> <source><italic>arXiv</italic></source> [<comment>Preprint</comment>]. <volume>arXiv</volume>:<fpage>1804.02767</fpage>.</citation></ref>
<ref id="B27"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ren</surname> <given-names>S.</given-names></name> <name><surname>He</surname> <given-names>K.</given-names></name> <name><surname>Girshick</surname> <given-names>R.</given-names></name> <name><surname>Sun</surname> <given-names>J.</given-names></name></person-group> (<year>2016</year>). <article-title>Faster R-CNN: towards real-time object detection with region proposal networks.</article-title> <source><italic>arXiv</italic></source> [<comment>Preprint</comment>]. <volume>arXiv</volume>:<fpage>1506.01497v3</fpage>.</citation></ref>
<ref id="B28"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schmidhuber</surname> <given-names>J.</given-names></name></person-group> (<year>2015</year>). <article-title>Deep learning in neural networks: an overview.</article-title> <source><italic>Neural Netw.</italic></source> <volume>2015</volume> <fpage>85</fpage>&#x2013;<lpage>117</lpage>. <pub-id pub-id-type="doi">10.1016/j.neunet.2014.09.003</pub-id> <pub-id pub-id-type="pmid">25462637</pub-id></citation></ref>
<ref id="B29"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tan</surname> <given-names>M.</given-names></name> <name><surname>Pang</surname> <given-names>R.</given-names></name> <name><surname>Le</surname> <given-names>V. Q.</given-names></name></person-group> (<year>2020</year>). <article-title>EfficientDet: scalable and efficient object detection.</article-title> <source><italic>arXiv</italic></source> [<comment>Preprint</comment>] <volume>arXiv</volume>:<fpage>1911.09070v7</fpage>,</citation></ref>
<ref id="B30"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>W.</given-names></name> <name><surname>Liu</surname> <given-names>T.</given-names></name> <name><surname>Zhou</surname> <given-names>P.</given-names></name> <name><surname>Yang</surname> <given-names>T.</given-names></name> <name><surname>Li</surname> <given-names>C.</given-names></name> <name><surname>Zhong</surname> <given-names>X.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>Image analysis-based recognition and quantification of grain number per panicle in rice.</article-title> <source><italic>Plant Methods</italic></source> <volume>15</volume>:<fpage>122</fpage>. <pub-id pub-id-type="doi">10.1186/s13007-019-0510-0</pub-id> <pub-id pub-id-type="pmid">31695727</pub-id></citation></ref>
<ref id="B31"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xiang</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>P.</given-names></name> <name><surname>Yu</surname> <given-names>P.</given-names></name> <name><surname>Zhang</surname> <given-names>Z.</given-names></name> <name><surname>Sun</surname> <given-names>L.</given-names></name> <name><surname>Wu</surname> <given-names>W.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>LSSR1 facilitates seed setting rate by promoting fertilization in rice.</article-title> <source><italic>Rice</italic></source> <volume>12</volume>:<fpage>31</fpage>. <pub-id pub-id-type="doi">10.1186/s12284-019-0280-3</pub-id> <pub-id pub-id-type="pmid">31073866</pub-id></citation></ref>
<ref id="B32"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xiong</surname> <given-names>X.</given-names></name> <name><surname>Duan</surname> <given-names>L.</given-names></name> <name><surname>Liu</surname> <given-names>L.</given-names></name> <name><surname>Tu</surname> <given-names>H.</given-names></name> <name><surname>Yang</surname> <given-names>P.</given-names></name> <name><surname>Wu</surname> <given-names>D.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Panicle-SEG: a robust image segmentation method for rice panicles in the field based on deep learning and superpixel optimization.</article-title> <source><italic>Plant Methods</italic></source> <volume>13</volume>:<fpage>104</fpage>. <pub-id pub-id-type="doi">10.1186/s13007-017-0254-7</pub-id> <pub-id pub-id-type="pmid">29209408</pub-id></citation></ref>
<ref id="B33"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>C.</given-names></name> <name><surname>Jiang</surname> <given-names>H.</given-names></name> <name><surname>Yuen</surname> <given-names>P.</given-names></name> <name><surname>Zaki Ahmad</surname> <given-names>K.</given-names></name> <name><surname>Chen</surname> <given-names>Y.</given-names></name></person-group> (<year>2020</year>). <article-title>MHW-PD: a robust rice panicles counting algorithm based on deep learning and multi-scale hybrid window.</article-title> <source><italic>Comput. Electron. Agric.</italic></source> <volume>173</volume>:<fpage>105375</fpage>. <pub-id pub-id-type="doi">10.1016/j.compag.2020.105375</pub-id></citation></ref>
<ref id="B34"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>Y.</given-names></name> <name><surname>Yang</surname> <given-names>J.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Yu</surname> <given-names>Y.</given-names></name> <name><surname>Long</surname> <given-names>Y.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>OsCNGC13 promotes seed-setting rate by facilitating pollen tube growth in stylar tissues.</article-title> <source><italic>PLoS Genet.</italic></source> <volume>13</volume>:<fpage>e1006906</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pgen.1006906</pub-id> <pub-id pub-id-type="pmid">28708858</pub-id></citation></ref>
<ref id="B35"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>W.</given-names></name> <name><surname>Li</surname> <given-names>R.</given-names></name> <name><surname>Deng</surname> <given-names>H.</given-names></name> <name><surname>Wang</surname> <given-names>L.</given-names></name> <name><surname>Lin</surname> <given-names>W.</given-names></name> <name><surname>Ji</surname> <given-names>S.</given-names></name><etal/></person-group> (<year>2015</year>). <article-title>Deep convolutional neural networks for multi-modality isointense infant brain image segmentation.</article-title> <source><italic>Neuroimage</italic></source> <volume>108</volume> <fpage>214</fpage>&#x2013;<lpage>224</lpage>. <pub-id pub-id-type="doi">10.1016/j.neuroimage.2014.12.061</pub-id> <pub-id pub-id-type="pmid">25562829</pub-id></citation></ref>
<ref id="B36"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>H.</given-names></name> <name><surname>Sun</surname> <given-names>L.</given-names></name> <name><surname>Jia</surname> <given-names>Y.</given-names></name> <name><surname>Yu</surname> <given-names>C.</given-names></name> <name><surname>Fu</surname> <given-names>J.</given-names></name> <name><surname>Zhao</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Effect of nitrogen, phosphorus and potassium fertilizer combined application on japonica rice growth and yield in cold areas.</article-title> <source><italic>J. Northeast Agric. Univ.</italic></source> <volume>51</volume> <fpage>1</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.19720/j.cnki.issn.1005-9369.2020.12.001</pub-id></citation></ref>
<ref id="B37"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhou</surname> <given-names>C.</given-names></name> <name><surname>Ye</surname> <given-names>H.</given-names></name> <name><surname>Hu</surname> <given-names>J.</given-names></name> <name><surname>Shi</surname> <given-names>X.</given-names></name> <name><surname>Hua</surname> <given-names>S.</given-names></name> <name><surname>Yue</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>Automated counting of rice panicle by applying deep learning model to images from unmanned aerial vehicle platform.</article-title> <source><italic>Sensors</italic></source> <volume>19</volume>:<fpage>3106</fpage>. <pub-id pub-id-type="doi">10.3390/s19143106</pub-id> <pub-id pub-id-type="pmid">31337086</pub-id></citation></ref>
<ref id="B38"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zou</surname> <given-names>H.</given-names></name> <name><surname>Lu</surname> <given-names>H.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>L.</given-names></name> <name><surname>Cao</surname> <given-names>Z.</given-names></name></person-group> (<year>2020</year>). <article-title>Maize tassels detection: a benchmark of the state of the art.</article-title> <source><italic>Plant Methods</italic></source> <volume>16</volume>:<fpage>108</fpage>. <pub-id pub-id-type="doi">10.1186/s13007-020-00651-z</pub-id> <pub-id pub-id-type="pmid">32782455</pub-id></citation></ref>
</ref-list>
</back>
</article>
