<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Phys.</journal-id>
<journal-title>Frontiers in Physics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Phys.</abbrev-journal-title>
<issn pub-type="epub">2296-424X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1371320</article-id>
<article-id pub-id-type="doi">10.3389/fphy.2024.1371320</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Physics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Enhanced YOLOv5s &#x2b; DeepSORT method for highway vehicle speed detection and multi-sensor verification</article-title>
<alt-title alt-title-type="left-running-head">Luo et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fphy.2024.1371320">10.3389/fphy.2024.1371320</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Luo</surname>
<given-names>Zhongbin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2630472/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Bi</surname>
<given-names>Yanqiu</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yang</surname>
<given-names>Xun</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2299064/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Yong</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2290657/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yu</surname>
<given-names>Shanchuan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wu</surname>
<given-names>Mengjun</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ye</surname>
<given-names>Qing</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>China Merchants Chongqing Communications Research and Design Institute Co., Ltd.</institution>, <addr-line>Chongqing</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Research and Development Center of Transport Industry of Self-Driving Technology</institution>, <addr-line>Chongqing</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>National and Local Joint Engineering Research Center of Transportation Civil Engineering Materials</institution>, <institution>Chongqing Jiaotong University</institution>, <addr-line>Chongqing</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>School of Civil Engineering</institution>, <institution>Chongqing Jiaotong University</institution>, <addr-line>Chongqing</addr-line>, <addr-line>Shandong</addr-line>, <country>China</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>College of Computer Science</institution>, <institution>Chongqing University</institution>, <addr-line>Chongqing</addr-line>, <country>China</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>Key Laboratory of Dependable Service Computing in Cyber Physical Society</institution>, <institution>Ministry of Education</institution>, <institution>Chongqing University</institution>, <addr-line>Chongqing</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1664211/overview">Guanqiu Qi</ext-link>, Buffalo State College, United States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2632083/overview">Yunze Wang</ext-link>, Shijiazhuang Tiedao University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2632580/overview">Zhe Li</ext-link>, Hunan University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1394463/overview">Ye Li</ext-link>, Central South University, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Yanqiu Bi, <email>biyanqiu@cqjtu.edu.cn</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>19</day>
<month>02</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>12</volume>
<elocation-id>1371320</elocation-id>
<history>
<date date-type="received">
<day>16</day>
<month>01</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>07</day>
<month>02</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Luo, Bi, Yang, Li, Yu, Wu and Ye.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Luo, Bi, Yang, Li, Yu, Wu and Ye</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Addressing the need for vehicle speed measurement in traffic surveillance, this study introduces an enhanced scheme combining YOLOv5s detection with Deep SORT tracking. Tailored to the characteristics of highway traffic and vehicle features, the dataset data augmentation process was initially optimized. To improve the detector&#x2019;s recognition capabilities, the Swin Transformer Block module was incorporated, enhancing the model&#x2019;s ability to capture local regions of interest. <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> loss was employed as the loss function for the vehicle detection network, accelerating model convergence and achieving higher regression accuracy. The Mish activation function was utilized to reduce computational overhead and enhance convergence speed. The structure of the Deep SORT appearance feature extraction network was modified, and it was retrained on a vehicle re-identification dataset to mitigate identity switches due to obstructions. Subsequently, using known references in the image such as lane markers and contour labels, the transformation from image pixel coordinates to actual coordinates was accomplished. Finally, vehicle speed was measured by computing the average of instantaneous speeds across multiple frames. Through radar and video Multi-Sensor Verification, the experimental results show that the mean Average Precision (mAP) for target detection consistently exceeds 90%. The effective measurement distance for speed measurement is around 140&#xa0;m, with the absolute speed error generally within 1&#x2013;8&#xa0;km/h, meeting the accuracy requirements for speed measurement. The proposed model is reliable and fully applicable to highway scenarios.</p>
</abstract>
<kwd-group>
<kwd>YOLOv5S</kwd>
<kwd>Deep SORT</kwd>
<kwd>swin transformer</kwd>
<kwd>vehicle speed</kwd>
<kwd>traffic monitoring</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Radiation Detectors and Imaging</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Intelligent Transportation Systems (ITS) have been widely applied to practical traffic scenarios such as highways, urban roads, tunnels, and bridges. This integration owes much to the convergence of various technologies, including pattern recognition, video image processing, and network communication [<xref ref-type="bibr" rid="B1">1</xref>, <xref ref-type="bibr" rid="B2">2</xref>]. Vehicle speed is a crucial parameter that directly reflects the state of traffic [<xref ref-type="bibr" rid="B3">3</xref>, <xref ref-type="bibr" rid="B4">4</xref>]. Meanwhile, in highly complex traffic monitoring scenarios and under special weather conditions, intelligent transportation monitoring systems face numerous significant challenges. In addressing the issue of vehicle speeding, the measurement of vehicle speed can provide vital data for traffic management authorities. Accurate measurement of vehicle target speed is one of the challenges faced by traffic monitoring systems.</p>
<p>Traditional vehicle speed detection primarily utilizes inductive loop detection, laser detection, and radar detection. These methods are well-developed and commonly used in traffic systems. However, traditional detection methods have the following disadvantages: (1) the required equipment is expensive; (2) the equipment is installed under the road surface, leading to high subsequent maintenance costs and maintenance not only affects traffic but also damages road structure. Video-based vehicle speed detection leverages numerous traffic video monitoring devices, significantly overcoming the high costs and difficult maintenance issues associated with traditional speed detection methods. The vehicle speed detection system can be categorized into two types: one type focuses on accurate speed monitoring systems (such as speed camera applications) [<xref ref-type="bibr" rid="B5">5</xref>, <xref ref-type="bibr" rid="B6">6</xref>], and the other type, though less precise, can be used to estimate traffic speed (such as traffic camera application scenarios) [<xref ref-type="bibr" rid="B7">7</xref>, <xref ref-type="bibr" rid="B8">8</xref>]. This classification system takes into account the intrinsic parameters of the camera (such as sensor size and resolution, focal length), as well as extrinsic parameters (such as the camera&#x2019;s position relative to the road surface, drone-based cameras, etc.), and the number of cameras (monocular, stereo, or multiple cameras).</p>
<p>Through these parameters, the actual scene on the image plane can represent one or multiple lanes, as well as the relative position of vehicles to the camera, ultimately yielding one of the most critical variables: the ratio of pixels to road segment length, i.e., the road length each pixel represents. Due to the perspective projection model, this ratio is directly proportional to the square of the camera&#x2019;s distance, implying that measurements over long distances have poor accuracy. Accurate estimation of the camera&#x2019;s intrinsic and extrinsic parameters is required to provide measurements in the actual coordinate system. The most common approach is soft calibration, which involves calibrating intrinsic parameters in a verification laboratory or using sensor and lens characteristics, and estimating the rigid transformation between the camera and the road surface using manual [<xref ref-type="bibr" rid="B9">9</xref>, <xref ref-type="bibr" rid="B10">10</xref>] or automatic [<xref ref-type="bibr" rid="B11">11</xref>] methods.</p>
<p>Hard calibration involves estimating both the intrinsic and extrinsic parameters of the camera, which can be done either manually [<xref ref-type="bibr" rid="B12">12</xref>] or automatically [<xref ref-type="bibr" rid="B13">13</xref>&#x2013;<xref ref-type="bibr" rid="B15">15</xref>]. In certain limited scenarios, some details of camera calibration may be overlooked, such as the exact position of the camera, anchoring systems, gantries. Since cameras are mostly static (except for drone cameras), vehicle detection is most often addressed by modeling the background [<xref ref-type="bibr" rid="B16">16</xref>&#x2013;<xref ref-type="bibr" rid="B18">18</xref>]. Other methods are feature-based, such as detecting vehicle license plates [<xref ref-type="bibr" rid="B19">19</xref>, <xref ref-type="bibr" rid="B20">20</xref>] or other characteristics [<xref ref-type="bibr" rid="B21">21</xref>&#x2013;<xref ref-type="bibr" rid="B23">23</xref>].</p>
<p>Recently, learning-based approaches have become increasingly popular for recognizing vehicles in images [<xref ref-type="bibr" rid="B24">24</xref>, <xref ref-type="bibr" rid="B25">25</xref>]. The ability to track vehicles with smooth and stable trajectories is a key issue in handling vehicle speed detection. Vehicle tracking can be divided into three different categories: The first category is feature-based [<xref ref-type="bibr" rid="B26">26</xref>&#x2013;<xref ref-type="bibr" rid="B28">28</xref>], where tracking originates from a set of features of the vehicle (such as optical flow). The second category focuses on tracking the centroid of a vehicle&#x2019;s blob or bounding box [<xref ref-type="bibr" rid="B29">29</xref>, <xref ref-type="bibr" rid="B30">30</xref>]. The third category concentrates on tracking the entire vehicle [<xref ref-type="bibr" rid="B31">31</xref>, <xref ref-type="bibr" rid="B32">32</xref>] or its specific parts (such as the license plate [<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B34">34</xref>]).</p>
<p>The prerequisite for speed measurement is the effective assessment of distance. In monocular vision systems, the estimation of vehicle distance typically relies on specific constraints and methods. These include: (1) Flat road assumption and homography-based methods, which assume that the road is flat and apply a mathematical transformation known as homography [<xref ref-type="bibr" rid="B35">35</xref>, <xref ref-type="bibr" rid="B36">36</xref>], helping in mapping the view of a scene from one perspective to another, which is crucial for estimating distances in 2D images; (2) Detection of lines and specific areas [<xref ref-type="bibr" rid="B37">37</xref>, <xref ref-type="bibr" rid="B38">38</xref>]. By detecting lines and specific areas, designed detection lines and areas can be overlaid on the real-world view, providing a reference scale for measuring distances; (3) Use of prior knowledge about object dimensions, utilizing the known dimensions of certain objects to estimate distances. For instance, knowing the standard sizes of license plates ([<xref ref-type="bibr" rid="B39">39</xref>, <xref ref-type="bibr" rid="B40">40</xref>]) or the average dimensions of vehicles [<xref ref-type="bibr" rid="B41">41</xref>] can assist in calibrating distance measurements. However, these monocular methods have limitations, which are addressed in stereo vision systems. In stereo vision systems [<xref ref-type="bibr" rid="B42">42</xref>], two cameras are used to capture the same scene from slightly different angles, similar to human binocular vision. This setup allows for more accurate depth perception and distance estimation, as it mimics the way.</p>
<p>Currently, speed detection is primarily divided into macroscopic traffic flow speed and individual vehicle speed. Macroscopic traffic flow speed detection is based on a specific road section, using the length of the section and travel time to estimate the average speed of the segment [<xref ref-type="bibr" rid="B43">43</xref>, <xref ref-type="bibr" rid="B44">44</xref>]. Individual vehicle speed detection focuses on the micro-level speed of the vehicle itself, presenting greater technical challenges. This process requires prior knowledge of the camera&#x2019;s frame rate or accurate timestamps for each image to calculate the time between measurements. Utilizing consecutive or non-consecutive [<xref ref-type="bibr" rid="B45">45</xref>] images to estimate speed is a key factor impacting accuracy. In summary, whether in traffic flow speed or individual vehicle speed detection, factors such as the method of image capture (continuous or non-continuous), frame rate, timestamp accuracy, and the integration of various measurement data need to be carefully considered. The selection method and precision of these factors directly affect the accuracy of speed estimation.</p>
<p>In summary, vision-based vehicle speed detection involves the entire process of camera calibration, distance estimation, and speed estimation. However, the calibration process for monocular vision cameras is complex, the accuracy of distance estimation is relatively poor, and the precision of individual vehicle speed estimation needs improvement. Currently, there are few instances of rapidly detecting and stably tracking vehicle instantaneous speeds solely through video recognition technology, which limits the broader application of video recognition technologies in the field of traffic safety. Therefore, this study introduces an enhanced scheme that combines YOLOv5s detection with Deep SORT tracking, targeting the need for vehicle speed measurement in traffic monitoring. The dataset data expansion process is preliminarily optimized based on the characteristics of highway traffic and vehicle features. The Swin Transformer Block module is introduced to improve the detector&#x2019;s recognition capabilities and enhance the model&#x2019;s ability to capture areas of interest. The <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> loss is employed as the loss function for the vehicle detection network to accelerate model convergence and achieve higher regression precision. The Mish activation function is used to reduce computational costs and improve convergence speed. Modifications are made to the structure of the Deep SORT appearance feature extraction network, and it is retrained on the vehicle re-identification dataset to mitigate identity switches caused by obstacles. Subsequently, known references in the image, such as lane markings and contour labels, are used to complete the conversion from image pixel coordinates to actual coordinates through maximum likelihood estimation, maximum posterior estimation, and non-linear least squares methods. Finally, vehicle speed is measured by calculating the average of instantaneous speeds over multiple frames. The algorithm can detect and track vehicle targets without prior camera parameters and calibration, extract known reference information such as lane lines and contour labels, and automatically convert pixel coordinates to actual coordinates in traffic monitoring scenes, as well as automatically measure vehicle speeds, the algorithm framework as shown in <xref ref-type="fig" rid="F1">Figure 1</xref>. Accurate estimation of vehicle speed can support the detection of traffic accidents and incidents, offering scientific technical means for active safety management in intelligent transportation systems.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Algorithm framework diagram.</p>
</caption>
<graphic xlink:href="fphy-12-1371320-g001.tif"/>
</fig>
</sec>
<sec id="s2">
<title>2 Improved YOLOv5s &#x2b; DeepSORT algorithm for highway vehicle detection and tracking</title>
<sec id="s2-1">
<title>2.1 Construction of vehicle target dataset</title>
<sec id="s2-1-1">
<title>2.1.1 Characteristics of highway traffic scenarios</title>
<p>There are typically four categories of common highway traffic scenarios, as shown in <xref ref-type="fig" rid="F2">Figure 2</xref>.<list list-type="simple">
<list-item>
<p>(a) Scene variations, as the setup of traffic monitoring varies, so do the monitoring angles and heights. For instance, the monitoring angle and scene characteristics inside a tunnel differ greatly from those on a highway, leading to significantly reduced detection accuracy and numerous false detections of vehicle targets, as shown in <xref ref-type="fig" rid="F2">Figure 2A</xref>.</p>
</list-item>
<list-item>
<p>(b) The same scene at different times also exhibits significant differences. With changes in time, the brightness and visibility of scene images vary. The characteristics of vehicle targets at night are particularly difficult to capture due to the substantial interference from vehicle lights at night, making it hard to accurately obtain the body contours of target vehicles. If the dataset does not include such special night scene data, the detection results are not ideal <xref ref-type="fig" rid="F2">Figure 2B</xref>.</p>
</list-item>
<list-item>
<p>(c) Vehicle targets at different positions in the image will have obvious deformation. The same vehicle target will undergo significant size deformation from distant to closer positions in the image, affecting the detection accuracy of small targets. The red boxes in <xref ref-type="fig" rid="F2">Figure 2C</xref> indicate significant deformations of the same vehicle target at different locations.</p>
</list-item>
<list-item>
<p>(d) On actual roads, there is a widespread occurrence of vehicle occlusion, which can lead to multiple targets being detected as one, resulting in missed and false detections. The red boxes in <xref ref-type="fig" rid="F2">Figure 2D</xref> represent situations where vehicles are obstructing each other.</p>
</list-item>
</list>
</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Common issues in target vehicle detection.</p>
</caption>
<graphic xlink:href="fphy-12-1371320-g002.tif"/>
</fig>
<p>The existence of these four types of issues makes large public datasets such as COCO and VOC unsuitable for the perspectives captured by highway cameras, leading to a large number of false positives and missed detections of target vehicles.</p>
</sec>
<sec id="s2-1-2">
<title>2.1.2 Data preparation</title>
<p>Given the relatively uniform types of motor vehicles in highway scenarios, vehicles are generally classified into three categories: Car, Bus, and Truck. Car mainly refer to passenger vehicles with seating for fewer than seven people; Bus mainly include commercial buses, public transport buses, etc.; Truck primarily refer to small, medium, and large trucks, trailers, and various types of special-purpose vehicles as shown in <xref ref-type="table" rid="T1">Table 1</xref>. By collecting datasets from different scenes on highways and manually labeling them using the labelImg tool, a dataset in YOLO format was ultimately created.</p>
<p>The specific process includes: (1) Data Collection: Collect representative image data covering various scenes and angles of target categories. (2) Data Division: Divide the dataset into training, validation, and test sets, typically in a certain ratio, to ensure the independence and generalizability of the data. (3) Bounding Box Annotation: Annotate each target object with a bounding box, usually represented by a rectangle, including the coordinates of the top-left and bottom-right corners. Category Labeling: Assign corresponding category labels to each target object, identifying the category to which the object belongs. During dataset annotation, rectangular bounding boxes encompassing the entire vehicle are marked, with each side fitting closely to the vehicle. Annotation is not performed when the occlusion exceeds 50%, the vehicle type is indistinguishable, or the size is below 10&#x2a;10 pixels. Furthermore, in cases where vehicles are truncated, the truncation is not considered to affect the overall annotation. Trucks used for transportation are uniformly annotated, without separately marking the vehicles on them.</p>
</sec>
<sec id="s2-1-3">
<title>2.1.3 Data augmentation</title>
<p>To enhance the accuracy and generalization capability of model training, data augmentation techniques are employed, tailored to the characteristics of highway traffic environments and vehicle features. These techniques include Mosaic, Random_perspective, Mixup, HSV, Flipud, Fliplr, as shown in <xref ref-type="fig" rid="F3">Figure 3</xref>.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Data augmentation Flowchart.</p>
</caption>
<graphic xlink:href="fphy-12-1371320-g003.tif"/>
</fig>
</sec>
</sec>
<sec id="s2-2">
<title>2.2 Optimization of object detection network</title>
<p>In response to the identified issues with YOLOv5 in highway vehicle detection, the following optimizations were made to enhance the accuracy of vehicle detection: (1) Incorporating the Swin Transformer Block module to improve the model&#x2019;s ability to capture information from local areas of interest; (2) Utilizing <inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> loss as the loss function for the vehicle detection network to accelerate model convergence and achieve higher regression accuracy; (3) Adopting the Mish activation function to reduce computational overhead and increase convergence speed.</p>
<sec id="s2-2-1">
<title>2.2.1 Introduction of swin transformer block</title>
<p>To address the shortcomings of traditional YOLOv5 in traffic object detection, the Swin Transformer Block module is introduced for optimization.</p>
<p>The Swin Transformer network [<xref ref-type="bibr" rid="B46">46</xref>], proposed in 2021, is a Transformer network enhanced with a local self-attention mechanism. It has stronger dynamic computation capabilities compared to convolutional neural networks, with enhanced modeling capacity, and can adaptively compute both local and global pixel relationships, making it highly valuable for widespread use.</p>
<p>The core modules of the Transformer Block overall architecture are the Window-based Multi-Head Self-Attention layer (W-MSA) and the Shifted Window-based Multi-Head Self-Attention layer (SW-MSA). By restricting attention computation within a window, the network not only introduces the locality of convolution operations but also saves computational resources, resulting in good performance.</p>
<p>This article proposes the integration of the Swin Transformer Block structure into the backbone feature extraction network and neck feature fusion, utilizing the efficient self-attention mechanism module to fully explore the potential of feature representation. The improved YOLOv5 network incorporating the Swin Transformer Block module is shown in <xref ref-type="fig" rid="F4">Figure 4</xref>, named SwinTransYOLOv5 network.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>SwinTransYOLOv5 network structure diagram.</p>
</caption>
<graphic xlink:href="fphy-12-1371320-g004.tif"/>
</fig>
</sec>
<sec id="s2-2-2">
<title>2.2.2 Improvement of loss function</title>
<p>YOLOv5s employs <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> loss as the bounding box regression loss function to evaluate the distance between the predicted bounding box (PB) and the ground truth bounding box (GT), as shown in Eq. <xref ref-type="disp-formula" rid="e1">1</xref>.<disp-formula id="e1">
<mml:math id="m5">
<mml:mrow>
<mml:mfenced open="{" close="" separators="|">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi mathvariant="bold-italic">G</mml:mi>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">U</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">U</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">A</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">U</mml:mi>
</mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">A</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:msup>
</mml:mfrac>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">L</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">G</mml:mi>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">U</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">G</mml:mi>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">U</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
<p>In the formula, <inline-formula id="inf5">
<mml:math id="m6">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represents the intersection over union of PB and GT, <inline-formula id="inf6">
<mml:math id="m7">
<mml:mrow>
<mml:msup>
<mml:mi>A</mml:mi>
<mml:mi>c</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the area of the smallest rectangular box containing both PB and GT, <inline-formula id="inf7">
<mml:math id="m8">
<mml:mrow>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the union of PB and GT, and <inline-formula id="inf8">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the <inline-formula id="inf9">
<mml:math id="m10">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> loss. The advantage of <inline-formula id="inf10">
<mml:math id="m11">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> loss is its scale invariance, meaning the similarity between PB and GT is independent of their spatial scale. The problem with <inline-formula id="inf11">
<mml:math id="m12">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> Loss is that when either PB or GT completely encompasses the other, <inline-formula id="inf12">
<mml:math id="m13">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> Loss degenerates entirely into <inline-formula id="inf13">
<mml:math id="m14">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> loss. Because it heavily relies on the <inline-formula id="inf14">
<mml:math id="m15">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> term, this results in slow convergence during actual training and lower accuracy of the predicted bounding boxes. To address these issues, <inline-formula id="inf15">
<mml:math id="m16">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> loss also considers the overlapping area of PB and GT, the distance between their centroids, and their aspect ratios, as shown in Eq. <xref ref-type="disp-formula" rid="e2">2</xref>.<disp-formula id="e2">
<mml:math id="m17">
<mml:mrow>
<mml:mfenced open="{" close="" separators="|">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi mathvariant="bold-italic">C</mml:mi>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">U</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">U</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">&#x3c1;</mml:mi>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">g</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msup>
</mml:mfrac>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">L</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">C</mml:mi>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">U</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">C</mml:mi>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">U</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<p>In the formula, <inline-formula id="inf16">
<mml:math id="m18">
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf17">
<mml:math id="m19">
<mml:mrow>
<mml:msup>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represent the centroids of PB and GT, <inline-formula id="inf18">
<mml:math id="m20">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3c1;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denotes the Euclidean distance, <inline-formula id="inf19">
<mml:math id="m21">
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the length of the shortest diagonal of the smallest enclosing box of PB and GT, <inline-formula id="inf20">
<mml:math id="m22">
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represents a positive balance parameter, and <inline-formula id="inf21">
<mml:math id="m23">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> indicates the consistency of the aspect ratio of PB and GT. The definitions of <inline-formula id="inf22">
<mml:math id="m24">
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf23">
<mml:math id="m25">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> are as follows in Eq. <xref ref-type="disp-formula" rid="e3">3</xref>.<disp-formula id="e3">
<mml:math id="m26">
<mml:mrow>
<mml:mfenced open="{" close="" separators="|">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn mathvariant="bold">4</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">&#x3c0;</mml:mi>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">arctan</mml:mi>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">&#x3c9;</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">g</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">h</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">g</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="bold-italic">arctan</mml:mi>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">h</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mi mathvariant="bold-italic">v</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">U</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
</p>
<p>In the formula, <inline-formula id="inf24">
<mml:math id="m27">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3c9;</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf25">
<mml:math id="m28">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf26">
<mml:math id="m29">
<mml:mrow>
<mml:mi>&#x3c9;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf27">
<mml:math id="m30">
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> respectively represent the width and height of GT and PB.</p>
<p>Compared to the <inline-formula id="inf28">
<mml:math id="m31">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> loss used in YOLOv5s, <inline-formula id="inf29">
<mml:math id="m32">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> loss incorporates penalty terms for the distance between the centers of PB and GT, as well as their aspect ratios in the loss function. This ensures faster convergence of the predicted bounding boxes during training and yields higher regression localization accuracy. In This article, <inline-formula id="inf30">
<mml:math id="m33">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> loss is adopted as the loss function for the vehicle detection network.</p>
</sec>
<sec id="s2-2-3">
<title>2.2.3 Activation function</title>
<p>Changing the activation function can significantly enhance recognition performance. Activation functions are categorized into saturated and non-saturated types. The primary advantages of using non-saturated activation functions are twofold [<xref ref-type="bibr" rid="B47">47</xref>]: firstly, they effectively address the vanishing gradient problem, which becomes more severe with saturated activation functions; secondly, they can accelerate the convergence speed. After comparing the pros, cons, and characteristics of various activation functions without significantly increasing computational load, as shown in <xref ref-type="table" rid="T2">Table 2</xref>, the Leaky ReLU activation function in YOLOv5 was replaced with the Mish activation function.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Dataset categorization.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Type</th>
<th align="center">Example</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Car</td>
<td align="center">
<inline-graphic xlink:href="FPHY_fphy-2024-1371320_wc_tfx1.tif"/>
</td>
</tr>
<tr>
<td align="center">Bus</td>
<td align="center">
<inline-graphic xlink:href="FPHY_fphy-2024-1371320_wc_tfx2.tif"/>
</td>
</tr>
<tr>
<td align="center">Truck</td>
<td align="center">
<inline-graphic xlink:href="FPHY_fphy-2024-1371320_wc_tfx3.tif"/>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Comparison of common activation functions.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left"/>
<th align="center">Sigmoid</th>
<th align="center">tanh</th>
<th align="center">ReLU</th>
<th align="center">Leaky ReLU</th>
<th align="center">Mish</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Function graphs</td>
<td align="left">
<inline-graphic xlink:href="FPHY_fphy-2024-1371320_wc_tfx4.tif"/>
</td>
<td align="center">
<inline-graphic xlink:href="FPHY_fphy-2024-1371320_wc_tfx5.tif"/>
</td>
<td align="center">
<inline-graphic xlink:href="FPHY_fphy-2024-1371320_wc_tfx6.tif"/>
</td>
<td align="center">
<inline-graphic xlink:href="FPHY_fphy-2024-1371320_wc_tfx7.tif"/>
</td>
<td align="center">
<inline-graphic xlink:href="FPHY_fphy-2024-1371320_wc_tfx8.tif"/>
</td>
</tr>
<tr>
<td align="left">Function Formula</td>
<td align="center">
<inline-formula id="inf31">
<mml:math id="m34">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf32">
<mml:math id="m35">
<mml:mrow>
<mml:mi mathvariant="italic">tanh</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf33">
<mml:math id="m36">
<mml:mrow>
<mml:mi mathvariant="italic">max</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf34">
<mml:math id="m37">
<mml:mrow>
<mml:mi mathvariant="italic">max</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>0.1</mml:mn>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf35">
<mml:math id="m38">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="italic">tanh</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td rowspan="3" align="left">Advantages</td>
<td rowspan="3" align="left">Can restrict the output to be between (0, 1), facilitating the completion of classification tasks</td>
<td align="left">&#x2460;Can restrict the output to be between (&#x2212;1, 1), facilitating the completion of classification tasks</td>
<td rowspan="3" align="left">Linear: Saves computational resources and shortens convergence time</td>
<td align="left">&#x2460;Linear</td>
<td align="left">&#x2460;Linear</td>
</tr>
<tr>
<td align="left">&#x2461;Zero-Centered</td>
<td align="left">&#x2461;Gradient non-saturation, no neuron death</td>
<td align="left">&#x2461;Gradient non-saturation, no neuron death</td>
</tr>
<tr>
<td align="left"/>
<td align="left"/>
<td align="left">&#x2462;The network&#x2019;s convergence is the best among the five activation functions</td>
</tr>
<tr>
<td rowspan="3" align="left">Disadvantages</td>
<td align="left">&#x2460;The output is not zero-centered, leading to a zigzag pattern in gradient descent</td>
<td align="left">&#x2461;Gradient saturation, Gradient vanishing</td>
<td rowspan="3" align="left">Neuron Death: The left side of the ReLU function is completely flat. When the neuron&#x2019;s <inline-formula id="inf36">
<mml:math id="m39">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-value is negative, the output <inline-formula id="inf37">
<mml:math id="m40">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is 0, and the gradient is also 0, making it impossible to alter the weight value <inline-formula id="inf38">
<mml:math id="m41">
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> through the gradient, leaving <inline-formula id="inf39">
<mml:math id="m42">
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> unchanged</td>
<td rowspan="3" align="left">The network&#x2019;s convergence is not advantageous compared to the latest networks</td>
<td rowspan="3" align="left">Relatively higher computational cost</td>
</tr>
<tr>
<td align="left">&#x2461;Gradient saturation, Gradient vanishing</td>
<td align="left">&#x2462;Non-linear</td>
</tr>
<tr>
<td align="left">&#x2462;Non-linear, involves exponential operations, consuming more resources during computation</td>
<td align="left"/>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s2-3">
<title>2.3 Optimization of deep SORT for vehicle tracking</title>
<p>The multi-object online tracking algorithm SORT [<xref ref-type="bibr" rid="B48">48</xref>] (Simple Online and Realtime Tracking) utilizes Kalman filtering and Hungarian matching, using the <inline-formula id="inf40">
<mml:math id="m43">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> between tracking and detection results as the cost matrix, to implement a simple, efficient, and practical tracking paradigm. However, the SORT algorithm&#x2019;s limitation lies in its association metric being effective only when the uncertainty in state estimation is low, leading to numerous identity switches and tracking failures when the target is occluded. To address this issue, Deep SORT [<xref ref-type="bibr" rid="B49">49</xref>] combines both motion and appearance information of the target as the association metric, improving tracking failures caused by the target&#x2019;s disappearance and reappearance.</p>
<sec id="s2-3-1">
<title>2.3.1 Tracking processing and state estimation</title>
<p>Deep SORT uses an 8-dimensional state space <inline-formula id="inf41">
<mml:math id="m44">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b3;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>h</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b3;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> to describe the target&#x2019;s state and motion information in the image coordinate system. <inline-formula id="inf42">
<mml:math id="m45">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf43">
<mml:math id="m46">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represent the center coordinates of the target detection box, <inline-formula id="inf44">
<mml:math id="m47">
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf45">
<mml:math id="m48">
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> respectively represent the aspect ratio and height of the detection box, and <inline-formula id="inf46">
<mml:math id="m49">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b3;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> represent the relative velocity of the previous four parameters in the image coordinates. The algorithm employs a standard Kalman filter with a constant velocity model and a linear observation model, using the detection box parameters <inline-formula id="inf47">
<mml:math id="m50">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b3;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> as direct observations of the object state. By combining motion and appearance information, the Hungarian algorithm is used to match predicted and tracked boxes, and cascaded matching is integrated to enhance accuracy.<list list-type="simple">
<list-item>
<p>(1) Mahalanobis Distance</p>
</list-item>
</list>
</p>
<p>The Mahalanobis distance is used to evaluate the predicted Kalman state and the new state, as shown in Eq. <xref ref-type="disp-formula" rid="e4">4</xref>.<disp-formula id="e4">
<mml:math id="m51">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">d</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">d</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:msup>
<mml:msubsup>
<mml:mi mathvariant="bold-italic">S</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">d</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
</p>
<p>
<inline-formula id="inf48">
<mml:math id="m52">
<mml:mrow>
<mml:msup>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the motion matching degree between the <italic>j</italic> detection and the <italic>i</italic> trajectory, where <inline-formula id="inf49">
<mml:math id="m53">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the covariance matrix of the observation space at the current moment predicted by the Kalman filter for the trajectory, <inline-formula id="inf50">
<mml:math id="m54">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the predicted observation of the trajectory at the current moment, and <inline-formula id="inf51">
<mml:math id="m55">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the state of the <italic>j</italic> detection.</p>
<p>Considering the continuity of motion, detections are filtered using this Mahalanobis distance, with the 0.95 quantile of the chi-square distribution as the threshold value, defining a threshold function, as shown in Eq. <xref ref-type="disp-formula" rid="e5">5</xref>.<disp-formula id="e5">
<mml:math id="m56">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">d</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2264;</mml:mo>
<mml:msup>
<mml:mi mathvariant="bold-italic">t</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
<list list-type="simple">
<list-item>
<p>(2) Appearance features</p>
</list-item>
</list>
</p>
<p>While Mahalanobis distance is a good measure of association when the target&#x2019;s motion uncertainty is low, it becomes ineffective in practical situations like camera movement, leading to a large number of mismatches. Therefore, we integrate a second metric. For each BBox detection, we compute an appearance feature descriptor. We create a gallery to store the descriptors of the latest 100 trajectories and then use the minimum cosine distance between the <italic>i</italic> and <italic>j</italic> trajectories as the second measure, as shown in Eq. <xref ref-type="disp-formula" rid="e6">6</xref>.<disp-formula id="e6">
<mml:math id="m57">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">d</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="bold-italic">min</mml:mi>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:msub>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:msup>
<mml:msubsup>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">k</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2223;</mml:mo>
<mml:msubsup>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">k</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-script">R</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
</p>
<p>Can be represented using a threshold function, as shown in Eq. <xref ref-type="disp-formula" rid="e7">7</xref>.<disp-formula id="e7">
<mml:math id="m58">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">d</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2264;</mml:mo>
<mml:msup>
<mml:mi mathvariant="bold-italic">t</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>
</p>
<p>Mahalanobis distance can provide reliable target location information in short-term predictions, and the cosine similarity of appearance features can recover the target ID when the target is occluded and reappears. To make the advantages of both measures complementary, a linear weighting approach is used for their combination, as shown in Eqs <xref ref-type="disp-formula" rid="e8">8</xref>, <xref ref-type="disp-formula" rid="e9">9</xref>.<disp-formula id="e8">
<mml:math id="m59">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3bb;</mml:mi>
<mml:msup>
<mml:mi mathvariant="bold-italic">d</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3bb;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">d</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>
<disp-formula id="e9">
<mml:math id="m60">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x220f;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msubsup>
</mml:mstyle>
<mml:mrow>
<mml:mtext>&#x200a;</mml:mtext>
<mml:msubsup>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>
</p>
<p>In summary, distance measurement is effective for short-term prediction and matching, while appearance information is more effective for matching long-lost trajectories. The choice of hyperparameters depends on the specific dataset. For datasets with significant camera movement, the degree of motion matching is not considered.<list list-type="simple">
<list-item>
<p>(3) Cascaded matching</p>
</list-item>
</list>
</p>
<p>The strategy of cascaded matching is used to improve matching accuracy, mainly because when a target is occluded for a long time, the uncertainty of Kalman filtering greatly increases, leading to a dispersion of continuous prediction probabilities. Assuming the original covariance matrix is normally distributed, continuous predictions without updates will increase the variance of this normal distribution, so points far from the mean in Euclidean distance may obtain the same Mahalanobis distance value as points closer in the previous distribution. In the final stage, the authors use <italic>IOU</italic> association from the previous SORT algorithm to match <inline-formula id="inf52">
<mml:math id="m61">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> unconfirmed and unmatched trajectories. This can alleviate significant changes caused by abrupt appearance shifts or partial occlusions. However, this approach may also connect some newly generated trajectories to older ones.</p>
</sec>
<sec id="s2-3-2">
<title>2.3.2 Deep appearance features</title>
<p>The original algorithm uses a residual convolutional neural network to extract the appearance features of the target, training the model on a large-scale pedestrian re-identification dataset for pedestrian detection and tracking. Since the original algorithm was only used for the pedestrian category and the input images were scaled to 128 &#xd7; 64, which does not match the aspect ratio of vehicle targets, this article improves the network model by adjusting the input image size to 128 &#xd7; 128, as shown in <xref ref-type="table" rid="T3">Table 3</xref>. The adjusted network is then re-identification trained on the vehicle re-identification dataset VeRi [<xref ref-type="bibr" rid="B50">50</xref>].</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Adjusted reconstruction network.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Network layer</th>
<th align="center">Convolutional kernel parameters</th>
<th align="center">Output size</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Conv 1</td>
<td align="center">3 &#xd7; 3/1</td>
<td align="center">32 &#xd7; 128&#xd7;128</td>
</tr>
<tr>
<td align="left">Conv 2</td>
<td align="center">3 &#xd7; 3/1</td>
<td align="center">32 &#xd7; 128&#xd7;128</td>
</tr>
<tr>
<td align="left">Max Pool 3</td>
<td align="center">3 &#xd7; 3/2</td>
<td align="center">32 &#xd7; 64&#xd7;64</td>
</tr>
<tr>
<td align="left">Residual 4</td>
<td align="center">3 &#xd7; 3/1</td>
<td align="center">32 &#xd7; 64&#xd7;64</td>
</tr>
<tr>
<td align="left">Residual 5</td>
<td align="center">3 &#xd7; 3/1</td>
<td align="center">32 &#xd7; 64&#xd7;64</td>
</tr>
<tr>
<td align="left">Residual 6</td>
<td align="center">3 &#xd7; 3/2</td>
<td align="center">64 &#xd7; 32&#xd7;32</td>
</tr>
<tr>
<td align="left">Residual 7</td>
<td align="center">3 &#xd7; 3/1</td>
<td align="center">64 &#xd7; 32&#xd7;32</td>
</tr>
<tr>
<td align="left">Residual 8</td>
<td align="center">3 &#xd7; 3/2</td>
<td align="center">128 &#xd7; 16&#xd7;16</td>
</tr>
<tr>
<td align="left">Residual 9</td>
<td align="center">3 &#xd7; 3/1</td>
<td align="center">128 &#xd7; 16&#xd7;16</td>
</tr>
<tr>
<td align="left">Dense 10</td>
<td align="center">-</td>
<td align="center">128</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf53">
<mml:math id="m62">
<mml:mrow>
<mml:mi mathvariant="normal">B</mml:mi>
<mml:mtext>atch&#x2009;and&#x2009;</mml:mtext>
<mml:msub>
<mml:mi mathvariant="script">l</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mtext>&#x2009;Norm</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center" style="color:#333333">-</td>
<td align="center">128</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
</sec>
<sec id="s3">
<title>3 Vehicle speed measurement</title>
<sec id="s3-1">
<title>3.1 Model assumptions</title>
<p>All locations in road monitoring images can be mapped to the <inline-formula id="inf54">
<mml:math id="m63">
<mml:mrow>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mi>w</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> plane of the world coordinate system through camera calibration, as shown in <xref ref-type="fig" rid="F5">Figure 5</xref>. However, the precise measurement of vehicle speed depends not only on camera calibration but also significantly on the vehicle&#x2019;s trajectory. To better implement vehicle speed measurement, the speed model assumes the following: (1) In highway scenarios, the road is relatively flat without significant undulations, meeting the condition of <inline-formula id="inf55">
<mml:math id="m64">
<mml:mrow>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mi>w</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>; (2) In highway monitoring scenarios, the movement of vehicles between each frame is linear, allowing for the measurement of vehicles moving in both straight and non-straight paths using the proposed speed measurement method; (3) In highway video surveillance, the time interval between each frame is the same, facilitating the calculation of vehicle speed after obtaining the exact vehicle position using the interval between frames.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Pixel coordinate conversion diagram.</p>
</caption>
<graphic xlink:href="fphy-12-1371320-g005.tif"/>
</fig>
</sec>
<sec id="s3-2">
<title>3.2 Model design and implementation</title>
<p>Based on the assumptions and establishment of the aforementioned speed model, the specific process of speed detection is implemented. Firstly, using the YOLO object detection algorithm, the coordinates of the top-left corner of the image detection box are obtained. By determining the length and width of the detection box, the coordinates of the center of the bottom edge of the box can be obtained. This ensures that the measured vehicle speed is closer to the actual speed. For every target vehicle in each frame of the video stream, a set of vector relations can be obtained, as shown in Eq. <xref ref-type="disp-formula" rid="e10">10</xref>.<disp-formula id="e10">
<mml:math id="m65">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">d</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x2206;</mml:mo>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>
</p>
<p>Here, <inline-formula id="inf56">
<mml:math id="m66">
<mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the center coordinates of the bottom edge of the vehicle target detection box in the current video frame; <inline-formula id="inf57">
<mml:math id="m67">
<mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x2206;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the center coordinates of the bottom edge of the vehicle target detection box in the previous frame; <inline-formula id="inf58">
<mml:math id="m68">
<mml:mrow>
<mml:mo>&#x394;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the time interval between the two frames; <inline-formula id="inf59">
<mml:math id="m69">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the tracked trajectory points.</p>
<p>
<inline-formula id="inf60">
<mml:math id="m70">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the pixel distance between adjacent frames, and calculating the speed requires mapping the pixel coordinates to world coordinates. The current common method involves camera calibration, but camera calibration requires knowledge of the camera&#x2019;s focal length, height, internal parameters, etc., and the calibration process can be cumbersome.</p>
<p>In This article, state estimation is performed using the popular methods of maximum likelihood estimation, maximum <italic>a posteriori</italic> estimation, and non-linear least squares, selecting the best estimation parameters based on the loss in state estimation.<list list-type="simple">
<list-item>
<p>(1) Maximum Likelihood Estimation</p>
</list-item>
</list>
</p>
<p>Maximum Likelihood Estimation (MLE) is an important and widely used method for estimating quantities. MLE explicitly uses a probability model with the goal of finding a system occurrence tree that can produce observed data with a high probability. MLE is a representative of a class of system occurrence tree reconstruction methods based entirely on statistics. Given a set of data, if we know it is randomly taken from a certain distribution, but we don&#x27;t know the specific parameters of this distribution, that is, &#x201c;the model is determined, but the parameters are unknown.&#x201d; For example, we know the distribution is a normal distribution, but we don&#x2019;t know the mean and variance; or it&#x2019;s a binomial distribution, but we don&#x2019;t know the mean. MLE can be used to estimate the parameters of the model. The objective of MLE is to find a set of parameters that maximize the probability of the model producing the observed data, as shown in Eq. <xref ref-type="disp-formula" rid="e11">11</xref>.<disp-formula id="e11">
<mml:math id="m71">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mi mathvariant="bold">argmax</mml:mi>
<mml:mi mathvariant="bold-italic">&#x3bc;</mml:mi>
</mml:munder>
</mml:mstyle>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mo>;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3bc;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>
</p>
<p>Here, <inline-formula id="inf61">
<mml:math id="m72">
<mml:mrow>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the observed sequence data, and <inline-formula id="inf62">
<mml:math id="m73">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mo>;</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the likelihood function, which denotes the probability of the observed data occurring under the parameter <inline-formula id="inf63">
<mml:math id="m74">
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. Assuming each observation is independent, as shown in Eq. <xref ref-type="disp-formula" rid="e12">12</xref>.<disp-formula id="e12">
<mml:math id="m75">
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:msub>
<mml:mo>;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3bc;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x220f;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:mrow>
<mml:mtext>&#x200a;</mml:mtext>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3bc;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>
</p>
<p>To facilitate differentiation, the log is generally taken of the target. Therefore, optimizing the likelihood function is equivalent to optimizing the log-likelihood function, as shown in Eqs <xref ref-type="disp-formula" rid="e13">13</xref>, <xref ref-type="disp-formula" rid="e14">14</xref>.<disp-formula id="e13">
<mml:math id="m76">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mrow>
<mml:mi mathvariant="bold">arg</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold">max</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3bc;</mml:mi>
</mml:munder>
</mml:mstyle>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mo>;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3bc;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mrow>
<mml:mi mathvariant="bold">arg</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold">max</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3bc;</mml:mi>
</mml:munder>
</mml:mstyle>
<mml:mi mathvariant="bold">log</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mo>;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3bc;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>
<disp-formula id="e14">
<mml:math id="m77">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">M</mml:mi>
<mml:mi mathvariant="bold-italic">L</mml:mi>
<mml:mi mathvariant="bold-italic">E</mml:mi>
</mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="bold">arg</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">m</mml:mi>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mo>&#x2223;</mml:mo>
<mml:mi mathvariant="bold-italic">X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(14)</label>
</disp-formula>
<list list-type="simple">
<list-item>
<p>(2) Maximum A Posteriori Estimation</p>
</list-item>
</list>
</p>
<p>In Bayesian statistics, Maximum A Posteriori (MAP) Estimation refers to the mode of the posterior probability distribution. MAP estimation is used to estimate the values of quantities that cannot be directly observed in experimental data. It is closely related to the classical method of Maximum Likelihood Estimation (MLE), but it uses an augmented optimization objective that further considers the prior probability distribution of the quantity being estimated. Therefore, MAP estimation can be seen as a regularized form of MLE, as shown in Eqs <xref ref-type="disp-formula" rid="e15">15</xref>, <xref ref-type="disp-formula" rid="e16">16</xref>.<disp-formula id="e15">
<mml:math id="m78">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi mathvariant="bold-italic">&#x3b8;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi mathvariant="bold">MAP</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="bold">arg</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mi mathvariant="bold-italic">max</mml:mi>
<mml:mi mathvariant="bold-italic">&#x3b8;</mml:mi>
</mml:munder>
</mml:mstyle>
<mml:mtext>&#x200a;</mml:mtext>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3b8;</mml:mi>
<mml:mo>&#x2223;</mml:mo>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="bold">arg</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mi mathvariant="bold-italic">max</mml:mi>
<mml:mi mathvariant="bold-italic">&#x3b8;</mml:mi>
</mml:munder>
</mml:mstyle>
<mml:mtext>&#x200a;</mml:mtext>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#x2223;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="bold">arg</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mi mathvariant="bold-italic">max</mml:mi>
<mml:mi mathvariant="bold-italic">&#x3b8;</mml:mi>
</mml:munder>
</mml:mstyle>
<mml:mtext>&#x200a;</mml:mtext>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#x2223;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(15)</label>
</disp-formula>
<disp-formula id="e16">
<mml:math id="m79">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">M</mml:mi>
<mml:mi mathvariant="bold-italic">A</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="bold">arg</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">max</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#x2223;</mml:mo>
<mml:mi mathvariant="bold-italic">z</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="bold">arg</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">max</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">z</mml:mi>
<mml:mo>&#x2223;</mml:mo>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(16)</label>
</disp-formula>
</p>
<p>Here, <inline-formula id="inf64">
<mml:math id="m80">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the parameter to be estimated, and <inline-formula id="inf65">
<mml:math id="m81">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2223;</mml:mo>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the probability of occurrence of <inline-formula id="inf66">
<mml:math id="m82">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> when the estimated parameter is <inline-formula id="inf67">
<mml:math id="m83">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>.<list list-type="simple">
<list-item>
<p>(3) Non-Linear Least Squares</p>
</list-item>
</list>
</p>
<p>The Least Squares Method (also known as the Method of Least Squares) is a mathematical optimization technique. It finds the best function match for data by minimizing the sum of the squares of the errors. The Least Squares Method can be used to easily obtain unknown data, ensuring that the sum of the squares of the errors between these obtained data and the actual data is minimized. The Least Squares Method can also be used for curve fitting, and other optimization problems can be expressed using this method by minimizing energy or maximizing entropy. Using the Least Squares Method to estimate the mapping relationship, the mapping parameters are obtained, as shown in Eqs <xref ref-type="disp-formula" rid="e17">17</xref>, <xref ref-type="disp-formula" rid="e18">18</xref>.<disp-formula id="e17">
<mml:math id="m84">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mi mathvariant="bold-italic">min</mml:mi>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:munder>
</mml:mstyle>
<mml:mo>&#x2211;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mstyle>
<mml:mtext>&#x200a;</mml:mtext>
</mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
<label>(17)</label>
</disp-formula>
</p>
<p>Where <inline-formula id="inf68">
<mml:math id="m85">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is a nonlinear function, and <inline-formula id="inf69">
<mml:math id="m86">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mstyle>
</mml:mrow>
</mml:math>
</inline-formula> is the covariance matrix.<disp-formula id="e18">
<mml:math id="m87">
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c8;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2211;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mstyle>
<mml:mtext>&#x200a;</mml:mtext>
</mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
<label>(18)</label>
</disp-formula>
</p>
<p>Then, the Gauss-Newton method is used to solve for &#x3c8;(x), as shown in <xref ref-type="disp-formula" rid="e19">Eq. 19</xref>:<disp-formula id="e19">
<mml:math id="m88">
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c8;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2211;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mstyle>
<mml:mtext>&#x200a;</mml:mtext>
</mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:msubsup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3c6;</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(19)</label>
</disp-formula>
</p>
<p>For the sum of errors, we investigate the <italic>i</italic> term, also performing a second-order Taylor expansion, followed by differentiation. We first calculate its first-order derivative (gradient) and second-order derivative.</p>
<p>First-order derivative, as shown in Eqs <xref ref-type="disp-formula" rid="e20">20</xref>, <xref ref-type="disp-formula" rid="e21">21</xref>.<disp-formula id="e20">
<mml:math id="m89">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3c6;</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">2</mml:mn>
<mml:mo>&#x22c5;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(20)</label>
</disp-formula>
<disp-formula id="e21">
<mml:math id="m90">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3c8;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:mtext>&#x200a;</mml:mtext>
<mml:mn mathvariant="bold">2</mml:mn>
<mml:mo>&#x22c5;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(21)</label>
</disp-formula>
</p>
<p>Where <inline-formula id="inf70">
<mml:math id="m91">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula> is the element in the <inline-formula id="inf71">
<mml:math id="m92">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> column of the <inline-formula id="inf72">
<mml:math id="m93">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> row of the Jacobian matrix, thus the first-order derivative can also be expressed in the following form, as shown in Eq. <xref ref-type="disp-formula" rid="e22">22</xref>.<disp-formula id="e22">
<mml:math id="m94">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3c8;</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">2</mml:mn>
<mml:mo>&#x22c5;</mml:mo>
<mml:msup>
<mml:mi mathvariant="bold-italic">J</mml:mi>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:msup>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(22)</label>
</disp-formula>
</p>
<p>Second-order derivative, as shown in Eq. <xref ref-type="disp-formula" rid="e23">23</xref>.<disp-formula id="e23">
<mml:math id="m95">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mo>&#x2202;</mml:mo>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msup>
<mml:mi mathvariant="bold-italic">&#x3c8;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mo>&#x2202;</mml:mo>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:mtext>&#x200a;</mml:mtext>
<mml:mn mathvariant="bold">2</mml:mn>
<mml:mo>&#x22c5;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">2</mml:mn>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:mrow>
<mml:mtext>&#x200a;</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x22c5;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mo>&#x2202;</mml:mo>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msup>
<mml:msub>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(23)</label>
</disp-formula>
</p>
<p>Observing the result of the second-order derivative, the terms <inline-formula id="inf73">
<mml:math id="m96">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf74">
<mml:math id="m97">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula> are elements of the Jacobian matrix. When the iterative point is far from the target point, both the error and its second-order derivative are small and can be ignored. Therefore, the second-order derivative can be expressed in the following form, as shown in Eq. <xref ref-type="disp-formula" rid="e24">24</xref>.<disp-formula id="e24">
<mml:math id="m98">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mo>&#x2202;</mml:mo>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msup>
<mml:mi mathvariant="bold-italic">&#x3c8;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
<mml:mo>&#x2202;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">2</mml:mn>
<mml:mo>&#x22c5;</mml:mo>
<mml:msup>
<mml:mi mathvariant="bold-italic">J</mml:mi>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:msup>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi mathvariant="bold-italic">J</mml:mi>
</mml:mrow>
</mml:math>
<label>(24)</label>
</disp-formula>
</p>
<p>Therefore, after the second-order expansion, <inline-formula id="inf75">
<mml:math id="m99">
<mml:mrow>
<mml:mi>&#x3c8;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> can be written in the following form, as shown in <xref ref-type="disp-formula" rid="e25">Eq. 25</xref>:<disp-formula id="e25">
<mml:math id="m100">
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c8;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3c8;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mn mathvariant="bold">2</mml:mn>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:msup>
<mml:mi mathvariant="bold-italic">J</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:msup>
<mml:msup>
<mml:mi mathvariant="bold-italic">J</mml:mi>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:msup>
<mml:mi mathvariant="bold-italic">J</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(25)</label>
</disp-formula>
</p>
<p>Similarly, by differentiating it and setting the derivative equal to zero, <xref ref-type="disp-formula" rid="e26">Eq. 26</xref>:<disp-formula id="e26">
<mml:math id="m101">
<mml:mrow>
<mml:mo>&#x2207;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3c8;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">2</mml:mn>
<mml:msup>
<mml:mi mathvariant="bold-italic">J</mml:mi>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:msup>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mn mathvariant="bold">2</mml:mn>
<mml:msup>
<mml:mi mathvariant="bold-italic">J</mml:mi>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:msup>
<mml:mi mathvariant="bold-italic">J</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">0</mml:mn>
</mml:mrow>
</mml:math>
<label>(26)</label>
</disp-formula>
</p>
<p>Let <inline-formula id="inf76">
<mml:math id="m102">
<mml:mrow>
<mml:mo>&#x25b3;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> then, as shown in <xref ref-type="disp-formula" rid="e27">Eq. 27</xref>:<disp-formula id="e27">
<mml:math id="m103">
<mml:mrow>
<mml:mo>&#x25b3;</mml:mo>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">J</mml:mi>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:msup>
<mml:mi mathvariant="bold-italic">J</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x22c5;</mml:mo>
<mml:msup>
<mml:mi mathvariant="bold-italic">J</mml:mi>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:msup>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:mrow>
</mml:math>
<label>(27)</label>
</disp-formula>
</p>
</sec>
<sec id="s3-3">
<title>3.3 Vehicle speed measurement</title>
<p>Through prior estimation, <inline-formula id="inf77">
<mml:math id="m104">
<mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf78">
<mml:math id="m105">
<mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x2206;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> can be mapped to the world coordinate system, representing the actual distance moved by the target vehicle from the previous frame to the current frame, as shown in Eq. <xref ref-type="disp-formula" rid="e28">28</xref>. <inline-formula id="inf79">
<mml:math id="m106">
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">S</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> is measured in meters and is the Euclidean norm of <inline-formula id="inf80">
<mml:math id="m107">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, representing the physical distance moved by the target vehicle in the world coordinate system from time <inline-formula id="inf81">
<mml:math id="m108">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x2206;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula id="inf82">
<mml:math id="m109">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The speed of the vehicle target can be measured using <inline-formula id="inf83">
<mml:math id="m110">
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">S</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> as Eq. <xref ref-type="disp-formula" rid="e29">29</xref>. Here, <inline-formula id="inf84">
<mml:math id="m111">
<mml:mrow>
<mml:mo>&#x2206;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the time between two frames, measured in seconds, and is considered constant, being the reciprocal of the frame rate. For highway surveillance videos, which typically have a frame rate of 25&#xa0;fps, <inline-formula id="inf85">
<mml:math id="m112">
<mml:mrow>
<mml:mo>&#x2206;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> &#x3d; 1/25.<disp-formula id="e28">
<mml:math id="m113">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">S</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3c6;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3c6;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x2206;</mml:mo>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(28)</label>
</disp-formula>
<disp-formula id="e29">
<mml:math id="m114">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">v</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">S</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2206;</mml:mo>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c6;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3c6;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x2206;</mml:mo>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2206;</mml:mo>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(29)</label>
</disp-formula>
</p>
<p>Assuming a vehicle&#x2019;s trajectory contains m frame trajectory points, meaning in the first m frames of the video, the vehicle&#x2019;s speed between each adjacent pair of frames is <inline-formula id="inf86">
<mml:math id="m115">
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
<mml:mn>2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, then according to Eq. <xref ref-type="disp-formula" rid="e29">29</xref>, v1, v2, vm<sup>&#x2212;1</sup> as shown in Eqs <xref ref-type="disp-formula" rid="e30">30</xref>&#x2013;<xref ref-type="disp-formula" rid="e32">32</xref>:<disp-formula id="e30">
<mml:math id="m116">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">v</mml:mi>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">S</mml:mi>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2206;</mml:mo>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c6;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3c6;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2206;</mml:mo>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(30)</label>
</disp-formula>
<disp-formula id="e31">
<mml:math id="m117">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">v</mml:mi>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">S</mml:mi>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2206;</mml:mo>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c6;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mn mathvariant="bold">3</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3c6;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2206;</mml:mo>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(31)</label>
</disp-formula>
<disp-formula id="e32">
<mml:math id="m118">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">v</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">S</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2206;</mml:mo>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c6;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3c6;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2206;</mml:mo>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(32)</label>
</disp-formula>
</p>
<p>Therefore, the average driving speed of the target vehicle in the first m frames is as shown in Eq. <xref ref-type="disp-formula" rid="e33">33</xref>. The detection of the target vehicle&#x2019;s speed is achieved by calculating the average of the instantaneous speeds over multiple frames.<disp-formula id="e33">
<mml:math id="m119">
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mstyle>
<mml:msub>
<mml:mi mathvariant="bold-italic">v</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(33)</label>
</disp-formula>
</p>
</sec>
</sec>
<sec id="s4">
<title>4 Model training and evaluation metrics selection</title>
<sec id="s4-1">
<title>4.1 Experimental environment and model training</title>
<p>Experimental setup and hardware environment for the dataset: System Type: Windows 10 64-bit Operating System, Memory: 64GB, GPU: NVIDIA GeForce RTX3080ti, 24&#xa0;GB Graphics Card. Software environment: The auxiliary environment includes CUDA V11.2, OpenCV4.5.3. This article tested different corresponding datasets for various traffic scenarios. The dataset established in This article comprises a total of 30,000 images, including a diverse collection from different scenes, angles, and times.</p>
<p>During training, 80% of the dataset was used for training, while 20% of the data was reserved for testing. Data augmentation was applied in this study, which involved random scaling, cropping, and arrangement of images using the Mosaic method. Random rotation (parameter set to 0.5), random exposure (parameter set to 1.5), and saturation (parameter set to 1.5) were employed to enrich the training data. The learning rate was initially set to 0.001, and the maximum number of training iterations was set to 50,000. To optimize model convergence, the learning rate was adjusted to 0.0005 after 40,000 iterations. The input images to the network were resized to a resolution of 416 &#xd7; 416, and a batch size of 8 was used during training to ensure efficient network processing. The convergence of the model&#x2019;s training loss and mAP (mean Average Precision) can be observed in <xref ref-type="fig" rid="F6">Figure 6</xref>. It shows that the model converged around 3,000 iterations, and as the loss decreased, mAP also reached a high level.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Model training loss convergence status.</p>
</caption>
<graphic xlink:href="fphy-12-1371320-g006.tif"/>
</fig>
<p>Convolutional Neural Networks (CNNs) are capable of extracting key features from image objects. The detected objects are classified into three categories: Car, Truck, and Bus. The unique features of each class can be observed in <xref ref-type="fig" rid="F7">Figure 7</xref>, where each class of object exhibits distinct characteristics within the convolutional network. These distinct features are used for classification and detection purposes.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Classified target feature map.</p>
</caption>
<graphic xlink:href="fphy-12-1371320-g007.tif"/>
</fig>
</sec>
<sec id="s4-2">
<title>4.2 Selection of evaluation metrics</title>
<p>To verify the effectiveness of the model&#x2019;s detection, several typical metrics in the field of object detection and classification were selected for evaluation. For distracted driving behavior detection and classification, the focus is on detection precision and recall rate, as well as classification accuracy. Therefore, the model is evaluated using precision, recall, and F1_Score.</p>
<p>AP (Average Precision) is the average accuracy and a mainstream evaluation metric for object detection models. To correctly understand AP, it is necessary to use three concepts: Precision, Recall, and <inline-formula id="inf87">
<mml:math id="m120">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (Intersection over Union). <inline-formula id="inf88">
<mml:math id="m121">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> measures the degree of overlap between two areas, specifically the overlap rate between the target window generated by the model and the originally marked window, which represents the detection accuracy <inline-formula id="inf89">
<mml:math id="m122">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The calculation formula is shown in Eq. <xref ref-type="disp-formula" rid="e34">34</xref>. In an ideal situation, <inline-formula id="inf90">
<mml:math id="m123">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> equals 1, indicating a perfect overlap.<disp-formula id="e34">
<mml:math id="m124">
<mml:mrow>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">U</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">D</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">R</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
<mml:mo>&#x2229;</mml:mo>
<mml:mi mathvariant="bold-italic">G</mml:mi>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mi mathvariant="bold-italic">d</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
<mml:mi mathvariant="bold-italic">h</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">D</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">R</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
<mml:mo>&#x222a;</mml:mo>
<mml:mi mathvariant="bold-italic">G</mml:mi>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mi mathvariant="bold-italic">d</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
<mml:mi mathvariant="bold-italic">h</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(34)</label>
</disp-formula>
</p>
<p>Precision and Recall in object detection: Assuming a set of images containing several targets for detection, Precision represents the proportion of targets detected by the model that are actual target objects, while Recall represents the proportion of all real targets detected by the model. TP (True Positive) denotes samples correctly identified as positive, TN (True Negative) denotes samples correctly identified as negative, FP (False Positive) denotes samples incorrectly identified as positive, and FN (False Negative) denotes samples incorrectly identified as negative. The calculation of Precision and Recall values relies on the formulas shown in Eqs <xref ref-type="disp-formula" rid="e35">35</xref>, <xref ref-type="disp-formula" rid="e36">36</xref>.<disp-formula id="e35">
<mml:math id="m125">
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(35)</label>
</disp-formula>
<disp-formula id="e36">
<mml:math id="m126">
<mml:mrow>
<mml:mi mathvariant="bold-italic">R</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(36)</label>
</disp-formula>
</p>
<p>After calculating values using the formula, a PR (Precision-Recall) curve can be plotted. The AP (Average Precision) is the mean of Precision values on the PR curve. To achieve more accurate results, the PR curve is smoothed, and the area under the smoothed curve is calculated using integral methods to determine the final AP value. The calculation formula is shown as Eq. <xref ref-type="disp-formula" rid="e37">37</xref>.<disp-formula id="e37">
<mml:math id="m127">
<mml:mrow>
<mml:mi mathvariant="bold-italic">A</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mo>&#x222b;</mml:mo>
<mml:mn mathvariant="bold">0</mml:mn>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:msubsup>
<mml:msub>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mi mathvariant="bold-italic">m</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
<mml:mi mathvariant="bold-italic">h</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">r</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi mathvariant="bold-italic">d</mml:mi>
<mml:mi mathvariant="bold-italic">r</mml:mi>
</mml:mrow>
</mml:math>
<label>(37)</label>
</disp-formula>
</p>
<p>The F1-Score, also known as the F1 measure, is a metric for classification problems, often used as the final metric in multi-class problems. It is the harmonic mean of precision and recall. For the F1-Score of a single category, the calculation formula is as shown in Eq. <xref ref-type="disp-formula" rid="e38">38</xref>.<disp-formula id="e38">
<mml:math id="m128">
<mml:mrow>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:msub>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">2</mml:mn>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">R</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">R</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(38)</label>
</disp-formula>
</p>
<p>Subsequently, calculate the average value for all categories, denoted as F1. The calculation formula is shown in Eq. <xref ref-type="disp-formula" rid="e39">39</xref>.<disp-formula id="e39">
<mml:math id="m129">
<mml:mrow>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2211;</mml:mo>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:msub>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(39)</label>
</disp-formula>mAP (mean Average Precision) involves calculating the AP (Average Precision) for all categories and then computing the mean. The calculation formula is shown in Eq. <xref ref-type="disp-formula" rid="e40">40</xref>.<disp-formula id="e40">
<mml:math id="m130">
<mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
<mml:mi mathvariant="bold-italic">A</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
<mml:mi mathvariant="bold-italic">A</mml:mi>
<mml:msub>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn mathvariant="bold">2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:mrow>
</mml:math>
<label>(40)</label>
</disp-formula>
</p>
</sec>
</sec>
<sec sec-type="results|discussion" id="s5">
<title>5 Results and discussion</title>
<sec id="s5-1">
<title>5.1 Evaluation of object detection model results</title>
<p>Based on the aforementioned evaluation metrics, the trained object detection models are tested and assessed using the test sets from the datasets. The algorithm shows good statistical accuracy for different vehicle types, with APs of Car, Bus, Truck being 93.58, 91.26, 90.05 respectively, mAP at 92.42, and F1_Score at 97. This is primarily due to the high visibility in tunnel and roadbed sections, where target features are more distinct, resulting in a more accurate model. Overall, the model&#x2019;s detection accuracy for buses is lower than for other categories, mainly because the sample size for buses is significantly smaller than for other categories. However, with a mean Average Precision (mAP) exceeding 90%, it demonstrates that the proposed model is reliable and fully applicable to highway scenarios.</p>
</sec>
<sec id="s5-2">
<title>5.2 Evaluation of speed estimation results</title>
<sec id="s5-2-1">
<title>5.2.1 Selection of optimal fitting model</title>
<p>Based on the data distribution, This article selects 7 video points for fitting analysis with 7 sets of linear and nonlinear data. This curve relationship is not intuitively obvious but requires statistical testing. The optimal fitting model is chosen by comparing the degree of fit and its significance. The Akaike Information Criterion (AIC) and Bayesian Information Criterion (BIC) are two commonly used indicators for assessing model fitness, with smaller values indicating a better-fitting model. Therefore, before selecting a model, it is necessary to assess the AIC and BIC values for each model, including dependent and independent variables. Additionally, the goodness of fit <inline-formula id="inf91">
<mml:math id="m131">
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <italic>p</italic>-value are also key parameters for evaluating the quality of the fit. As the data distribution within the range of road video surveillance is essentially similar in terms of distance calibration, a random surveillance point is selected for the fitting analysis of the 7 formulas, with results as shown in <xref ref-type="table" rid="T4">Table 4</xref>.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Fitting model results.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Number</th>
<th align="center">Formulas</th>
<th align="center">Abbreviation</th>
<th align="center">AIC</th>
<th align="center">BIC</th>
<th align="center">
<inline-formula id="inf92">
<mml:math id="m132">
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">
<italic>p</italic>-value</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">1</td>
<td align="center">
<inline-formula id="inf93">
<mml:math id="m133">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">Line2p</td>
<td align="center">139.69</td>
<td align="center">141.6</td>
<td align="center">0.774</td>
<td align="center">3.3085e-05</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">
<inline-formula id="inf94">
<mml:math id="m134">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>/</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">Com2p</td>
<td align="center">95.8</td>
<td align="center">98.3</td>
<td align="center">0.912</td>
<td align="center">1.08e-08</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">
<inline-formula id="inf95">
<mml:math id="m135">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>/</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">Com3p</td>
<td align="center">94.5</td>
<td align="center">96.7</td>
<td align="center">0.913</td>
<td align="center">5.35e-07</td>
</tr>
<tr>
<td align="center">4</td>
<td align="center">
<inline-formula id="inf96">
<mml:math id="m136">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mn>2</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>b</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">Line3p</td>
<td align="center">118.0</td>
<td align="center">121.0</td>
<td align="center">0.958</td>
<td align="center">2.59e-08</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">
<inline-formula id="inf97">
<mml:math id="m137">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="italic">ln</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">Log2p</td>
<td align="center">130.9</td>
<td align="center">132.8</td>
<td align="center">0.880</td>
<td align="center">7.2456e-07</td>
</tr>
<tr>
<td align="center">6</td>
<td align="center">
<inline-formula id="inf98">
<mml:math id="m138">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="italic">exp</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>b</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">Exp2p</td>
<td align="center">84.7</td>
<td align="center">86.6</td>
<td align="center">0.996</td>
<td align="center">1.71e-15</td>
</tr>
<tr>
<td align="center">7</td>
<td align="center">
<inline-formula id="inf99">
<mml:math id="m139">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="italic">exp</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>b</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">Exp3p</td>
<td align="center">
<bold>82.8</bold>
</td>
<td align="center">
<bold>85.4</bold>
</td>
<td align="center">
<bold>0.997</bold>
</td>
<td align="center">
<bold>2.52e-14</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold values represent the method chosen in this article.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>From <xref ref-type="table" rid="T4">Table 4</xref>, it is evident that apart from linear fitting, the goodness of fit <inline-formula id="inf100">
<mml:math id="m140">
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> for all other methods is greater than 0.8. Among them, the <inline-formula id="inf101">
<mml:math id="m141">
<mml:mrow>
<mml:mi mathvariant="italic">Exp</mml:mi>
<mml:mn>3</mml:mn>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> fitting shows the best performance, hence <inline-formula id="inf102">
<mml:math id="m142">
<mml:mrow>
<mml:mi mathvariant="italic">Exp</mml:mi>
<mml:mn>3</mml:mn>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is chosen as the formula for distance-speed fitting.</p>
<p>To obtain the best fitting parameters for <inline-formula id="inf103">
<mml:math id="m143">
<mml:mrow>
<mml:mi mathvariant="italic">Exp</mml:mi>
<mml:mn>3</mml:mn>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, employing Maximum Likelihood Estimation, Maximum A Posteriori Estimation, and Non-linear Least Squares method for parameter estimation on the distance calibration data from 7 video points. The parameters are evaluated using AIC, BIC, <inline-formula id="inf104">
<mml:math id="m144">
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, and <italic>p</italic>-value, with the evaluation results presented in <xref ref-type="table" rid="T5">Table 5</xref>; <xref ref-type="fig" rid="F8">Figure 8</xref>.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Parameter estimation results.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="center">Number</th>
<th colspan="4" align="center">MLE</th>
<th colspan="4" align="center">MAP</th>
<th colspan="4" align="center">NLS</th>
</tr>
<tr>
<th align="center">AIC</th>
<th align="center">BIC</th>
<th align="center">
<inline-formula id="inf105">
<mml:math id="m145">
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">
<italic>p</italic>-value</th>
<th align="center">AIC</th>
<th align="center">BIC</th>
<th align="center">
<inline-formula id="inf106">
<mml:math id="m146">
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">
<italic>p</italic>-value</th>
<th align="center">AIC</th>
<th align="center">BIC</th>
<th align="center">
<inline-formula id="inf107">
<mml:math id="m147">
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">
<italic>p</italic>-value</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">1</td>
<td align="center">
<bold>80</bold>
</td>
<td align="center">
<bold>82</bold>
</td>
<td align="center">
<bold>0.998</bold>
</td>
<td align="center">
<bold>2.52e-14</bold>
</td>
<td align="center">81</td>
<td align="center">83</td>
<td align="center">0.998</td>
<td align="center">1.15e-14</td>
<td align="center">82</td>
<td align="center">85</td>
<td align="center">0.997</td>
<td align="center">2.52e-14</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">
<bold>83</bold>
</td>
<td align="center">
<bold>86</bold>
</td>
<td align="center">
<bold>0.994</bold>
</td>
<td align="center">
<bold>1.76e-10</bold>
</td>
<td align="center">84</td>
<td align="center">87</td>
<td align="center">0.994</td>
<td align="center">4.32e-10</td>
<td align="center">86</td>
<td align="center">89</td>
<td align="center">0.993</td>
<td align="center">5.38e-10</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">
<bold>81</bold>
</td>
<td align="center">
<bold>84</bold>
</td>
<td align="center">
<bold>0.996</bold>
</td>
<td align="center">
<bold>5.81e-13</bold>
</td>
<td align="center">84</td>
<td align="center">86</td>
<td align="center">0.995</td>
<td align="center">7.65e-13</td>
<td align="center">84</td>
<td align="center">87</td>
<td align="center">0.995</td>
<td align="center">8.26e-13</td>
</tr>
<tr>
<td align="center">4</td>
<td align="center">
<bold>94</bold>
</td>
<td align="center">
<bold>100</bold>
</td>
<td align="center">
<bold>0.923</bold>
</td>
<td align="center">
<bold>5.63e-09</bold>
</td>
<td align="center">96</td>
<td align="center">103</td>
<td align="center">0.913</td>
<td align="center">4.25e-08</td>
<td align="center">98</td>
<td align="center">105</td>
<td align="center">0.902</td>
<td align="center">5.63e-08</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">
<bold>83</bold>
</td>
<td align="center">
<bold>91</bold>
</td>
<td align="center">
<bold>0.983</bold>
</td>
<td align="center">
<bold>8.54e-13</bold>
</td>
<td align="center">85</td>
<td align="center">92</td>
<td align="center">0.980</td>
<td align="center">2.85e-12</td>
<td align="center">85</td>
<td align="center">93</td>
<td align="center">0.975</td>
<td align="center">3.16e-12</td>
</tr>
<tr>
<td align="center">6</td>
<td align="center">
<bold>84</bold>
</td>
<td align="center">
<bold>87</bold>
</td>
<td align="center">
<bold>0.992</bold>
</td>
<td align="center">
<bold>2.52e-10</bold>
</td>
<td align="center">85</td>
<td align="center">88</td>
<td align="center">0.995</td>
<td align="center">5.15e-10</td>
<td align="center">87</td>
<td align="center">90</td>
<td align="center">0.990</td>
<td align="center">6.87e-10</td>
</tr>
<tr>
<td align="center">7</td>
<td align="center">
<bold>82</bold>
</td>
<td align="center">
<bold>85</bold>
</td>
<td align="center">
<bold>0.995</bold>
</td>
<td align="center">
<bold>4.84e-12</bold>
</td>
<td align="center">83</td>
<td align="center">87</td>
<td align="center">0.994</td>
<td align="center">6.62e-12</td>
<td align="center">86</td>
<td align="center">89</td>
<td align="center">0.993</td>
<td align="center">5.36e-12</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold values represent the method chosen in this article.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Parameter estimation results.</p>
</caption>
<graphic xlink:href="fphy-12-1371320-g008.tif"/>
</fig>
<p>From the above table, it is clear that for the <inline-formula id="inf108">
<mml:math id="m148">
<mml:mrow>
<mml:mi mathvariant="italic">Exp</mml:mi>
<mml:mn>3</mml:mn>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> parameter estimation of the 7 video points, Maximum Likelihood Estimation shows the best performance, followed by Maximum A Posteriori Estimation, and lastly Non-linear Least Squares method, as indicated by AIC, BIC, <inline-formula id="inf109">
<mml:math id="m149">
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, and <italic>p</italic>-value.</p>
</sec>
<sec id="s5-2-2">
<title>5.2.2 Speed estimation results</title>
<p>To evaluate the measurement results of the speed estimation method, based on radar and video multi-sensor fusion technology, the results measured by millimeter-wave radar are taken as the true speed values. The verification experiment was conducted in the Shimen Tunnel on the Hanping Expressway in Shaanxi China, where radar and video integration devices were installed at 150-m intervals, totaling seven units, to achieve holographic perception of traffic flow states within a 1050-m range, obtaining detailed information on coordinates, lane positions, and speeds for different lanes and vehicle types. Vehicle speeds detected by millimeter-wave radar and video were extracted using timestamps and target IDs. The comparison between the measured results and the true speed values, along with the overall experimental results and performance analysis, are shown in <xref ref-type="table" rid="T6">Table 6</xref>.</p>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>Overall speed measurement results and performance analysis.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Station number</th>
<th align="center">MSE</th>
<th align="center">RMSE</th>
<th align="center">MAE</th>
<th align="center">
<inline-formula id="inf110">
<mml:math id="m150">
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">K733 &#x2b; 953</td>
<td align="center">26.9133</td>
<td align="center">5.1878</td>
<td align="center">3.8536</td>
<td align="center">0.87993</td>
</tr>
<tr>
<td align="center">K734 &#x2b; 088</td>
<td align="center">14.2012</td>
<td align="center">3.7685</td>
<td align="center">2.9179</td>
<td align="center">0.90497</td>
</tr>
<tr>
<td align="center">K734 &#x2b; 843</td>
<td align="center">52.2661</td>
<td align="center">7.2295</td>
<td align="center">5.3957</td>
<td align="center">0.8889</td>
</tr>
<tr>
<td align="center">K734 &#x2b; 983</td>
<td align="center">86.1127</td>
<td align="center">9.2797</td>
<td align="center">7.3639</td>
<td align="center">0.68259</td>
</tr>
<tr>
<td align="center">K735 &#x2b; 123</td>
<td align="center">6.6045</td>
<td align="center">2.5699</td>
<td align="center">2.0935</td>
<td align="center">0.96168</td>
</tr>
<tr>
<td align="center">K735 &#x2b; 263</td>
<td align="center">4.2581</td>
<td align="center">2.0635</td>
<td align="center">1.6984</td>
<td align="center">0.97730</td>
</tr>
<tr>
<td align="center">K735 &#x2b; 403</td>
<td align="center">81.6310</td>
<td align="center">9.0205</td>
<td align="center">7.6991</td>
<td align="center">0.75010</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>From <xref ref-type="table" rid="T6">Table 6</xref>, it is observed that the vehicle speed measurement method based on video, as discussed in This article, shows relatively good performance in scenarios with high overall speeds on highways. The minimum root mean square error is 2.0635, and the maximum is 9.2797. The main reasons for the larger deviation between the measured speeds and the actual values are environmental conditions, such as lighting and line shape. The coefficient of determination ranges from a minimum of 0.68259 to a maximum of 0.97730. The variation in the goodness of fit is for the same reasons as the minimum mean square error. Additionally, to further evaluate the speed tracking performance of this method, the vehicle speed measurement data from 7 video locations are manually divided into Front section, Middle section, Back section, and End section, for a comprehensive analysis of the overall tracking effect in these four segments, as seen in <xref ref-type="fig" rid="F9">Figure 9</xref>.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>Analysis of speed tracking effect.</p>
</caption>
<graphic xlink:href="fphy-12-1371320-g009.tif"/>
</fig>
<p>As depicted in <xref ref-type="fig" rid="F9">Figure 9</xref>, the effective measurement distance of this method is around 140&#xa0;m, with the absolute speed error generally within 1&#x2013;8&#xa0;km/h, meeting the accuracy requirements for speed measurement. This method has certain advantages in distance detection, especially in tunnel scenarios, where a camera spacing of 150&#xa0;m allows for continuous tracking of vehicle trajectories and speeds based on video. For further analysis of speed tracking differences within the 150&#xa0;m detection range, it&#x27;s divided into The first half and The second half. The first half data shows a minimum significance level of 0.4261, indicating small differences in speed tracking, reflecting stable tracking performance. The second half data has a minimum significance level of 0.0179, indicating some fluctuations in speed in the End section of The second half, but the absolute speed error still shows good precision.</p>
</sec>
</sec>
</sec>
<sec sec-type="conclusion" id="s6">
<title>6 Conclusion</title>
<p>This article proposes an improved YOLOv5s &#x2b; DeepSORT vehicle speed measurement algorithm for surveillance videos in highway scenarios, capable of vehicle target detection and continuous speed tracking without camera prior parameters and calibration. The main conclusions are as follows:<list list-type="simple">
<list-item>
<p>(1) The introduction of the Swin Transformer Block module improves the model&#x2019;s ability to capture local areas of interest, effectively increasing the detector&#x2019;s accuracy; using <inline-formula id="inf111">
<mml:math id="m151">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> Loss to replace the original <inline-formula id="inf112">
<mml:math id="m152">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> loss further enhances the detector&#x2019;s localization precision and effectively reduces omissions in congested vehicle scenarios; the algorithm shows good statistical accuracy for different vehicle types, with APs of Car, Bus, Truck being 93.58, 91.26, 90.05 respectively, mAP at 92.42, and F1_Score at 97.</p>
</list-item>
<list-item>
<p>(2) A calibration algorithm for traffic monitoring scenarios was proposed, which uses known reference points such as the image&#x2019;s centerline and contour marks. It applies Maximum Likelihood Estimation, Maximum A Posteriori Estimation, and Non-linear Least Squares method for the conversion between image pixel coordinates and actual coordinates. The parameter estimation showed good results, with Maximum Likelihood Estimation being the best, and AIC, BIC, <inline-formula id="inf113">
<mml:math id="m153">
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, and <italic>p</italic>-value being 83.56, 87.86, and 8.66E-10 respectively.</p>
</list-item>
<list-item>
<p>(3) The vehicle speed measurement is achieved by calculating the average of instantaneous speeds over multiple frames. This method&#x2019;s effective measurement distance is about 140m, with an absolute speed error generally within 1&#x2013;8&#xa0;km/h, meeting the accuracy requirements for speed measurement. It has certain advantages in distance detection, especially in tunnel scenarios where a camera spacing of 150&#xa0;m allows for continuous tracking of vehicle trajectories and speeds based on video.</p>
</list-item>
<list-item>
<p>(4) However, during experiments, it was found that vehicle speed accuracy is influenced by road geometry, environmental conditions, lighting, resolution, etc., These can be mitigated through image enhancement optimization algorithms or by increasing video resolution, thus achieving more accurate vehicle speed measurements, which help regulatory bodies more effectively control speeds on the roads, reducing instances of speeding and thereby decreasing traffic accidents, enhancing road safety. Additionally, with the rapid development of multi-sensor fusion technology, the integration of video and millimeter-wave radar detection results can complement each other, providing technical support for active traffic safety management on highways.</p>
</list-item>
</list>
</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s7">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/Supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s8">
<title>Author contributions</title>
<p>ZL: Conceptualization, Methodology, Writing&#x2013;original draft. YB: Funding acquisition, Writing&#x2013;original draft. XY: Project administration, Resources, Writing&#x2013;review and editing. YL: Investigation, Writing&#x2013;review and editing. SY: Software, Writing&#x2013;review and editing. MW: Funding acquisition, Project administration, Writing&#x2013;review and editing. QY: Data curation, Writing&#x2013;review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s9">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This research was supported by the National Natural Science Foundation of China (52208424); the Natural Science Foundation of Chongqing (2022NSCQ-MSX1939); Chongqing Municipal Education Commission Foundation (KJQN202300728); Chongqing Talent Innovation Leading Talent Project (CQYC20210301505); The Key Research and Development Program of Guangxi, China, (Grant No. AB21196034).</p>
</sec>
<ack>
<p>The authors would like to thank the support of their colleagues in the Research and Development Center of Transport Industry of Self-driving Technology.</p>
</ack>
<sec sec-type="COI-statement" id="s10">
<title>Conflict of interest</title>
<p>Authors ZL, XY, SY, MW, and QY were employed by China Merchants Chongqing Communications Research and Design Institute Co., Ltd.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wan</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Buckles</surname>
<given-names>B</given-names>
</name>
</person-group>. <article-title>Camera calibration and vehicle tracking: highway traffic video analytics</article-title>. <source>Transp Res C Emerg Technol</source> (<year>2014</year>) <volume>44</volume>:<fpage>202</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1016/j.trc.2014.02.018</pub-id>
</citation>
</ref>
<ref id="B2">
<label>2.</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Karo&#x144;</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Mikulski</surname>
<given-names>J</given-names>
</name>
</person-group>. <article-title>Selected problems of transport modelling with ITS services impact on travel behavior of users</article-title>. In: <conf-name>2017 15th International Conference on ITS Telecommunications (ITST)</conf-name>; <conf-date>29-31 May 2017</conf-date>; <conf-loc>Warsaw, Poland</conf-loc>. <publisher-name>IEEE</publisher-name> (<year>2017</year>). p. <fpage>1</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1109/ITST.2017.7972231</pub-id>
</citation>
</ref>
<ref id="B3">
<label>3.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Hou</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Chu</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>Y</given-names>
</name>
</person-group>. <article-title>ARIMA model and few-shot learning for vehicle speed time series analysis and prediction</article-title>. <source>Comput Intell Neurosci</source> (<year>2022</year>) <volume>2022</volume>:<fpage>1</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1155/2022/2526821</pub-id>
</citation>
</ref>
<ref id="B4">
<label>4.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jia</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>Urban traffic state estimation considering resident travel characteristics and road network capacity</article-title>. <source>J Transportation Syst Eng Inf Tech</source> (<year>2011</year>) <volume>11</volume>:<fpage>81</fpage>&#x2013;<lpage>5</lpage>. <pub-id pub-id-type="doi">10.1016/S1570-6672(10)60142-0</pub-id>
</citation>
</ref>
<ref id="B5">
<label>5.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Javadi</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Dahl</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Pettersson</surname>
<given-names>MI</given-names>
</name>
</person-group>. <article-title>Vehicle speed measurement model for video-based systems</article-title>. <source>Comput Electr Eng</source> (<year>2019</year>) <volume>76</volume>:<fpage>238</fpage>&#x2013;<lpage>48</lpage>. <pub-id pub-id-type="doi">10.1016/j.compeleceng.2019.04.001</pub-id>
</citation>
</ref>
<ref id="B6">
<label>6.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dahl</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Javadi</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>Analytical modeling for a video-based vehicle speed measurement framework</article-title>. <source>Sensors (Switzerland)</source> (<year>2020</year>) <volume>20</volume>(<issue>1</issue>):<fpage>160</fpage>. <pub-id pub-id-type="doi">10.3390/s20010160</pub-id>
</citation>
</ref>
<ref id="B7">
<label>7.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khan</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Sarker</surname>
<given-names>DMSZ</given-names>
</name>
<name>
<surname>Rayamajhi</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>Speed estimation of vehicle in intelligent traffic surveillance system using video image processing</article-title>. <source>Int J Sci Eng Res</source> (<year>2014</year>) <volume>5</volume>(<issue>12</issue>):<fpage>1384</fpage>&#x2013;<lpage>90</lpage>. <pub-id pub-id-type="doi">10.14299/ijser.2014.12.003</pub-id>
</citation>
</ref>
<ref id="B8">
<label>8.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wicaksono</surname>
<given-names>DW</given-names>
</name>
<name>
<surname>Setiyono</surname>
<given-names>B</given-names>
</name>
</person-group>. <article-title>Speed estimation on moving vehicle based on digital image processing</article-title>. <source>Int J Comput Sci Appl Math</source> (<year>2017</year>) <volume>3</volume>(<issue>1</issue>):<fpage>21</fpage>&#x2013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.12962/j24775401.v3i1.2117</pub-id>
</citation>
</ref>
<ref id="B9">
<label>9.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lu</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>H</given-names>
</name>
</person-group>. <article-title>A high accurate vehicle speed estimation method</article-title>. <source>Soft Comput</source> (<year>2020</year>) <volume>24</volume>:<fpage>1283</fpage>&#x2013;<lpage>91</lpage>. <pub-id pub-id-type="doi">10.1007/s00500-019-03965-w</pub-id>
</citation>
</ref>
<ref id="B10">
<label>10.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Huynh</surname>
<given-names>DQ</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Reynolds</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Atkinson</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>A vision-based pipeline for vehicle counting, speed estimation, and classification</article-title>. <source>IEEE Trans Intell Transportation Syst</source> (<year>2021</year>) <volume>22</volume>:<fpage>7547</fpage>&#x2013;<lpage>60</lpage>. <pub-id pub-id-type="doi">10.1109/TITS.2020.3004066</pub-id>
</citation>
</ref>
<ref id="B11">
<label>11.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bhardwaj</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Tummala</surname>
<given-names>GK</given-names>
</name>
<name>
<surname>Ramalingam</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Ramjee</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Sinha</surname>
<given-names>P</given-names>
</name>
</person-group>. <article-title>AutoCalib: automatic traffic camera calibration at scale</article-title>. <source>ACM Trans Sen Netw</source> (<year>2018</year>) <volume>14</volume>(<issue>3-4</issue>):<fpage>1</fpage>&#x2013;<lpage>27</lpage>. <pub-id pub-id-type="doi">10.1145/3199667</pub-id>
</citation>
</ref>
<ref id="B12">
<label>12.</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Qimin</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Mingming</surname>
<given-names>W</given-names>
</name>
<name>
<surname>Bin</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Xianghui</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>A methodology of vehicle speed estimation based on optical flow</article-title>. In: <conf-name>Proceedings of 2014 IEEE International Conference on Service Operations and Logistics, and Informatics</conf-name>; <conf-date>08-10 October 2014</conf-date>; <conf-loc>Qingdao, China</conf-loc>. <publisher-name>IEEE</publisher-name> (<year>2014</year>). p. <fpage>33</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1109/SOLI.2014.6960689</pub-id>
</citation>
</ref>
<ref id="B13">
<label>13.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schoepflin</surname>
<given-names>TN</given-names>
</name>
<name>
<surname>Dailey</surname>
<given-names>DJ</given-names>
</name>
</person-group>. <article-title>Dynamic camera calibration of roadside traffic management cameras for vehicle speed estimation</article-title>. <source>IEEE Trans Intell Transportation Syst</source> (<year>2003</year>) <volume>4</volume>:<fpage>90</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1109/TITS.2003.821213</pub-id>
</citation>
</ref>
<ref id="B14">
<label>14.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Han</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Heo</surname>
<given-names>O</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Kee</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Sunwoo</surname>
<given-names>M</given-names>
</name>
</person-group>. <article-title>Vehicle distance estimation using a mono-camera for FCW/AEB systems</article-title>. <source>Int J Automotive Tech</source> (<year>2016</year>) <volume>17</volume>:<fpage>483</fpage>&#x2013;<lpage>91</lpage>. <pub-id pub-id-type="doi">10.1007/s12239-016-0050-9</pub-id>
</citation>
</ref>
<ref id="B15">
<label>15.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sochor</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Juranek</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Spanhel</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Marsik</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Siroky</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Herout</surname>
<given-names>A</given-names>
</name>
<etal/>
</person-group> <article-title>Comprehensive data set for automatic single camera visual speed measurement</article-title>. <source>IEEE Trans Intell Transportation Syst</source> (<year>2019</year>) <volume>20</volume>:<fpage>1633</fpage>&#x2013;<lpage>43</lpage>. <pub-id pub-id-type="doi">10.1109/TITS.2018.2825609</pub-id>
</citation>
</ref>
<ref id="B16">
<label>16.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>H-Y</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>K-J</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>C-H</given-names>
</name>
</person-group>. <article-title>Vehicle speed detection from a single motion blurred image</article-title>. <source>Image Vis Comput</source> (<year>2008</year>) <volume>26</volume>:<fpage>1327</fpage>&#x2013;<lpage>37</lpage>. <pub-id pub-id-type="doi">10.1016/j.imavis.2007.04.004</pub-id>
</citation>
</ref>
<ref id="B17">
<label>17.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Celik</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Kusetogullari</surname>
<given-names>H</given-names>
</name>
</person-group>. <article-title>Solar-powered automated road surveillance system for speed violation detection</article-title>. <source>IEEE Trans Ind Elect</source> (<year>2010</year>) <volume>57</volume>:<fpage>3216</fpage>&#x2013;<lpage>27</lpage>. <pub-id pub-id-type="doi">10.1109/TIE.2009.2038395</pub-id>
</citation>
</ref>
<ref id="B18">
<label>18.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nguyen</surname>
<given-names>TT</given-names>
</name>
<name>
<surname>Pham</surname>
<given-names>XD</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>JH</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Jeon</surname>
<given-names>JW</given-names>
</name>
</person-group>. <article-title>Compensating background for noise due to camera vibration in uncalibrated-camera-based vehicle speed measurement system</article-title>. <source>IEEE Trans Veh Technol</source> (<year>2011</year>) <volume>60</volume>:<fpage>30</fpage>&#x2013;<lpage>43</lpage>. <pub-id pub-id-type="doi">10.1109/TVT.2010.2096832</pub-id>
</citation>
</ref>
<ref id="B19">
<label>19.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Eslami</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Raie</surname>
<given-names>AA</given-names>
</name>
<name>
<surname>Faez</surname>
<given-names>K</given-names>
</name>
</person-group>. <article-title>Precise vehicle speed measurement based on a hierarchical homographic transform estimation for law enforcement applications</article-title>. <source>IEICE Trans Inf Syst</source> (<year>2016</year>) <volume>E99.D</volume>:<fpage>1635</fpage>&#x2013;<lpage>44</lpage>. <pub-id pub-id-type="doi">10.1587/transinf.2015EDP7371</pub-id>
</citation>
</ref>
<ref id="B20">
<label>20.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Famouri</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Azimifar</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Wong</surname>
<given-names>A</given-names>
</name>
</person-group>. <article-title>A novel motion plane-based approach to vehicle speed estimation</article-title>. <source>IEEE Trans Intell Transportation Syst</source> (<year>2019</year>) <volume>20</volume>:<fpage>1237</fpage>&#x2013;<lpage>46</lpage>. <pub-id pub-id-type="doi">10.1109/TITS.2018.2847224</pub-id>
</citation>
</ref>
<ref id="B21">
<label>21.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>Z</given-names>
</name>
</person-group>. <article-title>An adaptive framework for multi-vehicle ground speed estimation in airborne videos</article-title>. <source>Remote Sens (Basel)</source> (<year>2019</year>) <volume>11</volume>:<fpage>1241</fpage>. <pub-id pub-id-type="doi">10.3390/rs11101241</pub-id>
</citation>
</ref>
<ref id="B22">
<label>22.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Koyuncu</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Koyuncu</surname>
<given-names>B</given-names>
</name>
</person-group>. <article-title>Vehicle Speed detection by using Camera and image processing software</article-title>. <source>Int J Eng Sci (Ghaziabad)</source> (<year>2018</year>) <volume>7</volume>:<fpage>64</fpage>&#x2013;<lpage>72</lpage>. <pub-id pub-id-type="doi">10.9790/1813-0709036472</pub-id>
</citation>
</ref>
<ref id="B23">
<label>23.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>J-H</given-names>
</name>
<name>
<surname>Oh</surname>
<given-names>W-T</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>J-H</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>J-C</given-names>
</name>
</person-group>. <article-title>Reliability verification of vehicle speed estimate method in forensic videos</article-title>. <source>Forensic Sci Int</source> (<year>2018</year>) <volume>287</volume>:<fpage>195</fpage>&#x2013;<lpage>206</lpage>. <pub-id pub-id-type="doi">10.1016/j.forsciint.2018.04.002</pub-id>
</citation>
</ref>
<ref id="B24">
<label>24.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sochor</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Jur&#xe1;nek</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Herout</surname>
<given-names>A</given-names>
</name>
</person-group>. <article-title>Traffic surveillance camera calibration by 3D model bounding box alignment for accurate vehicle speed measurement</article-title>. <source>Computer Vis Image Understanding</source> (<year>2017</year>) <volume>161</volume>:<fpage>87</fpage>&#x2013;<lpage>98</lpage>. <pub-id pub-id-type="doi">10.1016/j.cviu.2017.05.015</pub-id>
</citation>
</ref>
<ref id="B25">
<label>25.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Palubinskas</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Kurz</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Reinartz</surname>
<given-names>P</given-names>
</name>
</person-group>. <article-title>Model based traffic congestion detection in optical remote sensing imagery</article-title>. <source>Eur Transport Res Rev</source> (<year>2010</year>) <volume>2</volume>:<fpage>85</fpage>&#x2013;<lpage>92</lpage>. <pub-id pub-id-type="doi">10.1007/s12544-010-0028-z</pub-id>
</citation>
</ref>
<ref id="B26">
<label>26.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Do&#x1e7;an</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Temiz</surname>
<given-names>MS</given-names>
</name>
<name>
<surname>K&#xfc;l&#xfc;r</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>Real time speed estimation of moving vehicles from side view images from an uncalibrated video camera</article-title>. <source>Sensors</source> (<year>2010</year>) <volume>10</volume>(<issue>5</issue>):<fpage>4805</fpage>&#x2013;<lpage>24</lpage>. <pub-id pub-id-type="doi">10.3390/s100504805</pub-id>
</citation>
</ref>
<ref id="B27">
<label>27.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Bin</surname>
<given-names>R</given-names>
</name>
</person-group>. <article-title>Video-based traffic data collection system for multiple vehicle types</article-title>. <source>IET Intell Transport Syst</source> (<year>2014</year>) <volume>8</volume>:<fpage>164</fpage>&#x2013;<lpage>74</lpage>. <pub-id pub-id-type="doi">10.1049/iet-its.2012.0099</pub-id>
</citation>
</ref>
<ref id="B28">
<label>28.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jeyabharathi</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Dejey</surname>
<given-names>DD</given-names>
</name>
</person-group>. <article-title>Vehicle tracking and speed measurement system (VTSM) based on novel feature descriptor: diagonal hexadecimal pattern (DHP)</article-title>. <source>J Vis Commun Image Represent</source> (<year>2016</year>) <volume>40</volume>:<fpage>816</fpage>&#x2013;<lpage>30</lpage>. <pub-id pub-id-type="doi">10.1016/j.jvcir.2016.08.011</pub-id>
</citation>
</ref>
<ref id="B29">
<label>29.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Agrawal</surname>
<given-names>SC</given-names>
</name>
<name>
<surname>Tripathi</surname>
<given-names>RK</given-names>
</name>
</person-group>. <article-title>An image processing based method for vehicle speed estimation</article-title>. <source>Int J Scientific Tech Res</source> (<year>2020</year>) <volume>9</volume>:<fpage>1241</fpage>&#x2013;<lpage>6</lpage>.</citation>
</ref>
<ref id="B30">
<label>30.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Biswas</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Stevanovic</surname>
<given-names>A</given-names>
</name>
</person-group>. <article-title>Speed estimation of multiple moving objects from a moving UAV platform</article-title>. <source>ISPRS Int J Geoinf</source> (<year>2019</year>) <volume>8</volume>(<issue>6</issue>):<fpage>259</fpage>. <pub-id pub-id-type="doi">10.3390/ijgi8060259</pub-id>
</citation>
</ref>
<ref id="B31">
<label>31.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Roh</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Shin</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Sohn</surname>
<given-names>K</given-names>
</name>
</person-group>. <article-title>Image-based learning to measure the space mean speed on a stretch of road without the need to tag images with labels</article-title>. <source>Sensors (Switzerland)</source> (<year>2019</year>) <volume>19</volume>:<fpage>1227</fpage>. <pub-id pub-id-type="doi">10.3390/s19051227</pub-id>
</citation>
</ref>
<ref id="B32">
<label>32.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dong</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Wen</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Z</given-names>
</name>
</person-group>. <article-title>Vehicle speed estimation based on 3D ConvNets and non-local blocks</article-title>. <source>Future Internet</source> (<year>2019</year>) <volume>11</volume>(<issue>6</issue>):<fpage>123</fpage>. <pub-id pub-id-type="doi">10.3390/fi11060123</pub-id>
</citation>
</ref>
<ref id="B33">
<label>33.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luvizon</surname>
<given-names>DC</given-names>
</name>
<name>
<surname>Nassu</surname>
<given-names>BT</given-names>
</name>
<name>
<surname>Minetto</surname>
<given-names>R</given-names>
</name>
</person-group>. <article-title>A video-based system for vehicle speed measurement in urban roadways</article-title>. <source>IEEE Trans Intell Transportation Syst</source> (<year>2017</year>) <volume>18</volume>:<fpage>1</fpage>&#x2013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1109/TITS.2016.2606369</pub-id>
</citation>
</ref>
<ref id="B34">
<label>34.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Xiong</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Hou</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Qu</surname>
<given-names>B</given-names>
</name>
</person-group>. <article-title>Vehicle speed measurement based on binocular stereovision system</article-title>. <source>IEEE Access</source> (<year>2019</year>) <volume>7</volume>:<fpage>106628</fpage>&#x2013;<lpage>41</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2019.2932120</pub-id>
</citation>
</ref>
<ref id="B35">
<label>35.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Blankenship</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Diamantas</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>Detection, tracking, and speed estimation of vehicles: a homography-based approach</article-title>. <source>IMPROVE</source> (<year>2022</year>) <volume>1</volume>:<fpage>211</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.5220/0011093600003209</pub-id>
</citation>
</ref>
<ref id="B36">
<label>36.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fern&#xe1;ndez Llorca</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Hern&#xe1;ndez Mart&#xed;nez</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Garc&#xed;a Daza</surname>
<given-names>I</given-names>
</name>
</person-group>. <article-title>Vision-based vehicle speed estimation: a survey</article-title>. <source>IET Intell Transport Syst</source> (<year>2021</year>) <volume>15</volume>:<fpage>987</fpage>&#x2013;<lpage>1005</lpage>. <pub-id pub-id-type="doi">10.1049/itr2.12079</pub-id>
</citation>
</ref>
<ref id="B37">
<label>37.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>HJ</given-names>
</name>
</person-group>. <article-title>Vehicle detection and speed estimation for automated traffic surveillance systems at nighttime</article-title>. <source>Tehnicki Vjesnik</source> (<year>2019</year>) <volume>26</volume>:<fpage>091448</fpage>. <pub-id pub-id-type="doi">10.17559/TV-20170827091448</pub-id>
</citation>
</ref>
<ref id="B38">
<label>38.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ashraf</surname>
<given-names>MH</given-names>
</name>
<name>
<surname>Jabeen</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Alghamdi</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Zia</surname>
<given-names>MS</given-names>
</name>
<name>
<surname>Almutairi</surname>
<given-names>M</given-names>
</name>
</person-group>. <article-title>HVD-net: a hybrid vehicle detection network for vision-based vehicle tracking and speed estimation</article-title>. <source>J King Saud Univ - Comp Inf Sci</source> (<year>2023</year>) <volume>35</volume>:<fpage>101657</fpage>. <pub-id pub-id-type="doi">10.1016/j.jksuci.2023.101657</pub-id>
</citation>
</ref>
<ref id="B39">
<label>39.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pal</surname>
<given-names>SK</given-names>
</name>
<name>
<surname>Pramanik</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Maiti</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Mitra</surname>
<given-names>P</given-names>
</name>
</person-group>. <article-title>Deep learning in multi-object detection and tracking: state of the art</article-title>. <source>Appl Intelligence</source> (<year>2021</year>) <volume>51</volume>:<fpage>6400</fpage>&#x2013;<lpage>29</lpage>. <pub-id pub-id-type="doi">10.1007/s10489-021-02293-7</pub-id>
</citation>
</ref>
<ref id="B40">
<label>40.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiao</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>Z</given-names>
</name>
<etal/>
</person-group> <article-title>A survey of deep learning-based object detection</article-title>. <source>IEEE Access</source> (<year>2019</year>) <volume>7</volume>:<fpage>128837</fpage>&#x2013;<lpage>68</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2019.2939201</pub-id>
</citation>
</ref>
<ref id="B41">
<label>41.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khosravi</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Dehkordi</surname>
<given-names>RA</given-names>
</name>
<name>
<surname>Ahmadyfard</surname>
<given-names>A</given-names>
</name>
</person-group>. <article-title>Vehicle speed and dimensions estimation using on-road cameras by identifying popular vehicles</article-title>. <source>Scientia Iranica</source> (<year>2022</year>) <fpage>29</fpage>. <pub-id pub-id-type="doi">10.24200/sci.2020.55331.4174</pub-id>
</citation>
</ref>
<ref id="B42">
<label>42.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Zhe</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Q</given-names>
</name>
<name>
<surname>Pei</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>D</given-names>
</name>
</person-group>. <article-title>Robust inter-vehicle distance estimation method based on monocular vision</article-title>. <source>IEEE Access</source> (<year>2019</year>) <volume>7</volume>:<fpage>46059</fpage>&#x2013;<lpage>70</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2019.2907984</pub-id>
</citation>
</ref>
<ref id="B43">
<label>43.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jamshidnejad</surname>
<given-names>A</given-names>
</name>
<name>
<surname>De Schutter</surname>
<given-names>B</given-names>
</name>
</person-group>. <article-title>Estimation of the generalised average traffic speed based on microscopic measurements</article-title>. <source>Transportmetrica A: Transport Sci</source> (<year>2015</year>) <volume>11</volume>:<fpage>525</fpage>&#x2013;<lpage>46</lpage>. <pub-id pub-id-type="doi">10.1080/23249935.2015.1026957</pub-id>
</citation>
</ref>
<ref id="B44">
<label>44.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sarkar</surname>
<given-names>NC</given-names>
</name>
<name>
<surname>Bhaskar</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Miska</surname>
<given-names>MP</given-names>
</name>
</person-group>. <article-title>Microscopic modelling of area-based heterogeneous traffic flow: area selection and vehicle movement</article-title>. <source>Transp Res Part C Emerg Technol</source> (<year>2020</year>) <volume>111</volume>:<fpage>373</fpage>&#x2013;<lpage>96</lpage>. <pub-id pub-id-type="doi">10.1016/j.trc.2019.12.013</pub-id>
</citation>
</ref>
<ref id="B45">
<label>45.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Appathurai</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Sundarasekar</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Raja</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Alex</surname>
<given-names>EJ</given-names>
</name>
<name>
<surname>Palagan</surname>
<given-names>CA</given-names>
</name>
<name>
<surname>Nithya</surname>
<given-names>A</given-names>
</name>
</person-group>. <article-title>An efficient optimal neural network-based moving vehicle detection in traffic video surveillance system</article-title>. <source>Circuits Syst Signal Process</source> (<year>2020</year>) <volume>39</volume>:<fpage>734</fpage>&#x2013;<lpage>56</lpage>. <pub-id pub-id-type="doi">10.1007/s00034-019-01224-9</pub-id>
</citation>
</ref>
<ref id="B46">
<label>46.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z</given-names>
</name>
<etal/>
</person-group> <article-title>Swin transformer: hierarchical vision transformer using shifted windows</article-title>. <source>Proc IEEE Int Conf Comp Vis</source> (<year>2021</year>) <fpage>10012</fpage>&#x2013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1109/ICCV48922.2021.00986</pub-id>
</citation>
</ref>
<ref id="B47">
<label>47.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Q</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>B</given-names>
</name>
</person-group>. <article-title>Recent advances in convolutional neural network acceleration</article-title>. <source>Neurocomputing</source> (<year>2019</year>) <volume>323</volume>:<fpage>37</fpage>&#x2013;<lpage>51</lpage>. <pub-id pub-id-type="doi">10.1016/j.neucom.2018.09.038</pub-id>
</citation>
</ref>
<ref id="B48">
<label>48.</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Bewley</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Ge</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Ott</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Ramos</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Upcroft</surname>
<given-names>B</given-names>
</name>
</person-group>. <article-title>Simple online and realtime tracking</article-title>. In: <conf-name>Proceedings - International Conference on Image Processing, ICIP</conf-name>; <conf-date>17-20 September 2017</conf-date>; <conf-loc>Beijing, China</conf-loc>. <publisher-name>IEEE</publisher-name> (<year>2016</year>). p. <fpage>3464</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1109/ICIP.2016.7533003</pub-id>
</citation>
</ref>
<ref id="B49">
<label>49.</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wojke</surname>
<given-names>N</given-names>
</name>
<name>
<surname>Bewley</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Paulus</surname>
<given-names>D</given-names>
</name>
</person-group>. <article-title>Simple online and realtime tracking with a deep association metric</article-title>. In: <conf-name>Proceedings - International Conference on Image Processing, ICIP</conf-name>; <conf-date>17-20 September 2017</conf-date>; <conf-loc>Beijing, China</conf-loc>. <publisher-name>IEEE</publisher-name> (<year>2017</year>). p. <fpage>3645</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1109/ICIP.2017.8296962</pub-id>
</citation>
</ref>
<ref id="B50">
<label>50.</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>W</given-names>
</name>
<name>
<surname>Mei</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>H</given-names>
</name>
</person-group>. <article-title>A deep learning-based approach to progressive vehicle re-identification for urban surveillance</article-title>. In: <source>Lecture notes in computer science including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics</source>. <publisher-loc>Berlin, Germany</publisher-loc>: <publisher-name>Spinger</publisher-name> (<year>2016</year>). p. <fpage>869</fpage>&#x2013;<lpage>84</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-46475-6_53</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>