<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Future Transp.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Future Transportation</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Future Transp.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2673-5210</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1662480</article-id>
<article-id pub-id-type="doi">10.3389/ffutr.2026.1662480</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Freeway traffic state classification using vehicle trajectory data</article-title>
<alt-title alt-title-type="left-running-head">Cheng et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/ffutr.2026.1662480">10.3389/ffutr.2026.1662480</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Cheng</surname>
<given-names>Rende</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>An</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Sun</surname>
<given-names>Xiaofei</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Fangliang</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Na</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal Analysis</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Yu</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yang</surname>
<given-names>Lu</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Yu</surname>
<given-names>Quan</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
</contrib-group>
<aff id="aff1">
<label>1</label>
<institution>Henan Zhongyuan High-speed Zhengluo Construction Co., Ltd</institution>, <city>Zhengzhou</city>, <country country="CN">China</country>
</aff>
<aff id="aff2">
<label>2</label>
<institution>Jiangxi Communications Investment Group Co., Ltd</institution>, <city>Nanchang</city>, <country country="CN">China</country>
</aff>
<aff id="aff3">
<label>3</label>
<institution>CCCC Highway Consultants Co., Ltd</institution>, <city>Beijing</city>, <country country="CN">China</country>
</aff>
<aff id="aff4">
<label>4</label>
<institution>School of Electrical and Control Engineering, North China University of Technology</institution>, <city>Beijing</city>, <country country="CN">China</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Quan Yu, <email xlink:href="mailto:yuquan@ncut.edu.cn">yuquan@ncut.edu.cn</email>
</corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-09">
<day>09</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>7</volume>
<elocation-id>1662480</elocation-id>
<history>
<date date-type="received">
<day>09</day>
<month>07</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>07</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>09</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Cheng, Liu, Sun, Liu, Li, Wang, Yang and Yu.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Cheng, Liu, Sun, Liu, Li, Wang, Yang and Yu</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-09">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>This study proposes the FCM-RF-SMOTE framework to resolve the issue of data imbalance in real-time freeway traffic state classification. The framework integrates Fuzzy C-Means (FCM), Random Forest (RF), and the Synthetic Minority Over-sampling Technique (SMOTE). Traffic states are classified into four categories (smooth, stable, congested, and severely congested) based on quantitative thresholds derived from FCM clustering centers. The validation utilizes SUMO simulation with Gaussian noise and a 10 Hz sampling rate to approximate millimeter-wave radar characteristics. Results show that the proposed framework significantly increases the representation of the severe congestion class from 3.67% to 19.83%. Consequently, the overall classification accuracy is enhanced from 77.67% to 97.80%, demonstrating superior performance in handling imbalanced datasets compared to baseline methods. The findings demonstrate the robustness of the algorithm for traffic monitoring systems, particularly in identifying minority traffic states, with future work planned for physical sensor validation.</p>
</abstract>
<kwd-group>
<kwd>freeway</kwd>
<kwd>fuzzy c-means</kwd>
<kwd>random forest</kwd>
<kwd>SMOTE</kwd>
<kwd>traffic state classification</kwd>
<kwd>vehicle trajectory data</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="6"/>
<table-count count="11"/>
<equation-count count="13"/>
<ref-count count="36"/>
<page-count count="13"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Transportation Systems Modeling</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>Traffic condition classification refers to the process of classifying traffic conditions on roads into different categories based on various traffic parameters and data sources. The goal is to provide accurate information for effective traffic management and control. In this study, traffic state classification involves categorizing real-time traffic into four types: smooth, stable, congested, and severely congested. The parameters used for this classification are speed, speed deviation, headway, and density. The thresholds for each category are derived from the FCM clustering center (see <xref ref-type="table" rid="T4">Table 4</xref>). Specifically, we analyze vehicle trajectory data and extract key parameters, including speed, headway, and density, to accurately identify traffic conditions.</p>
<p>Real-time monitoring of freeway traffic conditions is of utmost importance for traffic managers to effectively manage traffic operations and provide accurate travel guidance, which is an essential component of an active traffic management system. With the development of technology, advanced traffic detectors have emerged. Unlike traditional induction coil detectors, which primarily count traffic volume and occupancy to infer traffic metrics, the latest generation of detectors integrates video and radar technologies, enabling the real-time detection of traffic flow and surrounding environmental conditions, more importantly, these detectors can monitor vehicle trajectories. This allows for the calculation of various detailed traffic parameters, providing a more comprehensive understanding of traffic states (<xref ref-type="bibr" rid="B2">Barth and Boriboonsomsin, 2008</xref>; <xref ref-type="bibr" rid="B34">Yuan, 2020</xref>; <xref ref-type="bibr" rid="B35">Zahid et al., 2020</xref>; <xref ref-type="bibr" rid="B20">Park et al., 2018</xref>). In this study, traffic states are defined based on quantitative thresholds of key parameters, including average speed, headway, and vehicle density. Specifically, four distinct categories are established: (1) smooth traffic (average speed &#x3e;110&#xa0;km/h, density &#x3c;30 veh/km); (2) stable traffic (average speed between 80 and 110&#xa0;km/h, density 30&#x2013;60 veh/km); (3) congested traffic (average speed between 40 and 80&#xa0;km/h, density 60&#x2013;100 veh/km); and (4) severely congested traffic (average speed &#x3c;40&#xa0;km/h, density &#x3e;100 veh/km). This standardized classification ensures consistency and interpretability in subsequent traffic state analysis.</p>
<p>In contrast to traditional induction coil detectors (<xref ref-type="bibr" rid="B18">Nanthawichit et al., 2003</xref>; <xref ref-type="bibr" rid="B32">Wang et al., 2018</xref>), which primarily count traffic volume and occupancy to infer and compute traffic metrics such as vehicle speed, vehicle length, fleet length, and vehicle type, the new detectors possess the capability to monitor the trajectories of all vehicles on the roadway. This advanced functionality allows for the calculation of headway spacing, headway spacing, speed differences between vehicles, and vehicle density within the detector&#x2019;s coverage area, based on the location and speed of the vehicles. The spatiotemporal velocity distribution of different lanes on a certain road during the morning rush hour is shown in <xref ref-type="fig" rid="F1">Figure 1</xref>, based on the basic graph of traffic density velocity, while the basic graph of traffic density <italic>versus</italic> velocity is presented in <xref ref-type="fig" rid="F2">Figure 2</xref>. These figures provide a comprehensive understanding of vehicle trajectories and their relationship to traffic flow dynamics.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Vehicle trajectory.</p>
</caption>
<graphic xlink:href="ffutr-07-1662480-g001.tif">
<alt-text content-type="machine-generated">Three color maps display traffic speed over time and space for Lanes 4, 5, and 6. The color gradient ranges from red (0 km/h) to blue (80 km/h). Each graph shows speed patterns between 7:50 and 8:05, with distinct variations per lane.</alt-text>
</graphic>
</fig>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Basic diagram based on flow-density-speed.</p>
</caption>
<graphic xlink:href="ffutr-07-1662480-g002.tif">
<alt-text content-type="machine-generated">A parabolic graph showing the relationship between \(K_{ij}\) and \(Q_{ij}\). The peak is marked as \((K_{m,ij}, Q_{m,ij})\). Two points, \((K_{ijr}, Q_{ijr})\) and \((K_{ijl}, Q_{ijl})\), on either side of the peak are labeled &#x22;Un-congested&#x22; in blue and &#x22;Congested&#x22; in red, respectively, with dashed lines extending vertically from each point to the horizontal axis.</alt-text>
</graphic>
</fig>
<p>The relationship among flow, speed, and density indicates that the conventional model of traffic states, which relies on the classification of macroscopic fundamental diagrams, fundamentally represents a two-dimensional traffic flow model. In this framework, two parameters are utilized to derive the third parameter. However, advancements in roadside detection technology, which facilitate the acquisition of real-time vehicle trajectory data, allow for an extension of the characterization of traffic operational states from the previous two-dimensional dataset to a higher-dimensional representation.</p>
<sec id="s1-1">
<label>1.1</label>
<title>Related work</title>
<p>The classification criteria for traffic operation status can be broadly categorized into two types: absolute metrics and relative metrics. Absolute metrics encompass a wide range of fixed values, including traffic volume as outlined in the Freeway Capacity Manual, average travel speed, saturation levels, and the corresponding load factor, which collectively serve as a comprehensive metric. These metrics categorize traffic operating conditions into six classifications, labeled A through F. For instance, China&#x2019;s Interim Technical Requirements for Road Network Operation Monitoring and Services utilize average travel speed as a metric, dividing traffic operating conditions into five categories: smooth, basic smooth, light congestion, moderate congestion, and severe congestion. Such classifications provide quantitative guidelines for assessing traffic states and facilitate the classification of traffic operation conditions through specific metric indicators. However, it is important to note that traffic flow is influenced by a variety of factors, including road characteristics, traffic patterns, weather conditions, and time of day. Additionally, the inherent uncertainty of traffic flow means that a standardized absolute metric may not accurately represent the actual traffic status on basic road segments under varying spatial and temporal conditions (<xref ref-type="bibr" rid="B15">Manual, 2010</xref>; <xref ref-type="bibr" rid="B10">Author Anonymous, 2012</xref>; <xref ref-type="bibr" rid="B31">Wang, 2019</xref>).</p>
<p>These traditional classification criteria have laid a foundation for traffic state analysis. However, with the development of traffic research, more attention has been shifted to traffic state classification algorithms. Early research focused on California and Bayesian algorithms. Notably, in recent years, researchers have made new improvements to these algorithms. Puangnak uses an improved California Algorithm to detect different types of traffic events, effectively improving search capabilities (<xref ref-type="bibr" rid="B22">Puangnak and Chivapreecha, 2019</xref>). This improvement broadens the application scope of the California algorithm. However, it may face difficulties in accurately detecting rare or complex traffic events. Shang utilized the Bayesian algorithm to optimize the handling of traffic data imbalance issues (<xref ref-type="bibr" rid="B26">Shang et al., 2021</xref>). Jin employed an improved Bayesian algorithm to enhance the classification of relevant traffic - flow characteristics (<xref ref-type="bibr" rid="B12">Jin et al., 2023</xref>). Zhao applied an improved Bayesian algorithm to the analysis and prediction of road safety conditions (<xref ref-type="bibr" rid="B36">Zhao et al., 2024</xref>). Ranpura innovated the traditional Bayesian algorithm by combining real-time traffic data to predict the delay time of traffic vehicles (<xref ref-type="bibr" rid="B23">Ranpura et al., 2024</xref>).</p>
<p>This research utilizes various traffic parameters as the foundation for analysis, including traffic flow, speed, and occupancy rate. Traffic state classification was achieved by comparing these parameters against established fixed thresholds (<xref ref-type="bibr" rid="B21">Payne and Tignor, 1978</xref>; <xref ref-type="bibr" rid="B6">Cook and Cleveland, 1974</xref>; <xref ref-type="bibr" rid="B7">Dudek et al., 1974</xref>; <xref ref-type="bibr" rid="B5">Collins et al., 1979</xref>; <xref ref-type="bibr" rid="B1">Ahmed and Cook, 1982</xref>; <xref ref-type="bibr" rid="B16">Martin et al., 2001</xref>; <xref ref-type="bibr" rid="B28">Sheu and Ritchie, 1998</xref>). <xref ref-type="bibr" rid="B9">Hsiao et al. (1994)</xref> were the first to apply the Fuzzy Logic (FL) algorithm for traffic event classification, employing fuzzy rule formulation and membership functions. <xref ref-type="bibr" rid="B8">Hawas (2007)</xref> proposed an urban road traffic event detection algorithm that integrated a fuzzy system to establish the membership function for clustering. <xref ref-type="bibr" rid="B3">Bauza et al. (2010)</xref> developed a fuzzy classifier-based method to analyze the traffic state of a road segment by examining vehicle networking, thereby facilitating small-scale traffic state exchanges. <xref ref-type="bibr" rid="B14">Liu et al. (2014)</xref> utilized Random Forest (RF) techniques to detect traffic events, effectively addressing noise and overfitting issues. <xref ref-type="bibr" rid="B11">Jiang et al. (2020)</xref> employed K-means clustering in combination with a Multi-Layer Perceptron (MLP) to detect urban road traffic status, resulting in a more effective real-time monitoring model.</p>
<p>Recently, Machine Learning methods like RF and Neural Networks have been applied. Sharma employed a neural network based on the Convolutional Neural Network (CNN) model, significantly improving the accuracy of vehicle trajectory prediction on highways (<xref ref-type="bibr" rid="B27">Sharma et al., 2023</xref>). Dr. P. Hasitha Reddy utilized a Deep CNN model to analyze traffic monitoring data, which enhanced control capabilities in traffic management (<xref ref-type="bibr" rid="B24">Reddy et al., 2024</xref>). Park applied Long Short-Term Memory (LSTM) networks to predict vehicle trajectories and traffic volume on urban roads, offering a potentially more accurate and efficient solution compared to traditional methods (<xref ref-type="bibr" rid="B19">Park and Yoon, 2024</xref>). Wan Ming implemented the random forest algorithm to detect traffic violations among taxi drivers and accurately predict the severity of these violations (<xref ref-type="bibr" rid="B17">Ming et al., 2023</xref>). This approach demonstrated high efficiency and precision, outperforming conventional detection methods. Shaaban employed the SMOTE to analyze traffic accident data, effectively addressing the issue of data imbalance and enhancing the reliability of subsequent data-based analyses (<xref ref-type="bibr" rid="B25">Shaaban et al., 2024</xref>).</p>
</sec>
<sec id="s1-2">
<label>1.2</label>
<title>Problem statement and contributions</title>
<p>Although traffic state classification has evolved from early threshold-based algorithms (e.g., California and Bayesian algorithms) to advanced machine learning methods (e.g., Fuzzy Logic and Random Forest), significant challenges remain. Existing studies predominantly focus on balanced datasets, neglecting the real-world prevalence of data imbalance. For instance, fuzzy logic often relies on subjective membership functions, while standard Random Forest models struggle to accurately classify minority states (e.g., severe congestion) in imbalanced or noisy data environments. Furthermore, while recent deep learning approaches improve prediction, they often overlook sensor noise characteristics.</p>
<p>To address these gaps, this study proposes a novel framework denoted as FCM-RF-SMOTE. This approach utilizes SUMO simulation to generate realistic trajectory data with radar-like noise characteristics. By integrating Fuzzy C-Means (FCM) clustering, Random Forest (RF), and the Synthetic Minority Over-sampling Technique (SMOTE), this study aims to develop a robust model for freeway traffic state classification.</p>
<p>The primary contributions of this study are summarized as follows. A standardized classification system is established. By introducing the SMOTE algorithm, the representation of the minority class (severe congestion) is significantly improved, increasing its proportion from 3.67% to 19.83%. Consequently, the overall classification accuracy enhanced from 77.67% to 97.80%. The fuzzy clustering feature of FCM is utilized to define traffic states objectively, improving upon traditional hard-threshold methods and better reflecting the continuity of traffic flow. The framework is validated using SUMO simulation with Gaussian noise (&#x3c3; &#x3d; 0.1&#xa0;m) and a 10&#xa0;Hz sampling rate, demonstrating the algorithm&#x2019;s robustness in scenarios approximating real-world millimeter-wave radar detection.</p>
</sec>
</sec>
<sec id="s2">
<label>2</label>
<title>Methodology and experimental design</title>
<sec id="s2-1">
<label>2.1</label>
<title>Data preparation</title>
<p>In this study, we employ the Simulation of Urban Mobility (SUMO) platform to model vehicle dynamics on a mainline freeway. The SUMO simulation parameters were calibrated to emulate basic characteristics of millimeter-wave radar. Gaussian positional noise (&#x3c3; &#x3d; 0.1&#xa0;m) was injected via SUMO &#x2018;noise&#x2019; module, and detectors were configured with a 10&#xa0;Hz sampling rate to approximate radar measurement intervals. However, this simplified model does not account for multi-target tracking or signal attenuation effects (e.g., rain fade).</p>
<p>The simulation is conducted over a duration of 10&#xa0;h, utilizing a time step of 1&#xa0;s and a random seed value of 42. The input traffic flow on the mainline roadway varies between 2000 and 7,000 vehicles per hour. The composition of the vehicle fleet is characterized by 90% small cars and 10% trucks, as detailed in <xref ref-type="table" rid="T1">Table 1</xref>. Furthermore, to enhance the realism of the simulation, a single accident is introduced at a random point during the simulation period to reflect actual traffic conditions.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Timetable of traffic flow.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Time(s)</th>
<th align="center">Cars (veh/h)</th>
<th align="center">Trucks (veh/h)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">0&#x2013;3,600</td>
<td align="center">6,300</td>
<td align="center">700</td>
</tr>
<tr>
<td align="center">3,600&#x2013;7,200</td>
<td align="center">1800</td>
<td align="center">200</td>
</tr>
<tr>
<td align="center">7,200&#x2013;10800</td>
<td align="center">4,500</td>
<td align="center">500</td>
</tr>
<tr>
<td align="center">10,800&#x2013;18000</td>
<td align="center">3,600</td>
<td align="center">400</td>
</tr>
<tr>
<td align="center">18,000&#x2013;25200</td>
<td align="center">5,400</td>
<td align="center">600</td>
</tr>
<tr>
<td align="center">25,200&#x2013;32400</td>
<td align="center">4,500</td>
<td align="center">500</td>
</tr>
<tr>
<td align="center">32,400&#x2013;36000</td>
<td align="center">2,700</td>
<td align="center">300</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The approximate simulation of the characteristics of radar sensors, including noise and multi-target tracking, provides a realistic dataset for model training. The classification performance achieved in this study (overall accuracy &#x3e;97%) demonstrates the framework&#x2019;s robustness in handling sensor-like noise, which is a common challenge in real-world deployments. This validates the simulation&#x2019;s capability to emulate physical sensor conditions effectively, supporting the feasibility of applying the framework in practical traffic monitoring systems.</p>
<p>It is important to note that while real-world validation is ideal, obtaining high-fidelity trajectory data that includes specific &#x2018;severe congestion&#x2019; events (such as accidents) is extremely difficult and dangerous to instrument in physical environments. Simulation allows us to generate these &#x2018;minority class&#x2019; events safely and consistently, which is essential for validating the effectiveness of the SMOTE algorithm in handling imbalanced data.</p>
<p>The data obtained from the roadside radar video detector encompasses various parameters, including time and coordinate points, vehicle type, instantaneous vehicle speed, and license plate information, among others (see <xref ref-type="table" rid="T2">Table 2</xref>).</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Raw data table.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Time</th>
<th align="center">vehicle_id</th>
<th align="center">vehicle_lane</th>
<th align="center">vehicle_speed</th>
<th align="center">vehicle_type</th>
<th align="center">vehicle_x</th>
<th align="center">vehicle_y</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">0</td>
<td align="center">E2.1.0</td>
<td align="center">L0_0</td>
<td align="center">20</td>
<td align="center">Car</td>
<td align="center">0</td>
<td align="center">&#x2212;9.38</td>
</tr>
<tr>
<td align="center">0</td>
<td align="center">E2.2.0</td>
<td align="center">L0_1</td>
<td align="center">20</td>
<td align="center">Trucks</td>
<td align="center">0</td>
<td align="center">&#x2212;5.62</td>
</tr>
<tr>
<td align="center">1</td>
<td align="center">E2.1.0</td>
<td align="center">L0_0</td>
<td align="center">22.34</td>
<td align="center">Car</td>
<td align="center">22.34</td>
<td align="center">&#x2212;9.38</td>
</tr>
<tr>
<td align="center">1</td>
<td align="center">E2.1.1</td>
<td align="center">L0_2</td>
<td align="center">20</td>
<td align="center">Car</td>
<td align="center">0</td>
<td align="center">&#x2212;1.88</td>
</tr>
<tr>
<td align="center">1</td>
<td align="center">E2.2.0</td>
<td align="center">L0_1</td>
<td align="center">21.01</td>
<td align="center">Trucks</td>
<td align="center">21.01</td>
<td align="center">&#x2212;5.62</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">E2.1.0</td>
<td align="center">L0_0</td>
<td align="center">23.78</td>
<td align="center">Car</td>
<td align="center">46.12</td>
<td align="center">&#x2212;9.38</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">E2.1.1</td>
<td align="center">L0_2</td>
<td align="center">21.43</td>
<td align="center">Car</td>
<td align="center">21.43</td>
<td align="center">&#x2212;1.88</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">E2.1.2</td>
<td align="center">L0_0</td>
<td align="center">20</td>
<td align="center">Car</td>
<td align="center">0</td>
<td align="center">&#x2212;9.38</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">E2.1.3</td>
<td align="center">L0_2</td>
<td align="center">20</td>
<td align="center">Car</td>
<td align="center">91.2</td>
<td align="center">&#x2212;1.88</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">E2.2.0</td>
<td align="center">L0_1</td>
<td align="center">21.99</td>
<td align="center">Trucks</td>
<td align="center">43.01</td>
<td align="center">&#x2212;5.62</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">E2.1.0</td>
<td align="center">L0_0</td>
<td align="center">26.23</td>
<td align="center">Car</td>
<td align="center">72.35</td>
<td align="center">&#x2212;9.38</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">E2.1.1</td>
<td align="center">L0_2</td>
<td align="center">23.61</td>
<td align="center">Car</td>
<td align="center">45.04</td>
<td align="center">&#x2212;1.88</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">E2.1.2</td>
<td align="center">L0_0</td>
<td align="center">21.86</td>
<td align="center">Car</td>
<td align="center">21.86</td>
<td align="center">&#x2212;9.38</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">E2.1.3</td>
<td align="center">L0_2</td>
<td align="center">22.6</td>
<td align="center">Car</td>
<td align="center">113.8</td>
<td align="center">&#x2212;1.88</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">E2.1.4</td>
<td align="center">L0_1</td>
<td align="center">20</td>
<td align="center">Car</td>
<td align="center">0</td>
<td align="center">&#x2212;5.62</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">E2.1.5</td>
<td align="center">L0_1</td>
<td align="center">20</td>
<td align="center">Car</td>
<td align="center">153.51</td>
<td align="center">&#x2212;5.62</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">E2.2.0</td>
<td align="center">L0_1</td>
<td align="center">23.27</td>
<td align="center">Trucks</td>
<td align="center">66.28</td>
<td align="center">&#x2212;5.62</td>
</tr>
<tr>
<td align="center">4</td>
<td align="center">E2.1.0</td>
<td align="center">L0_0</td>
<td align="center">28.54</td>
<td align="center">Car</td>
<td align="center">100.89</td>
<td align="center">&#x2212;9.38</td>
</tr>
<tr>
<td align="center">4</td>
<td align="center">E2.1.1</td>
<td align="center">L0_2</td>
<td align="center">25.64</td>
<td align="center">Car</td>
<td align="center">70.68</td>
<td align="center">&#x2212;1.88</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>(1) Speed is calculated using <xref ref-type="disp-formula" rid="e2_1">Equation 1</xref>
</p>
<p>
<disp-formula id="e2_1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
<p>(2) The speed deviation for an individual vehicle is defined in <xref ref-type="disp-formula" rid="e2_2">Equation 2</xref>. The average speed deviation is calculated as shown in <xref ref-type="disp-formula" rid="e2_3">Equation 3</xref>.</p>
<p>
<disp-formula id="e2_2">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
<disp-formula id="e2_3">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
</p>
<p>(3) Headway distance is determined by <xref ref-type="disp-formula" rid="e2_4">Equation 4</xref>. The average headway is calculated using <xref ref-type="disp-formula" rid="e2_5">Equation 5</xref>.</p>
<p>
<disp-formula id="e2_4">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
<disp-formula id="e2_5">
<mml:math id="m5">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
</p>
<p>(4) Headway time is defined in <xref ref-type="disp-formula" rid="e2_6">Equation 6</xref>. The average headway time is derived using <xref ref-type="disp-formula" rid="e2_7">Equation 7</xref>.</p>
<p>
<disp-formula id="e2_6">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
<disp-formula id="e2_7">
<mml:math id="m7">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mi>T</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:msub>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>
</p>
<p>(5) Vehicle density is calculated according to <xref ref-type="disp-formula" rid="e2_8">Equation 8</xref>.</p>
<p>
<disp-formula id="e2_8">
<mml:math id="m8">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2a;</mml:mo>
<mml:mn>1.5</mml:mn>
</mml:mrow>
<mml:mn>200</mml:mn>
</mml:mfrac>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>5</mml:mn>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>
</p>
<p>Within each 1-s interval, vehicles are organized by lane and direction. Due to high-frequency fluctuations, traffic parameters are aggregated into 1-min intervals to enhance stability. <xref ref-type="table" rid="T3">Table 3</xref> presents the processed dataset parameters: front and rear vehicle positions (<inline-formula id="inf1">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) and speeds (<inline-formula id="inf2">
<mml:math id="m10">
<mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>); average interval speed (<inline-formula id="inf3">
<mml:math id="m11">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>); average speed deviation (<inline-formula id="inf4">
<mml:math id="m12">
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>); average headway distance (<inline-formula id="inf5">
<mml:math id="m13">
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>); average headway time (<inline-formula id="inf6">
<mml:math id="m14">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>); and vehicle density (<inline-formula id="inf7">
<mml:math id="m15">
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, calculated as equivalent passenger cars per kilometer). These aggregated metrics serve as the foundational feature vectors for the subsequent classification model.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>1&#xa0;min set meter traffic operation parameters.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Time</th>
<th align="center">Speed</th>
<th align="center">Speed_deviation</th>
<th align="center">Headway</th>
<th align="center">Headway_time</th>
<th align="center">Density</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">660</td>
<td align="center">100.578</td>
<td align="center">4.974465</td>
<td align="center">44.17175</td>
<td align="center">1.583793</td>
<td align="center">68.07377</td>
</tr>
<tr>
<td align="center">720</td>
<td align="center">108.2678</td>
<td align="center">3.951056</td>
<td align="center">50.09159</td>
<td align="center">1.677349</td>
<td align="center">57.09016</td>
</tr>
<tr>
<td align="center">780</td>
<td align="center">104.741</td>
<td align="center">3.813028</td>
<td align="center">47.10683</td>
<td align="center">1.627219</td>
<td align="center">61.02459</td>
</tr>
<tr>
<td align="center">840</td>
<td align="center">107.175</td>
<td align="center">4.493562</td>
<td align="center">50.78096</td>
<td align="center">1.706486</td>
<td align="center">53.27869</td>
</tr>
<tr>
<td align="center">900</td>
<td align="center">106.4732</td>
<td align="center">4.300598</td>
<td align="center">46.88246</td>
<td align="center">1.590783</td>
<td align="center">57.7459</td>
</tr>
<tr>
<td align="center">960</td>
<td align="center">103.2073</td>
<td align="center">3.826104</td>
<td align="center">44.22546</td>
<td align="center">1.549328</td>
<td align="center">66.96721</td>
</tr>
<tr>
<td align="center">1,020</td>
<td align="center">104.3925</td>
<td align="center">5.74591</td>
<td align="center">50.04495</td>
<td align="center">1.726976</td>
<td align="center">56.18852</td>
</tr>
<tr>
<td align="center">1,080</td>
<td align="center">111.6654</td>
<td align="center">2.726264</td>
<td align="center">50.88868</td>
<td align="center">1.64871</td>
<td align="center">53.52459</td>
</tr>
<tr>
<td align="center">&#x2026;</td>
<td align="center">&#x2026;</td>
<td align="center">&#x2026;</td>
<td align="center">&#x2026;</td>
<td align="center">&#x2026;</td>
<td align="center">&#x2026;</td>
</tr>
<tr>
<td align="center">35,940</td>
<td align="center">114.7259</td>
<td align="center">5.277375</td>
<td align="center">73.25056</td>
<td align="center">2.265644</td>
<td align="center">27.37705</td>
</tr>
<tr>
<td align="center">36,000</td>
<td align="center">115.768</td>
<td align="center">5.394076</td>
<td align="center">71.08095</td>
<td align="center">2.189484</td>
<td align="center">27.08333</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2-2">
<label>2.2</label>
<title>Proposed framework</title>
<p>In contemporary traffic state analysis, addressing data complexities is of paramount importance. The Synthetic Minority Over-Sampling Technique (SMOTE) plays a pivotal role in the preprocessing stage. In traffic datasets, class imbalance is a prevalent issue, characterized by the underrepresentation of certain traffic states. SMOTE addresses this concern by generating synthetic samples for minority classes. By incorporating these synthetic samples into the dataset, SMOTE effectively rectifies the class distribution, ensuring that subsequent classification algorithms are not biased towards majority classes.</p>
<p>Following this, the Fuzzy C-means algorithm is applied to categorize traffic states. In the context of traffic analysis, it employs a set of traffic-status metrics to partition data into distinct traffic-state classes. Metrics such as vehicle speed, traffic flow rate, and occupancy are typically considered. By iteratively updating the membership values of each data point across different clusters and adjusting the cluster centers, the algorithm generates a traffic-status dataset for each class.</p>
<p>Finally, the Random Forest (RF) algorithm is implemented within the traffic-state classification decision module. RF, a combinatorial classifier, operates as a non-parametric classification algorithm. By analyzing the four classes of traffic-state data generated by the Fuzzy C-means algorithm, a traffic-state classifier is developed. This classifier can accurately assess the real-time operational status of road traffic, even in the presence of noisy data and missing values. In order to assess the temporal variability of traffic state changes, detector deployment intervals of 100&#xa0;m, 200&#xa0;m, 500&#xa0;m, and 1,000&#xa0;m were utilized. In summary, this paper constructs a traffic state discriminative model based on FCM-RF. The process of the FCM-RF traffic state classification model is shown in <xref ref-type="fig" rid="F3">Figure 3</xref>.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Flow chart of traffic state classification model of FCM-RF.</p>
</caption>
<graphic xlink:href="ffutr-07-1662480-g003.tif">
<alt-text content-type="machine-generated">Flowchart depicting a traffic operation analysis process. The steps include setting up scenarios, obtaining vehicle trajectory data, extracting traffic features, conducting FCM cluster analysis, evaluating clustering, obtaining cluster centers, and getting traffic state datasets. It then branches to building an RF classification model, evaluating its accuracy, and checking if accuracy exceeds ninety-five percent. If yes, it yields the classification result; if no, it loops back.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s2-3">
<label>2.3</label>
<title>Algorithm principles</title>
<sec id="s2-3-1">
<label>2.3.1</label>
<title>SMOTE</title>
<p>The synthetic few oversampling technique (SMOTE) proposed by Chawla et al. generates synthetic samples for minority classes to solve the problem of data imbalance (<xref ref-type="bibr" rid="B4">Chawla et al., 2002</xref>). The generation mechanism of this algorithm is simple and mainly consists of two parts: selecting k nearest neighbors of minority class samples based on the measurement method and generating new samples by interpolating between these minority class samples and neighboring samples using a linear interpolation strategy. The specific calculation process is as follows. For each minority class sample <inline-formula id="inf8">
<mml:math id="m16">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, SMOTE:</p>
<p>Finds its k-nearest neighbors;</p>
<p>Randomly selects one neighbor <inline-formula id="inf9">
<mml:math id="m17">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>;</p>
<p>Generates a synthetic sample <inline-formula id="inf10">
<mml:math id="m18">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">w</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> along the line segment between <inline-formula id="inf11">
<mml:math id="m19">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf12">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>A synthetic sample <italic>x<sup>new</sup>
</italic> is generated along the line segment as shown in <xref ref-type="disp-formula" rid="e2_9">Equation 9</xref>.<disp-formula id="e2_9">
<mml:math id="m21">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">w</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3bb;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>where <inline-formula id="inf13">
<mml:math id="m22">
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3bb;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is a random number between 0 and 1.</p>
</sec>
<sec id="s2-3-2">
<label>2.3.2</label>
<title>Fuzzy C-means</title>
<p>Fuzzy clustering is a type of soft clustering that differs from hard clustering in that the affiliation function value of a sample to each category can vary between 0 and 1. This feature not only highlights the interrelationships among data points but also reflects the potential transitional states between categories. The objective function to minimize is given by <xref ref-type="disp-formula" rid="e2_10">Equation 10</xref>:<disp-formula id="e2_10">
<mml:math id="m23">
<mml:mrow>
<mml:mi mathvariant="bold-italic">J</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mtext>&#x200a;</mml:mtext>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mtext>&#x200a;</mml:mtext>
<mml:msubsup>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:msubsup>
<mml:mo>&#x2225;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">v</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
<mml:msup>
<mml:mo>&#x2225;</mml:mo>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>where:</p>
<p>
<inline-formula id="inf14">
<mml:math id="m24">
<mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the number of data points; <inline-formula id="inf15">
<mml:math id="m25">
<mml:mrow>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the number of clusters; <inline-formula id="inf16">
<mml:math id="m26">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the membership value of data point <inline-formula id="inf17">
<mml:math id="m27">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> to cluster <inline-formula id="inf18">
<mml:math id="m28">
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula id="inf19">
<mml:math id="m29">
<mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the fuzziness parameter (controls the degree of overlap between clusters); <inline-formula id="inf20">
<mml:math id="m30">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">v</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the centroid of cluster <inline-formula id="inf21">
<mml:math id="m31">
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>The membership values <inline-formula id="inf22">
<mml:math id="m32">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are updated iteratively using <xref ref-type="disp-formula" rid="e2_11">Equation 11</xref>:<disp-formula id="e2_11">
<mml:math id="m33">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">c</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">v</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">v</mml:mi>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:msub>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mfrac>
<mml:mn mathvariant="bold">2</mml:mn>
<mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>
</p>
<p>Here, <inline-formula id="inf23">
<mml:math id="m34">
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">v</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> represents the distance between the <inline-formula id="inf24">
<mml:math id="m35">
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> sample and the <inline-formula id="inf25">
<mml:math id="m36">
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> cluster center.</p>
<p>The cluster centroids <inline-formula id="inf26">
<mml:math id="m37">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">v</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are updated as shown in <xref ref-type="disp-formula" rid="e2_12">Equation 12</xref>:<disp-formula id="e2_12">
<mml:math id="m38">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">v</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msubsup>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msubsup>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>
</p>
<p>Fuzzy clustering provides a more objective and realistic representation of data categorization. Clustering analysis indicates that traffic states exhibit an inherent fuzzy nature, characterized by the lack of distinct boundaries between different traffic states. Therefore, this paper proposes the utilization of fuzzy clustering analysis as a methodological approach for evaluating traffic state metrics in basic freeway sections.</p>
</sec>
<sec id="s2-3-3">
<label>2.3.3</label>
<title>Random forest</title>
<p>The Random Forest algorithm is an ensemble learning method that combines multiple decision trees to improve the accuracy and stability of the model. Here is a detailed description of its principle:</p>
<p>The algorithm starts with performing bootstrap sampling on the original training dataset <inline-formula id="inf27">
<mml:math id="m39">
<mml:mrow>
<mml:mi mathvariant="bold-italic">D</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. For each iteration, a new training subset <inline-formula id="inf28">
<mml:math id="m40">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">D</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is randomly sampled with replacement from <inline-formula id="inf29">
<mml:math id="m41">
<mml:mrow>
<mml:mi mathvariant="bold-italic">D</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The probability of each sample being selected in each drawing remains the same.</p>
<p>For each bootstrap sample <inline-formula id="inf30">
<mml:math id="m42">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">D</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, a decision tree <inline-formula id="inf31">
<mml:math id="m43">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is constructed. During the construction of the decision tree, at each node, a random subset of features is selected. The best split among these features is chosen based on a certain impurity measure. Commonly used impurity measures include Gini impurity and entropy.</p>
<p>For a node <inline-formula id="inf32">
<mml:math id="m44">
<mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> with <inline-formula id="inf33">
<mml:math id="m45">
<mml:mrow>
<mml:mi mathvariant="bold-italic">K</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> classes and <inline-formula id="inf34">
<mml:math id="m46">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> being the proportion of samples in class <inline-formula id="inf35">
<mml:math id="m47">
<mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> in node <inline-formula id="inf36">
<mml:math id="m48">
<mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, the Gini impurity is defined as <inline-formula id="inf37">
<mml:math id="m49">
<mml:mrow>
<mml:mi mathvariant="bold-italic">G</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">K</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msubsup>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mi mathvariant="bold-italic">k</mml:mi>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>. The feature that leads to the greatest reduction in Gini impurity is chosen as the splitting feature.</p>
<p>The entropy of a node <inline-formula id="inf38">
<mml:math id="m50">
<mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is defined as <inline-formula id="inf39">
<mml:math id="m51">
<mml:mrow>
<mml:mi mathvariant="bold-italic">H</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">K</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msub>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:msub>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="bold">log</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> .Similar to the Gini impurity, the feature that causes the largest decrease in entropy is selected for splitting. The tree is grown until a certain stopping criterion is met, such as a maximum depth <inline-formula id="inf40">
<mml:math id="m52">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">d</mml:mi>
<mml:mi mathvariant="bold-italic">max</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is reached or the number of samples in a node is less than a certain threshold <inline-formula id="inf41">
<mml:math id="m53">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mi mathvariant="bold-italic">min</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>Multiple decision trees <inline-formula id="inf42">
<mml:math id="m54">
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> are trained in this way, and the Random Forest <inline-formula id="inf43">
<mml:math id="m55">
<mml:mrow>
<mml:mi mathvariant="bold-italic">F</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is formed by combining these trees. When making predictions, for a classification task, each decision tree in the forest votes for a class. Let <inline-formula id="inf44">
<mml:math id="m56">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> be predicted class of the <inline-formula id="inf45">
<mml:math id="m57">
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> tree for input <inline-formula id="inf46">
<mml:math id="m58">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The final prediction <inline-formula id="inf47">
<mml:math id="m59">
<mml:mrow>
<mml:mover accent="true">
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the class with the most votes, that is, <inline-formula id="inf48">
<mml:math id="m60">
<mml:mrow>
<mml:mover accent="true">
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold">arg</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mi mathvariant="bold">max</mml:mi>
<mml:mo>&#x2061;</mml:mo>
</mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:munder>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> . Where <inline-formula id="inf49">
<mml:math id="m61">
<mml:mrow>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>&#xb7;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the indicator function. For a regression task, the average of the predictions of all the trees is usually taken as the final prediction. If <inline-formula id="inf50">
<mml:math id="m62">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the prediction of the <inline-formula id="inf51">
<mml:math id="m63">
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> tree for input <inline-formula id="inf52">
<mml:math id="m64">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, then the final prediction <inline-formula id="inf53">
<mml:math id="m65">
<mml:mrow>
<mml:mover accent="true">
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msub>
<mml:mover accent="true">
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>The Random Forest algorithm can also calculate the importance of each feature. One common method is the Mean Decrease in Impurity (MDI). For a feature <inline-formula id="inf54">
<mml:math id="m66">
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, the MDI is calculated as the average decrease in impurity across all trees in the forest when splitting on feature <inline-formula id="inf55">
<mml:math id="m67">
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. Let <inline-formula id="inf56">
<mml:math id="m68">
<mml:mrow>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> be the impurity of node <inline-formula id="inf57">
<mml:math id="m69">
<mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The importance of feature <inline-formula id="inf58">
<mml:math id="m70">
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is given by <xref ref-type="disp-formula" rid="e2_13">Equation 13</xref>:<disp-formula id="e2_13">
<mml:math id="m71">
<mml:mrow>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mi mathvariant="bold-italic">m</mml:mi>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">T</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">N</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">N</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">g</mml:mi>
<mml:mi mathvariant="bold-italic">h</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mi mathvariant="bold-italic">w</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>where <inline-formula id="inf59">
<mml:math id="m72">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">N</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf60">
<mml:math id="m73">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">N</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">g</mml:mi>
<mml:mi mathvariant="bold-italic">h</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are the left and right child nodes of node <inline-formula id="inf61">
<mml:math id="m74">
<mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> after splitting on feature <inline-formula id="inf62">
<mml:math id="m75">
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf63">
<mml:math id="m76">
<mml:mrow>
<mml:mi mathvariant="bold-italic">w</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the weight of node <inline-formula id="inf64">
<mml:math id="m77">
<mml:mrow>
<mml:mi mathvariant="bold-italic">N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, usually proportional to the number of samples in the node.</p>
<p>Note that all validations in this study are based on simulated data. While the simulation incorporates basic sensor characteristics (noise and sampling rate), it does not account for complex real-world factors like multi-path interference or weather effects, which may impact actual deployment performance.</p>
<p>In this framework, the Fuzzy C-Means (FCM) algorithm is employed solely to generate class labels (Smooth, Stable, Congested, Severely Congested) for the training dataset. The input features fed into the Random Forest classifier are the five original traffic parameters extracted from the vehicle trajectory data: interval speed (<inline-formula id="inf65">
<mml:math id="m78">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>), speed deviation (<inline-formula id="inf66">
<mml:math id="m79">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>), headway (<inline-formula id="inf67">
<mml:math id="m80">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>), headway time (<inline-formula id="inf68">
<mml:math id="m81">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>4</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>), and vehicle density (<inline-formula id="inf69">
<mml:math id="m82">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>5</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>). The FCM membership values are utilized to determine the hard label for each sample but are not included as feature vectors in the Random Forest training process.</p>
</sec>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Traffic state characterization based on FCM</title>
<p>Initially, the interval traffic data, organized in 1-min increments, undergoes clustering to classify the various traffic states. Each identified category is assigned a label, thereby generating a training dataset for the state classification algorithm.</p>
<p>The processed feature variables function as training samples, with each sample point represented as (<inline-formula id="inf70">
<mml:math id="m83">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf71">
<mml:math id="m84">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf72">
<mml:math id="m85">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf73">
<mml:math id="m86">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>4</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf74">
<mml:math id="m87">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>5</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>), where <inline-formula id="inf75">
<mml:math id="m88">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf76">
<mml:math id="m89">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf77">
<mml:math id="m90">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf78">
<mml:math id="m91">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>4</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf79">
<mml:math id="m92">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>5</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> correspond to interval speed, speed difference, headway, headway time, and vehicle density, respectively. The Fuzzy C-Means (FCM) algorithm is utilized to cluster the traffic states, identify the category to which each sample belongs, and assign category labels to the samples. Subsequently, the traffic flow parameter variable features of each category are analyzed to characterize specific traffic states.</p>
<p>As illustrated in <xref ref-type="fig" rid="F4">Figure 4</xref>, the traffic flow data have been categorized using the Fuzzy C-Means (FCM) algorithm, resulting in the classification of four distinct clusters that correspond to four categories of traffic states: smooth, stable, congested, and severely congested. <xref ref-type="fig" rid="F4">Figures 4a,c,e</xref> depict the distribution of speed, headway, and vehicle density across these categories, clearly indicating that the smooth state is characterized by the highest speed, significant headway spacing, and low vehicle density. Transitioning from the clear state to the severely congested state is associated with an increase in vehicle density, a decrease in speed, and a further increase in density. <xref ref-type="fig" rid="F4">Figures 4b,d</xref> illustrate the distribution of speed differences and headway spacing across the various categories. In the smooth, stable, and congested states, both speed differences and headway spacing exhibit abrupt changes primarily in response to blockages caused by accidents, while remaining relatively stable in other states. As shown in <xref ref-type="table" rid="T4">Table 4</xref>, during the detection process, the steady state predominates, accounting for more than 50% of the observations, whereas the severe congestion is the least prevalent, comprising only 3.67%. The variations in each traffic flow parameter align with the flow changes of the simulation input and adhere to established traffic flow theory.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Clustering results for each traffic parameter. <bold>(a)</bold> Speed Cluster Picture. <bold>(b)</bold> Speed Deviation Cluster Picture. <bold>(c)</bold> Headway Cluster Picture. <bold>(d)</bold> Headway Time Cluster Picture. <bold>(e)</bold> Density Cluster Picture.</p>
</caption>
<graphic xlink:href="ffutr-07-1662480-g004.tif">
<alt-text content-type="machine-generated">Five scatter plots each depict data on traffic flow. Each graph is labeled: (a) Speed, (b) Speed Deviation, (c) Headway, (d) Headway Time, and (e) Density, with clusters marked by colors representing fluent, unfluctuate, crowded, and congest traffic conditions over time.</alt-text>
</graphic>
</fig>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Clustering center of each traffic state.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">State</th>
<th align="center">Speed</th>
<th align="center">Speed deviation</th>
<th align="center">Headway</th>
<th align="center">Headway time</th>
<th align="center">Density</th>
<th align="center">Proportion</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Smooth</td>
<td align="center">113.9</td>
<td align="center">5.1</td>
<td align="center">68.0</td>
<td align="center">2.1</td>
<td align="center">29.9</td>
<td align="center">30.86%</td>
</tr>
<tr>
<td align="center">Stable</td>
<td align="center">104.7</td>
<td align="center">4.5</td>
<td align="center">49.7</td>
<td align="center">1.7</td>
<td align="center">55.0</td>
<td align="center">51.59%</td>
</tr>
<tr>
<td align="center">Congested</td>
<td align="center">77.6</td>
<td align="center">5.5</td>
<td align="center">37.6</td>
<td align="center">2.2</td>
<td align="center">80.9</td>
<td align="center">13.85%</td>
</tr>
<tr>
<td align="center">Severely congested</td>
<td align="center">9.7</td>
<td align="center">3.1</td>
<td align="center">11.2</td>
<td align="center">6.3</td>
<td align="center">285.7</td>
<td align="center">3.67%</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>It is worth noting that the state boundaries derived via FCM are data-driven and reflect the intrinsic distribution of the dataset rather than pre-defined engineering standards. As shown in <xref ref-type="table" rid="T4">Table 4</xref>, the &#x2018;Stable&#x2019; state exhibits higher average speeds (approx. 104.7&#xa0;km/h) compared to typical urban traffic management thresholds (e.g., 40&#x2013;60&#xa0;km/h). This is because the simulation represents a freeway scenario where traffic flow remains fast and stable until it reaches a critical density, after which it rapidly breaks down into congestion. Unlike hard-threshold methods (such as those in the Highway Capacity Manual) which may exhibit subjectivity, the FCM-derived thresholds objectively capture these specific flow-density transitions inherent to the monitored roadway section. Furthermore, the clustering outcomes for key traffic parameters (speed, headway, and density) align with the findings of <xref ref-type="bibr" rid="B33">Yu et al. (2015)</xref>, supporting the validity of this approach. Consequently, it is concluded that the traffic states classified by the Fuzzy C-Means algorithm are consistent with the operational characteristics of traffic flow.</p>
</sec>
<sec sec-type="results|discussion" id="s4">
<label>4</label>
<title>Classification model results and discussion</title>
<sec id="s4-1">
<label>4.1</label>
<title>Baseline performance on imbalanced data</title>
<p>The historical traffic data were clustered and analyzed to generate a dataset comprising four distinct types of traffic states. Sixty percent of the data were designated as the training set, while the remaining forty percent were utilized as test samples. The composition of the data samples is presented in <xref ref-type="table" rid="T5">Table 5</xref>.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Sample composition of the training and test sets of the algorithm.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Dataset type</th>
<th align="center">Smooth</th>
<th align="center">Stable</th>
<th align="center">Congested</th>
<th align="center">Severely congested</th>
<th align="center">Total sample</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Training set</td>
<td align="center">821</td>
<td align="center">1,372</td>
<td align="center">368</td>
<td align="center">98</td>
<td align="center">2,659</td>
</tr>
<tr>
<td align="center">Test set</td>
<td align="center">547</td>
<td align="center">915</td>
<td align="center">246</td>
<td align="center">66</td>
<td align="center">1774</td>
</tr>
<tr>
<td align="center">Total</td>
<td align="center">1,368</td>
<td align="center">2,287</td>
<td align="center">614</td>
<td align="center">164</td>
<td align="center">4,433</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The Random Forest (RF) algorithm is executed using the scikit-learn library in Python, and it computes the confusion matrix, which is presented in <xref ref-type="table" rid="T6">Table 6</xref>.</p>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>Classification algorithm confusion matrix.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" colspan="2" align="center">Actual state</th>
<th colspan="4" align="center">Predict</th>
</tr>
<tr>
<th align="center">Smooth</th>
<th align="center">Stable</th>
<th align="center">Congested</th>
<th align="center">Severely congested</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="4" align="center">Actual</td>
<td align="center">Smooth</td>
<td align="center">532</td>
<td align="center">0</td>
<td align="center">11</td>
<td align="center">0</td>
</tr>
<tr>
<td align="center">Stable</td>
<td align="center">15</td>
<td align="center">892</td>
<td align="center">0</td>
<td align="center">6</td>
</tr>
<tr>
<td align="center">Congested</td>
<td align="center">0</td>
<td align="center">6</td>
<td align="center">247</td>
<td align="center">65</td>
</tr>
<tr>
<td align="center">Severely congested</td>
<td align="center">0</td>
<td align="center">0</td>
<td align="center">0</td>
<td align="center">0</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As shown in <xref ref-type="table" rid="T7">Table 7</xref>, the accuracy of traffic status classification exceeds 95% when traffic conditions are smooth and stable, indicating the feasibility of the algorithm. However, the limited sample size for congestion and blockage states results in an unbalanced data distribution, which adversely affects the algorithm&#x2019;s discriminatory accuracy. To address this issue, the Synthetic Minority Over-sampling Technique (SMOTE) is employed to ensure a balanced sample size across all categories.</p>
<table-wrap id="T7" position="float">
<label>TABLE 7</label>
<caption>
<p>Random forest discriminant accuracy rate.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Traffic state</th>
<th align="center">Precision</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Smooth</td>
<td align="center">0.9797</td>
</tr>
<tr>
<td align="center">Stable</td>
<td align="center">0.9769</td>
</tr>
<tr>
<td align="center">Congested</td>
<td align="center">0.7767</td>
</tr>
<tr>
<td align="center">Severely congested</td>
<td align="center">0</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s4-2">
<label>4.2</label>
<title>Data augmentation using SMOTE</title>
<p>The unbalanced data samples from each state adversely affect the performance of the classification algorithm, the distribution of traffic conditions is shown in <xref ref-type="fig" rid="F5">Figure 5</xref>. To address this issue, the Synthetic Minority Over-sampling Technique (SMOTE) is employed to generate synthetic data, which is then incorporated into the original dataset. This approach aims to balance the data distribution and enhance the accuracy of the classification algorithm.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Traffic state distribution.</p>
</caption>
<graphic xlink:href="ffutr-07-1662480-g005.tif">
<alt-text content-type="machine-generated">Pie chart showing traffic conditions. Stable: 51% (orange), Smooth: 31% (blue), Congested: 14% (gray), Severely Congested: 4% (yellow). Each section is labeled with corresponding percentages.</alt-text>
</graphic>
</fig>
<p>As illustrated in the figure, the distribution of each traffic state is markedly uneven, with the sample size for the stable state significantly exceeding that of the blocked and congested states. This issue of data imbalance leads to erroneous learning by the random forest algorithm, resulting in a low accuracy rate for the blocked and congested states, which subsequently undermines the effectiveness of traffic state classification. Consequently, the application of the SMOTE algorithm is necessary to address the imbalance in the data.</p>
<p>From <xref ref-type="table" rid="T8">Table 8</xref>, it can be observed that after clustering the traffic flow dataset into 1-min intervals, there are only 164 instances of severe congestion and 614 instances of congested state. By employing the SMOTE algorithm to synthetically augment the blocked data, the quantity of severe congestion data has been increased sevenfold, while the quantity of congested state data has been doubled. The distribution of data categories for each state following the application of the SMOTE algorithm is presented in <xref ref-type="table" rid="T9">Table 9</xref> and illustrated in <xref ref-type="fig" rid="F6">Figure 6</xref>.</p>
<table-wrap id="T8" position="float">
<label>TABLE 8</label>
<caption>
<p>Statistical distribution of traffic status data.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Traffic state</th>
<th align="center">Number</th>
<th align="center">Proportion</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Smooth</td>
<td align="center">1,368</td>
<td align="center">30.86%</td>
</tr>
<tr>
<td align="center">Stable</td>
<td align="center">2,287</td>
<td align="center">51.59%</td>
</tr>
<tr>
<td align="center">Congested</td>
<td align="center">614</td>
<td align="center">13.85%</td>
</tr>
<tr>
<td align="center">Severely congested</td>
<td align="center">164</td>
<td align="center">3.67%</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T9" position="float">
<label>TABLE 9</label>
<caption>
<p>Distribution of traffic states after SMOTE.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Traffic state</th>
<th align="center">Number</th>
<th align="center">Proportion</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Smooth</td>
<td align="center">1,368</td>
<td align="center">22.08%</td>
</tr>
<tr>
<td align="center">Stable</td>
<td align="center">2,287</td>
<td align="center">36.91%</td>
</tr>
<tr>
<td align="center">Congested</td>
<td align="center">1,312</td>
<td align="center">21.18%</td>
</tr>
<tr>
<td align="center">Severely congested</td>
<td align="center">1,228</td>
<td align="center">19.83%</td>
</tr>
<tr>
<td align="center">Total</td>
<td align="center">6,195</td>
<td align="center">100%</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Distribution of traffic status after SMOTE processing.</p>
</caption>
<graphic xlink:href="ffutr-07-1662480-g006.tif">
<alt-text content-type="machine-generated">Pie chart displaying four categories: Stable at 37% in orange, Smooth at 22% in blue, Congested at 21% in gray, and Severely Congested at 20% in yellow.</alt-text>
</graphic>
</fig>
<p>The original dataset has been enhanced with data on congested and severely congested states to ensure that the proportions of the four traffic states are balanced, while also preserving a larger sample size for the stable state. After applying the SMOTE algorithm, the distribution of the four traffic states has become more balanced, with severe congestion now accounting for 19.83% of the total. This adjustment effectively addresses the class imbalance issue present in the original dataset, where severe congestion only made up 3.67%. As a result, the dataset processed using the SMOTE algorithm can now serve as a more representative sample for traffic state classification.</p>
</sec>
<sec id="s4-3">
<label>4.3</label>
<title>Performance of FCM-RF-SMOTE framework</title>
<p>Using the balanced dataset generated by SMOTE (as detailed in <xref ref-type="table" rid="T9">Table 9</xref>), the Random Forest model was retrained using the same 60:40 training-test split ratio. The Random Forest (RF) algorithm is implemented using the scikit-learn library in Python, and it computes the confusion matrix, as illustrated in <xref ref-type="table" rid="T10">Table 10</xref>.</p>
<table-wrap id="T10" position="float">
<label>TABLE 10</label>
<caption>
<p>The confusion matrix of the algorithm.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" colspan="2" align="center">Actual state</th>
<th colspan="4" align="center">Predict</th>
</tr>
<tr>
<th align="center">Smooth</th>
<th align="center">Stable</th>
<th align="center">Congested</th>
<th align="center">Severely congested</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="4" align="center">Actual</td>
<td align="center">Smooth</td>
<td align="center">530</td>
<td align="center">0</td>
<td align="center">17</td>
<td align="center">0</td>
</tr>
<tr>
<td align="center">Stable</td>
<td align="center">8</td>
<td align="center">900</td>
<td align="center">7</td>
<td align="center">0</td>
</tr>
<tr>
<td align="center">Congested</td>
<td align="center">0</td>
<td align="center">0</td>
<td align="center">491</td>
<td align="center">0</td>
</tr>
<tr>
<td align="center">Severely congested</td>
<td align="center">0</td>
<td align="center">5</td>
<td align="center">0</td>
<td align="center">520</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As illustrated in <xref ref-type="table" rid="T9">Table 9</xref>, the accuracy of the four categories of traffic state classification exceeds 97%, indicating superior algorithm performance. The application of SMOTE (Synthetic Minority Over-sampling Technique) to address imbalanced data has enhanced the discriminative capability for congestion and blockage states. As illustrated in <xref ref-type="table" rid="T11">Table 11</xref>, the classification accuracy exceeds 97% in simulation, indicating potential for real-world applications pending field validation.</p>
<table-wrap id="T11" position="float">
<label>TABLE 11</label>
<caption>
<p>Random forest discriminant accuracy rate.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Traffic state</th>
<th align="center">Precision</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Smooth</td>
<td align="center">0.9797</td>
</tr>
<tr>
<td align="center">Stable</td>
<td align="center">0.9856</td>
</tr>
<tr>
<td align="center">Congested</td>
<td align="center">0.9823</td>
</tr>
<tr>
<td align="center">Severely congested</td>
<td align="center">0.9780</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s4-4">
<label>4.4</label>
<title>Discussion</title>
<p>The comparative analysis between the baseline and the proposed framework demonstrates the critical role of data balancing. While the baseline RF model struggled with minority classes due to data imbalance (0% precision for severe congestion), the integration of SMOTE significantly enhanced the model&#x2019;s sensitivity, boosting the classification accuracy for severe congestion to 97.80%. These results demonstrate the framework&#x2019;s robustness, which is further validated by the rigorous design of the simulation environment. The emulation methodology is informed by established research in vehicular sensing and communications, notably the contributions of Sommer (<xref ref-type="bibr" rid="B29">Sommer et al., 2011</xref>) et al., who elucidated the effects of shadowing and signal degradation on vehicular communications, and Stiller (<xref ref-type="bibr" rid="B30">Stiller et al., 2025</xref>), who conducted a review of sensor fusion techniques aimed at enhancing the reliability of traffic monitoring. Furthermore, insights from Li and Yoon (<xref ref-type="bibr" rid="B13">Li and Yoon, 2023</xref>) regarding radar-camera fusion have further guided the design of the simulation, effectively replicating sensor imperfections and multi-target scenarios that are typical in real-world applications.</p>
<p>The use of SUMO simulation allowed for the injection of Gaussian noise (&#x3c3; &#x3d; 0.1&#xa0;m) and limited sampling rates (10&#xa0;Hz), approximating the characteristics of commercial millimeter-wave radars. The model&#x2019;s high performance under these noisy conditions suggests a degree of robustness suitable for deployment in hardware-constrained environments.</p>
<p>Despite these promising results, several limitations warrant future investigation. Firstly, while the simulation mimics basic sensor noise, it does not fully replicate complex real-world factors such as multi-path interference, occlusions in multi-lane scenarios, or signal attenuation caused by adverse weather (e.g., rain or fog). Secondly, the current framework assumes homogeneous freeway traffic; its applicability to urban environments with complex interactions (e.g., intersections) remains to be tested. Thirdly, relying on a single sensor type may limit reliability.</p>
<p>Future work will focus on validating the model with physical sensor data, extending the framework to urban scenarios, and exploring multi-source data integration (e.g., fusing radar with camera or GPS data) to further enhance system robustness.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s5">
<label>5</label>
<title>Conclusion</title>
<p>In this paper, we propose a novel FCM-RF-SMOTE framework for traffic state classification, integrating Fuzzy C-Means (FCM) clustering, Random Forest (RF) classification, and the Synthetic Minority Over-sampling Technique (SMOTE). The primary contributions of this study are as follows.<list list-type="order">
<list-item>
<p>A standardized classification system was derived based on the clustering centers identified by the FCM algorithm. The analysis defined the following data-driven thresholds: smooth (&#x3e;110&#xa0;km/h), stable (80&#x2013;110&#xa0;km/h), congested (40&#x2013;80&#xa0;km/h), and severely congested (&#x3c;40&#xa0;km/h).</p>
</list-item>
<list-item>
<p>A novel FCM-RF-SMOTE framework is proposed to effectively address data imbalance, improving the minority class (severe congestion) proportion from 3.67% to 19.83%, and enhancing classification accuracy from 77.67% to 97.80%.</p>
</list-item>
<list-item>
<p>The fuzzy clustering feature of FCM quantifies ambiguous traffic state boundaries, improving upon traditional threshold-based methods and better reflecting the continuity of traffic flow.</p>
</list-item>
<list-item>
<p>The framework is initially validated via SUMO simulation with simplified radar-like noise (&#x3c3; &#x3d; 0.1&#xa0;m, 10&#xa0;Hz). Its performance in real-world scenarios requires further field testing.</p>
</list-item>
</list>
</p>
<p>Looking ahead, future work should focus on three main directions: (1) Long-Term Deployment, evaluating the framework&#x2019;s performance in real-world traffic management systems over extended periods and under diverse conditions; (2) Integration with Smart City Infrastructure, such as connected vehicles and intelligent traffic signals, to enable more dynamic and adaptive traffic management; and (3) Advanced Algorithms for Predictive Analysis, exploring the use of advanced algorithms like Convolutional Neural Networks (CNNs) and Transformers to further enhance the framework&#x2019;s ability to predict traffic congestion and optimize traffic flow in real time. In conclusion, the FCM-RF-SMOTE framework provides a robust and efficient approach to traffic state classification, with significant implications for intelligent transportation systems and traffic management.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>RC: Methodology, Validation, Conceptualization, Writing &#x2013; review and editing. AL: Writing &#x2013; review and editing, Conceptualization. XS: Writing &#x2013; original draft, Investigation. FL: Writing &#x2013; original draft, Software. NL: Writing &#x2013; original draft, Formal Analysis. YW: Project administration, Writing &#x2013; original draft, Supervision. LY: Validation, Writing &#x2013; original draft. QY: Writing &#x2013; original draft, Visualization, Writing &#x2013; review and editing.</p>
</sec>
<ack>
<title>Acknowledgements</title>
<p>Song Shujian was involved in submitting the manuscript and replying to reviewers on behalf of the corresponding author.</p>
</ack>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>Author RC was employed by Henan Zhongyuan High-speed Zhengluo Construction Co., Ltd. Author AL was employed by Jiangxi Communications Investment Group Co., Ltd. Authors XS, FL, NL, YW, and LY were employed by CCCC Highway Consultants Co., Ltd.</p>
<p>The remaining author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2558058/overview">Stefano de Luca</ext-link>, University of Salerno, Italy</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3134090/overview">Muhammad Javed</ext-link>, Shanghai Maritime University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3284944/overview">Kuo-Chuan Wu</ext-link>, National Pingtung University, Taiwan</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ahmed</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Cook</surname>
<given-names>A. R.</given-names>
</name>
</person-group> (<year>1982</year>). <article-title>Application of time-series analysis techniques to highway incident detection</article-title>. <source>Transp. Res. Rec.</source> <volume>841</volume>, <fpage>19</fpage>&#x2013;<lpage>21</lpage>.</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="book">
<collab>Author Anonymous</collab> (<year>2012</year>). <source>Interim technical requirements for road network operation monitoring and service</source>. <publisher-loc>Beijing</publisher-loc>: <publisher-name>China Communications Press</publisher-name>.</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barth</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Boriboonsomsin</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Real-world carbon dioxide impacts of traffic congestion</article-title>. <source>Transp. Res. Rec.</source> <volume>2058</volume>, <fpage>163</fpage>&#x2013;<lpage>171</lpage>. <pub-id pub-id-type="doi">10.3141/2058-20</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Bauza</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Gozalvez</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Sanchez-Soriano</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2010</year>). <source>Road traffic congestion detection through cooperative vehicle-to-vehicle communications[C]//IEEE local Computer Network Conference</source>. <publisher-loc>IEEE</publisher-loc>, <fpage>606</fpage>&#x2013;<lpage>612</lpage>.</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chawla</surname>
<given-names>N. V.</given-names>
</name>
<name>
<surname>Bowyer</surname>
<given-names>K. W.</given-names>
</name>
<name>
<surname>Hall</surname>
<given-names>L. O.</given-names>
</name>
<name>
<surname>Kegelmeyer</surname>
<given-names>W. P.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>SMOTE: synthetic minority over-sampling technique</article-title>. <source>J. Artif. Intell. Res.</source> <volume>16</volume> (<issue>1</issue>), <fpage>321</fpage>&#x2013;<lpage>357</lpage>. <pub-id pub-id-type="doi">10.1613/jair.953</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Collins</surname>
<given-names>J. F.</given-names>
</name>
<name>
<surname>Hopkins</surname>
<given-names>C. M.</given-names>
</name>
<name>
<surname>Martin</surname>
<given-names>J. A.</given-names>
</name>
</person-group> (<year>1979</year>). <article-title>Automatic incident Detection-TRRL algorithms HIOCC and PATREG</article-title>. <source>TRRL Suppl. Rep. 526. Transp. Road Res. Laboratory</source>.</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cook</surname>
<given-names>A. R.</given-names>
</name>
<name>
<surname>Cleveland</surname>
<given-names>D. E.</given-names>
</name>
</person-group> (<year>1974</year>). <article-title>Detection of highway capacity-reducing incidents by traffic-stream measurements</article-title>. <source>Transp. Res. Rec.</source> <volume>495</volume>, <fpage>1</fpage>&#x2013;<lpage>11</lpage>.</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dudek</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Messer</surname>
<given-names>C. J.</given-names>
</name>
<name>
<surname>Nuckles</surname>
<given-names>N. B.</given-names>
</name>
</person-group> (<year>1974</year>). <article-title>Incident detection on urban highways</article-title>. <source>Transp. Res. Rec.</source> <volume>495</volume>, <fpage>12</fpage>&#x2013;<lpage>24</lpage>.</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hawas</surname>
<given-names>Y. E.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>A fuzzy-based system for incident detection in urban street networks</article-title>. <source>Transp. Res. Part C Emerg. Technol.</source> <volume>15</volume> (<issue>2</issue>), <fpage>69</fpage>&#x2013;<lpage>95</lpage>. <pub-id pub-id-type="doi">10.1016/j.trc.2007.02.001</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hsiao</surname>
<given-names>C. H.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>C. T.</given-names>
</name>
<name>
<surname>Cassidy</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>1994</year>). <article-title>Application of fuzzy logic and neural networks to automatically detect highway traffic incidents</article-title>. <source>J. Transp. Eng.</source> <volume>120</volume> (<issue>5</issue>), <fpage>753</fpage>&#x2013;<lpage>772</lpage>. <pub-id pub-id-type="doi">10.1061/(asce)0733-947x(1994)120:5(753)</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Xue</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A novel method about the representation and discrimination of traffic State</article-title>. <source>Sensors</source> <volume>20</volume>, <fpage>5039</fpage>. <pub-id pub-id-type="doi">10.3390/s20185039</pub-id>
<pub-id pub-id-type="pmid">32899826</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jin.</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Ma.</surname>
<given-names>W. F.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>R. X.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>G. G.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>An efficient variational Bayesian algorithm for calibrating fundamental diagrams and its probabilistic sensitivity analysis</article-title>. <source>Transp. B Transp. Dyn.</source> <volume>11</volume> (<issue>1</issue>), <fpage>1616</fpage>&#x2013;<lpage>1641</lpage>. <pub-id pub-id-type="doi">10.1080/21680566.2023.2231159</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yoon</surname>
<given-names>H.-S.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Sensor fusion-based vehicle detection and tracking using a single camera and radar at a traffic intersection</article-title>. <source>Sensors</source> <volume>23</volume> (<issue>10</issue>), <fpage>4888</fpage>. <pub-id pub-id-type="doi">10.3390/s23104888</pub-id>
<pub-id pub-id-type="pmid">37430801</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Design and analysis of traffic incident detection based on random forest</article-title>. <source>J. Southeast Univ. Engl. Ed.</source> <volume>1</volume>, <fpage>88</fpage>&#x2013;<lpage>95</lpage>. <pub-id pub-id-type="doi">10.3969/j.issn.1003-7985.2014.01.017</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Manual</surname>
<given-names>H. C.</given-names>
</name>
</person-group> (<year>2010</year>). <source>HCM2010[M]. Transportation research board</source>. <publisher-loc>Washington, DC</publisher-loc>: <publisher-name>National Research Council</publisher-name>.</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Martin</surname>
<given-names>P. T.</given-names>
</name>
<name>
<surname>Perrin</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hansen</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Kump</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Moore</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2001</year>). <source>Incident detection algorithm evaluation</source>. <publisher-name>Salt Lake City, UT, United States: Utah Department of Transportation</publisher-name>.</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ming</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Qian</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Shan</surname>
<given-names>W. L.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Taxi drivers&#x27; traffic violations detection using random forest algorithm: a case study in China</article-title>. <source>Traffic Inj. Prev.</source> <volume>24</volume> (<issue>1/4</issue>), <fpage>362</fpage>&#x2013;<lpage>370</lpage>. <pub-id pub-id-type="doi">10.1080/15389588.2023.2191286</pub-id>
<pub-id pub-id-type="pmid">36976788</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nanthawichit</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Nakatsuji</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Suzuki</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Application of probe-vehicle data for real-time traffic-state estimation and short-term travel-time prediction on a highway</article-title>. <source>Transp. Res. Rec. J.</source> <volume>1855</volume>, <fpage>49</fpage>&#x2013;<lpage>59</lpage>. <pub-id pub-id-type="doi">10.3141/1855-06</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Park</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yoon</surname>
<given-names>S. H.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Deep reinforcement learning for base station switching scheme with federated LSTM&#x2010;based traffic predictions</article-title>. <source>ETRI J.</source> <volume>46</volume> (<issue>3</issue>), <fpage>379</fpage>&#x2013;<lpage>391</lpage>. <pub-id pub-id-type="doi">10.4218/etrij.2023-0065</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Park</surname>
<given-names>H.-C.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>D.-K.</given-names>
</name>
<name>
<surname>Kho</surname>
<given-names>S.-Y.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Bayesian network for highway traffic State prediction</article-title>. <source>Transp. Res. Rec.</source> <volume>2672</volume>, <fpage>124</fpage>&#x2013;<lpage>135</lpage>. <pub-id pub-id-type="doi">10.1177/0361198118786824</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Payne</surname>
<given-names>H. J.</given-names>
</name>
<name>
<surname>Tignor</surname>
<given-names>S. C.</given-names>
</name>
</person-group> (<year>1978</year>). <article-title>Highway incident-detection algorithms based on decision trees with states</article-title>. <source>Transp. Res. Rec.</source> (<issue>682</issue>).</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Puangnak</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Chivapreecha</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Comparative study of threshold selection for incident detection based on California Algorithm</article-title>. <fpage>911</fpage>, <lpage>914</lpage>. <pub-id pub-id-type="doi">10.1109/ECTI-CON47248.2019.8955226</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ranpura</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Gujar</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Shukla</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Estimation of traffic delay due to traffic control elements using Bayesian Optimized Predictive Model for heterogeneous traffic conditions</article-title>. <source>10th Int. Conf. Control, Decis. Inf. Technol. (CoDIT).0</source>. <pub-id pub-id-type="doi">10.1109/CoDIT62066.2024.10708404</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reddy</surname>
<given-names>D. P.</given-names>
</name>
<name>
<surname>Manjunath</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Rohith</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Reddy</surname>
<given-names>N. M.</given-names>
</name>
<name>
<surname>Satyanarayana</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Deep CNN model for condition monitoring of road traffic: an application of computer vision</article-title>. <source>Turkish J. Comput. Math. Educ. (TURCOMAT)</source> <volume>14</volume>, <fpage>1362</fpage>&#x2013;<lpage>1370</lpage>. <pub-id pub-id-type="doi">10.61841/turcomat.v14i03.14525</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shaaban</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Davoodi</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Shaaban</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Analyzing autonomous vehicle collision types to support sustainable transportation systems: a machine learning and Association rules approach</article-title>. <source>Sustainability</source> <volume>16</volume>, <fpage>9893</fpage>. <pub-id pub-id-type="doi">10.3390/su16229893</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A hybrid method for traffic incident detection using random forest-recursive feature elimination and long short-term memory network with bayesian Optimization Algorithm</article-title>. <source>Qual. Control, Trans.</source> <volume>9</volume> (<issue>1</issue>), <fpage>1219</fpage>&#x2013;<lpage>1232</lpage>. <pub-id pub-id-type="doi">10.1109/access.2020.3047340</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Sharma</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Dash</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sial</surname>
<given-names>M. R.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>Road traffic congestion detection through cooperative vehicle-to-vehicle communications[C]</article-title>,&#x201d; in <source>2023 4th IEEE global conference for advancement in technology (GCAT)</source>. <publisher-loc>IEEE</publisher-loc>, <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sheu</surname>
<given-names>J. B.</given-names>
</name>
<name>
<surname>Ritchie</surname>
<given-names>S. G.</given-names>
</name>
</person-group> (<year>1998</year>). <article-title>A new methodology for incident detection and characterization on surface streets</article-title>. <source>Transp. Res. Part C Emerg. Technol.</source> <volume>6</volume> (<issue>5-6</issue>), <fpage>315</fpage>&#x2013;<lpage>335</lpage>. <pub-id pub-id-type="doi">10.1016/s0968-090x(99)00002-9</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sommer</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Joerer</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Segata</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Tonguz</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Cigno</surname>
<given-names>R. L.</given-names>
</name>
<name>
<surname>Dressler</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>How shadowing hurts vehicular communications and how dynamic beaconing can help</article-title>. <source>IEEE INFOCOM</source>, <fpage>110</fpage>&#x2013;<lpage>115</lpage>. <pub-id pub-id-type="doi">10.1109/INFCOM.2013.6566745</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stiller</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Puente Le&#xf3;n</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Kruse</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Information fusion for automotive applications &#x2013; an overview</article-title>. <source>Inf. Fusion</source> <volume>12</volume> (<issue>4</issue>), <fpage>244</fpage>&#x2013;<lpage>252</lpage>. <pub-id pub-id-type="doi">10.1016/j.inffus.2011.03.005</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2019</year>). <source>Research on method for traffic status identification of highway basic Sections[D]</source>. <publisher-loc>Nanjing</publisher-loc>: <publisher-name>Southeast University</publisher-name>.</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Tian</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). &#x201c;<article-title>Real time detection of traffic signal running State and remote alarm for fault information at road intersection</article-title>,&#x201d; in <source>Proceedings of the 2018 24th international conference on automation and computing</source> (<publisher-loc>Newcastle upon Tyne, UK</publisher-loc>: <publisher-name>ICAC</publisher-name>), <fpage>478</fpage>&#x2013;<lpage>482</lpage>.</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Guangli</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Study on the recognition model of highway traffic State</article-title>. <source>J. Transp. Eng. Inf. No. 2 Vo</source> <volume>1</volume>.</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Yuan</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Application of intelligent technology in urban traffic congestion</article-title>,&#x201d; in <source>Proceedings of the 2020 international conference on computer engineering and application (ICCEA)</source> (<publisher-loc>Guangzhou, China</publisher-loc>).</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zahid</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Jamal</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Memon</surname>
<given-names>M. Q.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Short term traffic state prediction <italic>via</italic> hyperparameter optimization based classifiers</article-title>. <source>Sensors</source> <volume>20</volume>, <fpage>685</fpage>. <pub-id pub-id-type="doi">10.3390/s20030685</pub-id>
<pub-id pub-id-type="pmid">32012650</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Geng</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>She</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Duan</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Road traffic safety status analysis and prediction based on dynamic bayesian network</article-title>. <source>J. Phys. Conf. Ser.</source> <volume>2868</volume>, <fpage>012028</fpage>. <pub-id pub-id-type="doi">10.1088/1742-6596/2868/1/012028</pub-id>
</mixed-citation>
</ref>
</ref-list>
</back>
</article>