<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="research-article" dtd-version="1.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Energy Res.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Energy Research</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Energy Res.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2296-598X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1745369</article-id>
<article-id pub-id-type="doi">10.3389/fenrg.2026.1745369</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>YOLOv8-FOD: a lightweight foreign object detection algorithm for power transmission lines</article-title>
<alt-title alt-title-type="left-running-head">Liu et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fenrg.2026.1745369">10.3389/fenrg.2026.1745369</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Wei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yang</surname>
<given-names>Tao</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Shi</surname>
<given-names>Dao</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Yufang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Zhang</surname>
<given-names>Liang</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2541074"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal Analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Jiansheng</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
</contrib-group>
<aff id="aff1">
<label>1</label>
<institution>State Grid Sichuan Electric Power Company Guangyuan Power Supply Company</institution>, <city>Guangyuan</city>, <country country="CN">China</country>
</aff>
<aff id="aff2">
<label>2</label>
<institution>Southwest Petroleum University</institution>, <city>Chengdu</city>, <country country="CN">China</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Liang Zhang, <email xlink:href="mailto:liangzhangswpu@gmail.com">liangzhangswpu@gmail.com</email>
</corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-10">
<day>10</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>14</volume>
<elocation-id>1745369</elocation-id>
<history>
<date date-type="received">
<day>13</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>05</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>14</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Liu, Yang, Shi, Li, Zhang and Liu.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Liu, Yang, Shi, Li, Zhang and Liu</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-10">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Timely and accurate detection of foreign objects is crucial for the safe operation of transmission lines in power grid. Currently, object detection models have more and more parameters and their calculations are becoming increasingly complex. Therefore, sufficient computing power is usually required. To deploy models on resource-constrained edge devices, this paper proposed a lightweight detection algorithm based on improved YOLOv8, which is called YOLOv8-FOD (Foreign Object Detection). Firstly, the backbone network is optimized by incorporating large kernel block (LarK Block) into c2f module, forming a new C2f_LarK module. This achieves a wider receptive field, captures more contextual information, and effectively reduces network redundancy. Secondly, a lightweight detection head, Det_Tiny, is proposed. Through adaptive feature redundancy compression and hardware-friendly computational optimization, computational complexity is significantly reduced. Finally, a new feature fusion network structure (Fusion) is designed, utilizing the CGAFusion module to fuse high-dimensional and low-dimensional features. This captures important information at different semantic layers and improves the detection of edge details, effectively enhancing detection accuracy. Experimental results show that, compared with standard YOLOv8n, the proposed model reduces parameter count by 36.6%, model size by 31.1%, and computational complexity (GFLOPs) by 40.7%. While maintaining detection accuracy of mAP@0.5 and improving mAP@[0.5:0.95] by 0.3%, the model is more lightweight and has high practicality for deployment on edge devices.</p>
</abstract>
<kwd-group>
<kwd>CGAFusion</kwd>
<kwd>FasterNet</kwd>
<kwd>lightweight object detection</kwd>
<kwd>UniRepLKNet</kwd>
<kwd>YOLOv8</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This research was funded by Science and Technology Project of State Grid Sichuan Electric Power Company: Development of a portable intelligent detection device for tree and bamboo hidden dangers in transmission lines (No. B7190725006G).</funding-statement>
</funding-group>
<counts>
<fig-count count="8"/>
<table-count count="4"/>
<equation-count count="9"/>
<ref-count count="27"/>
<page-count count="00"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Smart Grids</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>The safe operation of transmission lines is important for power system. However, due to long-term exposure to the outdoors, they are susceptible to interference from foreign objects. Common foreign objects are kites, balloons, floating objects, bird nests, <italic>etc.</italic> When attached to transmission lines, they can easily cause arc discharges, short circuit, and even fires or power grid failures (<xref ref-type="bibr" rid="B24">Wang Z. et al., 2023</xref>). This seriously endangers the reliability and stability of power system. Therefore, it is necessary to detect and eliminate potential safety hazards at an early stage by accurately and efficiently detecting foreign objects.</p>
<p>Traditional foreign object detection on power transmission lines mainly relies on manual inspection. But it has the problems of low efficiency and high cost. Consequently, deep learning-based real-time object detection technology has emerged as a pivotal approach to addressing this challenge (<xref ref-type="bibr" rid="B7">Faisal et al., 2025</xref>). The continuous development of deep learning has shown significant advantages in foreign object detection. However, the parameter count and computational complexity continue to increase, resulting in large model size and limited inference speed. When deployed on actual edge devices, they are often constrained by memory capacity, computing resources, and energy consumption. This leads to problems such as model mismatch and high latency, which seriously hinders real-time monitoring and early warning capabilities. Compared with other detection models such as SSD, Faster R-CNN and Retina Net, the YOLO series detection models require relatively small computing resources (<xref ref-type="bibr" rid="B11">Liu et al., 2016</xref>; <xref ref-type="bibr" rid="B16">Maduako et al., 2022</xref>; <xref ref-type="bibr" rid="B19">Ren et al., 2017</xref>; <xref ref-type="bibr" rid="B25">Yan et al., 2025</xref>; <xref ref-type="bibr" rid="B10">Lin et al., 2017</xref>). They are more suitable for deployment on edge devices.</p>
<p>Representative YOLO models for foreign object detection on power transmission lines include YOLOv4, YOLOv5, YOLOv7, YOLOv8, and YOLO11 (<xref ref-type="bibr" rid="B2">Bochkovskiy et al., 2020</xref>; <xref ref-type="bibr" rid="B18">Peng et al., 2025</xref>; <xref ref-type="bibr" rid="B23">Wang C. Y. et al., 2023</xref>; <xref ref-type="bibr" rid="B1">Bin et al., 2025</xref>; <xref ref-type="bibr" rid="B21">Shao et al., 2024</xref>). <xref ref-type="bibr" rid="B22">Song et al. (2021)</xref> proposed a foreign object detection algorithm for high-voltage transmission lines based on improved YOLOv4 algorithm. By using k-means clustering and DIoU-NMS method, the detection accuracy on kite, balloon, plastic, wildfire, and smog reaches 81.72%. <xref ref-type="bibr" rid="B26">Zhou et al. (2024)</xref> proposed a foreign body detection algorithm based on the improved YOLOv5. The algorithm introduced an efficient channel attention (ECA) module in the backbone network and adopted bilinear interpolation in the neck network to improve the detection accuracy of the model, achieving a 3.9% increase in mAP@0.5 accuracy. <xref ref-type="bibr" rid="B13">Liu et al. (2023)</xref> combined two attention mechanisms and an additional detection layer to improve its ability to identify small defects and distant objects, achieving a detection speed 16.3% faster than YOLOv5 and a detection progress 3.3% higher than YOLOv7. <xref ref-type="bibr" rid="B12">Liu et al. (2021)</xref> improved the overall learning capability of the network by aggregating spatial and channel information in the feature map, achieving an 88.5% detection accuracy for foreign objects such as bird nests, hanging debris, and wildfires. However, this increase in detection performance came at the cost of a significant rise in parameters. To address the challenges of small scale, high density, and deformability of obstacles in high-voltage lines, <xref ref-type="bibr" rid="B17">Pan et al. (2024)</xref> proposed an improved YOLO algorithm based on multi-scale feature fusion. By incorporating data augmentation strategies such as image rotation and cropping, their approach significantly enhanced model accuracy and robustness in complex scenarios. <xref ref-type="bibr" rid="B24">Wang Z. et al. (2023)</xref> constructed an SPPCSPC module and added a global attention module to the stem to focus on occluded foreign objects to improve the feature extraction ability of YOLOv8 at multiple scales. The model achieved an accuracy of 95.5% in detecting foreign objects, while the number of parameters increased to 50.6M.</p>
<p>The models mentioned above all aim to improve the ability to detect targets by increasing the number of parameters. This poses challenges for deployment on edge devices. To overcome existing bottlenecks, recent research has pivoted toward lightweight architectures and efficient edge-side inference. For instance, <xref ref-type="bibr" rid="B14">Lu et al. (2023)</xref> reconstructed YOLOv5 using GhostNetV2 for transmission line defect detection. Their model achieved a mean Average Precision (mAP) of 94.3% at 63 FPS on edge devices, substantially bolstering real-time monitoring and generalization capabilities in harsh environments (<xref ref-type="bibr" rid="B1">Bin et al., 2025</xref>). Furthermore, <xref ref-type="bibr" rid="B20">Sankuri et al. (2025)</xref> introduced a hybrid DETR framework combined with semi-supervised learning for insulator defect detection. By optimizing the encoder and introducing a hierarchical hybrid matching strategy, the model attained a high-speed inference of 85 FPS, markedly improving detection precision and robustness against complex backgrounds (<xref ref-type="bibr" rid="B21">Shao et al., 2024</xref>). Additionally, <xref ref-type="bibr" rid="B27">Zhu et al. (2022)</xref> developed Fast-PLDN, a real-time semantic segmentation network that leverages low-pass and high-pass filter blocks along with an edge-attention fusion module to tackle the challenges of curved power line detection. This model achieves an exceptional speed of 189.6 FPS with an mIoU of 71.3%, demonstrating superior real-time perception performance.</p>
<p>Therefore, how to effectively reduce model size and computational complexity while maintaining high detection accuracy for complex foreign objects remains a key research focus in this field. In light of this, this paper proposes a lightweight foreign object detection algorithm for transmission lines based on YOLOv8-FOD, with the following main contributions:<list list-type="order">
<list-item>
<p>The large kernel block (LarK Block) of UniRepLKNet (<xref ref-type="bibr" rid="B6">Ding et al., 2024</xref>) is integrated into the c2f module to optimize the backbone network to form a new C2f_LarK module. This achieves a wider receptive field without increasing the depth of the model. By adopting a larger convolution kernel, LarK Block captures more contextual information without the need for additional network layers, effectively reducing the redundancy of the network.</p>
</list-item>
<list-item>
<p>A lightweight detection head, Det_Tiny, is proposed. The standard convolution of classification loss branch in original detector head is replaced by partial convolution (PConv) of FasterNet (<xref ref-type="bibr" rid="B3">Chen et al., 2023</xref>). Through adaptive compression of feature redundancy and hardware-friendly computation optimization, the computational complexity is significantly reduced while ensuring classification accuracy.</p>
</list-item>
<list-item>
<p>A new feature fusion network Fusion is designed. Based on CGAFusion (<xref ref-type="bibr" rid="B4">Chen et al., 2024a</xref>), high-dimensional features and low-dimensional features are fused. Content-guided attention (CGA) is used to assign a unique Spatial Importance Map (SIM) to each channel. This allows it to focus on more useful information in the features and improve the detection of edge details, effectively increasing detection accuracy.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s2">
<label>2</label>
<title>YOLOv8-FOD lightweight detection model</title>
<p>This paper uses YOLOv8n as the baseline model and proposes an improved YOLOv8-FOD lightweight target detection model tailored for resource-constrained edge devices. The improved network structure is shown as <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Yolov8-FOD model structure diagram.</p>
</caption>
<graphic xlink:href="fenrg-14-1745369-g001.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a convolutional neural network architecture for object detection. It shows the process from input to output, with blocks labeled Conv, C2f_LarK, FasterNet block, and CGAFusion. The diagram is divided into sections: Backbone, Neck, Fusion, and Head, leading to detection output on images.</alt-text>
</graphic>
</fig>
<p>The model first incorporates the Large Kernel Block of UniRepLKNet into the backbone network, reconstructing the C2f module to create a new C2f_LarK module. This modification effectively expands the receptive field without increasing network depth, significantly enhancing the context perception ability for small targets at a distance. Secondly, a lightweight detection head, Det_Tiny, based on Partial Convolution (PConv) from FasterNet is designed to compress redundant features through channel-selective calculations. This reduces the computational load of the detection head while maintaining classification accuracy through a dynamic channel adjustment mechanism. Furthermore, a multi-scale feature fusion network (Fusion) based on CGAFusion is proposed. A dual-path attention mechanism and dynamic weight fusion strategy is used to collaboratively optimize high-dimensional semantic features alongside low-dimensional detail features, thereby improving the positioning accuracy of foreign objects in complex backgrounds.</p>
<sec id="s2-1">
<label>2.1</label>
<title>Backbone network reconstruction</title>
<p>In the task of detecting foreign objects on power transmission lines, overcoming complex background interference and acquiring small target features are the key points. Although YOLOv8 achieves effective feature extraction in the backbone network through C2f module, the traditional small-scale convolution kernel (3 &#xd7; 3) in its backbone network has problems such as limited receptive field and insufficient long-range dependency modeling capabilities. It is difficult to capture global contextual information in scenarios with large transmission line spans. This paper integrates the LarK block module of UniRepLKNet into C2f module to construct the C2f_LarK module. The LarK block module captures more contextual information through a larger receptive field, enabling the backbone network to capture richer features. At the same time, the re-parameterized nature of the LarK block can significantly improve the efficiency of the model during inference, making it more conducive to deployment on edge devices. The structure design of C2f_UniRepLKNet is shown as <xref ref-type="fig" rid="F2">Figure 2</xref>.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Structure design of C2f_UniRepLKNet.</p>
</caption>
<graphic xlink:href="fenrg-14-1745369-g002.tif">
<alt-text content-type="machine-generated">Diagram illustrating the C2f_LarK architecture. It features a convolutional layer leading to a split block, followed by multiple sequential LarK Blocks. Outputs are concatenated, passing through another convolutional layer. The Dilated Re-param Block includes components like BN, DWconv, SE Block with ReLU and sigmoid functions, and an FFN with GELU, before adding and drop path.</alt-text>
</graphic>
</fig>
<p>In <xref ref-type="fig" rid="F2">Figure 2</xref>, the LarK Block module achieves efficient feature extraction through a dual-path design of dilated reparameterization and channel attention enhancement (<xref ref-type="bibr" rid="B9">Hu et al., 2018</xref>). Its core is to use dilated convolution to reconstruct the parameter space of small-kernel convolutions, re-parameterizing the multi-branch structure into a single sparse large-kernel convolution through mathematical equivalence. Specifically, during the training phase, the module deploys non-dilated large-kernel convolutions (e.g., 13 &#xd7; 13) and dilated small-kernel convolutions (e.g., 3 &#xd7; 3, with a dilation rate of d &#x3d; 4) in parallel, complementing their parameter spaces through gradient co-optimization. During the inference phase, leveraging the additivity principle of convolution, the multi-branch structure is mathematically equivalently reparameterized. Specifically, in physical implementation, we deploy it as a depth-wise dilated convolution. This implementation directly utilizes the underlying operator optimizations of dilated and depth-wise convolutions in mainstream hardware (such as GPUs and NPUs), achieving a receptive field equivalent to a 31 &#xd7; 31 large core without explicit sparse matrix operations. This avoids the hardware efficiency degradation issues caused by unstructured sparsity. Furthermore, the module integrates the Squeeze-and-Excitation (SE) attention mechanism. Global Average Pooling (GAP) is used to compress the spatial dimensions to generate channel description vectors. Channel attention weights are then generated through a fully connected layer and sigmoid activation to dynamically calibrate feature channel responses.</p>
<p>Regarding the selection of the core kernel size for this module, this study strictly adheres to the ablation conclusions from the original UniRepLKNet (<xref ref-type="bibr" rid="B6">Ding et al., 2024</xref>) and designates 31 &#xd7; 31 as the optimal kernel dimension. Research has substantiated that this size represents a &#x201c;saturation point&#x201d; for balancing receptive field gains and inference latency. Compared to larger kernels (e.g., 51 &#xd7; 51), a 31 &#xd7; 31 kernel provides a large-scale receptive field sufficient to cover the wide-span scenarios of transmission lines with negligible increases in Memory Access Cost (MAC). This enables efficient feature capture while effectively circumventing excessive computational overhead.</p>
</sec>
<sec id="s2-2">
<label>2.2</label>
<title>Lightweight target detection head</title>
<p>In traditional YOLOv8 object detection model, 25% of its parameters come from the detection head. Conventional convolutional layers (Conv &#x2b; BN &#x2b; SiLU) are often used for category classification and bounding box regression, but the introduced parameter amount and computational complexity limit their deployment capabilities on edge devices. To optimize this problem, this paper proposes to replace the standard convolution operation in original classification branch with Partial Convolution (PConv) of FasterNet. This allows for the construction of a new lightweight object detection head, Det_Tiny. Through adaptive feature redundancy compression and hardware-friendly computational optimization, the algorithm significantly reduces computational complexity while maintaining classification accuracy. The PConv structure design is shown in <xref ref-type="fig" rid="F3">Figure 3</xref>.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Structure design of PConv.</p>
</caption>
<graphic xlink:href="fenrg-14-1745369-g003.tif">
<alt-text content-type="machine-generated">Diagram illustrating a convolution operation in neural networks. It shows stacked layers labeled as &#x22;Filter,&#x22; with a convolution kernel represented by cubes. The convolution operation is indicated with a star symbol, leading to an output stack via an arrow.</alt-text>
</graphic>
</fig>
<p>Traditional standard convolution performs intensive calculations on all input channels in the spatial dimension. However, in the scenario of foreign object detection on power transmission lines, the feature distribution of background areas (such as the sky and vegetation) and target areas (kites and plastic bags) is significantly spatially sparse. Direct application of standard convolution will lead to a large number of redundant calculations. Especially in classification tasks, high-frequency detail features (such as foreign object edge textures) contribute much more to category judgment than smooth background areas. PConv performs spatial convolution only on some channels through channel grouping and selective calculation. The remaining channels are processed using low-cost point-by-point convolution (PWConv) to reduce redundant operations. Its mathematical expression is as <xref ref-type="disp-formula" rid="e1">Equation 1</xref>.<disp-formula id="e1">
<mml:math id="m1">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">F</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>Concat</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mtext>Conv</mml:mtext>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="bold">F</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>:</mml:mo>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mtext>PWConv</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="bold">F</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>:</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>Where <inline-formula id="inf1">
<mml:math id="m2">
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the number of channels involved in spatial convolution. By separating high-frequency feature channels (required for spatial detail extraction) from low-frequency channels (required for channel information fusion), a balance between computational efficiency and feature expression is achieved.</p>
<p>In this study, adhering to the ablation experimental conclusions from the original FasterNet authors (<xref ref-type="bibr" rid="B3">Chen et al., 2023</xref>), we set this parameter to 1/4 of the total input channels (g &#x3d; 1/4). Research has demonstrated that this ratio serves as the optimal balance point between computational efficiency and feature representation capability. It ensures that a sufficient number of channels are dedicated to extracting critical high-frequency spatial features of foreign objects, while simultaneously maximizing the utilization of the remaining channels for low-cost fusion to reduce Giga Floating-point Operations Per Second (GFLOPs). An excessively low ratio leads to insufficient feature extraction, whereas an overly high ratio diminishes the lightweight advantages. Thus, the configuration of g &#x3d; 1/4 effectively satisfies the dual requirements for both accuracy and speed.</p>
</sec>
<sec id="s2-3">
<label>2.3</label>
<title>Multi-scale feature fusion network</title>
<p>To address the accuracy loss caused by lightweight model, this paper proposes a multi-scale feature fusion network, Fusion, based on CGAFusion. Structure design of CGAFusion is shown in <xref ref-type="fig" rid="F4">Figure 4</xref>.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Structure design of CGAFusion.</p>
</caption>
<graphic xlink:href="fenrg-14-1745369-g004.tif">
<alt-text content-type="machine-generated">Diagram of the CGAFusion process showing the integration of low-level and high-level features. It includes addition and multiplication operations, a CGA block, and a one-by-one convolution step.</alt-text>
</graphic>
</fig>
<p>The CGAFusion module fuses shallow features with deep features. The Content-Guided Attention (CGA) module, shown in <xref ref-type="fig" rid="F5">Figure 5</xref>, assigns a unique SIM, <inline-formula id="inf2">
<mml:math id="m3">
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, to each channel. This coarse-to-fine fusion of channel and spatial information enhances feature representation. The spatial attention module captures the spatial information of the feature map through global average pooling and global max pooling operations. The channel attention module utilizes adaptive average pooling and convolutional layers to highlight the importance of different channels. Finally, by combining the input feature map and the outputs of the first two attention modules, the SIM in the feature space is finely adjusted through channel shuffling.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Structure design of CGA.</p>
</caption>
<graphic xlink:href="fenrg-14-1745369-g005.tif">
<alt-text content-type="machine-generated">Diagram of a Convolutional Neural Network structure showing spatial and channel attention. It includes GMP and GAP in spatial attention feeding into 7x7 convolution, and CAP in channel attention with two 1x1 convolutions. Outputs from both modules merge, undergo channel shuffle and convolution, followed by sigmoid activation, resulting in output W.</alt-text>
</graphic>
</fig>
<p>The Fusion network proposed in this study is composed of three CGAFusion modules, which respectively fuse the low-level features from the final three C2f_LarK layers of the backbone with the high-level features from the final three C2f layers of the neck. Given the backbone low-level features <inline-formula id="inf3">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> (<inline-formula id="inf4">
<mml:math id="m5">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>) and the neck high-level features <inline-formula id="inf5">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> (<inline-formula id="inf6">
<mml:math id="m7">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>) the module first constructs ground-state features via initial feature superposition <inline-formula id="inf7">
<mml:math id="m8">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> (<inline-formula id="inf8">
<mml:math id="m9">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>) to serve as the input for the CGA module. Finally, the fusion results are fed into the Det-Tiny detection head. The specific steps are detailed below:<list list-type="order">
<list-item>
<p>The spatial attention modeling of the CGA module captures local salient regions through dual-path feature compression. The calculation is expressed as shown in <xref ref-type="disp-formula" rid="e2">Equation 2</xref>.</p>
</list-item>
</list>
<disp-formula id="e2">
<mml:math id="m10">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msubsup>
<mml:mtext>Conv</mml:mtext>
<mml:mrow>
<mml:mn>7</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>7</mml:mn>
</mml:mrow>
<mml:mtext>reflect</mml:mtext>
</mml:msubsup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mtext>Concat</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mtext>AvgPool</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mtext>MaxPool</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<p>Specifically, AvgPool and MaxPool compress the data along the channel dimension to produce 2-channel features. A 7 &#xd7; 7 reflected padded convolution (Padding &#x3d; 3) and a Sigmoid activation function are then used to generate a spatial weight map <inline-formula id="inf9">
<mml:math id="m11">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, highlighting the potential spatial distribution of foreign objects.<list list-type="simple">
<list-item>
<p>2. The channel attention modeling of the CGA module is based on the Squeeze-and-Excitation (SE) framework of SENet, which is utilized to model inter-channel dependencies. The calculation is expressed as shown in <xref ref-type="disp-formula" rid="e3">Equation 3</xref>.</p>
</list-item>
</list>
<disp-formula id="e3">
<mml:math id="m12">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mtext>Conv</mml:mtext>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mtext>ReLU</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mtext>Conv</mml:mtext>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mtext>AdaptiveAvgPool</mml:mtext>
<mml:mn>2</mml:mn>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
<list list-type="simple">
<list-item>
<p>3. The CGA module dynamically generates refined Spatial Importance Maps through a fusion process. By incorporating grouped convolutions, the module facilitates feature interaction while effectively preventing cross-channel information interference. The calculation is expressed as shown in <xref ref-type="disp-formula" rid="e4">Equation 4</xref>.</p>
</list-item>
</list>
<disp-formula id="e4">
<mml:math id="m13">
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msubsup>
<mml:mtext>Conv</mml:mtext>
<mml:mrow>
<mml:mn>7</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>7</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mtext>Group</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="normal">C</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mtext>Concat</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mo>&#x2295;</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
</p>
<p>Specifically, the spatial and channel attention maps are fused via element-wise addition. The resulting integrated attention map is then concatenated with the initial features. A 7 &#xd7; 7 grouped convolution (groups &#x3d; C) is subsequently applied to extract local correlations, generating refined spatial importance weights <inline-formula id="inf10">
<mml:math id="m14">
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. These weights are utilized to dynamically balance the contributions from the backbone and neck features.<list list-type="simple">
<list-item>
<p>4. Synergetic generation via the CGA-based Multi-scale Feature Fusion Network (CGAFusion). The calculation is expressed as shown in <xref ref-type="disp-formula" rid="e5">Equation 5</xref>.</p>
</list-item>
</list>
<disp-formula id="e5">
<mml:math id="m15">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mtext>fuse</mml:mtext>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2299;</mml:mo>
<mml:mi>W</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2299;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>Where <inline-formula id="inf11">
<mml:math id="m16">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the channel recalibration layer implemented via a 1 &#xd7; 1 convolution, and &#x2299; denotes element-wise multiplication. The ground-state features serve to preserve the original information, while the Spatial Importance Maps dynamically allocate weights between the two components based on local complexity.</p>
<p>This fusion approach achieves information complementarity, enabling the model to capture high-level semantic information while retaining underlying details. Furthermore, the CGAFusion module&#x2019;s adaptive attention mechanism highlights key features and suppresses redundant information, effectively enhancing the model&#x2019;s ability to detect objects of varying scales. This improves the model&#x2019;s overall detection performance in complex scenarios.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Experimental setup</title>
<sec id="s3-1">
<label>3.1</label>
<title>Experimental environment</title>
<p>
<xref ref-type="table" rid="T1">Table 1</xref> provides the main configurations for the experimental environment, including the operating system, CPU, GPU, Python, Cuda, and PyTorch version.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Configurations for the experimental environment.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Items</th>
<th align="center">Types</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Operating system</td>
<td align="center">Windows 11</td>
</tr>
<tr>
<td align="center">CPU</td>
<td align="center">Intel core i5-124900F</td>
</tr>
<tr>
<td align="center">GPU</td>
<td align="center">Nvidia RTX 4060TI GPU</td>
</tr>
<tr>
<td align="center">Language</td>
<td align="center">Python 3.9.18</td>
</tr>
<tr>
<td align="center">Platform</td>
<td align="center">PyTorch2.1.1</td>
</tr>
<tr>
<td align="center">CUDA</td>
<td align="center">11.8</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The hyperparameter settings for model training are shown in <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Training parameters.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Items</th>
<th align="center">Parameters</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Input image size</td>
<td align="center">3&#x2a;640&#x2a;640</td>
</tr>
<tr>
<td align="center">Batch size</td>
<td align="center">16</td>
</tr>
<tr>
<td align="center">Epochs</td>
<td align="center">200</td>
</tr>
<tr>
<td align="center">Optimizer</td>
<td align="center">Adam</td>
</tr>
<tr>
<td align="center">Momentum</td>
<td align="center">0.937</td>
</tr>
<tr>
<td align="center">Initial learning rate</td>
<td align="center">0.01</td>
</tr>
<tr>
<td align="center">Learning rate decay strategy</td>
<td align="center">Cos annealing</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The input image size was uniformly set to 3 &#xd7; 640 &#xd7; 640. The number of training epochs was 200. The training batch size was 16. The initial learning rate was 0.01. And the cosine annealing decay algorithm was used to gradually change the current learning rate.</p>
</sec>
<sec id="s3-2">
<label>3.2</label>
<title>Experimental dataset</title>
<p>In view of the pronounced scarcity of publicly available datasets for power line foreign object detection, this study employs the RailFOD23 (<xref ref-type="bibr" rid="B5">Chen et al., 2024b</xref>) benchmark to evaluate the generalization capability of the proposed lightweight algorithm in complex scenarios. This dataset, which has been widely adopted in the literature for assessing the performance of power line detection models (<xref ref-type="bibr" rid="B8">Hao et al., 2024</xref>), comprises video frames sourced from multi-national railway surveillance systems. It spans a broad spectrum of illumination conditions, weather patterns, and object categories. Notably, its structural and environmental characteristics share a high degree of scene commonality and challenge with power line inspection tasks, providing a rigorous platform for validating model robustness.</p>
<p>The RailFOD23 dataset contains 14,615 images and 40,541 annotated objects. The annotated objects include four common foreign objects: bird nests, balloons, plastic bags, and floating objects. It is randomly divided into three group sets with a 7:2:1 ratio. The training set consists of 10,230 images. The validation set consists of 2,923 images. And the test set consists of 1,462 images.</p>
</sec>
<sec id="s3-3">
<label>3.3</label>
<title>Evaluation indicators</title>
<p>This study uses recall (R), precision (P), and Mean Average Precision (mAP) to evaluate the accuracy of the model in detecting small objects. Calculations are shown from <xref ref-type="disp-formula" rid="e6">Equations 6</xref>&#x2013;<xref ref-type="disp-formula" rid="e9">9</xref>. To comprehensively evaluate the computational efficiency of the model, other metrics introduced include the total number of parameters, model size, and number of floating-point operations (GFLOPs).<disp-formula id="e6">
<mml:math id="m17">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>100</mml:mn>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
<disp-formula id="e7">
<mml:math id="m18">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>100</mml:mn>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>
<disp-formula id="e8">
<mml:math id="m19">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mo>&#x222b;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mn>1</mml:mn>
</mml:msubsup>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>
<disp-formula id="e9">
<mml:math id="m20">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>K</mml:mi>
</mml:msubsup>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mi>K</mml:mi>
</mml:mfrac>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>100</mml:mn>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>Where TP represents the true positive that is correctly predicted. FP represents the false positive that is incorrectly predicted as positive. FN represents the false negative that is incorrectly predicted as negative. And K represents the number of classes.</p>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Results and analysis</title>
<p>Ablation experiment and comparison experiment are conducted to validate the effectiveness of proposed model.</p>
<sec id="s4-1">
<label>4.1</label>
<title>Results of ablation experiment</title>
<p>C2f_LarK module, Det_Tiny module, and CGAFusion module are considered in ablation experiments.</p>
<sec id="s4-1-1">
<label>4.1.1</label>
<title>Model performance with different modules</title>
<p>The experimental results of model performance with different modules are shown in <xref ref-type="table" rid="T3">Table 3</xref> below. The baseline is original YOLOv8n model.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Model performance with different modules.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Model</th>
<th align="center">mAP@0.5 (%)</th>
<th align="center">mAP@[0.5:0.95] (%)</th>
<th align="center">Precision (%)</th>
<th align="center">Recall (%)</th>
<th align="center">Model size (MB)</th>
<th align="center">Parameters (million)</th>
<th align="center">GFLOPs</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Baseline</td>
<td align="center">94.0</td>
<td align="center">83.7</td>
<td align="center">91.2</td>
<td align="center">87.8</td>
<td align="center">6.1</td>
<td align="center">3.0</td>
<td align="center">8.1</td>
</tr>
<tr>
<td align="center">&#x2b;C2f_LarK</td>
<td align="center">94.2</td>
<td align="center">84.5</td>
<td align="center">91.6</td>
<td align="center">88.2</td>
<td align="center">5.7</td>
<td align="center">2.5</td>
<td align="center">7.4</td>
</tr>
<tr>
<td align="center">&#x2b;Det-tiny</td>
<td align="center">93.6</td>
<td align="center">83.7</td>
<td align="center">91.7</td>
<td align="center">87.3</td>
<td align="center">4.4</td>
<td align="center">2.4</td>
<td align="center">5.2</td>
</tr>
<tr>
<td align="center">&#x2b;Fusion</td>
<td align="center">94.1</td>
<td align="center">84.0</td>
<td align="center">91.6</td>
<td align="center">87.3</td>
<td align="center">6.2</td>
<td align="center">3.1</td>
<td align="center">8.4</td>
</tr>
<tr>
<td align="center">YOLOv8-FOD</td>
<td align="center">94.0</td>
<td align="center">84.0</td>
<td align="center">91.8</td>
<td align="center">87.4</td>
<td align="center">4.1</td>
<td align="center">1.9</td>
<td align="center">4.8</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>
<xref ref-type="table" rid="T3">Table 3</xref> shows that when C2f_LarK module is added alone, mAP@0.5 increases by 0.2%, mAP@ [0.5:0.95] increases by 0.8%, model size decreases by 0.4MB, parameter count decreases by 16.6%, and computational complexity also decreases by 8.6%.</p>
<p>While the standalone integration of the Det-Tiny module leads to a marginal 0.4% decrease in mAP@0.5, it yields substantial improvements in efficiency. The model size is reduced by 1.7 MB, the parameter count decreases by 20%, and the computational complexity (GFLOPs) is simultaneously lowered by 35.8%, thereby offering a superior overall advantage for the model.</p>
<p>When Fusion module is added alone, mAP@0.5 and mAP@ [0.5:0.95] increases slightly by 0.1% and 0.3% respectively. Model size also increases by 0.1 MB. Parameter count increases by 3.3% and GFLOPs increases by 3.7%. All parameters are close to that of the baseline.</p>
<p>When C2f_LarK, Det_Tiny, and Fusion modules are added simultaneously, mAP@0.5 remains unchanged and mAP@[0.5:0.95] increases by 0.3%. Meanwhile, model size decreases by 31.1%. Parameter count decreases by 36.6%, and GFLOPs decreases by 40.7%. This demonstrates the effectiveness of the improved YOLOv8-FOD model in achieving lightweight design.</p>
</sec>
<sec id="s4-1-2">
<label>4.1.2</label>
<title>Comparison of precision-recall curve</title>
<p>The Precision-Recall (P-R) curves of five different target detection experiments conducted on the RailFOD23 dataset are shown in <xref ref-type="fig" rid="F6">Figure 6</xref>. The larger the area of P-R curve around the coordinate axis, the higher the model&#x2019;s mAP value and the better its performance. It can be seen that mAP@0.5 change little for detecting nest, plastic bag, fluttering object and balloon. The improved model achieves lightweighting without sacrificing detection accuracy.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>P-R curves of each model. <bold>(a)</bold> Baseline; <bold>(b)</bold> &#x2b;C2f_LarK; <bold>(c)</bold> &#x2b;Det-Tiny; <bold>(d)</bold> &#x2b;Fusion; <bold>(e)</bold> YOLOv8-FOD.</p>
</caption>
<graphic xlink:href="fenrg-14-1745369-g006.tif">
<alt-text content-type="machine-generated">Five precision-recall curves labeled (a) to (e) compare object detection performance across different models. Each graph shows lines for &#x22;Nest,&#x22; &#x22;Plastic bag,&#x22; &#x22;Fluttering object,&#x22; &#x22;Balloon,&#x22; and &#x22;all classes,&#x22; with mAP@0.5 scores ranging from 0.936 to 0.942. Panel (a) has scores from 0.912 to 0.994, (b) from 0.911 to 0.994, (c) from 0.891 to 0.994, (d) from 0.904 to 0.994, and (e) from 0.894 to 0.994.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="s4-2">
<label>4.2</label>
<title>Comparative visualization analysis</title>
<p>To visually verify the effectiveness of proposed method and explain the underlying mechanism of performance improvement, we used Gradient-weighted Class Activation Mapping (Grad-CAM) technology to perform a feature map visualization comparison analysis of the models before and after the improvement, as shown in <xref ref-type="fig" rid="F7">Figure 7</xref>. As can be seen from the figure, the original YOLOv8n model, limited by its small effective receptive field, often struggles to capture the complete features of the target, resulting in weak response or even feature loss in the heatmap at the target region.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Comparison of heatmaps before and after model improvement.</p>
</caption>
<graphic xlink:href="fenrg-14-1745369-g007.tif">
<alt-text content-type="machine-generated">Comparison of power lines with debris in three columns: the original photo, detection using YOLOv8n, and detection using YOLOv8-FOD. YOLOv8n and YOLOv8-FOD columns highlight objects with heatmap overlays, showing detected debris on power lines and structures.</alt-text>
</graphic>
</fig>
<p>The improved YOLOv8-FOD model exhibits stronger feature extraction capabilities, with its high-response regions in the heatmap more densely and completely covering the foreign object target. Notably, compared to the original model, the improved model demonstrates stronger focusing ability in the target edge region, enabling more accurate delineation of the object&#x2019;s outline. This complete perception of the target&#x2019;s shape is mainly due to the large receptive field of the C2f_LarK module, ensuring that the model can capture the target&#x2019;s contextual information from a global perspective. Simultaneously, the introduction of the Fusion module effectively enhances the preservation of details during feature fusion, allowing the model to keenly capture object boundary information, thereby achieving accurate localization of the foreign object from its center to its edge.</p>
</sec>
<sec id="s4-3">
<label>4.3</label>
<title>Comparative analysis with other models</title>
<p>To further verify overall effectiveness of the improved model proposed in this paper, we conducted a comprehensive comparative experiment with other models.</p>
<sec id="s4-3-1">
<label>4.3.1</label>
<title>Comparison of model performance</title>
<p>As shown in <xref ref-type="table" rid="T4">Table 4</xref>, the performance is compared with Faster-RCNN, YOLOv5n, YOLOv7-tiny, YOLO11n, and YOLO12n.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Performance of different model.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Model</th>
<th align="center">mAP@0.5 (%)</th>
<th align="center">mAP@[0.5:0.95] (%)</th>
<th align="center">Precision (%)</th>
<th align="center">Recall (%)</th>
<th align="center">Model size (MB)</th>
<th align="center">Parameters (million)</th>
<th align="center">GFLOPs</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Faster-RCNN</td>
<td align="center">89.5</td>
<td align="center">77.6</td>
<td align="center">87.6</td>
<td align="center">85.6</td>
<td align="center">108.9</td>
<td align="center">137.1</td>
<td align="center">370.2</td>
</tr>
<tr>
<td align="center">YOLOv5n</td>
<td align="center">94.0</td>
<td align="center">81.6</td>
<td align="center">91.0</td>
<td align="center">87.2</td>
<td align="center">3.7</td>
<td align="center">1.7</td>
<td align="center">4.2</td>
</tr>
<tr>
<td align="center">YOLOv7-tiny</td>
<td align="center">94.5</td>
<td align="center">82.0</td>
<td align="center">91.4</td>
<td align="center">87.5</td>
<td align="center">12.3</td>
<td align="center">6.0</td>
<td align="center">11.7</td>
</tr>
<tr>
<td align="center">YOLO11n</td>
<td align="center">94.1</td>
<td align="center">83.8</td>
<td align="center">91.1</td>
<td align="center">89.7</td>
<td align="center">5.2</td>
<td align="center">2.6</td>
<td align="center">6.3</td>
</tr>
<tr>
<td align="center">YOLO12n</td>
<td align="center">93.8</td>
<td align="center">82.6</td>
<td align="center">91.0</td>
<td align="center">87.5</td>
<td align="center">5.2</td>
<td align="center">2.6</td>
<td align="center">6.3</td>
</tr>
<tr>
<td align="center">YOLOv8-FOD</td>
<td align="center">94.0</td>
<td align="center">84.0</td>
<td align="center">91.8</td>
<td align="center">87.3</td>
<td align="center">4.1</td>
<td align="center">1.9</td>
<td align="center">4.8</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As shown in <xref ref-type="table" rid="T4">Table 4</xref>, the proposed model significantly improves accuracy compared with two-stage Faster-RCNN. It also significantly reduces parameter count and computational complexity. Compared with YOLOv5n baseline, the improved model achieves a 2.4% increase in mAP@ [0.5:0.95], with concurrent improvements in both Precision and Recall (<xref ref-type="bibr" rid="B1">Bin et al., 2025</xref>). Furthermore, the model exhibits only a marginal increase in parameter count and computational complexity. Compared with YOLOv7-tiny, YOLO11n and YOLO12n, the proposed model significantly reduces both number of parameters and computational complexity while maintaining accuracy.</p>
<p>Overall, the proposed model in this paper outperforms other models in terms of comprehensive performance of lightweight and detection accuracy.</p>
</sec>
<sec id="s4-3-2">
<label>4.3.2</label>
<title>Comparison of mAP curves</title>
<p>
<xref ref-type="fig" rid="F8">Figure 8</xref> shows the mAP@0.5 and mAP@[0.5:0.95] accuracy curves of different models tested on the RailFOD23 dataset. It show that the proposed model significantly outperforms other models in terms of mAP@ [0.5:0.95], while reducing the number of model parameters and computational complexity. Furthermore, the mAP@0.5 accuracy does not significantly decrease compared with the baseline model.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Comparison of mAP curves of different models: <bold>(a)</bold> mAP@0.5; <bold>(b)</bold> mAP@[0.5:0.95].</p>
</caption>
<graphic xlink:href="fenrg-14-1745369-g008.tif">
<alt-text content-type="machine-generated">Two line graphs compare the performance of different object detection models across epochs. The left graph shows mAP at 0.5 for Faster-RCNN and various YOLO versions, indicating rapid convergence by epoch 50. The right graph shows mAP from 0.5 to 0.95, illustrating similar trends. Each line represents a different model variant.</alt-text>
</graphic>
</fig>
</sec>
</sec>
</sec>
<sec sec-type="conclusion" id="s5">
<label>5</label>
<title>Conclusion</title>
<p>To ensure efficient deployment of foreign object detection algorithm for power transmission lines on resource-constrained edge devices, this paper proposed a lightweight algorithm based on improved YOLOv8. Firstly, the large kernel block of UniRepLKNet is integrated into C2f module to construct the C2f_LarK structure. This architecture expands the receptive field and reduces redundant computation without increasing network depth. Secondly, a Det_Tiny detection head based on FasterNet partial convolution (PConv) is employed to achieve hardware-friendly optimization of the classification branch through adaptive feature redundancy compression. Finally, the feature fusion module (Fusion) is designed based on CGAFusion. It enhances edge detail detection capabilities through cross-semantic feature interaction. Experimental results show that compared with standard YOLOv8n model, the proposed algorithm reduces parameter count by 36.6%, model size by 31.1%, and computational complexity (GFLOPs) by 40.7%. It maintains the same detection accuracy of mAP@0.5 and improves mAP@[0.5:0.95] by 0.3%. A balance between accuracy and efficiency is achieved. It provides a feasible lightweight solution for intelligent inspection of transmission lines.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>WL: Funding acquisition, Methodology, Writing &#x2013; original draft, Writing &#x2013; review and editing. TY: Investigation, Methodology, Writing &#x2013; original draft. DS: Conceptualization, Investigation, Writing &#x2013; original draft. YL: Project administration, Writing &#x2013; original draft. LZ: Conceptualization, Formal Analysis, Writing &#x2013; original draft, Writing &#x2013; review and editing. JL: Project administration, Software, Writing &#x2013; review and editing.</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>Authors WL, TY, DS, and YL were employed by State Grid Sichuan Electric Power Company Guangyuan Power Supply Company.</p>
<p>The remaining author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
<p>The author(s) declared that this work received funding from State Grid Sichuan Electric Power Company. The funder had the following involvement in the study: study design, collection, analysis, interpretation of data, the writing of this article, and the decision to submit it for publication.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1898462/overview">Feng Liu</ext-link>, Nanjing Tech University, China</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3306808/overview">Man Wu</ext-link>, Guangxi University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3306978/overview">Weichao Pan</ext-link>, Shandong Jianzhu University, China</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bin</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Qiu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>CI-YOLO: a lightweight foreign object detection model for inspecting transmission line</article-title>. <source>Measurement</source> <volume>242</volume>, <fpage>116193</fpage>. <pub-id pub-id-type="doi">10.1016/j.measurement.2024.116193</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bochkovskiy</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>C. Y.</given-names>
</name>
<name>
<surname>Liao</surname>
<given-names>H. Y. M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>YOLOv4: optimal speed and accuracy of object detection</article-title>. <source>
<italic>arXiv preprint</italic>, arXiv:2004.10934</source>. <pub-id pub-id-type="doi">10.48550/arXiv.2004.10934</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kao</surname>
<given-names>S. H.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhuo</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>C. H.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). &#x201c;<article-title>Run, don&#x2019;t walk: chasing higher FLOPs for faster neural networks</article-title>,&#x201d; in <source>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition (CVPR)</source> (<publisher-loc>Vancouver, Canada</publisher-loc>). <comment>17&#x2013;23 June 2023</comment>.</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>Z. M.</given-names>
</name>
</person-group> (<year>2024a</year>). <article-title>DEA-Net: single image dehazing based on detail-enhanced convolution and content-guided attention</article-title>. <source>IEEE Trans. Image Process.</source> <volume>33</volume>, <fpage>1002</fpage>&#x2013;<lpage>1015</lpage>. <pub-id pub-id-type="doi">10.1109/TIP.2024.3354108</pub-id>
<pub-id pub-id-type="pmid">38252568</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2024b</year>). <article-title>RailFOD23: a dataset for foreign object detection on railroad transmission lines</article-title>. <source>Sci. Data</source> <volume>11</volume> (<issue>1</issue>), <fpage>72</fpage>. <pub-id pub-id-type="doi">10.1038/s41597-024-02918-9</pub-id>
<pub-id pub-id-type="pmid">38228610</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Ding</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ge</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Yue</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). &#x201c;<article-title>UniRepLKNet: a universal perception large-kernel convnet for audio, video, point cloud, time-series, and image recognition</article-title>,&#x201d; in <source>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition (CVPR)</source> (<publisher-loc>Seattle, USA</publisher-loc>), <fpage>16</fpage>&#x2013;<lpage>22</lpage>.</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Faisal</surname>
<given-names>M. A. A.</given-names>
</name>
<name>
<surname>Mecheter</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Qiblawey</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Fernandez</surname>
<given-names>J. H.</given-names>
</name>
<name>
<surname>Chowdhury</surname>
<given-names>M. E.</given-names>
</name>
<name>
<surname>Kiranyaz</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Deep learning in automated power line inspection: a review</article-title>. <source>Appl. Energy</source> <volume>385</volume>, <fpage>125507</fpage>. <pub-id pub-id-type="doi">10.1016/j.apenergy.2025.125507</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Pei</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>A lightweight transmission line foreign object detection algorithm incorporating adaptive weight pooling</article-title>. <source>Electronics</source> <volume>13</volume> (<issue>23</issue>), <fpage>4645</fpage>. <pub-id pub-id-type="doi">10.3390/electronics13234645</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Hu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Squeeze-and-excitation networks</article-title>,&#x201d; in <source>Proceedings of the IEEE conference on computer vision and pattern recognition</source> (<publisher-loc>Salt Lake City, UT, USA</publisher-loc>).</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>T. Y.</given-names>
</name>
<name>
<surname>Goyal</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Girshick</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Doll&#xe1;r</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Focal loss for dense object detection</article-title>,&#x201d; in <source>Proceedings of the IEEE international conference on computer vision ICCV</source> (<publisher-loc>Venice, Italy</publisher-loc>), <fpage>22</fpage>&#x2013;<lpage>29</lpage>.</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Anguelov</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Erhan</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Szegedy</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Reed</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>C. Y.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). &#x201c;<article-title>SSD: single shot multibox detector</article-title>,&#x201d; in <source>Proceedings of the European conference on computer vision</source>. <publisher-loc>Amsterdam, Netherlands</publisher-loc>: <publisher-name>ECCV</publisher-name>. <comment>11&#x2013;14 October 2016</comment>.</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Qi</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Improved YOLOX-S abnormal condition detection for power transmission line corridors</article-title>,&#x201d; in <source>Proceedings of the IEEE 3rd international conference on power data science ICPDS</source>. <comment>Harbin, China, 17&#x2013;19 December 2021</comment>.</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Sui</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>YOLO-CSM-based component defect and foreign object detection in overhead transmission lines</article-title>. <source>Electronics</source> <volume>13</volume> (<issue>1</issue>), <fpage>123</fpage>. <pub-id pub-id-type="doi">10.3390/electronics13010123</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Chi</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Yolo-inspection: defect detection method for power transmission lines based on enhanced YOLOv5s</article-title>. <source>J. Real-Time Image Process.</source> <volume>20</volume> (<issue>5</issue>), <fpage>104</fpage>. <pub-id pub-id-type="doi">10.1007/s11554-023-01360-1</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maduako</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Igwe</surname>
<given-names>C. F.</given-names>
</name>
<name>
<surname>Abah</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Onwuasaanya</surname>
<given-names>O. E.</given-names>
</name>
<name>
<surname>Chukwu</surname>
<given-names>G. A.</given-names>
</name>
<name>
<surname>Ezeji</surname>
<given-names>F.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Deep learning for component fault detection in electricity transmission lines</article-title>. <source>J. Big Data</source> <volume>9</volume> (<issue>1</issue>), <fpage>81</fpage>. <pub-id pub-id-type="doi">10.1186/s40537-022-00630-2</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Pan</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Huan</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2024</year>). &#x201c;<article-title>Improving high-voltage line obstacle detection with multi-scale feature fusion in YOLO algorithm</article-title>,&#x201d; in <source>Proceedings of 6th international conference on electronics and communication, network and computer technology (ECNCT)</source> (<publisher-loc>Guangzhou, China</publisher-loc>), <fpage>19</fpage>&#x2013;<lpage>21</lpage>.</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>YOLOv7-CWFD for real-time detection of bolt defects on transmission lines</article-title>. <source>Sci. Rep.</source> <volume>15</volume> (<issue>1</issue>), <fpage>1635</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-024-81386-y</pub-id>
<pub-id pub-id-type="pmid">39794347</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ren</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Girshick</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Faster R-CNN: towards real-time object detection with region proposal networks</article-title>. <source>IEEE Trans. Pattern Analysis Mach. Intell.</source> <volume>39</volume> (<issue>6</issue>), <fpage>1137</fpage>&#x2013;<lpage>1149</lpage>. <pub-id pub-id-type="doi">10.1109/TPAMI.2016.2577031</pub-id>
<pub-id pub-id-type="pmid">27295650</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sankuri</surname>
<given-names>R. S.</given-names>
</name>
<name>
<surname>Sristy</surname>
<given-names>N. B.</given-names>
</name>
<name>
<surname>Karri</surname>
<given-names>S. P. K.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Accurate insulator defect detection in power transmission lines using semi-supervised hybrid DETR with advanced loss methods</article-title>. <source>J. Real-Time Image Process.</source> <volume>22</volume> (<issue>5</issue>), <fpage>1</fpage>&#x2013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1007/s11554-025-01760-5</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Lv</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Che</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>TL-YOLO: foreign-Object detection on power transmission line based on improved YOLOv8</article-title>. <source>Electronics</source> <volume>13</volume> (<issue>8</issue>), <fpage>1543</fpage>. <pub-id pub-id-type="doi">10.3390/electronics13081543</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Song</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xiang</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>Q.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). &#x201c;<article-title>Intrusion detection of foreign objects in high-voltage lines based on YOLOv4</article-title>,&#x201d; in <source>Proceedings of the 6th international conference on intelligent computing and signal processing ICSP</source> (<publisher-loc>China</publisher-loc>: <publisher-name>Xi&#x2019;an</publisher-name>), <fpage>9</fpage>&#x2013;<lpage>11</lpage>.</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>C. Y.</given-names>
</name>
<name>
<surname>Bochkovskiy</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Liao</surname>
<given-names>H. Y. M.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>YOLOv7: trainable bag-of-freebies sets new state-of-the-art for real-time object detectors</article-title>,&#x201d; in <source>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition (CVPR)</source> (<publisher-loc>Vancouver, Canada</publisher-loc>). <comment>17&#x2013;23 June 2023</comment>.</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Foreign-object detection in high-voltage transmission line based on improved YOLOv8m</article-title>. <source>Appl. Sci.</source> <volume>13</volume> (<issue>23</issue>), <fpage>12775</fpage>. <pub-id pub-id-type="doi">10.3390/app132312775</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yan</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>GFRF R-CNN: object detection algorithm for transmission lines</article-title>. <source>Comput. Model. Eng. and Sci.</source> <volume>82</volume> (<issue>1</issue>), <fpage>1439</fpage>&#x2013;<lpage>1458</lpage>. <pub-id pub-id-type="doi">10.32604/cmc.2024.057797</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Dong</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Improved YOLOv5 foreign object detection for transmission lines</article-title>. <source>Optoelectron. Lett.</source> <volume>20</volume> (<issue>8</issue>), <fpage>490</fpage>&#x2013;<lpage>496</lpage>. <pub-id pub-id-type="doi">10.1007/s11801-024-3218-y</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Fast-PLDN: fast power line detection network</article-title>. <source>J. Real-Time Image Process.</source> <volume>19</volume> (<issue>1</issue>), <fpage>3</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1007/s11554-021-01154-3</pub-id>
</mixed-citation>
</ref>
</ref-list>
</back>
</article>