<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Earth Sci.</journal-id>
<journal-title>Frontiers in Earth Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Earth Sci.</abbrev-journal-title>
<issn pub-type="epub">2296-6463</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1649203</article-id>
<article-id pub-id-type="doi">10.3389/feart.2025.1649203</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Earth Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>GKCAE: A graph-attention-based encoder for fine-grained semantic segmentation of high-voltage transmission corridors scenario LiDAR data</article-title>
<alt-title alt-title-type="left-running-head">Zhang et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/feart.2025.1649203">10.3389/feart.2025.1649203</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Zhang</surname>
<given-names>Su</given-names>
</name>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3095705/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Haibo</given-names>
</name>
<uri xlink:href="https://loop.frontiersin.org/people/3167024/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Rong</surname>
<given-names>Jingguo</given-names>
</name>
<uri xlink:href="https://loop.frontiersin.org/people/3166920/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Yaping</given-names>
</name>
<uri xlink:href="https://loop.frontiersin.org/people/3166975/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
</contrib>
</contrib-group>
<aff>
<institution>State Grid Economic and Technological Research Institute Co., Ltd.</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1997329/overview">Zhiheng Liu</ext-link>, Xidian University, China</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1796885/overview">Mahmoud Emam</ext-link>, Menoufia University, Egypt</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2019879/overview">Tingting Wu</ext-link>, Anhui University, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Su Zhang, <email>zhangsu615@163.com</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>21</day>
<month>08</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>13</volume>
<elocation-id>1649203</elocation-id>
<history>
<date date-type="received">
<day>18</day>
<month>06</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>31</day>
<month>07</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Zhang, Liu, Rong and Zhang.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Zhang, Liu, Rong and Zhang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Accurate semantic segmentation of airborne LiDAR point clouds is essential for the intelligent inspection and maintenance of high-voltage transmission infrastructure. While existing methods predominantly focus on major structural components such as towers and conductors, they often fail to address the fine-grained segmentation of smaller yet critical elements, including ground wires, crossing lines, and insulators. To tackle this limitation, we propose a novel network architecture&#x2014;Graph-Kernel Convolution Attention Encoder (GKCAE)&#x2014;designed for multi-class, fine-grained semantic segmentation of transmission corridor point clouds. GKCAE first captures local geometric features using Kernel Point Convolution, and then models inter-class spatial relationships through Graph Edge-Conditioned Convolution to incorporate global contextual information. Additionally, a Channel-Spatial Attention Module is introduced to enhance point-level feature representations, particularly for small or geometrically similar classes. Experiments conducted on three realworld transmission corridor datasets demonstrate that our method achieves a mean Intersection over Union (mIoU) of 81.93% and an Overall Accuracy (OA) of 94.1%, outperforming existing state-of-the-art approaches.</p>
</abstract>
<kwd-group>
<kwd>ALS point clouds</kwd>
<kwd>semantic segmentation</kwd>
<kwd>graph edge convolution</kwd>
<kwd>high-voltage transmission corridors</kwd>
<kwd>deep learning</kwd>
</kwd-group>
<contract-sponsor id="cn001">State Grid Sichuan Economic Research Institute, State Grid Corporation of China<named-content content-type="fundref-id">10.13039/501100018683</named-content>
</contract-sponsor>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Geoinformatics</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>High-voltage transmission line inspection is a vital and routine task in power systems, crucial for ensuring the safety and stability of electricity supply. Its primary objective is to assess the structural integrity of power infrastructure components&#x2014;including conductors, towers, and fittings&#x2014;while identifying potential hazards such as vegetation encroachment, geological disasters, and tower tilts (<xref ref-type="bibr" rid="B54">Yang et al., 2020</xref>). Due to its high accuracy, rapid data acquisition, and all-weather operational capability, airborne Light Detection and Ranging (LiDAR) technology has been widely adopted for transmission line inspection. Semantic segmentation of LiDAR point clouds, as a fundamental step in intelligent inspection workflows, enables precise differentiation of key elements such as conductors, towers, and vegetation, thereby enhancing automation and supporting fine-grained management and intelligent analysis.</p>
<p>Previous research on the semantic segmentation of transmission line point clouds has predominantly relied on handcrafted features and physics-based constraints (<xref ref-type="bibr" rid="B12">Huang et al., 2021</xref>; <xref ref-type="bibr" rid="B30">Rejichi and Chaabane, 2015</xref>). Conventional methods include terrain classification using machine learning algorithms such as Random Forests and Support Vector Machines (SVM), as well as line- and surface-based feature extraction using Principal Component Analysis (PCA) and geometric descriptors. Although these approaches have achieved promising results, they suffer from limitations such as high parameter sensitivity, low computational efficiency, and poor generalization capability. As a result, they are typically constrained to specific datasets and narrowly defined scenarios, rendering them inadequate for the large-scale, high-performance, and fine-grained requirements of modern intelligent inspection tasks.</p>
<p>Recently, deep learning-based 3D point cloud semantic segmentation has been extensively explored in domains such as indoor navigation, smart cities, digital twins, plant inspection, and autonomous driving (<xref ref-type="bibr" rid="B11">Hu et al., 2020</xref>; <xref ref-type="bibr" rid="B5">Fan et al., 2021</xref>; <xref ref-type="bibr" rid="B55">Yin et al., 2023</xref>; <xref ref-type="bibr" rid="B19">Landrieu and Simonovsky, 2018</xref>; <xref ref-type="bibr" rid="B10">Han et al., 2023</xref>; <xref ref-type="bibr" rid="B50">Xiang et al., 2023</xref>; <xref ref-type="bibr" rid="B8">Ghahremani et al., 2021</xref>; <xref ref-type="bibr" rid="B52">Xin et al., 2023</xref>). In parallel, fine-grained semantic segmentation methods have attracted increasing attention due to their ability to differentiate visually similar and spatially adjacent components in complex environments. For instance, SSC-Net employs a multi-task joint learning strategy for segmentation and classification, demonstrating high performance in challenging biomedical imaging scenarios (<xref ref-type="bibr" rid="B32">Sha et al., 2025</xref>). Similarly, IndVisSGG introduces a vision-language model-based scene graph generation framework for industrial applications, effectively capturing small-scale structures with contextual reasoning (<xref ref-type="bibr" rid="B44">Wang et al., 2025</xref>). In the domain of multimodal learning, Vman integrates visual-modified attention mechanisms to enhance semantic recognition across tasks involving subtle feature variations (<xref ref-type="bibr" rid="B37">Song et al., 2025</xref>). Moreover, sparse Bayesian learning techniques have been successfully applied to radar data processing for superresolution and small-target enhancement, providing a principled approach to fine-grained signal discrimination (<xref ref-type="bibr" rid="B58">Zhang and Chen, 2014</xref>). Correspondingly, there is growing interest in leveraging deep learning techniques for semantic understanding of transmission line environments (<xref ref-type="bibr" rid="B33">Shen et al., 2023</xref>; <xref ref-type="bibr" rid="B38">Su et al., 2022</xref>; <xref ref-type="bibr" rid="B64">Zhou et al., 2024</xref>; <xref ref-type="bibr" rid="B35">Shi et al., 2023</xref>; <xref ref-type="bibr" rid="B62">Zhao et al., 2020</xref>). Despite notable advances, several critical challenges remain. (1) Most existing methods focus on segmenting broad object categories (e.g., ground, buildings, vegetation, towers, conductors) while neglecting the fine-grained segmentation (<xref ref-type="bibr" rid="B22">Lin et al., 2021</xref>) of essential metallic components such as ground wires, insulators, and drainage wires, which are indispensable for power system safety. Although certain studies have incorporated spatial topological constraints to distinguish ground and cross-span wires, the accurate segmentation of insulators and drainage wires remains challenging due to their similar geometric characteristics and close physical proximity to towers and conductors (<xref ref-type="bibr" rid="B43">Wang et al., 2023</xref>). Some approaches introduce additional post-processing steps to differentiate conductors from insulators; however, these techniques are typically time-consuming and highly sensitive to parameter settings, which limits their practical applicability. (2) Transmission corridor point clouds exhibit a strip-like, continuous spatial distribution. Traditional point-based sampling strategies (e.g., k-nearest neighbors, farthest point sampling) are often inadequate due to scale disparities along the XY axes, thereby hindering full-scene sampling and reducing the model&#x2019;s capacity to learn discriminative features for small-class objects. Moreover, a single transmission span&#x2014;comprising two towers and the connecting segment&#x2014;can contain tens of millions of points, while critical components such as conductors, insulators, and drainage wires may account for less than 5% of the total data. Therefore, developing effective encoding and feature learning strategies specifically tailored to the unique structure of transmission corridor point clouds remains an open and significant research challenge.</p>
<p>To address these issues, we propose a fine-grained semantic understanding encoder network for transmission corridors based on airborne LiDAR point clouds. We introduce a Graph Edge-Constrained Convolution with Attention Encoder (GECCAE), which performs local-to-global feature encoding from both point-wise and class-aware perspectives, enabling high-accuracy, robust, and fine-grained semantic segmentation of transmission corridor components. The main contributions of this study are as follows:<list list-type="simple">
<list-item>
<p>1. A Graph Edge-Constrained Convolution (GECC) module guided by segmentation, which models inter-class geometric relationships to enhance geometric feature extraction and significantly improves inter-class distinction for precise semantic boundary delineation.</p>
</list-item>
<list-item>
<p>2. A novel Channel-Spatial Attention Module (CSAM) that captures cross-scene contextual dependencies and jointly models local and global features, thereby enhancing point-level semantic understanding.</p>
</list-item>
<list-item>
<p>3. A systematic classification schema tailored for modern power grid asset management. Extensive quantitative and qualitative evaluations on multiple real-world transmission corridor datasets demonstrate that the proposed method outperforms existing state-of-the-art approaches in various power line segmentation tasks.</p>
</list-item>
</list>
</p>
<p>In addition, recent advances in multi-scale spatiotemporal interactive fusion networks (<xref ref-type="bibr" rid="B25">Ma et al., 2025</xref>), deep core node information embedding (<xref ref-type="bibr" rid="B6">Fei et al., 2025</xref>), history-enhanced 3D scene graph reasoning <xref ref-type="bibr" rid="B7">Feng et al. (2025)</xref>, and pixel-level noise mining (<xref ref-type="bibr" rid="B24">Liu et al., 2025</xref>) have provided new insights and methodologies for addressing the challenges of fine-grained semantic segmentation in transmission corridor point clouds. These studies have significantly improved model performance in complex scene understanding and segmentation accuracy by introducing techniques such as multi-scale feature fusion, deep information embedding, and temporal context enhancement. In particular, the multi-scale spatiotemporal interactive fusion network enhances the ability to capture dynamic changes and spatial relationships of key components by integrating features across multiple spatial and temporal scales. Meanwhile, pixel-level noise mining offers an effective strategy for mitigating noise in point cloud data, further improving the accuracy and robustness of segmentation results.</p>
<p>The structure of this paper is organized as follows. <xref ref-type="sec" rid="s1">Section 1</xref> outlines the research objectives and summarizes the main contributions. <xref ref-type="sec" rid="s2">Section 2</xref> reviews the current state-of-the-art methods and analyzes their advantages and limitations. <xref ref-type="sec" rid="s3">Section 3</xref> presents the proposed methodology, including the Kernel Point Convolution (KPConv) module, the Graph Edge-Conditioned Convolution (GECC) module, the Channel-Spatial Attention Module (CSAM), and the overall network architecture. <xref ref-type="sec" rid="s4">Section 4</xref> reports the experimental results on real-world transmission corridor datasets, compares our method with existing approaches, and discusses its effectiveness. Finally, <xref ref-type="sec" rid="s5">Section 5</xref> concludes the paper and outlines potential future work.</p>
</sec>
<sec id="s2">
<title>2 Related works</title>
<p>In recent years, the semantic segmentation of LiDAR point clouds has emerged as a key research focus for the intelligent inspection of power transmission corridors. Existing approaches can be broadly categorized into traditional methods and deep learning-based techniques.</p>
<sec id="s2-1">
<title>2.1 Traditional methods</title>
<p>Traditional methods primarily employ either unsupervised or supervised machine learning techniques, relying heavily on handcrafted features and predefined physical constraints.</p>
<p>Unsupervised methods, such as Principal Component Analysis (PCA), are widely used to identify linear structures such as power lines by computing dominant eigenvectors. These approaches offer high computational efficiency; however, they are sensitive to outliers and exhibit limited capacity in modeling complex spatial configurations, resulting in suboptimal performance for large-scale semantic segmentation tasks (<xref ref-type="bibr" rid="B30">Rejichi and Chaabane, 2015</xref>; <xref ref-type="bibr" rid="B14">Hui et al., 2021</xref>; <xref ref-type="bibr" rid="B28">Nurunnabi et al., 2012</xref>; <xref ref-type="bibr" rid="B2">Cao et al., 2025</xref>).</p>
<p>Supervised classification methods, including Random Forest (RF) (<xref ref-type="bibr" rid="B15">Jiang et al., 2022</xref>; <xref ref-type="bibr" rid="B21">Liao et al., 2022</xref>; <xref ref-type="bibr" rid="B27">Ni et al., 2017</xref>; <xref ref-type="bibr" rid="B39">Tang et al., 2023</xref>) and Support Vector Machines (SVM) (<xref ref-type="bibr" rid="B4">Chen et al., 2019</xref>; <xref ref-type="bibr" rid="B60">Zhang et al., 2013</xref>), utilize manually crafted feature vectors to perform object classification at the scene level. RF enhances robustness through ensemble learning, but suffers from limited interpretability and difficulty in distinguishing geometrically similar objects. SVM seeks an optimal hyperplane in high-dimensional feature space; however, its performance often deteriorates in multi-class segmentation scenarios due to challenges in defining consistent decision boundaries across diverse object classes (<xref ref-type="bibr" rid="B57">Zafar et al., 2018</xref>).</p>
<p>Overall, these traditional approaches are inherently constrained by their reliance on domain-specific rules and limited generalization capabilities, rendering them inadequate for modern large-scale, fine-grained monitoring of power transmission systems (<xref ref-type="bibr" rid="B26">Mirzaei et al., 2022</xref>; <xref ref-type="bibr" rid="B9">Grothum et al., 2023</xref>; <xref ref-type="bibr" rid="B34">Shen et al., 2024</xref>).</p>
</sec>
<sec id="s2-2">
<title>2.2 Deep learning methods</title>
<p>With the rapid advancement of 3D deep learning, point cloud semantic segmentation has achieved significant progress across a wide range of domains, including autonomous driving, digital twins, and smart grids. Deep learning-based methods can generally be categorized into point-based methods, convolution-based methods, and hybrid approaches.</p>
<sec id="s2-2-1">
<title>2.2.1 Point-based methods</title>
<p>Point-based methods directly operate on 3D spatial coordinates, thereby preserving the raw geometric structure of point clouds. PointNet and its variants, such as PointNet&#x2b;&#x2b; (<xref ref-type="bibr" rid="B3">Charles et al., 2017</xref>; <xref ref-type="bibr" rid="B29">Qi et al., 2017</xref>) and RandLA-Net (<xref ref-type="bibr" rid="B11">Hu et al., 2020</xref>), utilize multi-layer perceptrons (MLPs) and farthest point sampling (FPS) strategies to perform per-point semantic prediction (<xref ref-type="bibr" rid="B20">Lawin et al., 2017</xref>; <xref ref-type="bibr" rid="B17">Lai et al., 2022</xref>). SCF-Net (<xref ref-type="bibr" rid="B5">Fan et al., 2021</xref>) and Stratified Transformer (<xref ref-type="bibr" rid="B17">Lai et al., 2022</xref>) further extend these frameworks by aggregating local contextual information through neighborhood-based encodings and attention mechanisms. While these models offer architectural simplicity and computational efficiency, they often struggle to accurately segment small object categories due to downsampling-induced feature dilution.</p>
<p>Recent studies have applied point-based techniques to power grid scenarios. For instance, Zhang et al. enhanced PointNet by incorporating geometric feature extraction (GFE) and neighborhood information aggregation (NIA) modules to segment power lines and towers in railway environments (<xref ref-type="bibr" rid="B61">Zhang et al., 2022</xref>). <xref ref-type="bibr" rid="B38">Su et al. (2022)</xref> and <xref ref-type="bibr" rid="B56">Yu et al. (2023)</xref> employed PointNet&#x2b;&#x2b; for high-precision segmentation of transmission structures, while Yu et al. additionally proposed a dual-stage sampling strategy based on RandLA-Net to improve segmentation accuracy for wires and towers.</p>
</sec>
<sec id="s2-2-2">
<title>2.2.2 Convolution-based and graph-based methods</title>
<p>Convolution-based methods aim to regularize the irregular structure of point clouds through voxelization or kernel-based strategies, thereby enabling the application of convolutional neural networks (CNNs). Representative models such as KPConv and PointCNN employ kernel point convolution and learned transformation metrics, respectively, to effectively capture local geometric context (<xref ref-type="bibr" rid="B40">Thomas et al., 2019</xref>; <xref ref-type="bibr" rid="B41">Thomas et al., 2024</xref>; <xref ref-type="bibr" rid="B13">Huang et al., 2024</xref>).</p>
<p>Graph-based methods transform point clouds into graph structures that explicitly represent spatial relationships among neighboring points. Graph neural networks (GNNs), including DGCNN, SPG, and SPT, utilize these representations to facilitate both local and global feature learning (<xref ref-type="bibr" rid="B42">Wang et al., 2019</xref>; <xref ref-type="bibr" rid="B36">Simonovsky and Komodakis, 2017</xref>; <xref ref-type="bibr" rid="B31">Robert et al., 2023</xref>). These models have demonstrated superior capability in modeling topological relationships, making them particularly well-suited for corridor-like scenes with strong spatial continuity.</p>
<p>In the power grid domain, several studies have adopted PointCNN for UAV-based transmission line segmentation (<xref ref-type="bibr" rid="B63">Zhao et al., 2023</xref>; <xref ref-type="bibr" rid="B64">Zhou et al., 2024</xref>), while others have incorporated attention mechanisms into KPConv-based frameworks to enhance feature extraction in power corridor environments.</p>
</sec>
<sec id="s2-2-3">
<title>2.2.3 Multi-modal and hybrid methods</title>
<p>Some recent studies have explored multi-modal fusion strategies to overcome the limitations of single-representation learning. RPVNet employs a deep adaptive Range-Point-Voxel fusion framework to integrate complementary information from different modalities (<xref ref-type="bibr" rid="B53">Xu et al., 2021</xref>). PVCNN <xref ref-type="bibr" rid="B23">Liu et al. (2019)</xref> combines point-based and voxel-based features to enhance semantic representations. <xref ref-type="bibr" rid="B43">Wang et al. (2023)</xref> further introduced coordinate attention mechanisms into PointNet&#x2b;&#x2b;, resulting in an end-to-end CA-PointNet&#x2b;&#x2b; model that improves spatial feature awareness.</p>
<p>Despite considerable progress, directly applying existing deep learning models to transmission corridor scenarios remains challenging. Point cloud data in these environments often exhibit high sparsity, severe class imbalance, and a large number of small-scale target components. These characteristics significantly hinder model performance, particularly in identifying critical but underrepresented elements such as insulators, ground wires, and drainage lines. Moreover, the elongated spatial distribution of corridor scenes complicates traditional point sampling strategies (e.g., KNN, FPS), thereby weakening the network&#x2019;s ability to capture global and contextual features. Consequently, there is an urgent need for customized semantic segmentation models specifically designed to address the unique spatial and structural properties of high-voltage transmission corridors.</p>
</sec>
</sec>
</sec>
<sec sec-type="methods" id="s3">
<title>3 Methodology</title>
<p>Due to the characteristics of airborne LiDAR scanning and the highly unbalanced spatial distribution of scene elements in high-voltage transmission corridors, the resulting point clouds typically exhibit significant sparsity and severe class imbalance. Although some studies have attempted to enhance segmentation performance for small-sample classes using data augmentation techniques, these approaches operate primarily at the data preprocessing level and fail to address the fundamental limitations of network generalization.</p>
<p>The core challenge in semantic understanding of transmission corridor scenes lies in achieving accurate and fine-grained segmentation of key power infrastructure components, including towers, conductors, and associated metallic fittings (e.g., insulators and jumper wires).</p>
<p>To tackle this problem, we propose a novel architecture termed Graph Edge-Conditioned Convolution Attention Encoder (GKCAE). This network integrates graph convolution and point convolution to jointly encode local&#x2013;global relationships at both the point-level and class-level representations. Furthermore, a context-aware attention module is incorporated to aggregate multi-scale semantic features and enhance deep feature extraction. This design enables robust, high-precision, and fine-grained semantic segmentation of complex point cloud data from power line corridors.</p>
<sec id="s3-1">
<title>3.1 KPConv block</title>
<p>Our work focuses on three-dimensional point cloud data from high-voltage transmission corridors scenario. We employ a convolutional architecture that integrates kernel point convolutions and graph-based modeling to extract point-wise semantic features. The KPConv block is implemented following the variational convolutional framework proposed in <xref ref-type="bibr" rid="B40">Thomas et al. (2019)</xref>, and its structure is illustrated in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Structure of the Kernel Point Convolution block (KPConv).</p>
</caption>
<graphic xlink:href="feart-13-1649203-g001.tif">
<alt-text content-type="machine-generated">Flowchart of a neural network layer with multiple components. Input \(C_{in}\) passes to Conv1D with Batch Normalization (BN) and ReLU activation, followed by KPConv with BN and ReLU. This connects to another Conv1D with BN, then splits. One path goes to an element-wise sum, and another goes through Conv1D again with BN and ReLU. The results combine at the element-wise sum circle, passing through ReLU with BN to produce \(C_{out}\). Key indicates element-wise sum and BN/ReLU meanings.</alt-text>
</graphic>
</fig>
<p>Our work focuses on three-dimensional point cloud data in high-voltage transmission corridor scenarios. We employ a convolutional architecture that integrates kernel point convolutions with graph-based modeling to extract point-wise semantic features. The KPConv block is implemented based on the variational convolutional framework proposed in <xref ref-type="bibr" rid="B40">Thomas et al. (2019)</xref>, and its architecture is illustrated in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<p>In KPConv, each convolutional kernel is composed of a set of kernel points with predefined coordinates in 3D Euclidean space. For a given input point, convolution is performed by computing weights based on its relative position to these kernel points. The input to the KPConv layer consists of both spatial coordinates and associated feature vectors. The spatial coordinates, typically represented as <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, encode the geometric structure and spatial relationships among points in the cloud. Formally, a KPConv kernel is defined by a set of kernel points <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, where each <inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes a kernel point in 3D space. These kernel points serve as learnable positions analogous to filter weights in conventional image convolution, but are distributed irregularly to adapt to the unstructured nature of point clouds. During training, the positions of the kernel points are optimized to improve feature extraction capability, allowing KPConv to capture local geometric variations more effectively than grid-based methods.</p>
<p>Given an input point cloud <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:mi mathvariant="script">P</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and its associated features <inline-formula id="inf5">
<mml:math id="m5">
<mml:mrow>
<mml:mi mathvariant="bold">F</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, the kernel <inline-formula id="inf6">
<mml:math id="m6">
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> convolution for a point <inline-formula id="inf7">
<mml:math id="m7">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> can be defined as follows (<xref ref-type="disp-formula" rid="e1">Equation 1</xref>):<disp-formula id="e1">
<mml:math id="m8">
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mo>&#x2217;</mml:mo>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mi>g</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
<p>Further, it can be explained that for each input point <inline-formula id="inf8">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, the network searches for a set of neighboring points <inline-formula id="inf9">
<mml:math id="m10">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. This is typically accomplished using a nearest neighbors search, where neighbors are determined by calculating the Euclidean distance <inline-formula id="inf10">
<mml:math id="m11">
<mml:mrow>
<mml:mo stretchy="false">&#x2016;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x2016;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> between points and selecting the nearest <inline-formula id="inf11">
<mml:math id="m12">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> points (or within a spherical neighborhood). KPConv performs a distance-weighted convolution operation. The output feature <inline-formula id="inf12">
<mml:math id="m13">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> for each input point <inline-formula id="inf13">
<mml:math id="m14">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is computed as follows (<xref ref-type="disp-formula" rid="e2">Equation 2</xref>):<disp-formula id="e2">
<mml:math id="m15">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:mi>h</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mo stretchy="false">&#x2016;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x2016;</mml:mo>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x22c5;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<p>Here, <inline-formula id="inf14">
<mml:math id="m16">
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is a weighting function, such as a Gaussian kernel, that calculates the weight based on the distances between neighboring points <inline-formula id="inf15">
<mml:math id="m17">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and kernel points <inline-formula id="inf16">
<mml:math id="m18">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. The convolution kernel defined by the kernel points aggregates features from the neighboring points through weighted summation to produce a new feature vector. This new feature effectively captures local geometric and attribute information, representing the characteristics of the point cloud.</p>
<p>Additionally, to enhance the learning capability for varying object sizes, the convolution can incorporate deformable kernels. While some scholars argue that deformability has negligible effects on ALS data (<xref ref-type="bibr" rid="B22">Lin et al., 2021</xref>; <xref ref-type="bibr" rid="B46">Wen et al., 2021</xref>; <xref ref-type="bibr" rid="B45">Wen et al., 2020</xref>), we believe that in the context of high-voltage transmission line scenes, deformable KPConv can adapt to local geometric shapes, thus enhancing the representation of detailed features. This is particularly beneficial for capturing the significant geometric variations of objects such as power towers and buildings. Therefore, in our work, we still adopt a mixed approach as suggested by <xref ref-type="bibr" rid="B40">Thomas et al. (2019)</xref>, <xref ref-type="bibr" rid="B41">Thomas et al. (2024)</xref>, incorporating both rigid and deformable kernels within the overall semantic segmentation structure. The calculations and definitions for the deformable kernel features are expressed in <xref ref-type="disp-formula" rid="e3">Equations 3</xref>, <xref ref-type="disp-formula" rid="e4">4</xref>:<disp-formula id="e3">
<mml:math id="m19">
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mo>&#x2217;</mml:mo>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>deform</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">&#x394;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
<disp-formula id="e4">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>deform</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">&#x394;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mi>h</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="normal">&#x394;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
</p>
<p>Here, <inline-formula id="inf17">
<mml:math id="m21">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="normal">&#x394;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the offsets generated by the rigid KPConv, mapping the dimension of input features to <inline-formula id="inf18">
<mml:math id="m22">
<mml:mrow>
<mml:mn>3</mml:mn>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> values. During training, the network simultaneously learns to generate shifts for the rigid kernel and output features for the deformable kernel, with the learning rate for the former set to 0.1 times that of the global network learning rate. After the convolution operation, nonlinear activation functions, specifically ReLU, along with batch normalization, are applied to enhance the model&#x2019;s nonlinear capacity and stability.</p>
</sec>
<sec id="s3-2">
<title>3.2 Graph edge-conditioned convolution block</title>
<p>Although KPConv can learn local features of points through direct point convolution, its neighborhood learning limitations prevent it from obtaining a broader receptive field, particularly for capturing global information. Additionally, point-wise features fail to encode relationships between objects within a scene, resulting in challenges when exploring interactions among objects. This limitation affects our task by hindering the accurate segmentation of transmission lines and other linear targets, as well as distinguishing between poles and buildings that may be obscured by vegetation. Previous work has demonstrated that incorporating global contextual information can enhance model performance for large-scale semantic segmentation tasks (<xref ref-type="bibr" rid="B40">Thomas et al., 2019</xref>; <xref ref-type="bibr" rid="B13">Huang et al., 2024</xref>). To address this issue, we introduce a Graph Edge-Conditioned Convolution (GECC) block to encode and extract features based on dependencies between global objects. Inspired by SPG and DGCNN, we designed the GECC block, illustrated in <xref ref-type="fig" rid="F2">Figure 2</xref>. We construct graphs from geometric homogeneous points along line segments to capture relationships between objects. By combining segment features with point features, the network can adaptively encode local and global features, thereby achieving improved semantic predictions on the ALS dataset.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Structure of the Graph Edge-Conditioned Convolution block (GECC).</p>
</caption>
<graphic xlink:href="feart-13-1649203-g002.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a neural network architecture. The process begins with the input \( C_{in} \) passing through a Conv1D layer with Batch Normalization and ReLU, followed by two branches. The top branch involves Semantation labels and a GECC layer with Batch Normalization and ReLU, leading to another Conv1D with Batch Normalization. The bottom branch merges with the top through an element-wise sum and proceeds to a ReLU activation with Batch Normalization. Output is \( C_{out} \). Labels clarify operations and abbreviations like BN for Batch Normalization and ReLU for Leaky ReLU.</alt-text>
</graphic>
</fig>
<p>For the definition of the graph structure and embedding encoding, our process is as follows. First, for edge-conditioned convolution, we consider it as a directed or undirected graph <inline-formula id="inf19">
<mml:math id="m23">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>E</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf20">
<mml:math id="m24">
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is a finite set of vertex geometries with <inline-formula id="inf21">
<mml:math id="m25">
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>V</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf22">
<mml:math id="m26">
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>V</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is a set of edges with <inline-formula id="inf23">
<mml:math id="m27">
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>E</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. Let <inline-formula id="inf24">
<mml:math id="m28">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>max</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> be the layer index in the feedforward neural network. There exists a function <inline-formula id="inf25">
<mml:math id="m29">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>:</mml:mo>
<mml:mi>V</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> that assigns labels to each vertex, and <inline-formula id="inf26">
<mml:math id="m30">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>:</mml:mo>
<mml:mi>E</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> that assigns labels to each edge. We utilize the concept of the Superpoint graph from SPG and apply an unsupervised <inline-formula id="inf27">
<mml:math id="m31">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>-cut pursuit algorithm (<xref ref-type="bibr" rid="B18">Landrieu and Obozinski, 2017</xref>) to partition the point cloud into individual clusters. This step involves pre-computing semantic labels and predefined geometric features before training. As a result, the graph structure remains fixed, eliminating the need to update segment labels repeatedly during training, thereby avoiding unnecessary computational overhead.</p>
<p>Next, we construct the graph structure based on the input. Given a point cloud <inline-formula id="inf28">
<mml:math id="m32">
<mml:mrow>
<mml:mi mathvariant="script">P</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> with its geometric features <inline-formula id="inf29">
<mml:math id="m33">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> (such as linearity or normal vectors), we build a directed graph <inline-formula id="inf30">
<mml:math id="m34">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>E</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and set its labels <inline-formula id="inf31">
<mml:math id="m35">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf32">
<mml:math id="m36">
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> as follows. We create a vertex <inline-formula id="inf33">
<mml:math id="m37">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> for every point <inline-formula id="inf34">
<mml:math id="m38">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">P</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and assign the respective signal to it by <inline-formula id="inf35">
<mml:math id="m39">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> (or 0 if there are no features <inline-formula id="inf36">
<mml:math id="m40">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>). Then, we connect each vertex <inline-formula id="inf37">
<mml:math id="m41">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> to all vertices <inline-formula id="inf38">
<mml:math id="m42">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> in its spatial neighborhood via a directed edge <inline-formula id="inf39">
<mml:math id="m43">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. The edge-conditioned convolution can dynamically produce filtering weights <inline-formula id="inf40">
<mml:math id="m44">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> based on the edge <inline-formula id="inf41">
<mml:math id="m45">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> and flexibly handle varying neighborhood sizes to capture contextual information between different segments. Algorithm 1 presents the pseudocode for constructing the graph structure and computing the features using the Graph Edge-Conditioned Convolution (GECC) module. The computation of ECC is as follows (<xref ref-type="disp-formula" rid="e5">Equation 5</xref>):<disp-formula id="e5">
<mml:math id="m46">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
</p>
<p>We learn the parameters <inline-formula id="inf42">
<mml:math id="m47">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> from a multilayer perceptron. The edge features <inline-formula id="inf43">
<mml:math id="m48">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> are processed by <inline-formula id="inf44">
<mml:math id="m49">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> to generate a weight matrix, which is then used to perform matrix-vector multiplication with the features of neighboring nodes. Finally, the updated node features are directly backpropagated to each point, resulting in <inline-formula id="inf45">
<mml:math id="m50">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>gecc</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> as the output of this block. Physically, this formulation enables each point to aggregate and filter geometric features from its spatial neighbors, allowing the model to learn contextual relationships based on local geometry.</p>
<p>
<statement content-type="algorithm" id="Algorithm_1">
<label>Algorithm 1</label>
<p>GECC Block Feature Generation Algorithm.<list list-type="simple">
<list-item>
<p>
<bold>Require:</bold> Point features <inline-formula id="inf46">
<mml:math id="m51">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>in</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>in</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, segment labels <inline-formula id="inf47">
<mml:math id="m52">
<mml:mrow>
<mml:mi mathvariant="script">S</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>
<bold>Ensure:</bold> Output features <inline-formula id="inf48">
<mml:math id="m53">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>out</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>out</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;1:&#x2003;<inline-formula id="inf49">
<mml:math id="m54">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2190;</mml:mo>
<mml:mtext>ReLU</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>BN</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>Conv</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>in</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;2:&#x2003;<inline-formula id="inf50">
<mml:math id="m55">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>local</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2190;</mml:mo>
<mml:mtext>KPConv</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mspace width="1em"/>
<mml:mo>&#x25b9;</mml:mo>
<mml:mtext>Local&#x2009;features&#x2009;in&#x2009;</mml:mtext>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;3:&#x2003;<bold>for</bold> <inline-formula id="inf51">
<mml:math id="m56">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula id="inf52">
<mml:math id="m57">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> <bold>do</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;4:&#x2003;&#x2003;<inline-formula id="inf53">
<mml:math id="m58">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2190;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="script">S</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
</mml:mfrac>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="script">S</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>local</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;5:&#x2003;Construct graph <inline-formula id="inf54">
<mml:math id="m59">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>E</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> with segments <inline-formula id="inf55">
<mml:math id="m60">
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> as nodes and fully connected edges</p>
</list-item>
<list-item>
<p>&#x2003;6:&#x2003;<bold>for</bold> each node <inline-formula id="inf56">
<mml:math id="m61">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> <bold>do</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;7:&#x2003;&#x2003;<inline-formula id="inf57">
<mml:math id="m62">
<mml:mrow>
<mml:mtext>agg</mml:mtext>
<mml:mo>&#x2190;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;8:&#x2003;&#x2003;<bold>for</bold> each neighbor <inline-formula id="inf58">
<mml:math id="m63">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> <bold>do</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;9:&#x2003;&#x2003;&#x2003;<inline-formula id="inf59">
<mml:math id="m64">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2190;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;10:&#x2003;&#x2003;&#x2003;<inline-formula id="inf60">
<mml:math id="m65">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2190;</mml:mo>
<mml:mi mathvariant="normal">&#x398;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;11:&#x2003;&#x2003;&#x2003;<inline-formula id="inf61">
<mml:math id="m66">
<mml:mrow>
<mml:mtext>agg</mml:mtext>
<mml:mo>&#x2190;</mml:mo>
<mml:mtext>agg</mml:mtext>
<mml:mo>&#x2b;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x22c5;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;12:&#x2003;&#x2003;<inline-formula id="inf62">
<mml:math id="m67">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2190;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x22c5;</mml:mo>
<mml:mtext>agg</mml:mtext>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mspace width="1em"/>
<mml:mo>&#x25b9;</mml:mo>
<mml:mtext>Eq.&#x2009;</mml:mtext>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>5</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;13:&#x2003;<bold>for</bold> each point <inline-formula id="inf63">
<mml:math id="m68">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> <bold>do</bold>
</p>
</list-item>
<list-item>
<p>&#x2003;14:&#x2003;&#x2003;<inline-formula id="inf64">
<mml:math id="m69">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>ecc</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mo>&#x2190;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="script">S</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;15:&#x2003;<inline-formula id="inf65">
<mml:math id="m70">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>cat</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2190;</mml:mo>
<mml:mtext>Concat</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>local</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>ecc</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;16:&#x2003;<inline-formula id="inf66">
<mml:math id="m71">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>mixed</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2190;</mml:mo>
<mml:mtext>BN</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>Conv</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>cat</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>out</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;17:&#x2003;<inline-formula id="inf67">
<mml:math id="m72">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>res</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2190;</mml:mo>
<mml:mtext>ReLU</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>BN</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>Conv</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>in</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>out</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;18:&#x2003;<inline-formula id="inf68">
<mml:math id="m73">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>out</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2190;</mml:mo>
<mml:mtext>ReLU</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>BN</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>mixed</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>res</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>&#x2003;19:&#x2003;<bold>return</bold> <inline-formula id="inf69">
<mml:math id="m74">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>out</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
</list>
</p>
</statement>
</p>
</sec>
<sec id="s3-3">
<title>3.3 Channel-spatial attention module</title>
<p>Contextual information is essential for capturing global relationships within a scene. In point cloud semantic segmentation, local features typically focus on the geometric properties of points within a local neighborhood, but points belonging to the same category can share similar characteristics despite being spatially distant. Identifying the correlations among points in feature space can significantly improve the model&#x2019;s predictive accuracy. The Channel-Spatial Attention Module (CSAM) is frequently used in remote sensing to model the global contextual information of targets in images, enhancing the global feature representation. Similarly, we employ a CSAM to compute the attention of features processed by the graph-point convolution, allowing for the modeling of similarities between distant targets and improving the global understanding of the scene.</p>
<p>As shown in <xref ref-type="fig" rid="F3">Figure 3</xref>, the input features are projected into different feature subspaces through various learnable fully connected layers, which are then used to construct queries, keys, and values for the attention function. The output of the attention function serves to enhance the features by encoding global contextual information. Our channel attention mechanism focuses on selecting and amplifying the features that are most beneficial for the network. In <xref ref-type="fig" rid="F3">Figure 3</xref>, the weights for different features are computed by analyzing the relationships between them. These computed weights are then multiplied with the features to emphasize those that are crucial for network classification. During this weight computation, we reduce the spatial dimensions of the input features to improve computational efficiency. For multi-feature aggregation, unlike PointNet, which directly applies max pooling, we use average pooling to better capture the extent of target categories. Thus, before summation, we perform feature aggregation using both max pooling and average pooling. The specific operations of the channel attention mechanism are outlined as follows.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Structure of the channel-spatial attention Module (CSAM).</p>
</caption>
<graphic xlink:href="feart-13-1649203-g003.tif">
<alt-text content-type="machine-generated">Diagram of a neural network model showing channel and spatial attention mechanisms. The channel attention involves pooling, MLP, and softmax operations, while spatial attention includes concatenation and multi-layer perceptron processes. Both sections lead to a fully connected layer. Symbols indicate element-wise sum and matrix multiplication.</alt-text>
</graphic>
</fig>
<p>First, we aggregate spatial dimension information between different feature channels using both average pooling and max pooling to obtain the average pooled feature <inline-formula id="inf70">
<mml:math id="m75">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mtext>avg</mml:mtext>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and the max pooled feature <inline-formula id="inf71">
<mml:math id="m76">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mtext>max</mml:mtext>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>. Then, these two distinct features are input into a multi-layer perceptron (MLP) network to generate an important feature mapping function <inline-formula id="inf72">
<mml:math id="m77">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. This MLP network consists of two fully connected layers, an activation function, and a dropout mechanism applied after the second fully connected layer to enhance the network&#x2019;s generalization capability. Finally, the results from the MLP processing of the two different features are summed element-wise, which can be expressed as follows (<xref ref-type="disp-formula" rid="e6">Equations 6</xref>, <xref ref-type="disp-formula" rid="e7">7</xref>):<disp-formula id="e6">
<mml:math id="m78">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>&#x3c6;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2009;</mml:mo>
<mml:mtext>avg</mml:mtext>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2295;</mml:mo>
<mml:mi>&#x3c6;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mtext>max</mml:mtext>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
<disp-formula id="e7">
<mml:math id="m79">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2299;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>where <inline-formula id="inf73">
<mml:math id="m80">
<mml:mrow>
<mml:mo>&#x2295;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf74">
<mml:math id="m81">
<mml:mrow>
<mml:mo>&#x2299;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> represent element-wise addition and matrix multiplication, respectively, <inline-formula id="inf75">
<mml:math id="m82">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the activation function, and <inline-formula id="inf76">
<mml:math id="m83">
<mml:mrow>
<mml:mi>&#x3c6;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represents the MLP network.</p>
<p>The spatial attention module is specifically designed to select neighborhoods that are more beneficial for expressing point cloud shape information, as illustrated in the flowchart below in <xref ref-type="fig" rid="F3">Figure 3</xref>. For the input features, we first apply average pooling and max pooling operations to enhance the focus on the relationships between different object categories. Notably, the features here are the aggregated feature matrics from GECC and KPConv, representing a point-object level feature aggregator. Next, we concatenate the resulting features <inline-formula id="inf77">
<mml:math id="m84">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and perform convolution operations to generate different attention coefficients:<disp-formula id="e8">
<mml:math id="m85">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>&#x3c6;</mml:mi>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>max</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:mo stretchy="false">&#x2016;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>avg</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>in <xref ref-type="disp-formula" rid="e8">Equation 8</xref>,where <inline-formula id="inf78">
<mml:math id="m86">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> stands for the activation function, <inline-formula id="inf79">
<mml:math id="m87">
<mml:mrow>
<mml:mi>&#x3c6;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> stands for the MLP network, <inline-formula id="inf80">
<mml:math id="m88">
<mml:mrow>
<mml:mo stretchy="false">&#x2016;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> represents the concatenation operation, and <inline-formula id="inf81">
<mml:math id="m89">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>max</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf82">
<mml:math id="m90">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>avg</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represent the max pooling and average pooling operations, respectively.</p>
<p>Finally, we combine the channel attention and spatial attention, passing them through a fully connected network to obtain point-wise attention scores. Through this spatial-channel attention module, point features are updated from a global perspective, enabling comprehensive learning of interactions between complex points. This process enhances the model&#x2019;s ability to capture dependencies across spatially distant points, thereby improving prediction accuracy.</p>
</sec>
<sec id="s3-4">
<title>3.4 Overall architecture</title>
<p>We propose the graph kernel convolution attention-based encoder (GKCAE) for local-global feature encoding of airborne LiDAR point cloud data in high-voltage transmission corridor scenes. Inspired by previous work, we use a U-Net architecture as the overall framework, and the designed semantic segmentation model is illustrated in <xref ref-type="fig" rid="F4">Figure 4</xref>. The model employs a five-layer network for encoding, with the structures of graph edge-conditioned convolution (GECC), kernel point convolution (KPConv) and channel spatial attention module (CSAM) in each layer depicted in <xref ref-type="fig" rid="F5">Figure 5</xref>.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>The proposed semantic segmentation network structure diagram.</p>
</caption>
<graphic xlink:href="feart-13-1649203-g004.tif">
<alt-text content-type="machine-generated">Diagram illustrating a neural network architecture with input and output point clouds on the sides, and various blocks in between. Blocks are labeled with features such as KPConv, GCC, class score, and attention, connected by arrows showing data flow. A legend explains the color-coded elements representing points, features, and modules.</alt-text>
</graphic>
</fig>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Diagram of the component structure in each layer.</p>
</caption>
<graphic xlink:href="feart-13-1649203-g005.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a process where &#x22;Semantic Labels&#x22; and &#x22;XYZ&#x22; input into &#x22;GECC Block&#x22; and &#x22;KPConv Block,&#x22; respectively, which then combine to feed into &#x22;CSAM,&#x22; leading to a final block.</alt-text>
</graphic>
</fig>
<p>To capture local geometric information at multiple scales, we applied downsampling to gradually expand the convolutional receptive field. In the decoder, nearest-neighbor upsampling is employed to obtain the final point-wise features. Throughout the encoding layers, we extract local-global features and incorporate CSAM in the first, third, and fifth layers to capture contextual information. In addition, skip connections are used to transfer intermediate features from the encoder to the decoder, where they are combined with the upsampled features and passed through a fully connected layer to produce the final semantic predictions.</p>
<p>To train the network, we use a weighted cross-entropy loss, as shown in <xref ref-type="disp-formula" rid="e10">Equation 10</xref>:<disp-formula id="e9">
<mml:math id="m91">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>
<disp-formula id="e10">
<mml:math id="m92">
<mml:mrow>
<mml:mtext>Loss</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>log</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:mi>log</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>
</p>
<p>The weight <inline-formula id="inf83">
<mml:math id="m93">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> for each class is calculated based on its proportion in the total number of points, as shown in <xref ref-type="disp-formula" rid="e9">Equation 9</xref>, where <inline-formula id="inf84">
<mml:math id="m94">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the number of points in class <inline-formula id="inf85">
<mml:math id="m95">
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. In <xref ref-type="disp-formula" rid="e10">Equation 10</xref>, <inline-formula id="inf86">
<mml:math id="m96">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> indicates whether the true label of the <inline-formula id="inf87">
<mml:math id="m97">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> sample belongs to class <inline-formula id="inf88">
<mml:math id="m98">
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, while <inline-formula id="inf89">
<mml:math id="m99">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the corresponding predicted probability.</p>
</sec>
</sec>
<sec id="s4">
<title>4 Experiments</title>
<sec id="s4-1">
<title>4.1 Datasets and matrics</title>
<p>In order to verify the effectiveness of our method, we manually annotated three regions collected by airborne LiDAR. The data was collected in Anhui, China, using a DJI M600 drone equipped with a Riegl VUX-LR laser scanner. The point density of this data reaches 40&#x2013;50 points per square meter. To validate the model, we employed the CloudCompare software for manual annotation. As shown in <xref ref-type="fig" rid="F6">Figure 6</xref>, the three datasets used in this experiment and their annotation results are presented. It can be observed that, in addition to the conventional categories such as ground, vegetation, buildings, poles and frames, and wires, we further classified the wires into a total of 9 categories, including crossing lines, ground wires, insulators, jumper wires, etc. For each dataset, we manually divided it into blocks, and then partitioned them into training, validation, and test sets at a ratio of 7:1.5:1.5 respectively, for the training and validation of the proposed method. <xref ref-type="table" rid="T1">Table 1</xref> shows the number and proportion of points for each category in the test dataset. It can be observed that there is a pronounced class imbalance in high-voltage transmission line scenarios. Large-scale objects such as vegetation, buildings, and ground account for more than 97% of the points, while power facility categories represent less than 3%. In particular, the proportions of classes like insulator and drainage thread are even lower.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Display of the dataset and annotation results used in the experiment.</p>
</caption>
<graphic xlink:href="feart-13-1649203-g006.tif">
<alt-text content-type="machine-generated">Three datasets are displayed: Dataset I, Dataset II, and Dataset III, each with highlighted sections showing detailed sections of a landscape with transmission towers, wires, vegetation, and ground. A color key indicates categories such as buildings, vegetation, towers, conductors, insulators, and other elements.</alt-text>
</graphic>
</fig>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Class-wise point count and percentage in the test dataset.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left"/>
<th align="center">Conductor</th>
<th align="center">Tower</th>
<th align="center">Vegetation</th>
<th align="center">Building</th>
<th align="center">Insulator</th>
<th align="center">Drainage thread</th>
<th align="center">Crossing line</th>
<th align="center">Shield wire</th>
<th align="center">Ground</th>
<th align="center">Sum</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Number of categories</td>
<td align="center">2,932,346</td>
<td align="center">1,045,266</td>
<td align="center">138,751,040</td>
<td align="center">3,500,440</td>
<td align="center">18,430</td>
<td align="center">18,582</td>
<td align="center">614,916</td>
<td align="center">1,155,238</td>
<td align="center">80,115,096</td>
<td align="center">228,151,354</td>
</tr>
<tr>
<td align="left">Percentage of categories</td>
<td align="center">1.285%</td>
<td align="center">0.458%</td>
<td align="center">
<bold>60.815%</bold>
</td>
<td align="center">
<bold>1.534%</bold>
</td>
<td align="center">0.008%</td>
<td align="center">0.008%</td>
<td align="center">0.270%</td>
<td align="center">0.506%</td>
<td align="center">
<bold>35.115%</bold>
</td>
<td align="center">100.000%</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold values represent the proportion of the top three most frequent sample categories.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Specifically, the block division was carried out by cropping 50 m on each side along the direction perpendicular to the transmission line corridor. Meanwhile, along the direction of the transmission line corridor, the data was cropped into blocks every 300 m, and a 10-meter overlapping buffer zone was reserved between adjacent blocks to avoid the fragmentation of ground objects during cropping.</p>
<p>To quantitatively evaluate our method, we follow the protocol adopted in previous studies (<xref ref-type="bibr" rid="B64">Zhou et al., 2024</xref>; <xref ref-type="bibr" rid="B43">Wang et al., 2023</xref>; <xref ref-type="bibr" rid="B15">Jiang et al., 2022</xref>), use Overall Accuracy (OA) and Mean Intersection over Union (mIoU) as assessment metrics. OA measures the percentage of correctly predicted points relative to the total number of test points, while mIoU evaluates the semantic segmentation performance across various categories. The calculation formulas are as follows (<xref ref-type="disp-formula" rid="e11">Equations 11</xref>, <xref ref-type="disp-formula" rid="e12">12</xref>):<disp-formula id="e11">
<mml:math id="m100">
<mml:mrow>
<mml:mtext>OA</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>all</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>all</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mtext>FP</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>all</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>
<disp-formula id="e12">
<mml:math id="m101">
<mml:mrow>
<mml:mtext>mIoU</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:mfrac>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
<mml:mo>&#x2b;</mml:mo>
<mml:mtext>FP</mml:mtext>
<mml:mo>&#x2b;</mml:mo>
<mml:mtext>FN</mml:mtext>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>where <inline-formula id="inf90">
<mml:math id="m102">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf91">
<mml:math id="m103">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf92">
<mml:math id="m104">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represent true positives, false negatives, and false positives, respectively, in a confusion matrix.</p>
</sec>
<sec id="s4-2">
<title>4.2 Implementation details</title>
<p>In terms of implementation details, we adopt the Adam optimizer to train the network, which is developed and implemented using the PyTorch framework. A warm-up learning rate strategy is employed to gradually adjust the learning rate during the initial training phase. Prior to training, additional grid-based downsampling is applied to the input data to ensure uniform point spacing. Specifically, the grid size is set to 0.2, and a hash mapping between the downsampled points and the original point cloud is maintained. During inference, a voting-based strategy is used to map the predicted labels back to the original point cloud based on this mapping. For the kernel point convolution layers, the spherical neighborhood radius is set to 25 m, and the initial kernel radius is set to 0.5 m. The receptive field gradually expands with increasing network depth. To improve the model&#x2019;s generalization and robustness, we apply random data augmentations, including rotation, translation, and noise injection. During training, ground-truth semantic labels are utilized to construct and optimize the Graph Embedding and Coordinate Conversion (GECC) graph. However, during validation, semantic labels are excluded, and the graph is constructed solely based on the learned features and geometric coordinates. All experiments are conducted on a single NVIDIA GeForce RTX 4090 24 GB GPU.</p>
</sec>
<sec id="s4-3">
<title>4.3 Semantic segmentation results</title>
<p>In this section, to validate the effectiveness and performance of the proposed method, we conduct an analysis from both quantitative and qualitative perspectives. Additionally, we select several state-of-the-art (SOTA) methods of the same type for comparison. These include the point-based MLP method, PointNet&#x2b;&#x2b; (<xref ref-type="bibr" rid="B3">Charles et al., 2017</xref>; <xref ref-type="bibr" rid="B29">Qi et al., 2017</xref>), the point convolution-based method KPConv (<xref ref-type="bibr" rid="B40">Thomas et al., 2019</xref>), and the Transformer-based PointTransformer (PT) (<xref ref-type="bibr" rid="B49">Wu et al., 2024b</xref>; <xref ref-type="bibr" rid="B48">Wu et al., 2024a</xref>; <xref ref-type="bibr" rid="B47">Wu et al., 2022</xref>). In the comparative experiments, the data sample partitioning follows the strategy described in <xref ref-type="sec" rid="s4-1">Section 4.1</xref>. For KPConv, we set the input radius to 25 m and the initial kernel radius to 0.5 m, consistent with the settings used in our model&#x2019;s KPConv block. For the other methods, the hyperparameters are set to the empirical values reported in their respective original papers.</p>
<sec id="s4-3-1">
<title>4.3.1 Comparison results and analysis on the entire dataset</title>
<p>We first conducted a comparative experiment on the overall dataset, with the confusion matrix of our method presented in <xref ref-type="fig" rid="F7">Figure 7</xref>. The quantitative results, comparing our method to others, are shown in <xref ref-type="table" rid="T2">Table 2</xref>. As observed, our method achieved an mIoU of 81.93% and an OA of 94.1% on the overall dataset, outperforming the other methods. Additionally, our approach achieved the best segmentation results in the categories of conductor, tower, building, insulator, drainage thread, crossing line, and shield wire. The PT method demonstrated the best segmentation results for vegetation and ground, though our method was only slightly behind. Furthermore, although other state-of-the-art (SOTA) methods, such as KPConv and PT, exhibited strong overall accuracy (with OA values exceeding 90%), their performance in terms of mIoU and class-specific segmentation accuracy was somewhat inferior. In contrast, our method demonstrates more balanced performance, particularly in the segmentation of linear features.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Confusion matrix on the entire dataset.</p>
</caption>
<graphic xlink:href="feart-13-1649203-g007.tif">
<alt-text content-type="machine-generated">Confusion matrix for classification with labels Conductor, Tower, Vegetation, Building, Insulator, Drainage Thread, Crossing Line, Shield Wire, and Ground. Values range from 0.0 to 1.0, with darker shades indicating higher accuracy. The diagonal shows correct predictions with values mostly above 0.9. Off-diagonal values indicate misclassifications.</alt-text>
</graphic>
</fig>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Quantitative comparison results on the entire dataset. The first column lists the different methods, the second and third columns show the overall mIoU and OA values, and the following columns represent the IoU values for each class. The best result in each column is highlighted in black.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="left">Methods</th>
<th rowspan="2" align="center">mIoU</th>
<th rowspan="2" align="center">OA</th>
<th colspan="9" align="center">IoU</th>
</tr>
<tr>
<th align="center">Conductor</th>
<th align="center">Tower</th>
<th align="center">Vegetation</th>
<th align="center">Building</th>
<th align="center">Insulator</th>
<th align="center">Drainage thread</th>
<th align="center">Crossing line</th>
<th align="center">Shield wire</th>
<th align="center">Ground</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">PointNet&#x2b;&#x2b; <xref ref-type="bibr" rid="B29">Qi et al. (2017)</xref>
</td>
<td align="center">35.96%</td>
<td align="center">73.17%</td>
<td align="center">71.56%</td>
<td align="center">24.25%</td>
<td align="center">65.44%</td>
<td align="center">68.71%</td>
<td align="center">0.00%</td>
<td align="center">0.00%</td>
<td align="center">0.00%</td>
<td align="center">23.50%</td>
<td align="center">70.21%</td>
</tr>
<tr>
<td align="left">KPConv <xref ref-type="bibr" rid="B40">Thomas et al. (2019)</xref>
</td>
<td align="center">64.01%</td>
<td align="center">91.04%</td>
<td align="center">75.07%</td>
<td align="center">92.65%</td>
<td align="center">85.14%</td>
<td align="center">82.07%</td>
<td align="center">42.80%</td>
<td align="center">19.56%</td>
<td align="center">52.07%</td>
<td align="center">48.99%</td>
<td align="center">77.74%</td>
</tr>
<tr>
<td align="left">PT <xref ref-type="bibr" rid="B48">Wu et al. (2024a)</xref>
</td>
<td align="center">71.67%</td>
<td align="center">92.48%</td>
<td align="center">87.23%</td>
<td align="center">89.34%</td>
<td align="center">
<bold>87.13%</bold>
</td>
<td align="center">80.61%</td>
<td align="center">32.34%</td>
<td align="center">41.23%</td>
<td align="center">54.21%</td>
<td align="center">93.10%</td>
<td align="center">
<bold>79.85%</bold>
</td>
</tr>
<tr>
<td align="left">Ours</td>
<td align="center">
<bold>81.93%</bold>
</td>
<td align="center">
<bold>94.10%</bold>
</td>
<td align="center">
<bold>88.94%</bold>
</td>
<td align="center">
<bold>95.34%</bold>
</td>
<td align="center">86.34%</td>
<td align="center">
<bold>89.22%</bold>
</td>
<td align="center">
<bold>68.39%</bold>
</td>
<td align="center">
<bold>62.41%</bold>
</td>
<td align="center">
<bold>69.54%</bold>
</td>
<td align="center">
<bold>97.91%</bold>
</td>
<td align="center">79.28%</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold values indicate the best-performing evaluation values in their respective columns.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>
<xref ref-type="fig" rid="F8">Figure 8</xref> shows the segmentation performance of different methods in the same region of a tile from the overall dataset. This region contains multiple distinct power line targets (crossing line, conductor, and shield wire) as well as common targets (ground, vegetation). It can be seen that PointNet&#x2b;&#x2b; struggles to effectively distinguish between different linear targets. KPConv can roughly identify the conductor and shield wire, but fails to recognize the crossing line. PT provides better recognition results for conductor and shield wire, but performs poorly on the crossing line. In contrast, our method can effectively distinguish between these three different linear targets. Although the crossing line, shield wire, and conductor have the same physical mechanisms, their applications in power transmission scenes differ. Our method achieves good segmentation performance for these targets, largely due to our GECC and CSAM modules, which can model the global relational information between different classes.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Qualitative segmentation results of different methods on the entire dataset. The center shows the ground truth point cloud and labels, while the top and bottom sections display the segmentation results of different methods within the black circle region.</p>
</caption>
<graphic xlink:href="feart-13-1649203-g008.tif">
<alt-text content-type="machine-generated">Composite image comparing point cloud segmentations using different methods: GT, PointNet&#x2b;&#x2b;, KPConv, PT, and Ours. Each method is represented by an enlarged oval view showing variations in segmentation accuracy and detail, with GT being the ground truth. Color-coded lines and areas depict different segments in the data.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s4-3-2">
<title>4.3.2 Comparison results and analysis on sub-datasets</title>
<p>Furthermore, we conducted comparative experiments and analysis of our method and other methods on the three sub-datasets. As mentioned earlier, the training samples for the three sub-datasets differ, with dataset I having the most samples and dataset III having the least. We performed comparative analysis on all three datasets, with the quantitative results shown in <xref ref-type="table" rid="T3">Table 3</xref> and the qualitative results displayed in <xref ref-type="fig" rid="F9">Figures 9</xref>&#x2013;<xref ref-type="fig" rid="F11">11</xref>.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Quantitative comparison results on the sub-datasets. The first four columns represent the different datasets, methods, and the mIoU and OA metrics. The remaining nine columns show the IoU values for each class for the different methods.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="center">Sub-datasets</th>
<th rowspan="2" align="center">Methods</th>
<th rowspan="2" align="center">mIoU</th>
<th rowspan="2" align="center">OA</th>
<th colspan="9" align="center">IoU</th>
</tr>
<tr>
<th align="left">Conductor</th>
<th align="left">Tower</th>
<th align="left">Vegetation</th>
<th align="left">Building</th>
<th align="left">Insulator</th>
<th align="center">Drainage line</th>
<th align="center">Crossing line</th>
<th align="center">Shield wire</th>
<th align="left">Ground</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="4" align="left">Dataset I</td>
<td align="left">PointNet&#x2b;&#x2b; <xref ref-type="bibr" rid="B29">Qi et al. (2017)</xref>
</td>
<td align="center">31.27%</td>
<td align="center">60.17%</td>
<td align="center">61.04%</td>
<td align="center">34.03%</td>
<td align="center">54.06%</td>
<td align="center">59.58%</td>
<td align="center">0.00%</td>
<td align="center">0.00%</td>
<td align="center">13.22%</td>
<td align="center">0.00%</td>
<td align="center">59.53%</td>
</tr>
<tr>
<td align="left">KPConv <xref ref-type="bibr" rid="B40">Thomas et al. (2019)</xref>
</td>
<td align="center">59.71%</td>
<td align="center">76.53%</td>
<td align="center">80.04%</td>
<td align="center">91.39%</td>
<td align="center">66.69%</td>
<td align="center">89.93%</td>
<td align="center">17.45%</td>
<td align="center">0.00%</td>
<td align="center">64.43%</td>
<td align="center">60.70%</td>
<td align="center">66.73%</td>
</tr>
<tr>
<td align="left">PT <xref ref-type="bibr" rid="B48">Wu et al. (2024a)</xref>
</td>
<td align="center">
<bold>75.24%</bold>
</td>
<td align="center">
<bold>89.98%</bold>
</td>
<td align="center">
<bold>90.63%</bold>
</td>
<td align="center">
<bold>95.67%</bold>
</td>
<td align="center">
<bold>76.15%</bold>
</td>
<td align="center">84.26%</td>
<td align="center">
<bold>52.20%</bold>
</td>
<td align="center">
<bold>46.61%</bold>
</td>
<td align="center">67.29%</td>
<td align="center">93.04%</td>
<td align="center">
<bold>71.25%</bold>
</td>
</tr>
<tr>
<td align="left">Ours</td>
<td align="center">75.16%</td>
<td align="center">88.66%</td>
<td align="center">90.45%</td>
<td align="center">94.47%</td>
<td align="center">75.53%</td>
<td align="center">
<bold>85.77%</bold>
</td>
<td align="center">50.30%</td>
<td align="center">45.61%</td>
<td align="center">
<bold>67.39%</bold>
</td>
<td align="center">
<bold>97.86%</bold>
</td>
<td align="center">69.04%</td>
</tr>
<tr>
<td rowspan="4" align="left">Dataset II</td>
<td align="left">PointNet&#x2b;&#x2b; <xref ref-type="bibr" rid="B29">Qi et al. (2017)</xref>
</td>
<td align="center">29.18%</td>
<td align="center">70.18%</td>
<td align="center">43.72%</td>
<td align="center">30.04%</td>
<td align="center">45.70%</td>
<td align="center">62.43%</td>
<td align="center">0.00%</td>
<td align="center">0.00%</td>
<td align="center">0.00%</td>
<td align="center">18.40%</td>
<td align="center">62.32%</td>
</tr>
<tr>
<td align="left">KPConv <xref ref-type="bibr" rid="B40">Thomas et al. (2019)</xref>
</td>
<td align="center">61.34%</td>
<td align="center">92.35%</td>
<td align="center">67.48%</td>
<td align="center">95.80%</td>
<td align="center">96.73%</td>
<td align="center">87.59%</td>
<td align="center">18.96%</td>
<td align="center">0.00%</td>
<td align="center">54.69%</td>
<td align="center">38.85%</td>
<td align="center">91.99%</td>
</tr>
<tr>
<td align="left">PT <xref ref-type="bibr" rid="B48">Wu et al. (2024a)</xref>
</td>
<td align="center">71.74%</td>
<td align="center">95.19%</td>
<td align="center">79.89%</td>
<td align="center">92.04%</td>
<td align="center">96.34%</td>
<td align="center">79.57%</td>
<td align="center">30.40%</td>
<td align="center">19.40%</td>
<td align="center">65.05%</td>
<td align="center">91.97%</td>
<td align="center">91.02%</td>
</tr>
<tr>
<td align="left">Ours</td>
<td align="center">
<bold>78.71%</bold>
</td>
<td align="center">
<bold>97.65%</bold>
</td>
<td align="center">
<bold>81.96%</bold>
</td>
<td align="center">
<bold>96.62%</bold>
</td>
<td align="center">
<bold>96.93%</bold>
</td>
<td align="center">
<bold>87.87%</bold>
</td>
<td align="center">
<bold>34.80%</bold>
</td>
<td align="center">
<bold>44.21%</bold>
</td>
<td align="center">
<bold>75.87%</bold>
</td>
<td align="center">
<bold>97.70%</bold>
</td>
<td align="center">
<bold>92.45%</bold>
</td>
</tr>
<tr>
<td rowspan="4" align="left">Dataset III</td>
<td align="left">PointNet&#x2b;&#x2b; <xref ref-type="bibr" rid="B29">Qi et al. (2017)</xref>
</td>
<td align="center">40.46%</td>
<td align="center">74.09%</td>
<td align="center">70.06%</td>
<td align="center">33.21%</td>
<td align="center">83.77%</td>
<td align="center">67.32%</td>
<td align="center">0.00%</td>
<td align="center">0.00%</td>
<td align="center">24.42%</td>
<td align="center">16.29%</td>
<td align="center">69.06%</td>
</tr>
<tr>
<td align="left">KPConv <xref ref-type="bibr" rid="B40">Thomas et al. (2019)</xref>
</td>
<td align="center">64.91%</td>
<td align="center">95.11%</td>
<td align="center">79.02%</td>
<td align="center">91.90%</td>
<td align="center">90.89%</td>
<td align="center">66.24%</td>
<td align="center">20.89%</td>
<td align="center">18.90%</td>
<td align="center">87.98%</td>
<td align="center">36.34%</td>
<td align="center">92.02%</td>
</tr>
<tr>
<td align="left">PT <xref ref-type="bibr" rid="B48">Wu et al. (2024a)</xref>
</td>
<td align="center">80.46%</td>
<td align="center">95.27%</td>
<td align="center">98.59%</td>
<td align="center">90.63%</td>
<td align="center">90.16%</td>
<td align="center">84.41%</td>
<td align="center">34.67%</td>
<td align="center">42.30%</td>
<td align="center">95.67%</td>
<td align="center">96.15%</td>
<td align="center">91.56%</td>
</tr>
<tr>
<td align="left">Ours</td>
<td align="center">
<bold>88.47%</bold>
</td>
<td align="center">
<bold>96.30%</bold>
</td>
<td align="center">
<bold>99.05%</bold>
</td>
<td align="center">
<bold>96.13%</bold>
</td>
<td align="center">
<bold>92.12%</bold>
</td>
<td align="center">
<bold>93.32%</bold>
</td>
<td align="center">
<bold>69.16%</bold>
</td>
<td align="center">
<bold>58.01%</bold>
</td>
<td align="center">
<bold>96.53%</bold>
</td>
<td align="center">
<bold>98.62%</bold>
</td>
<td align="center">
<bold>93.27%</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold values indicate the best-performing evaluation values in their respective columns.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>Qualitative results of different methods on Dataset I. The black circles highlight the areas with segmentation errors.</p>
</caption>
<graphic xlink:href="feart-13-1649203-g009.tif">
<alt-text content-type="machine-generated">Comparative visualization of different methods for landscape classification, including &#x22;GT,&#x22; &#x22;PointNet&#x2b;&#x2b;,&#x22; &#x22;KPConv,&#x22; &#x22;PT,&#x22; and &#x22;Ours.&#x22; Each method categorizes elements like ground, buildings, vegetation, towers, and more, in various colors. Black ellipses highlight differences or errors. A legend indicates color codes for 10 categories, such as ground in pink and buildings in blue.</alt-text>
</graphic>
</fig>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption>
<p>Qualitative results of different methods on Dataset II. The black circles highlight the areas with segmentation errors.</p>
</caption>
<graphic xlink:href="feart-13-1649203-g010.tif">
<alt-text content-type="machine-generated">Comparison of segmentation results from different methods on a landscape image featuring ground, buildings, vegetation, towers, conductors, insulators, drainage threads, crossing lines, and shield wires. Each row represents results from GT, PointNet&#x2b;&#x2b;, KPConv, PT, and a method labeled &#x22;Ours&#x22;, with key differences highlighted in black circles.</alt-text>
</graphic>
</fig>
<fig id="F11" position="float">
<label>FIGURE 11</label>
<caption>
<p>Qualitative results of different methods on Dataset III. The black circles highlight the areas with segmentation errors.</p>
</caption>
<graphic xlink:href="feart-13-1649203-g011.tif">
<alt-text content-type="machine-generated">Comparison of five segmentation methods on a landscape illustration, each row labeled GT, PointNet&#x2b;&#x2b;, KPConv, PT, and Ours. Various colors represent different elements: ground (pink), building (blue), vegetation (green), tower (light blue), conductor (red), insulator (purple), drainage thread (dark blue), crossing line (orange), and shield wire (yellow). Black circles highlight differences in segmentation across methods. The legend identifies each element's color.</alt-text>
</graphic>
</fig>
<p>From the above qualitative and quantitative comparison results, it can be seen that overall, our method performs well across all three sub-datasets, achieving the best segmentation results on datasets II and III. On dataset I, our method achieves an mIoU of 75.16% and an OA of 88.66%, slightly behind PT (mIoU 75.24%, OA 89.98%), while the results of other methods are less impressive. This is largely because both our method and PT incorporate attention mechanisms. We use spatial-channel attention, while PT uses a transformer-based self-attention mechanism. These modules help capture more information when processing a larger sequence of samples and more diverse scenes. In contrast, methods based purely on point-MLP or convolution tend to converge earlier and reach saturation. This is mainly because high-voltage power transmission scenes are complex and the samples are highly imbalanced. Methods like KPConv and PointNet&#x2b;&#x2b; can easily and quickly learn large samples (such as ground and vegetation), but they struggle to learn smaller classes, especially similar ones (such as conductor, crossing line, etc.). This further validates the rationale and effectiveness of the attention mechanisms in our method&#x2019;s design.</p>
<p>Meanwhile, in data sets II and III, our method achieves the best accuracy. Although PT and other methods perform similarly in terms of overall accuracy, their segmentation results for individual classes are not as strong as ours. The inclusion of the GECC module in our method allows it to model the attributes of different categories more effectively. Graph convolutions help emphasize differences in attributes such as relative elevation, orientation, and other features of similar objects. This enables our model to better distinguish between objects that, despite sharing similar local geometric properties, differ in global attributes and topological relationships.</p>
</sec>
<sec id="s4-3-3">
<title>4.3.3 Detailed comparison and analysis</title>
<p>Considering that our task is the fine-grained semantic segmentation of high-voltage transmission lines, the segmentation of power facilities (especially tower areas) is a key focus. Therefore, we conduct a comparative analysis of the segmentation details of power transmission towers and their connected areas across the three subdatasets. As shown in <xref ref-type="fig" rid="F12">Figure 12</xref>, the regions of power transmission towers and other power facilities are displayed in the three datasets. It can be seen that, although our method has some shortcomings in the segmentation of vegetation, it still outperforms other methods in the segmentation of various types of power lines, buildings and towers.</p>
<fig id="F12" position="float">
<label>FIGURE 12</label>
<caption>
<p>Detail performance of different methods in power facility areas across three datasets. The black circles highlight areas with segmentation errors.</p>
</caption>
<graphic xlink:href="feart-13-1649203-g012.tif">
<alt-text content-type="machine-generated">Three rows of point cloud visualizations comparing different methods: GT, PointNet&#x2b;&#x2b;, KPConv, PT, and Ours. Each method is evaluated on three datasets, columns labeled Dataset I, II, and III, with colorful annotated regions such as power lines and towers. Circles highlight specific areas of interest or differences among the methods.</alt-text>
</graphic>
</fig>
<p>Specifically, as consistent with the previous comparative analysis, PointNet&#x2b;&#x2b; lacks effective segmentation of current objects, while KPConv cannot distinguish between similar linear targets of different categories. Although PT can differentiate them, its segmentation accuracy is insufficient, and its segmentation of buildings is less complete compared to our method and KPConv. Our method effectively segments power infrastructure and buildings, but its performance in the segmentation of cluttered vegetation is inadequate. This is primarily because, during feature extraction, the GECC module aggregates local geometric properties of the point cloud (such as linearity and relative height), and when combined with the neighborhood features obtained by the KPConv module, it strengthens the model&#x2019;s ability to learn artificial objects. However, its segmentation capability for natural objects is relatively weaker. Nonetheless, it is worth noting that our primary goal is to address the fine-grained segmentation of power facilities in high-voltage transmission corridor scenes, so this limitation can be temporarily overlooked.</p>
</sec>
</sec>
<sec id="s4-4">
<title>4.4 Effectiveness analysis</title>
<sec id="s4-4-1">
<title>4.4.1 Effectiveness of KPConv</title>
<p>In this section, we evaluate the effectiveness of the proposed Graph-Kernel Convolution Attention Encoder (GKCAE) through a series of ablation studies. To quantify the contribution of each key component, we systematically remove or replace specific modules and assess the resulting performance on the entire dataset. A central component of GKCAE is the Kernel Point Convolution (KPConv) module (<xref ref-type="bibr" rid="B40">Thomas et al., 2019</xref>), which serves as the backbone for point-wise convolution and local feature extraction. To investigate its role, we compare two commonly used variants: rigid KPConv, which uses fixed kernel point positions, and deformable KPConv, where kernel positions are learned during training. The comparison reveals how kernel flexibility influences the network&#x2019;s ability to capture fine-grained geometric structures.</p>
<p>
<xref ref-type="table" rid="T4">Table 4</xref> presents the quantitative results comparing rigid and deformable variants of the KPConv module. As shown, the model employing deformable KPConv achieves superior performance, with an improvement of approximately 1.85% in mean Intersection over Union (mIoU) over its rigid counterpart. This performance gain can be attributed to the inherent complexity of transmission line scenes, which contain diverse object categories with significant scale variation. The rigid KPConv, constrained by fixed kernel point positions and a uniform receptive radius, lacks adaptability to local geometric structures. This limitation reduces its effectiveness in capturing small-scale components, particularly fine-grained power infrastructure distributed around transmission towers. In contrast, the deformable KPConv dynamically adjusts kernel point locations during training, allowing the convolutional operation to better align with local surface geometry. This flexibility enhances the network&#x2019;s capacity to extract discriminative features from irregular regions, thereby improving segmentation accuracy for critical components such as towers, conductors, and insulators.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Effectiveness Analysis of Rigid and Deformable Kernel Point Convolutions. &#x201c;KP&#x201d; represents the Kernel point convolution network.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Methods</th>
<th align="center">mIoU (%)</th>
<th align="center">OA (%)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">With rigid KP</td>
<td align="center">80.08</td>
<td align="center">93.81</td>
</tr>
<tr>
<td align="left">With deformable KP(Ours)</td>
<td align="center">81.93</td>
<td align="center">94.10</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s4-4-2">
<title>4.4.2 Effectiveness of CASM and GECC</title>
<p>To analyze the effectiveness of the CSAM and GECC modules, we progressively add these components and test the modified versions. <xref ref-type="table" rid="T5">Table 5</xref> presents the results of our ablation experiments.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Effectiveness analysis of CSAM and GECC. &#x201c;KP&#x201d; represents the use of deformable Kernel point convolution network.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="center">Methods</th>
<th rowspan="2" align="center">mIoU</th>
<th rowspan="2" align="center">OA</th>
<th colspan="9" align="center">IoU</th>
</tr>
<tr>
<th align="center">Conductor</th>
<th align="center">Tower</th>
<th align="center">Vegetation</th>
<th align="center">Building</th>
<th align="center">Insulator</th>
<th align="center">Drainage thread</th>
<th align="center">Crossing line</th>
<th align="center">Shield wire</th>
<th align="center">Ground</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">KP &#x2b; CSAM</td>
<td align="center">74.50%</td>
<td align="center">92.58%</td>
<td align="center">87.09%</td>
<td align="center">93.60%</td>
<td align="center">85.02%</td>
<td align="center">68.68%</td>
<td align="center">61.94%</td>
<td align="center">50.07%</td>
<td align="center">59.43%</td>
<td align="center">86.57%</td>
<td align="center">78.10%</td>
</tr>
<tr>
<td align="center">KP &#x2b; GECC</td>
<td align="center">77.47%</td>
<td align="center">90.62%</td>
<td align="center">88.12%</td>
<td align="center">91.13%</td>
<td align="center">83.92%</td>
<td align="center">71.46%</td>
<td align="center">68.17%</td>
<td align="center">61.96%</td>
<td align="center">67.34%</td>
<td align="center">94.68%</td>
<td align="center">70.45%</td>
</tr>
<tr>
<td align="center">KP &#x2b; CSAM &#x2b; GECC (Ours)</td>
<td align="center">
<bold>81.93%</bold>
</td>
<td align="center">
<bold>94.10%</bold>
</td>
<td align="center">
<bold>88.94%</bold>
</td>
<td align="center">
<bold>95.34%</bold>
</td>
<td align="center">
<bold>86.34%</bold>
</td>
<td align="center">
<bold>89.22%</bold>
</td>
<td align="center">
<bold>68.39%</bold>
</td>
<td align="center">
<bold>62.41%</bold>
</td>
<td align="center">
<bold>69.54%</bold>
</td>
<td align="center">
<bold>97.91%</bold>
</td>
<td align="center">
<bold>79.28%</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold values indicate the best-performing evaluation values in their respective columns.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>It can be observed that when key components are removed, both overall accuracy and performance metrics decline to varying degrees. In particular, when the GECC module is removed and only the KPConv &#x2b; CSAM combination is used, the mIoU decreases significantly. This indicates that the absence of the graph network negatively impacts the model&#x2019;s ability to recognize certain categories, especially for similar linear power line targets. This highlights the crucial role of the GECC module in capturing global relationships and enhancing segmentation accuracy.</p>
<p>Moreover, when the CSAM module is removed and only the KPConv and GECC modules are retained, we observe a slight increase in mIoU but a decrease in overall accuracy (OA). This phenomenon can be attributed to the following: in the absence of the attention mechanism, the network still retains its ability to capture fine-grained geometric features through KPConv and GECC, which enhances per-class segmentation performance and leads to higher mIoU. However, the lack of global contextual modeling&#x2014;particularly in the absence of spatial and channel-wise attention&#x2014;reduces the model&#x2019;s ability to distinguish dominant classes, such as vegetation, thereby lowering the OA. These results highlight the critical role of the CSAM module in enhancing global feature representation and improving class-level precision, especially for large-area categories. Although the model maintains a certain level of object recognition capability without CSAM, the segmentation accuracy is compromised due to the loss of global semantic context. This further validates the importance of integrating both GECC and CSAM modules: their combination yields the best performance in fine-grained semantic segmentation of complex transmission corridor scenes.</p>
<p>Furthermore, we output and visually display the attention matrix output by the CASM. As shown in <xref ref-type="fig" rid="F13">Figures 13a</xref>, it is the original 3D point cloud ground truth. The following figures respectively show the visualization results of the attention features of categories such as conductors, ground wires, the ground, transmission towers, and buildings. It can be seen that after the input point cloud undergoes feature learning through kernel point convolution and graph convolution, and then global point-level feature extraction is carried out by the channel-spatial attention module, it is able to effectively learn large targets (such as transmission towers, buildings, the ground, vegetation, etc.), and can also effectively learn and capture linear targets (such as conductors and ground wires). At the same time, for the learning of small targets (such as jumper wires and insulators), although the capture ability of learning is not as strong as that for large targets, the learning of these ground objects can still be achieved.</p>
<fig id="F13" position="float">
<label>FIGURE 13</label>
<caption>
<p>Channel-spatial attention visualisation results [<bold>(a)</bold> is the true value of the input point cloud (the color represents the semantic category), and <bold>(b)</bold> is the thermal representation of the attention map under different categories].</p>
</caption>
<graphic xlink:href="feart-13-1649203-g013.tif">
<alt-text content-type="machine-generated">Point clouds and attention visualization. The top section shows input point clouds of a landscape with a building, vegetation, and power lines. The bottom section displays attention visualization for categories: conductor, shield wire, ground, vegetation, tower, building, insulator, and drainage line. Each category has its own graph indicating attention levels with a color bar scale.</alt-text>
</graphic>
</fig>
<p>As shown in <xref ref-type="fig" rid="F13">Figure 13b</xref>,the ablation experiments further confirm the effectiveness and rationale behind the semantic segmentation network model we designed for high-voltage transmission line point clouds. In particular, the use of graph convolutional attention significantly enhances the model&#x2019;s ability to segment and recognize similar categories of power line targets. This capability is crucial for achieving large-scale, rapid semantic understanding of transmission line scenes, enabling the extraction of detailed information about key components. Ultimately, this approach lays a solid foundation for downstream tasks, such as automated monitoring and maintenance, where precise identification of power line infrastructure is essential.</p>
<p>While the proposed GKCAE network demonstrates strong performance in fine-grained semantic segmentation of transmission line point clouds, several limitations remain. First, the model currently focuses on unimodal geometric and spatial features extracted from point clouds, and lacks the integration of other informative modalities such as RGB or thermal data. Future research can explore (<xref ref-type="bibr" rid="B1">Cai et al., 2024</xref>), such as token division strategies, to enhance feature diversity and robustness. Second, the graph structure in our method is constructed on relatively shallow geometric relationships. Advanced graph representations, such as <xref ref-type="bibr" rid="B16">Jing et al. (2025)</xref>, offer an opportunity to model more complex and high-order semantic dependencies between points, potentially improving the segmentation of highly entangled or ambiguous objects like insulators and jumper wires. Moreover, our method primarily focuses on discriminative learning. Incorporating (<xref ref-type="bibr" rid="B51">Xiang et al., 2025</xref>) or topological regularization can further improve feature consistency among local regions and enhance structural awareness. Additionally, for complex shape variations in small components, biomimetic representations like (<xref ref-type="bibr" rid="B59">Zhang and Chen, 2018</xref>) may offer novel geometric priors to better handle target deformation or noise. In future work, we plan to integrate these ideas to further enhance the generalization and interpretability of our model, particularly in large-scale, multi-source transmission line applications.</p>
</sec>
</sec>
</sec>
<sec sec-type="conclusion" id="s5">
<title>5 Conclusion</title>
<p>In this paper, we proposed a novel Graph-Attention-based Encoder Network (GKCAE) for fine-grained semantic segmentation of airborne LiDAR point cloud data in high-voltage transmission corridor scenarios. The proposed model integrates Graph Edge-Conditioned Convolution (GECC) and a Channel-Spatial Attention Module (CSAM) to effectively capture both local geometric features and global contextual information, thereby addressing key challenges such as point cloud sparsity, severe class imbalance, and the high-precision requirements for segmenting power line components. Through comprehensive experiments on multiple real-world datasets, our method demonstrated superior performance compared to state-of-the-art (SOTA) techniques, particularly in the segmentation of critical transmission line elements such as conductors, towers, and various wire types. The proposed approach achieved a mean Intersection over Union (mIoU) of 81.93% and an Overall Accuracy (OA) of 94.1%, outperforming existing methods in both overall segmentation accuracy and class-wise performance, especially for smaller and visually similar categories. Ablation studies further validated the effectiveness of each model component, showing that the integration of graph convolution and attention mechanisms significantly enhances the network&#x2019;s ability to distinguish between structurally similar power line elements. These findings confirm the model&#x2019;s suitability for fine-grained semantic segmentation in complex and cluttered corridor environments.</p>
<p>Despite these promising results, our method has several limitations. First, the current framework relies solely on geometric and spatial features from point clouds, without incorporating additional modalities such as RGB or infrared imagery, which could offer richer semantic cues. Second, the graph representation used is relatively shallow, limiting the model&#x2019;s capacity to capture higher-order semantic relationships among complex infrastructure components. Third, the absence of explicit topological or structural constraints may reduce feature consistency in highly cluttered scenes. Future work will focus on integrating multi-modal interaction mechanisms, hypergraph representations, and homology-based regularization to improve the robustness, generalization, and interpretability of the proposed framework across diverse transmission corridor environments.</p>
<p>In summary, the proposed GKCAE method provides a reliable and effective solution for intelligent perception and analysis in high-voltage transmission line scenarios. It demonstrates strong potential for supporting the automated monitoring and maintenance of power infrastructure. Future research will extend this work by exploring hazard detection based on fine-grained segmentation results and by evaluating model performance under varying sensor types and point cloud densities, with the aim of improving scalability and practical applicability in real-world inspection tasks.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>SZ: Methodology, Writing &#x2013; review and editing, Writing &#x2013; original draft. HL: Writing &#x2013; review and editing, Investigation, Supervision, Data curation, Conceptualization. JR: Validation, Writing &#x2013; review and editing, Funding acquisition, Visualization, Resources. YZ: Supervision, Writing &#x2013; review and editing, Project administration, Methodology.</p>
</sec>
<sec sec-type="funding-information" id="s8">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research and/or publication of this article. This research was funded by the project of State Grid Economic and Technological Research Institute (Research on Carbon Storage Estimation Technology for Power Transmission Corridors Based on Multi-Source LiDAR, 52440024000J).</p>
</sec>
<ack>
<p>The authors thank all the reviewers for their insightful comments.</p>
</ack>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>Authors SZ, HL, JR, and YZ were employed by State Grid Economic and Technological Research Institute Co., LTD.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cai</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sui</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Gu</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Multi-modal interaction with token division strategy for rgb-t tracking</article-title>. <source>Pattern Recognit.</source> <volume>155</volume>, <fpage>110626</fpage>. <pub-id pub-id-type="doi">10.1016/j.patcog.2024.110626</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cao</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Du</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>P.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>An automatic method for powerline extraction from als point cloud of powerline corridors</article-title>. <source>IEEE J. Sel. Top. Appl. Earth Observations Remote Sens.</source> <volume>18</volume>, <fpage>10803</fpage>&#x2013;<lpage>10829</lpage>. <pub-id pub-id-type="doi">10.1109/JSTARS.2025.3555534</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Charles</surname>
<given-names>R. Q.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kaichun</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Guibas</surname>
<given-names>L. J.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Pointnet: deep learning on point sets for 3d classification and segmentation</article-title>,&#x201d; in <conf-name>2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>, <conf-loc>Honolulu, HI, USA</conf-loc>, <conf-date>21-26 July 2017</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>77</fpage>&#x2013;<lpage>85</lpage>. <pub-id pub-id-type="doi">10.1109/cvpr.2017.16</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Belkacem</surname>
<given-names>A. N.</given-names>
</name>
<name>
<surname>Qiao</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Dong</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>The mixed kernel function svm-based point cloud classification</article-title>. <source>Int. J. Precis. Eng. Manuf.</source> <volume>20</volume>, <fpage>737</fpage>&#x2013;<lpage>747</lpage>. <pub-id pub-id-type="doi">10.1007/s12541-019-00102-3</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Fan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Dong</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Lv</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ye</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>F.-Y.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Scf-net: learning spatial contextual features for large-scale point cloud segmentation</article-title>,&#x201d; in <conf-name>2021 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>, <conf-loc>Nashville, TN, USA</conf-loc>, <conf-date>20-25 June 2021</conf-date> (<publisher-name>IEEE</publisher-name>). <pub-id pub-id-type="doi">10.1109/cvpr46437.2021.01427</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fei</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Deep core node information embedding on networks with missing edges for community detection</article-title>. <source>Inf. Sci.</source> <volume>707</volume>, <fpage>122039</fpage>. <pub-id pub-id-type="doi">10.1016/j.ins.2025.122039</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Feng</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Dong</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Mian</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2025</year>). &#x201c;<article-title>History-enhanced 3d scene graph reasoning from rgb-d sequences</article-title>,&#x201d; in <source>IEEE transactions on circuits and systems for video technology</source> (<publisher-name>IEEE Press</publisher-name>).</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ghahremani</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Williams</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Corke</surname>
<given-names>F. M.</given-names>
</name>
<name>
<surname>Tiddeman</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Doonan</surname>
<given-names>J. H.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Deep segmentation of point clouds of wheat</article-title>. <source>Front. Plant Sci.</source> <volume>12</volume>, <fpage>608732</fpage>. <pub-id pub-id-type="doi">10.3389/fpls.2021.608732</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Grothum</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Bienert</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bluemlein</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Eltner</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>Using machine learning techniques to filter vegetation in colorized sfm point clouds of soil surfaces</article-title>,&#x201d; in <source>The international archives of the photogrammetry, remote sensing and spatial information sciences XLVIII-1/W2-2023</source>, <fpage>163</fpage>&#x2013;<lpage>170</lpage>. <pub-id pub-id-type="doi">10.5194/isprs-archives-xlviii-1-w2-2023-163-2023</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Han</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Grid graph-based large-scale point clouds registration</article-title>. <source>Int. J. Digital Earth</source> <volume>16</volume>, <fpage>2448</fpage>&#x2013;<lpage>2466</lpage>. <pub-id pub-id-type="doi">10.1080/17538947.2023.2228298</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Hu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Rosa</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). &#x201c;<article-title>Randla-net: efficient semantic segmentation of large-scale point clouds</article-title>,&#x201d; in <conf-name>2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>, <conf-loc>Seattle, WA, USA</conf-loc>, <conf-date>13-19 June 2020</conf-date> (<publisher-name>IEEE</publisher-name>). <pub-id pub-id-type="doi">10.1109/cvpr42600.2020.01112</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Du</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Fast and accurate power line corridor survey using spatial line clustering of point cloud</article-title>. <source>Remote Sens.</source> <volume>13</volume>, <fpage>1571</fpage>. <pub-id pub-id-type="doi">10.3390/rs13081571</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ai</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Als point cloud semantic segmentation based on graph convolution and transformer with elevation attention</article-title>. <source>IEEE J. Sel. Top. Appl. Earth Observations Remote Sens.</source> <volume>17</volume>, <fpage>2877</fpage>&#x2013;<lpage>2889</lpage>. <pub-id pub-id-type="doi">10.1109/jstars.2023.3347224</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Hui</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Tingting</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zuoxiao</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Weibin</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Menhas</surname>
<given-names>M. I.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Power equipment segmentation of 3d point clouds based on geodesic distance with k-means clustering</article-title>,&#x201d; in <conf-name>2021 6th International Conference on Power and Renewable Energy (ICPRE)</conf-name>, <conf-loc>Shanghai, China</conf-loc>, <conf-date>17-20 September 2021</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>317</fpage>&#x2013;<lpage>321</lpage>. <pub-id pub-id-type="doi">10.1109/icpre52634.2021.9635211</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <source>Fast semantic segmentation of 3D lidar point cloud based on random forest method</source>. <publisher-loc>Singapore</publisher-loc>: <publisher-name>Springer Nature</publisher-name>, <fpage>415</fpage>&#x2013;<lpage>424</lpage>. <pub-id pub-id-type="doi">10.1007/978-981-19-2580-1_35</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jing</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Di</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Emam</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mian</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Hypergraph biformer for semantic segmentation of high-resolution remote sensing images</article-title>. <source>IEEE Trans. Geoscience Remote Sens.</source> <volume>63</volume>, <fpage>1</fpage>&#x2013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1109/tgrs.2025.3543556</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Lai</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). &#x201c;<article-title>Stratified transformer for 3d point cloud segmentation</article-title>,&#x201d; in <conf-name>2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>, <conf-loc>New Orleans, LA, USA</conf-loc>, <conf-date>18-24 June 2022</conf-date> (<publisher-name>IEEE</publisher-name>). <pub-id pub-id-type="doi">10.1109/cvpr52688.2022.00831</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Landrieu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Obozinski</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Cut pursuit: fast algorithms to learn piecewise constant functions on general weighted graphs</article-title>. <source>SIAM J. Imaging Sci.</source> <volume>10</volume>, <fpage>1724</fpage>&#x2013;<lpage>1766</lpage>. <pub-id pub-id-type="doi">10.1137/17m1113436</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Landrieu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Simonovsky</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Large-scale point cloud semantic segmentation with superpoint graphs</article-title>,&#x201d; in <conf-name>2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition</conf-name>, <conf-loc>Salt Lake City, UT, USA</conf-loc>, <conf-date>18-23 June 2018</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>4558</fpage>&#x2013;<lpage>4567</lpage>. <pub-id pub-id-type="doi">10.1109/cvpr.2018.00479</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Lawin</surname>
<given-names>F. J.</given-names>
</name>
<name>
<surname>Danelljan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Tosteberg</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Bhat</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Khan</surname>
<given-names>F. S.</given-names>
</name>
<name>
<surname>Felsberg</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2017</year>). <source>Deep projective 3D semantic segmentation</source>. <publisher-name>Springer International Publishing</publisher-name>, <fpage>95</fpage>&#x2013;<lpage>107</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-64689-3_8</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Liao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>A supervoxel-based random forest method for robust and effective airborne lidar point cloud classification</article-title>. <source>Remote Sens.</source> <volume>14</volume>, <fpage>1516</fpage>. <pub-id pub-id-type="doi">10.3390/rs14061516</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Vosselman</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>M. Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Local and global encoder network for semantic segmentation of airborne laser scanning point clouds</article-title>. <source>ISPRS J. Photogrammetry Remote Sens.</source> <volume>176</volume>, <fpage>151</fpage>&#x2013;<lpage>168</lpage>. <pub-id pub-id-type="doi">10.1016/j.isprsjprs.2021.04.016</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Point-voxel cnn for efficient 3d deep learning</article-title>,&#x201d; in <conf-name>Conference on Neural Information Processing Systems (NeurIPS)</conf-name>, <conf-loc>Vancouver, BC, Canada</conf-loc>, <conf-date>December 8-14, 2019</conf-date>.</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Dong</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Pixel-level noise mining for weakly supervised salient object detection</article-title>. <source>IEEE Trans. Neural Netw. Learn. Syst.</source>, <fpage>1</fpage>&#x2013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1109/tnnls.2025.3575255</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ma</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Mu</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hua</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>A multi-scale spatial&#x2013;temporal interaction fusion network for digital twin-based thermal error compensation in precision machine tools</article-title>. <source>Expert Syst. Appl.</source> <volume>286</volume>, <fpage>127812</fpage>. <pub-id pub-id-type="doi">10.1016/j.eswa.2025.127812</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mirzaei</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Arashpour</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Asadi</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Masoumi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Bai</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Behnood</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>3d point cloud data processing with machine learning for construction and infrastructure applications: a comprehensive review</article-title>. <source>Adv. Eng. Inf.</source> <volume>51</volume>, <fpage>101501</fpage>. <pub-id pub-id-type="doi">10.1016/j.aei.2021.101501</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ni</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Classification of als point cloud with improved point cloud segmentation and random forests</article-title>. <source>Remote Sens.</source> <volume>9</volume>, <fpage>288</fpage>. <pub-id pub-id-type="doi">10.3390/rs9030288</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Nurunnabi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Belton</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>West</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2012</year>). &#x201c;<article-title>Robust segmentation in laser scanning 3d point cloud data</article-title>,&#x201d; in <conf-name>2012 International Conference on Digital Image Computing Techniques and Applications (DICTA)</conf-name>, <conf-loc>Fremantle, WA, Australia</conf-loc>, <conf-date>03-05 December 2012</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1109/dicta.2012.6411672</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Qi</surname>
<given-names>C. R.</given-names>
</name>
<name>
<surname>Yi</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Guibas</surname>
<given-names>L. J.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Pointnet&#x2b;&#x2b;: deep hierarchical feature learning on point sets in a metric space</article-title>,&#x201d; in <source>Proceedings of the 31st international conference on neural information processing systems</source> (<publisher-loc>Red Hook, NY, USA</publisher-loc>: <publisher-name>Curran Associates Inc</publisher-name>), <fpage>5105</fpage>&#x2013;<lpage>5114</lpage>.</citation>
</ref>
<ref id="B30">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Rejichi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chaabane</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Feature extraction using pca for vhr satellite image time series spatio-temporal classification</article-title>,&#x201d; in <conf-name>2015 IEEE International Geoscience and Remote Sensing Symposium (IGARSS)</conf-name>, <conf-loc>Milan, Italy</conf-loc>, <conf-date>26-31 July 2015</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>485</fpage>&#x2013;<lpage>488</lpage>. <pub-id pub-id-type="doi">10.1109/igarss.2015.7325806</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Robert</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Raguet</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Landrieu</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>Efficient 3d semantic segmentation with superpoint transformer</article-title>,&#x201d; in <conf-name>2023 IEEE/CVF International Conference on Computer Vision (ICCV)</conf-name>, <conf-loc>Paris, France</conf-loc>, <conf-date>01-06 October 2023</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>17149</fpage>&#x2013;<lpage>17158</lpage>. <pub-id pub-id-type="doi">10.1109/iccv51070.2023.01577</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sha</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Guan</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Ssc-net: a multi-task joint learning network for tongue image segmentation and multi-label classification</article-title>. <source>Digit. Health</source> <volume>11</volume>, <fpage>20552076251343696</fpage>. <pub-id pub-id-type="doi">10.1177/20552076251343696</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ferreira</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>An automatic framework for pylon detection by a hierarchical coarse-to-fine segmentation of powerline corridors from uav lidar point clouds</article-title>. <source>Int. J. Appl. Earth Observation Geoinformation</source> <volume>118</volume>, <fpage>103263</fpage>. <pub-id pub-id-type="doi">10.1016/j.jag.2023.103263</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ferreira</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>A review and future directions of techniques for extracting powerlines and pylons from lidar point clouds</article-title>. <source>Int. J. Appl. Earth Observation Geoinformation</source> <volume>132</volume>, <fpage>104056</fpage>. <pub-id pub-id-type="doi">10.1016/j.jag.2024.104056</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shi</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Rail transit shield tunnel deformation detection method based on cloth simulation filtering with point cloud cylindrical projection</article-title>. <source>Tunn. Undergr. Space Technol.</source> <volume>135</volume>, <fpage>105031</fpage>. <pub-id pub-id-type="doi">10.1016/j.tust.2023.105031</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Simonovsky</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Komodakis</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Dynamic edge-conditioned filters in convolutional neural networks on graphs</article-title>,&#x201d; in <conf-name>2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>, <conf-loc>Honolulu, HI, USA</conf-loc>, <conf-date>21-26 July 2017</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>29</fpage>&#x2013;<lpage>38</lpage>. <pub-id pub-id-type="doi">10.1109/cvpr.2017.11</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Song</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Vman: visual-modified attention network for multimodal paradigms</article-title>. <source>Vis. Comput.</source> <volume>41</volume>, <fpage>2737</fpage>&#x2013;<lpage>2754</lpage>. <pub-id pub-id-type="doi">10.1007/s00371-024-03563-4</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Su</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lai</surname>
<given-names>C. S.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Automatic multi-source data fusion technique of powerline corridor using uav lidar</article-title>,&#x201d; in <conf-name>2022 IEEE International Smart Cities Conference (ISC2)</conf-name>, <conf-loc>Pafos, Cyprus</conf-loc>, <conf-date>26-29 September 2022</conf-date> (<publisher-name>IEEE</publisher-name>). <pub-id pub-id-type="doi">10.1109/isc255366.2022.9922293</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Lan</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Duanmu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A classification method of point clouds of transmission line corridor based on improved random forest and multi-scale features</article-title>. <source>Sensors</source> <volume>23</volume>, <fpage>1320</fpage>. <pub-id pub-id-type="doi">10.3390/s23031320</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Thomas</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Qi</surname>
<given-names>C. R.</given-names>
</name>
<name>
<surname>Deschaud</surname>
<given-names>J.-E.</given-names>
</name>
<name>
<surname>Marcotegui</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Goulette</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Guibas</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Kpconv: flexible and deformable convolution for point clouds</article-title>,&#x201d; in <conf-name>2019 IEEE/CVF International Conference on Computer Vision (ICCV)</conf-name>, <conf-loc>Seoul, Korea (South)</conf-loc>, <conf-date>27 October 2019 - 02 November 2019</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>6410</fpage>&#x2013;<lpage>6419</lpage>. <pub-id pub-id-type="doi">10.1109/iccv.2019.00651</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Thomas</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Tsai</surname>
<given-names>Y.-H. H.</given-names>
</name>
<name>
<surname>Barfoot</surname>
<given-names>T. D.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2024</year>). &#x201c;<article-title>Kpconvx: modernizing kernel point convolution with kernel attention</article-title>,&#x201d; in <conf-name>2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>, <conf-loc>Seattle, WA, USA</conf-loc>, <conf-date>16-22 June 2024</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>5525</fpage>&#x2013;<lpage>5535</lpage>. <pub-id pub-id-type="doi">10.1109/cvpr52733.2024.00528</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hou</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Shan</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Graph attention convolution for point cloud semantic segmentation</article-title>,&#x201d; in <conf-name>2019 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>, <conf-loc>Long Beach, CA, USA</conf-loc>, <conf-date>15-20 June 2019</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>10288</fpage>&#x2013;<lpage>10297</lpage>. <pub-id pub-id-type="doi">10.1109/cvpr.2019.01054</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Semantic segmentation of transmission corridor 3d point clouds based on ca-pointnet&#x2b;&#x2b;</article-title>. <source>Electronics</source> <volume>12</volume>, <fpage>2829</fpage>. <pub-id pub-id-type="doi">10.3390/electronics12132829</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Indvissgg: vlm-based scene graph generation for industrial spatial intelligence</article-title>. <source>Adv. Eng. Inf.</source> <volume>65</volume>, <fpage>103107</fpage>. <pub-id pub-id-type="doi">10.1016/j.aei.2024.103107</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Chi</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Directionally constrained fully convolutional neural network for airborne LiDAR point cloud classification</article-title>. <source>ISPRS J. Photogrammetry Remote Sens.</source> <volume>162</volume>, <fpage>50</fpage>&#x2013;<lpage>62</lpage>. <pub-id pub-id-type="doi">10.1016/j.isprsjprs.2020.02.004</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Chi</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Airborne LiDAR point cloud classification with global-local graph attention convolution neural network</article-title>. <source>ISPRS J. Photogrammetry Remote Sens.</source> <volume>173</volume>, <fpage>181</fpage>&#x2013;<lpage>194</lpage>. <pub-id pub-id-type="doi">10.1016/j.isprsjprs.2021.01.007</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Lao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <source>Point transformer v2: grouped vector attention and partition-based pooling</source>. <publisher-loc>Red Hook, NY</publisher-loc>: <publisher-name>NeurIPS</publisher-name>.<volume>35</volume>, <fpage>33330</fpage>&#x2013;<lpage>33342</lpage>
</citation>
</ref>
<ref id="B48">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>P.-S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Qiao</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2024a</year>). <article-title>Point transformer v3: Simpler faster stronger</article-title>. <source>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</source>. <publisher-loc>Los Alamitos, CA</publisher-loc>: <publisher-name>CVPR</publisher-name>, <fpage>4840</fpage>&#x2013;<lpage>4851</lpage>.</citation>
</ref>
<ref id="B49">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Tian</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2024b</year>). <article-title>Towards large-scale 3d representation learning with multi-dataset point prompt training</article-title>. <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source>. <publisher-loc>Los Alamitos, CA</publisher-loc>: <publisher-name>CVPR</publisher-name>, <fpage>19551</fpage>&#x2013;<lpage>19562</lpage>.</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Peters</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Kontogianni</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Vetterli</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Puliti</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Astrup</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Towards accurate instance segmentation in large-scale lidar point clouds</article-title>. <source>ISPRS Ann. Photogrammetry, Remote Sens. Spatial Inf. Sci.</source> <volume>X-1/W1-2023</volume>, <fpage>605</fpage>&#x2013;<lpage>612</lpage>. <pub-id pub-id-type="doi">10.5194/isprs-annals-x-1-w1-2023-605-2023</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ling</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Hcmpe-net: an unsupervised network for underwater image restoration with multi-parameter estimation based on homology constraint</article-title>. <source>Opt. and Laser Technol.</source> <volume>186</volume>, <fpage>112616</fpage>. <pub-id pub-id-type="doi">10.1016/j.optlastec.2025.112616</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xin</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bartholomeus</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kootstra</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>3d data-augmentation methods for semantic segmentation of tomato plant parts</article-title>. <source>Front. Plant Sci.</source> <volume>14</volume>, <fpage>1045545</fpage>. <pub-id pub-id-type="doi">10.3389/fpls.2023.1045545</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Dou</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Pu</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Rpvnet: a deep and efficient range-point-voxel fusion network for lidar point cloud segmentation</article-title>,&#x201d; in <conf-name>2021 IEEE/CVF International Conference on Computer Vision (ICCV)</conf-name>, <conf-loc>Montreal, QC, Canada</conf-loc>, <conf-date>10-17 October 2021</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>16004</fpage>&#x2013;<lpage>16013</lpage>. <pub-id pub-id-type="doi">10.1109/iccv48922.2021.01572</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A review on state-of-the-art power line inspection techniques</article-title>. <source>IEEE Trans. Instrum. Meas.</source> <volume>69</volume>, <fpage>9350</fpage>&#x2013;<lpage>9365</lpage>. <pub-id pub-id-type="doi">10.1109/tim.2020.3031194</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yin</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Dcnet: Large-scale point cloud semantic segmentation with discriminative and efficient feature aggregation</article-title>. <source>IEEE Trans. Circuits Syst. Video Technol.</source> <volume>33</volume>, <fpage>4083</fpage>&#x2013;<lpage>4095</lpage>. <pub-id pub-id-type="doi">10.1109/tcsvt.2023.3239541</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Deep-learning-based semantic segmentation approach for point clouds of extra-high-voltage transmission lines</article-title>. <source>Remote Sens.</source> <volume>15</volume>, <fpage>2371</fpage>. <pub-id pub-id-type="doi">10.3390/rs15092371</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zafar</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Ashraf</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Ali</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Ahmed</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Jabbar</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Naseer</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Intelligent image classification-based on spatial weighted histograms of concentric circles</article-title>. <source>Comput. Sci. Inf. Syst.</source> <volume>15</volume>, <fpage>615</fpage>&#x2013;<lpage>633</lpage>. <pub-id pub-id-type="doi">10.2298/CSIS180105025Z</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>H. H.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>R. S.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Coherent processing and superresolution technique of multi-band radar data based on fast sparse bayesian learning algorithm</article-title>. <source>IEEE Trans. Antennas Propag.</source> <volume>62</volume>, <fpage>6217</fpage>&#x2013;<lpage>6227</lpage>. <pub-id pub-id-type="doi">10.1109/tap.2014.2361158</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>H.-H.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>P.-Y.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Biomimetic radar target recognition based on hypersausage chains</article-title>. <source>Appl. Comput. Electromagn. Soc. J. (ACES)</source> <volume>33</volume>, <fpage>1429</fpage>&#x2013;<lpage>1438</lpage>.</citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Ning</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Svm-based classification of segmented airborne lidar point clouds in urban areas</article-title>. <source>Remote Sens.</source> <volume>5</volume>, <fpage>3749</fpage>&#x2013;<lpage>3775</lpage>. <pub-id pub-id-type="doi">10.3390/rs5083749</pub-id>
</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>A deep learning based method for railway overhead wire reconstruction from airborne lidar data</article-title>. <source>Remote Sens.</source> <volume>14</volume>, <fpage>5272</fpage>. <pub-id pub-id-type="doi">10.3390/rs14205272</pub-id>
</citation>
</ref>
<ref id="B62">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Jia</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Torr</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Koltun</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2020</year>). <source>Point transformer</source>. <pub-id pub-id-type="doi">10.48550/ARXIV.2012.09164</pub-id>
</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Dong</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zuo</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A point cloud segmentation method for power lines and towers based on a combination of multiscale density features and point-based deep learning</article-title>. <source>Int. J. Digital Earth</source> <volume>16</volume>, <fpage>620</fpage>&#x2013;<lpage>644</lpage>. <pub-id pub-id-type="doi">10.1080/17538947.2023.2168770</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Wen</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Spatial attention-based kernel point convolution network for semantic segmentation of transmission corridor scenarios in airborne laser scanning point clouds</article-title>. <source>Electronics</source> <volume>13</volume>, <fpage>4501</fpage>. <pub-id pub-id-type="doi">10.3390/electronics13224501</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>