<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mar. Sci.</journal-id>
<journal-title>Frontiers in Marine Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mar. Sci.</abbrev-journal-title>
<issn pub-type="epub">2296-7745</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmars.2023.1113669</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Marine Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Instance segmentation ship detection based on improved Yolov7 using complex background SAR images</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Yasir</surname>
<given-names>Muhammad</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2076588"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Zhan</surname>
<given-names>Lili</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Shanwei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2131589"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wan</surname>
<given-names>Jianhua</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Hossain</surname>
<given-names>Md Sakaouth</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Isiacik Colak</surname>
<given-names>Arife Tugsan</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2079456"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Mengge</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Islam</surname>
<given-names>Qamar Ul</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Raza Mehdi</surname>
<given-names>Syed</given-names>
</name>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1897540"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yang</surname>
<given-names>Qian</given-names>
</name>
<xref ref-type="aff" rid="aff7">
<sup>7</sup>
</xref>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>College of Oceanography and Space Informatics, China University of Petroleum (East China)</institution>, <addr-line>Qingdao</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>College of Geodesy and Geomatics, Shandong University of Science and Technology</institution>, <addr-line>Qingdao</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Department of Geological Sciences, Jahangirnagar University</institution>, <addr-line>Dhaka</addr-line>, <country>Bangladesh</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>National University International Maritime College Oman</institution>, <addr-line>Sahar</addr-line>, <country>Oman</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Department of Electrical and Computer Engineering, College of Engineering, Dhofar University</institution>, <addr-line>Salalah</addr-line>, <country>Oman</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>Department of Marine Engineering, Ocean College, Zhejiang University, Zhoushan</institution>, <addr-line>Zhejiang</addr-line>, <country>China</country>
</aff>
<aff id="aff7">
<sup>7</sup>
<institution>People's Liberation Army (PLA) Troops No.63629</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Haiyong Zheng, Ocean University of China, China</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Yangfan Wang, Ocean University of China, China; Xiaoling Zhang, University of Electronic Science and Technology of China, China; Ibrar Ahmad, University of Peshawar, Pakistan</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Lili Zhan, <email xlink:href="mailto:skd992016@sdust.edu.cn">skd992016@sdust.edu.cn</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>01</day>
<month>05</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>10</volume>
<elocation-id>1113669</elocation-id>
<history>
<date date-type="received">
<day>01</day>
<month>12</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>07</day>
<month>04</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Yasir, Zhan, Liu, Wan, Hossain, Isiacik Colak, Liu, Islam, Raza Mehdi and Yang</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Yasir, Zhan, Liu, Wan, Hossain, Isiacik Colak, Liu, Islam, Raza Mehdi and Yang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>It is significant for port ship scheduling and traffic management to be able to obtain more precise location and shape information from ship instance segmentation in SAR pictures. Instance segmentation is more challenging than object identification and semantic segmentation in high-resolution RS images. Predicting class labels and pixel-wise instance masks is the goal of this technique, which is used to locate instances in images. Despite this, there are now just a few methods available for instance segmentation in high-resolution RS data, where a remote-sensing image&#x2019;s complex background makes the task more difficult. This research proposes a unique method for YOLOv7 to improve HR-RS image segmentation one-stage detection. First, we redesigned the structure of the one-stage fast detection network to adapt to the task of ship target segmentation and effectively improve the efficiency of instance segmentation. Secondly, we improve the backbone network structure by adding two feature optimization modules, so that the network can learn more features and have stronger robustness. In addition, we further modify the network feature fusion structure, improve the module acceptance domain to increase the prediction ability of multi-scale targets, and effectively reduce the amount of model calculation. Finally, we carried out extensive validation experiments on the sample segmentation datasets HRSID and SSDD. The experimental comparisons and analyses on the HRSID and SSDD datasets show that our model enhances the predicted instance mask accuracy, enhancing the instance segmentation efficiency of HR-RS images, and encouraging further enhancements in the projected instance mask accuracy. The suggested model is a more precise and efficient segmentation in HR-RS imaging as compared to existing approaches.</p>
</abstract>
<kwd-group>
<kwd>computer vision</kwd>
<kwd>object detection</kwd>
<kwd>instance segmentation</kwd>
<kwd>HR-RS</kwd>
<kwd>YOLOv7</kwd>
<kwd>SSDD</kwd>
<kwd>HRSID</kwd>
<kwd>SAR Complex background images</kwd>
</kwd-group>
<counts>
<fig-count count="8"/>
<table-count count="4"/>
<equation-count count="5"/>
<ref-count count="78"/>
<page-count count="15"/>
<word-count count="8356"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Ocean Observation</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>SAR is a microwave imaging sensor built on electromagnetic wave scattering properties that may be used in all weather conditions and has some ability to penetrate clouds and the ground. With the ongoing exploitation of maritime resources as well as the increased attention being paid to the monitoring of marine ships, it has special benefits in marine monitoring, mapping, the military, and all of these fields(<xref ref-type="bibr" rid="B20">Li et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B23">Liu et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B18">Kong et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B52">Yasir et&#xa0;al., 2023a</xref>; <xref ref-type="bibr" rid="B53">Yasir et&#xa0;al., 2023b</xref>). SAR ship detection technique is therefore very important for protecting marine ecosystems, maritime law enforcement, and territorial sea security. Ocean ship monitoring has received a lot of attention (<xref ref-type="bibr" rid="B70">Zhang et&#xa0;al., 2020b</xref>; <xref ref-type="bibr" rid="B4">Chen et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B49">Xu et&#xa0;al., 2022a</xref>; <xref ref-type="bibr" rid="B60">Zhang et&#xa0;al., 2023</xref>). Synthetic aperture radar (SAR) is more suited for monitoring ocean ships than optical sensors (<xref ref-type="bibr" rid="B55">Zeng et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B63">Zhang and Zhang, 2021a</xref>; <xref ref-type="bibr" rid="B50">Xu et&#xa0;al., 2022b</xref>; <xref ref-type="bibr" rid="B66">Zhang and Zhang, 2022c</xref>) because of its ability to operate in all weather conditions (<xref ref-type="bibr" rid="B61">Zhang and Zhang, 2021b</xref>). Ship monitoring is a key maritime task that is crucial for ocean surveillance, national defense security, fisheries management, etc. identification Ship in the SAR picture is a significant area of remote sensing research because it relies on target detection technology, which is in high demand (<xref ref-type="bibr" rid="B41">Wang et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B3">Chang et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B29">Qian et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B33">Su et&#xa0;al., 2022</xref>). Ship identification in satellite RS pictures has grown in importance as a research area recently (<xref ref-type="bibr" rid="B28">Nie et&#xa0;al., 2020</xref>). The marine transportation sector is now developing extremely quickly. The number of maritime infractions has increased as a result of the quick expansion in ship numbers and shipping volume. Automated ship identification plays an increasingly essential role in maritime surveillance, monitoring, and traffic supervision as well as in the regulation of illegal fishing and freight transit. It can assist in gathering information about ship dispersion. HR-RS pictures are given by a variety of airborne and spaceborne sensors, including Gaofen-3, TerraSAR-X, RADARSAT-2, Ziyuan-3, Sentinel-1, Gaofen-2, and unmanned aerial vehicles (UAV), owing to the quick development of imaging technology in the domain of RS. These HR pictures are being used in the military and the domains of the national economy, such as traffic control, marine management, urban monitoring, and ocean surveillance (<xref ref-type="bibr" rid="B27">Mou and Zhu, 2018</xref>; <xref ref-type="bibr" rid="B9">Cui et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B34">Su et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B37">Sun et&#xa0;al., 2021b</xref>). The HR RS pictures are especially well suited for object identification and segmentation in areas like military precision strikes and maritime transportation safety (<xref ref-type="bibr" rid="B34">Su et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B42">Wang et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B57">Zhang et&#xa0;al., 2020a</xref>). Instance segmentation, which may be characterized as a technology that addresses both the issue of object identification and semantic segmentation, has emerged as a significant, sophisticated, and challenging area of research in machine vision. Parallel to semantic segmentation, it has both pixel-level classification and object identification properties, where dissimilar instances must be located even if they belong to the same type (<xref ref-type="bibr" rid="B48">Xu et&#xa0;al., 2021</xref>). Since the two-stage object identification algorithm&#x2019;s introduction, other convolutional neural network-based object detection and segmentation methods have appeared, including the R-CNN, Faster R-CNN (<xref ref-type="bibr" rid="B31">Ren et&#xa0;al., 2015</xref>), and Mask R-CNN (<xref ref-type="bibr" rid="B15">He et&#xa0;al., 2017</xref>).</p>
<p>Deep learning innovation demonstrates inspiring outcomes recently in several fields, including object identification (<xref ref-type="bibr" rid="B59">Zhang et&#xa0;al., 2019a</xref>; <xref ref-type="bibr" rid="B56">Zhang et&#xa0;al., 2020c</xref>; <xref ref-type="bibr" rid="B58">Zhang et&#xa0;al., 2021a</xref>), image classification (<xref ref-type="bibr" rid="B26">Liu et&#xa0;al., 2021b</xref>; <xref ref-type="bibr" rid="B74">Zhou et&#xa0;al., 2022a</xref>; <xref ref-type="bibr" rid="B73">Zhou et&#xa0;al., 2022b</xref>), Segmentation (<xref ref-type="bibr" rid="B26">Liu et&#xa0;al., 2021b</xref>; <xref ref-type="bibr" rid="B71">Zhou et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B76">Zong and Wan, 2022</xref>; <xref ref-type="bibr" rid="B77">Zong and Wang, 2022</xref>), and so on (<xref ref-type="bibr" rid="B72">Zhou et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B25">Liu et&#xa0;al., 2021a</xref>; <xref ref-type="bibr" rid="B45">Wu et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B54">Yin et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B75">Zhu and Zhao, 2022</xref>). Recently, despite the existence of many excellent algorithms, like the path aggregation network (<xref ref-type="bibr" rid="B24">Liu et&#xa0;al., 2018</xref>), Mask Score R-CNN (<xref ref-type="bibr" rid="B39">Wang et&#xa0;al., 2020a</xref>), Cascade Mask R-CNN (<xref ref-type="bibr" rid="B10">Dai et&#xa0;al., 2016</xref>), and segmenting objects by locations (<xref ref-type="bibr" rid="B40">Wang et&#xa0;al., 2020b</xref>) and so on (<xref ref-type="bibr" rid="B62">Zhang and Zhang, 2019</xref>; <xref ref-type="bibr" rid="B69">Zhang et&#xa0;al., 2019b</xref>; <xref ref-type="bibr" rid="B68">Zhang et&#xa0;al., 2021b</xref>; <xref ref-type="bibr" rid="B32">Shao et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B64">Zhang and Zhang, 2022a</xref>; <xref ref-type="bibr" rid="B65">Zhang and Zhang, 2022b</xref>; <xref ref-type="bibr" rid="B66">Zhang and Zhang, 2022c</xref>; <xref ref-type="bibr" rid="B67">Zhang and Zhang, 2022d</xref>), common issues, such as erroneous segmentation edges and the development of global relations, still exist. The extension of the model will lead to dimensional disasters if the long-range dependencies are represented by dilated convolution or by expanding the number of channels. YOLOv7 serves as the basic foundational framework for the development of a framework model for RS picture object identification and instance segmentation in order to get over CNNs&#x2019; limitations in terms of their capacity to extract spatial information. Detecting and segmenting ships in SAR images is difficult because of the complexity and variety of the images themselves, which include speckle noise, shadows, and cluttered backgrounds. These elements make it challenging to reliably identify ships among other objects in the image and to define the ship&#x2019;s boundaries.</p>
<p>In addition, different from moving targets such as aircraft and vehicles, ship targets often dock side by side near the port, so it is difficult for general detection methods to accurately distinguish each target, resulting in a large number of missing targets. Meanwhile, Ship case segmentation can not only accurately obtain the position of the object, but also effectively achieve the shape information of the target, which can further promote the research of SAR ship recognition. However, at present, a large number of studies only focus on the SAR ship targets detection and do not further achieve the target-level instance segmentation. It is specifically affected by the following factors, (1) the complexity of the instance segmentation model is high, often reaching hundreds of megabytes, which is difficult to be applied. (2) The running efficiency of the instance segmentation algorithm is relatively low, and the initial training of the model takes a long time. (3) There is not enough sample data to train the model, which makes the performance of existing deep learning methods insufficient. In our study, we utilized various data augmentation techniques, such as random flipping, rotation, and scaling, to generate additional samples from the limited dataset. These techniques effectively increase the diversity of the training samples and help prevent overfitting.</p>
<p>To overcome this problem, we propose an improved version of the YOLOv7 object detection algorithm that incorporates an ELAN-Net backbone and feature pyramid network (FPN) to boost the model&#x2019;s capability to extract relevant features from SAR images in complex backgrounds. Our suggested algorithm achieves state-of-the-art effectiveness on two benchmark datasets, demonstrating its effectiveness in addressing the research problem of accurate ship identification and segmentation in complex SAR pictures. The main contributions in this paper are outlined in the following order:</p>
<p>&#x39b; An upgraded YOLOv7 model has been proposed for instance segmentation ship detection.</p>
<p>&#x39b; An effective feature extraction module has been developed and added to the improved backbone network, enhancing the network&#x2019;s focus on target features and making the process of feature extraction more efficient.</p>
<p>&#x39b; The feature pyramid module is optimized with feature fusion to increase the accuracy of multi-scale target segmentation and further improve the speed of image processing to boost the identification and segmentation performance of the network for multi-scale ship targets.</p>
<p>&#x39b; Two ship datasets, an SSDD dataset, and an HRSID dataset are used to evaluate the efficiency of the suggested technique. To test the model&#x2019;s robustness, two ship datasets are run (which contain images with different scales, resolutions, and scenes).</p>
<p>The paper is structured as follows: Part 2 explains the materials and experimental setup and demonstrates how the study acts as an organizing foundation for the remaining portions of the research. Part 3 provides a description of the research project&#x2019;s results and analyses. It has also shown the model&#x2019;s potential by comparing it with other innovatively made versions. The ablation study is described in Section 4, and Section 5 concludes the paper.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Related work</title>
<sec id="s2_1">
<label>2.1</label>
<title>Deep learning-based instance segmentation</title>
<p>Instance segmentation in SAR photos has the advantage of combining semantic segmentation with object identification. Using semantic segmentation, each pixel of the input picture is separated into logical groups according to where the ship targets are located. It offers a better description and perception of the ship targets because of the more complex interpretation technique. As the first attempt at segmenting CNN, Mask R-CNN (<xref ref-type="bibr" rid="B22">Lin et&#xa0;al., 2017b</xref>) adds a mask branch that is analogous to the classification and regression branch in Faster R-CNN in order to forecast the segmentation mask for each region of interest (RoI). Mask Scoring R-CNN (<xref ref-type="bibr" rid="B39">Wang et&#xa0;al., 2020a</xref>) utilizes the product of the classification score and the IoU score of the mask to construct the mask score in order to increase the quality of an instance. Cascade Mask R-CNN is created by combining Mask R-CNN and Cascade R-CNN (<xref ref-type="bibr" rid="B7">Chen et&#xa0;al., 2019b</xref>). Each cascade framework adds a mask branch to complete the instance segmentation task, combining the best features of the two approaches. In order to improve identification accuracy, Hybrid Task Cascade (<xref ref-type="bibr" rid="B7">Chen et&#xa0;al., 2019b</xref>) proposes integrating the concurrent structures of identification and segmentation, which leverage semantic segmentation branches to build a spatial context for the bounding box. In recent years, a number of one-stage algorithms, notably YOLACT (<xref ref-type="bibr" rid="B1">Bolya et&#xa0;al., 2019</xref>) and SOLO (<xref ref-type="bibr" rid="B40">Wang et&#xa0;al., 2020b</xref>), have appeared that correspond to object identification methods. In addition, a few approaches such as BlendMask (<xref ref-type="bibr" rid="B8">Chen et&#xa0;al., 2020</xref>) and PolarMask (<xref ref-type="bibr" rid="B46">Xie et&#xa0;al., 2020</xref>) are built on an item identification network without anchors. Due to their speed benefits, these one-stage techniques are frequently utilized in the domain of autonomous vehicle operation and facial detection. However, in some complex ship identification tasks, the identification technique can only assess a ship&#x2019;s length and contour when they are important details for the particular type of ship. Improvements to the current algorithms for segmenting SAR images by an instance are not currently being made in a substantial way. The HRSID (<xref ref-type="bibr" rid="B21">Lin et&#xa0;al., 2017a</xref>) dataset was recently created for the segmentation of ship instances in SAR images.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>SAR images-based ship detection</title>
<p>SAR can continually monitor the planet, in contrast to optical sensors, which are inoperable at night. Because SAR images do not contain information about color, texture, shape, or other aspects, they show ships differently than optical images do. Furthermore, the SAR image has a lot of noise; as a result, identifying SAR images might be difficult for researchers without the appropriate skills. Because there is a dearth of data on tagged SAR ships as an outcome, it is more challenging to identify ships from SAR images. In order to find ships in SAR images, several deep-learning techniques have been used (<xref ref-type="bibr" rid="B35">Sun et&#xa0;al., 2021a</xref>; <xref ref-type="bibr" rid="B23">Liu et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B36">Sun et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B51">Yasir et&#xa0;al., 2022</xref>). (<xref ref-type="bibr" rid="B13">Fan et&#xa0;al. 2019b</xref>) implemented a multi-level features extractor into the Faster R-CNN for polarimetric SAR ship identification. A dense attention pyramid network was created to identify SAR ships by densely connecting each feature map to the attention convolutional module (<xref ref-type="bibr" rid="B9">Cui et&#xa0;al., 2019</xref>). For pixel-by-pixel ship identification in polarimetric SAR photos, a fully convolutional network has been created (<xref ref-type="bibr" rid="B12">Fan et&#xa0;al., 2019a</xref>). The feature pyramid structure contained a split convolution block and an embedded spatial attention block (<xref ref-type="bibr" rid="B14">Gao et&#xa0;al., 2019</xref>). Against a complex background, the feature pyramid structure can identify ship items with accuracy. Wei et&#xa0;al. (<xref ref-type="bibr" rid="B44">Wei et&#xa0;al., 2020</xref>) created a high-resolution feature pyramid structure for ship recognition that combined high-to-low-resolution features. The challenge of ships of various sizes and crowded berthings has been addressed by the development of a multi-scale adaptive recalibration structure (<xref ref-type="bibr" rid="B5">Chen et&#xa0;al., 2019a</xref>). A one-stage SAR target identification approach was suggested by Hou et&#xa0;al. (<xref ref-type="bibr" rid="B16">Hou et&#xa0;al., 2019</xref>) to address the low confidence of candidates and false positives. (<xref ref-type="bibr" rid="B17">Kang et&#xa0;al. 2017</xref>) proposed a method integrating CFAR with faster R-CNN. The object proposals produced by the faster R-CNN used in this method for extracting small objects served as the protective window of the CFAR. Zou et&#xa0;al. (<xref ref-type="bibr" rid="B78">Zou et&#xa0;al., 2020</xref>) integrated YOLOv3 with a generative adversarial network with a multi-scale loss term to increase the accuracy of SAR ship recognition. In order to identify and recognize ships in complex-scene SAR images, Xiong et&#xa0;al. (<xref ref-type="bibr" rid="B47">Xiong et&#xa0;al., 2022</xref>) suggested a lightweight model that integrated several attention mechanisms into the YOLOv5-n lightweight model.</p>
<p>Results from using CNN methods to identify ships in SAR imagery are impressive. However, there are still two significant areas of work that need to be addressed. One of these involves methodically combining the most recent advancements in computer vision to connect optical and SAR images. The other seeks to broaden the use of ship identification to further applications, such as instance segmentation. The two SAR image components were combined as part of this study to enhance the images&#x2019; suitability for RS applications, which is another goal of the investigation.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Proposed improved methodology</title>
<sec id="s3_1">
<label>3.1</label>
<title>Overall structure of our model</title>
<p>In addition to classifying and locating the object of interest in an image, instance segmentation also labels each pixel that is a component of the particular object instance. It enhances the identification process by associating the bounding box and mask with the object. As a result, instance segmentation will help us identify ships more accurately and will also help us deal with crowded sceneries and detect partially occluded ships. Semantic segmentation-based bottom-up and identification-based top-down techniques have been the main focus of case segmentation research for a very long time. The majority of CNN-based models and their derivation models, including RCNN, have been used for computer vision tasks such as object identification, tracking, segmentation, and classification. Faster RCNN (<xref ref-type="bibr" rid="B6">Chen et&#xa0;al., 1993</xref>) is improved by a cutting-edge technique known as Mask RCNN (<xref ref-type="bibr" rid="B15">He et&#xa0;al., 2017</xref>), which also does instance segmentation using region proposals. Additionally, it locates every instance of the target object down to the pixel level in an image.</p>
<p>YOLO is a single-stage object detector that can forecast a particular object in each area of the feature maps without the aid of the cascaded location classification stage. YOLO categorizes and locates the object using bounding boxes and a particular Convolution Neural Networks (CNN) network. It splits the image into an S&#xd7;S; S &#x2208; &#x2124;+ grid and identifies an object as a grid cell if its focal point crosses one. A one-stage detection method called YOLO may recognize objects instantly and is very quick (<xref ref-type="bibr" rid="B30">Redmon et&#xa0;al., 2016</xref>). The YOLOV7 algorithm, which is now the most sophisticated in the YOLO series, balances the conflict between the quantity of parameters, the amount of calculation, and the performance. It also outperforms earlier iterations of the YOLO series in terms of accuracy and speed. In this paper, we used the improved Yolov7 for segmentation ship detection, and <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref> illustrates the outline of the method recommended in the research.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>The overall structure of the proposed ship detection and segmentation model. E-ELAN, MP-Conv, Cat-Conv, and SPPSPC are some improved modules.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1113669-g001.tif"/>
</fig>
<p>The 1024x1024 SAR images are concurrently supplied to the network feature extraction at the input end, as shown in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>. In order to successfully manage the framework training, the proper ship target labeling must be delivered. The entire deep framework is divided into three sections: the backbone structure, which is primarily used to extract features from the input picture; the feature pyramid, which is used to scale the extracted features and strengthen the expression of the target feature; and the network prediction layer, which predicts the target at three scales. Finally, post-processing techniques like maximum value suppression (NMS) are used to acquire the results of the identification output.</p>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Improved backbone networks</title>
<p>The two new modules that are added to the backbone structure in this research are as follows: SiLu function is used by the MP-Conv module, the E-ELAN module, and its activation function. The SiLU activation function used by the MP-Conv module is known to be more computationally efficient and effective than the traditional ReLU activation function. By incorporating the SiLU function, the MP-Conv module can better capture relevant features from SAR images, leading to improved object detection performance. Meanwhile, The MP-Conv module adopts the way of double-branch fusion to carry out super downsampling of convolution blocks, which on the one hand improves the operational efficiency of target feature extraction, on the other hand, it can fuse and enhance target feature expression. The E-ELAN module is designed to boost the capability of the algorithm to retrieve spatial information from the SAR image. This is achieved by incorporating an attention mechanism that selectively weighs the feature maps based on their relevance to the final prediction. By selectively weighing the feature maps, the E-ELAN module can help the model focus on the most relevant information, leading to improved detection and segmentation performance. In addition, the E-ELAN module can stack more blocks by considering the shortest gradient path, so as to enhance the feature extraction capability of the network without significantly increasing the complexity of the model.</p>
<p>The E-ELAN module is an effective network structure, as shown in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2A</bold>
</xref>, that enables the network to learn more features and has stronger robustness by managing the shortest and longest gradient routes. The ELAN module has two branches specifically: The first branch involves using a 1x1 convolution to adjust the number of channels. The second branch, which is more difficult, first passes through a 1x1 convolution module to alter the number of channels. Then, run four 3x3 convolution modules to extract features.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>The detailed structures of two improved modules in the backbone network. <bold>(A)</bold> The E-ELAN module. <bold>(B)</bold> The MP-Conv module. &#x201c;Conv&#x201d; means the ordinary convolution-2D layer, &#x201c;BN&#x201d; means the batch normalization layer, &#x201c;Max-Pooling&#x201d; means the max pooling-2D layer; &#x201c;k&#x201d; is the kernel size, and &#x201c;s&#x201d; is the sliding step.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1113669-g002.tif"/>
</fig>
<p>The reason for selecting the fourth B-Conv as the branch for channel concatenating in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref> is that we conducted extensive experiments and found that this branch provides the best performance for ship detection. Specifically, we found that by selecting the fourth B-Conv branch, the network can effectively capture features at different scales and resolutions, which is critical for accurate ship instance segmentation detection in complex background SAR images.</p>
<p>Two branches of the MP-Conv (Max-Pooling Convolution) module, as seen in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2B</bold>
</xref>, are employed for downsampling. A Max-pool, or maximal pooling, is used on the first branch. The result of maximizing is downsampling and a 1x1 convolution to change the number of layers. The second branch initially performs a 1x1 convolution to change the number of layers before passing through a convolution block with a 3x3 convolution kernel and a 2 stride. Downsampling is another application for this convolution block. In the end, the two branches&#x2019; results are combined, the number of layers equals the number of input layers, but the spatial resolution is decreased by a factor of 2.</p>
<p>In summary, the proposed model structure is designed to enhance the model&#x2019;s ability to extract relevant features from SAR images and to incorporate spatial information through the attention mechanism. These improvements contribute to improved object detection and segmentation performance, as demonstrated in our experiments.</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Improved neck networks</title>
<p>
<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref> displays the detailed structures of two enhanced modules in the neck network. <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3A</bold>
</xref> illustrates how similar the Cat-conv module is to the E-ELAN (Encoder Enhanced Layer Aggregation Network) module, with the exception that it chooses a different number of outputs for the second branch. Three outputs are chosen by the E-ELAN module for final addition, and five channels are chosen by the Cat-conv module for contact. The Cat-conv structure utilized in this article can assist the entire pyramid framework in aggregating multi-scale features, increasing the multi-scale representation of ship targets, which have remarkable multi-scale features in SAR images.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>The detailed structures of two improved modules in the neck network. <bold>(A)</bold> The Cat-Conv module. <bold>(B)</bold> The SPPSPC module. &#x201c;Conv&#x201d; means the ordinary convolution-2D layer, &#x201c;BN&#x201d; means the batch normalization layer, &#x201c;Max-Pooling&#x201d; means the max pooling-2D layer; &#x201c;k&#x201d; is the kernel size, and &#x201c;s&#x201d; is the sliding step.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1113669-g003.tif"/>
</fig>
<p>In order to increase the receptive field more efficiently and further promote the algorithm to adapt to different resolution images, we optimize to design of the SPPSPC (Spatial Pyramid Pooling with Spatial Pyramid Convolution) module to replace the original SPP module. As seen in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3B</bold>
</xref>, the first branch has four branches following the Max-pool operation. Through maximal pooling, it obtains various receptive fields. These four distinct branches signify the network&#x2019;s ability to process a variety of objects. That is to say, it has four receptive fields for each of its four separate scales of maximum pooling, which are utilized to differentiate between large and small targets. In this way, the SPPSPC module designed in this paper combines and optimizes the feature reorganization, which can effectively increase the accuracy of the algorithm while greatly reducing the amount of computation. The loss function used in our proposed network is a combination of three loss functions: the localization loss, the confidence loss, and the segmentation loss. The localization loss measures the difference between the predicted bounding box and the ground truth bounding box. The confidence loss measures the objectness score and the background score. Finally, the segmentation loss measures the pixel-wise difference between the predicted mask and the ground truth mask. The overall loss function is a weighted sum of these three loss functions, and it is optimized using the stochastic gradient descent (SGD) algorithm.</p>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Experimental result and discussions</title>
<sec id="s4_1">
<label>4.1</label>
<title>Dataset overview</title>
<sec id="s4_1_1">
<label>4.1.1</label>
<title>HRSID dataset</title>
<p>The High-Resolution SAR Images Dataset for Ship Detection and Instance Segmentation (HRSID) provided by Wei et&#xa0;al. (<xref ref-type="bibr" rid="B22">Lin et&#xa0;al., 2017b</xref>) is made up of images from 99 Sentinel-1B imageries, 36 TerraSAR-X, and 1 TanDEM-X imagery. The resolutions of the 800 x 800-pixel images, which contain 16951 ships and 5604 sliced SAR images, range from 1 to 15 meters.</p>
</sec>
</sec>
<sec id="s4_2">
<label>4.1.2</label>
<title>SSDD dataset</title>
<p>The first and most important stage in applying deep learning algorithms to recognize ships is the construction of a substantial and comprehensive dataset. As a result, the experiment makes use of the SSDD (<xref ref-type="bibr" rid="B19">Li et&#xa0;al., 2017</xref>) dataset, which contains 1160 SAR pictures from Sentinel-1 TerraSAR-X, and RadarSat-2 with resolutions ranging from 1m to 15m and polarizations in HV, HH, VH, and VV (<xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>). Scenes of offshore ships and inshore ships are both present in the collection as background elements.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Information about the SAR imageries in detail for construction.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Dataset</th>
<th valign="middle" align="center">Image (num)</th>
<th valign="middle" align="center">Size (Pixel)</th>
<th valign="middle" align="center">Satellite</th>
<th valign="middle" align="center">Resolution (m)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">HRSID (<xref ref-type="bibr" rid="B22">Lin et&#xa0;al., 2017b</xref>)</td>
<td valign="middle" align="center">5604</td>
<td valign="middle" align="center">800 x 800</td>
<td valign="middle" align="center">Sentinel-1B/TerraSAR-X /TanDEM-X</td>
<td valign="middle" align="center">1-15</td>
</tr>
<tr>
<td valign="middle" align="center">SSDD (<xref ref-type="bibr" rid="B19">Li et&#xa0;al., 2017</xref>)</td>
<td valign="middle" align="center">1160</td>
<td valign="middle" align="center">800 x 800</td>
<td valign="middle" align="center">RadarSat-2/TerraSAR-X/Sentinel-1</td>
<td valign="middle" align="center">1-15</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The first two SAR image examples in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref> show offshore ships, whereas the last two in the row, respectively, show ships docking in ports and large ships and show the cluster-distributed tiny ships in the canal.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Photos are shown from the dataset used in the current paper. <bold>(A)</bold> some photos from the HRISD dataset and, <bold>(B)</bold> some photos from SSDD datasets.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1113669-g004.tif"/>
</fig>
</sec>
<sec id="s4_3">
<label>4.2</label>
<title>Implementation setting</title>
<p>The experiments are all run on an Intel Core i9-9900KF CPU and an NVIDIA Geforce GTX 2080Ti GPU utilizing CUDA 10.1 CUDNN 7.6.5 and PyTorch 1.7.0. In each experiment, the initial learning rate is set to 0.01, the final one-cycle learning rate is set to 0.001, the momentum is set to 0.937, the optimizer weight decay is set to 0.0005, and the ship detection confidence is set to 0.7. We use the Stochastic Gradient Descent (SGD) algorithm for learning optimization. The ship instance segmentation task in this research also requires labeling the object instance as supervision information and sending it to the suggested deep learning framework for learning optimization, unlike the general detection task. In order to more thoroughly assess the proposed model, we separated the entire training set into the test set and the training set in a 7:3 ratio. We then compared the detection results with the true value annotation to assess how well the algorithm performed.</p>
</sec>
<sec id="s4_4">
<label>4.3</label>
<title>Evaluation metrics</title>
<p>The traditional methods for quantitatively and thoroughly assessing the effectiveness of object detectors are the estimate metrics precision (p), recall (r), intersection of union (IoU), and average precision (AP) (<xref ref-type="bibr" rid="B11">Everingham et&#xa0;al., 2010</xref>). The expert annotation of the object&#x2019;s geographic coordinates is referred to as the ground truth in supervised learning for object identification and instance segmentation. The percentage of overlap between the expected outcome and the actual result serves as a proxy for the correlation between two variables; a higher level of overlap denotes a stronger connection and a more precise prediction. Eq (1) states that the bounding box IoU is determined by the percent of overlap between the predicted bounding box and the ground truth bounding box.</p>
<p>The efficiency of various techniques is evaluated using a number of recognized indicators, such as AP, r, p, and IoU, and these indications are particularly specified in the following Eq (1&#x2013;5) since SAR photo object identification tasks are comparable:</p>
<disp-formula>
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:msub>
<mml:mi>U</mml:mi>
<mml:mrow>
<mml:mi>b</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>o</mml:mi>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>&#x2229;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>o</mml:mi>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>g</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>o</mml:mi>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>&#x222a;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mtext>Bbox</mml:mtext>
</mml:mrow>
<mml:mi>g</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In object identification tasks, AP is a frequently used indicator that compares the proportion of properly recognized items to the total number of objects in the picture. Another often-used metric is r, which compares the fraction of successfully recognized items to the total number of objects in the picture. It is determined as the ratio of true positives (items that have been accurately identified) to the sum of true positives and false negatives (objects that were present in the image but not detected).</p>
<p>p is an indicator that calculates the proportion of successfully detected items concerning the total number of detected objects in the picture. It is calculated by dividing the number of true positives by the total number of true positives and false positives. IoU (Intersection over Union) is an indicator that calculates the ratio of the intersection of two bounding boxes to the union of two bounding boxes to determine the similarity between two bounding boxes (Bbox p and Bbox g). These indicators (AP, r, p, IoU) are extensively employed in the domain of SAR picture object identification to evaluate and compare the efficacy of various methodologies.</p>
<p>The rate of overlap between the ground mask and predicted mask, as shown in equation (2), determines the mask IoU in a manner similar to how segmentation precision is calculated.</p>
<disp-formula>
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:msub>
<mml:mi>U</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>s</mml:mi>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>&#x2229;</mml:mo>
<mml:mi>M</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>s</mml:mi>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mi>g</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>s</mml:mi>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>&#x222a;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mtext>Mask</mml:mtext>
</mml:mrow>
<mml:mi>g</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The IoU may also be used to assess segmentation tasks such as object recognition in SAR images. The IoU is determined using equation (2), which is comparable to the calculation for IoU of bounding boxes that has been previously described. The IoU mask is the ratio of the predicted mask (Mask p) and the ground truth mask (Mask g) intersection to the union of the two masks. IoU is also known as the Jaccard Index in the context of image segmentation, which is a standard statistic for evaluating the performance of image segmentation algorithms. A high IoU score implies that the predicted mask and the ground truth mask have a high degree of overlap, indicating that the model is accurate.</p>
<p>During classification, algorithms may incorrectly recognize the surroundings and the objects. True Positives (TP), True Negatives (TN), False Positives (FP) and False Negatives (FN) are the four categorization findings, where TP stands for the number of successfully categorized positive samples, TN for correctly classed negative samples, FN for correctly classified missed positive samples, and FP for correctly classified false alarms in the background. These criteria establish <italic>p</italic> and <italic>r</italic>, as shown by equations (3, 4).</p>
<disp-formula>
<label>(3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:mi>Pr</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(4)</label>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:mi>Re</mml:mi>
<mml:mtext>call</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In classification tasks, the four categorization findings are used to evaluate the algorithm&#x2019;s performance. Precision and recall, two often used indicators in classification tasks, are calculated using TN, FN, TP, and FP. The equation (3) is used to calculate precision, it calculates the fraction of correctly identified positive samples to the total number of positive samples. A high accuracy score suggests that the algorithm has a low number of false positives, indicating that it accurately identifies a large majority of positive samples.</p>
<p>The AP is established using recall and precision measurements. If the horizontal coordinate is the r value and the vertical coordinate is the precision value, as shown in equation (5), then the area under the recall-precision curve is the AP value in the Cartesian coordinate system:</p>
<disp-formula>
<mml:math display="block" id="M5">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>=</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mstyle displaystyle="true">
<mml:mrow>
<mml:munderover>
<mml:mo>&#x222b;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mn>1</mml:mn>
</mml:munderover>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mstyle>
<mml:mi>d</mml:mi>
<mml:mi>R</mml:mi>
<mml:mtext>&#x2009;&#x2009;&#x2009;&#x2009;&#x2009;&#x2009;&#x2009;&#x2009;&#x2009;&#x2009;&#x2009;&#x2009;&#x2009;&#x2009;&#x2009;&#x2009;&#x2009;</mml:mtext>
<mml:mo stretchy="false">(</mml:mo>
<mml:mn>5</mml:mn>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The mathematical average of all categories in a dataset with multiple classes is defined as the mean AP (mAP). The AP measure is extensively used to assess the effectiveness of object identification systems. The area under the recall-precision curve, which is a plot of recall <italic>vs.</italic> accuracy, is what it is. According to equation (5), the AP value in the Cartesian coordinate system is the definite integral of the accuracy value with respect to the recall value, ranging from 0 to 1.&#xa0;A greater AP value suggests that the algorithm is doing well, as seen by a larger area under the recall-precision curve.</p>
<p>Mean Average Precision (mAP) is a statistic used to assess the effectiveness of multi-class object identification systems. It is the average of all the AP values in a dataset. It provides an overall measure of the algorithm&#x2019;s performance across all classes in the dataset. A greater mAP number implies that the method performs better across all classes in the dataset.</p>
</sec>
<sec id="s4_5">
<label>4.4</label>
<title>Visualization experiment of proposed algorithm</title>
<p>Due to various incident angles of the radar signal, environmental conditions, polarization techniques, etc., the preprocessing SAR images include clutter noise that interferes with the feature of ships and prohibits ship identification and instance segmentation using CNN. Therefore, while building a SAR dataset for ship identification and instance segmentation, ships should be totally and precisely labeled as opposed to creating an optical RS dataset for object recognition and instance segmentation (<xref ref-type="bibr" rid="B43">Waqas Zamir et&#xa0;al., 2019</xref>). In current research work, we have established an effective and reliable algorithm for building an HR-RS dataset for CNN-based ship identification and instance segmentation. Instance segmentation&#x2019;s impacts on low-resolution SAR pictures may be limited in order to escape missing annotation and incorrect annotation brought on by artificial structures that resemble ships (<xref ref-type="bibr" rid="B42">Wang et&#xa0;al., 2019</xref>), which are displayed as highlighted spots in low-resolution SAR images. High-resolution remote sensing pictures are utilized to create the dataset, and the images are sliced into 800 x 800 size segments for optimal function development, such as multi-scale training.</p>
<p>The results of ship identification instance segmentation for SAR images using the proposed model are shown in <xref ref-type="fig" rid="f5">
<bold>Figures&#xa0;5</bold>
</xref>, <xref ref-type="fig" rid="f6">
<bold>6</bold>
</xref>. The ground truth mask results are shown in the first row of the figure, and the projected instance outcomes are outcomes presented in the second row. <xref ref-type="fig" rid="f5">
<bold>Figures&#xa0;5</bold>
</xref>, <xref ref-type="fig" rid="f6">
<bold>6</bold>
</xref> demonstrate how our model&#x2019;s output is suitable for our goal of segmenting instances in HR-RS images. As missed and false alarms increase in our model, instance segmentation is carried out on the mask branch. Finally, these synthetic targets can be detected and segmented quite well, and the segmentation outcomes produced by our model are very close to reality. With the help of our model, the instance segmentation task in HR-RS images was completed successfully.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Outcomes of the proposed approach instance segmentation in the HRSID dataset (first row show the ground truth and second row is the predicted instance outcomes).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1113669-g005.tif"/>
</fig>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Results of the proposed model&#x2019;s instance segmentation in the SSDD detection dataset (the first row show the ground truth and the second row shows the predicted instance outcomes).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1113669-g006.tif"/>
</fig>
</sec>
<sec id="s4_6">
<label>4.5</label>
<title>Ablation studies</title>
<p>We performed ablation experiments to assess the efficacy of various components in their suggested ship instance segmentation detection model. <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref> shows the findings of the ablation research. As the default model, the writers used the YOLOv7 model with an input size of 640x640 pixels. The standard model had an AP of 57.8, with an AP50 of 83.7 and an AP75 of 69.5. Also we have added E-ELAN, an edge enhancement module, to the basic model in the first ablation trial. With the inclusion of E-ELAN, the AP increased to 59.4, with an AP50 of 89.6 and an AP75 of 71.9.Then we have added MP-Conv, a multi-path convolution module, to the basic model in the second ablation analysis. The inclusion of MP-Conv increased the AP to 60.7, with an AP50 of 83.9 and an AP75 of 69.8. Cat-Conv, a channel attention transfer convolution module, was added to the baseline model in the third ablation trial. Cat-Conv increased the AP to 62.3, with an AP50 of 83.1 and an AP75 of 68.3. Also we have added SPPSPC, a spatial pyramid pooling module, and convolution to the baseline model in the fourth ablation trial. SPPSPC increased the AP to 63.5, with an AP50 of 87.8 and an AP75 of 73.5. In the last, the authors added all of the previously stated modules (E-ELAN, MP-Conv, Cat-Conv, and SPPSPC) to the baseline model in the fifth and concluding ablation trial. The finished model had the greatest AP of 69.7, as well as an AP50 of 94.9 and an AP75 of 86.5. The authors discovered that incorporating all four modules greatly enhanced the baseline model&#x2019;s performance, particularly in terms of accuracy and recall, showing the efficacy of their suggested model for real-time ship instance segmentation recognition in complicated backdrop SAR images.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>The ablation experiment study.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center">Model</th>
<th valign="top" align="center">Input size</th>
<th valign="top" align="center">E-ELAN</th>
<th valign="top" align="center">MP-Conv</th>
<th valign="top" align="center">Cat-Conv</th>
<th valign="top" align="center">SPPSPC</th>
<th valign="top" align="center">AP</th>
<th valign="top" align="center">AP50</th>
<th valign="top" align="center">AP75</th>
<th valign="top" align="center">APS</th>
<th valign="top" align="center">APM</th>
<th valign="top" align="center">APL</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" rowspan="6" align="center">Yolov7</td>
<td valign="top" align="center">640x640</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">57.8</td>
<td valign="top" align="center">83.7</td>
<td valign="top" align="center">69.5</td>
<td valign="top" align="center">57.3</td>
<td valign="top" align="center">60.6</td>
<td valign="top" align="center">24.5</td>
</tr>
<tr>
<td valign="top" align="center">640x640</td>
<td valign="top" align="center">&#x2714;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">59.4</td>
<td valign="top" align="center">89.6</td>
<td valign="top" align="center">71.9</td>
<td valign="top" align="center">59.1</td>
<td valign="top" align="center">60.6</td>
<td valign="top" align="center">39.7</td>
</tr>
<tr>
<td valign="top" align="center">640x640</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2714;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">60.7</td>
<td valign="top" align="center">83.9</td>
<td valign="top" align="center">69.8</td>
<td valign="top" align="center">56.9</td>
<td valign="top" align="center">61.2</td>
<td valign="top" align="center">30.4</td>
</tr>
<tr>
<td valign="top" align="center">640x640</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2714;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">62.3</td>
<td valign="top" align="center">83.1</td>
<td valign="top" align="center">68.3</td>
<td valign="top" align="center">60.7</td>
<td valign="top" align="center">63.5</td>
<td valign="top" align="center">47.8</td>
</tr>
<tr>
<td valign="top" align="center">640x640</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2714;</td>
<td valign="top" align="center">63.5</td>
<td valign="top" align="center">87.8</td>
<td valign="top" align="center">73.5</td>
<td valign="top" align="center">65.5</td>
<td valign="top" align="center">67.4</td>
<td valign="top" align="center">45.5</td>
</tr>
<tr>
<td valign="top" align="center">640x640</td>
<td valign="top" align="center">&#x2714;</td>
<td valign="top" align="center">&#x2714;</td>
<td valign="top" align="center">&#x2714;</td>
<td valign="top" align="center">&#x2714;</td>
<td valign="top" align="center">69.7</td>
<td valign="top" align="center">94.9</td>
<td valign="top" align="center">86.5</td>
<td valign="top" align="center">73.4</td>
<td valign="top" align="center">76.8</td>
<td valign="top" align="center">58.6</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s4_7">
<label>4.6</label>
<title>Comparison with other state-of-the-art techniques</title>
<p>
<xref ref-type="fig" rid="f7">
<bold>Figures&#xa0;7</bold>
</xref> and <xref ref-type="fig" rid="f8">
<bold>8</bold>
</xref> show the qualitative outcomes of our model and the comparable algorithm on the SSDD and HRSID dataset, individually, to further validate the efficiency of instance segmentation and ship identification. Row 1 displays the ground-truth mask, while rows 2 to 6 display the results of Faster R-CNN, Cascade R-CNN, Mask R-CNN, and Hybrid Task Cascade, respectively. When compared to existing instance segmentation techniques, the results of our improved model can accurately recognize and separate artificial targets in a variety of scenes, as shown in row 7. The expected instance masks, in particular, precisely cover these contrived objectives. As a result of our model&#x2019;s nearly complete elimination of false alarms and missed detections, our mask branch consistently accomplishes superior instance segmentation. When contrast to bounding box identification approaches like Faster R-CNN, Mask R-CNN, Cascade Mask R-CNN, Hybrid Task Cascade, and Cascade R-CNN, instance segmentation outcomes are more closely connected to the shape of the original targets. Additionally, separate instances within the same category can be distinguished using the instance segmentation. The ships in <xref ref-type="fig" rid="f7">
<bold>Figures&#xa0;7</bold>
</xref>, <xref ref-type="fig" rid="f8">
<bold>8</bold>
</xref> stand out because to their dissimilar colors, and in addition, the suggested model, when compared to other instance segmentation approaches, has no false alarms and no missed targets detection while also producing better results for mask segmentation. The results from the HRSID and SSDD dataset show that our technique is appropriate for instance segmentation in HR-RS photos and outperforms existing instance segmentation strategies when it comes to mask segmentation.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Outcomes of CNN-based techniques for visual ship identification instance segmentation using the HRSID dataset. Outcomes from <bold>(A)</bold> illustrate the ground truth, <bold>(B)</bold> the Faster-R-CNN technique, <bold>(C)</bold> the Cascade R-CNN, <bold>(D)</bold> the Mask R-CNN, <bold>(E)</bold> the Cascade Mask R-CNN, <bold>(F)</bold> the Hybrid Task Cascade, and <bold>(G)</bold> the results from our proposed method.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1113669-g007.tif"/>
</fig>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Outcomes of CNN-based techniques for visual ship identification instance segmentation using the SSDD. Results from <bold>(A)</bold> illustrate the ground truth, <bold>(B)</bold> the Faster-R-CNN technique, <bold>(C)</bold> the Cascade R-CNN, <bold>(D)</bold> the Mask R-CNN, <bold>(E)</bold> the Cascade Mask R-CNN, <bold>(F)</bold> the Hybrid Task Cascade, and <bold>(G)</bold> the results from our proposed method.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1113669-g008.tif"/>
</fig>
<p>To quantitatively assess the achievement of instance segmentation, we compared the suggested approach with other cutting-edge approaches on the HRSID and SSDD in <xref ref-type="table" rid="T3">
<bold>Tables&#xa0;3</bold>
</xref> and <xref ref-type="table" rid="T4">
<bold>4</bold>
</xref>. Faster R-CNN, Mask R-CNN, Cascade R-CNN, Cascade Mask R-CNN, and Hybrid Task Cascade are some of these techniques. <xref ref-type="table" rid="T3">
<bold>Tables&#xa0;3</bold>
</xref> and <xref ref-type="table" rid="T4">
<bold>4</bold>
</xref> show that the suggested strategy achieves the maximum ap of 69.7%. Hybrid Task Cascade and our model outperform Faster R-CNN, Cascade R-CNN, Mask R-CNN, Cascade Mask R-CNN, and Cascade R-CNN by 6.3%, 3.2%, 4.5%, 0.8%, and 2.4%, respectively. In summary, the recommended method has superior instance segmentation effectiveness and better precise predicted instance masks on the HRSID dataset compared to other instance segmentation algorithms. The reduced parameter count, and computational expense are due to the use of the SiLU activation function, which is more computationally efficient than the traditional ReLU activation function. Additionally, the E-ELAN module selectively weighs the feature maps, further reducing the computational expense without compromising performance. The AP50 score of our model is 94.9%, which is also 10.2% higher than Faster R-CNN, 9.3% higher than Cascade R-CNN, 7.4% higher than Mask R-CNN, 8.2% higher than Cascade Mask R-CNN, and 7.3% higher than Hybrid Task Cascade. Our model achieves an AP75 score of 86.5%, which is an improvement of 14.9% over Faster R-CNN, 9.3% over Cascade R-CNN, 12.5% over Mask R-CNN, 9.7% over Cascade Mask R-CNN, and 7.2% over Hybrid Task Cascade. Mask segmentation has proven to be more precise and superior to other state-of-the-art techniques, such as segmentation utilizing the HRSID dataset. The efficacy of large medium, and small targets on the HRSID dataset has also improved, according to APS, APM, and APL.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Comparing to various cutting-edge methods on the HRSID dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Methods</th>
<th valign="top" colspan="2" align="left">Backbone</th>
<th valign="top" colspan="2" align="left">Time (ms)</th>
<th valign="top" align="left">Model (Size)</th>
<th valign="top" align="left">AP</th>
<th valign="top" align="left">AP50</th>
<th valign="top" align="center">AP75</th>
<th valign="top" align="left">APS</th>
<th valign="top" align="left">APM</th>
<th valign="top" align="left">APL</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Faster R-CNN(<xref ref-type="bibr" rid="B31">Ren et&#xa0;al., 2015</xref>)</td>
<td valign="top" colspan="2" align="left">ResNet-50+FPN ResNet-101+FPN</td>
<td valign="top" colspan="2" align="center">52.6 64.2</td>
<td valign="top" align="left">330M 482M</td>
<td valign="top" align="left">64.9 63.4</td>
<td valign="top" align="left">84.6 84.7</td>
<td valign="top" align="left">71.5 71.6</td>
<td valign="top" align="left">65.1 65.7</td>
<td valign="top" align="left">66.2 67.3</td>
<td valign="top" align="left">17.8 25.3</td>
</tr>
<tr>
<td valign="top" align="left">Cascade R-CNN(<xref ref-type="bibr" rid="B2">Cai and Vasconcelos, 2019</xref>)</td>
<td valign="top" colspan="2" align="left">ResNet-50+FPN ResNet-101+FPN</td>
<td valign="top" colspan="2" align="center">73.9 85.5</td>
<td valign="top" align="left">552M 704M</td>
<td valign="top" align="left">67.8 66.5</td>
<td valign="top" align="left">85.8 85.6</td>
<td valign="top" align="left">77.6 77.3</td>
<td valign="top" align="left">68.6 68.2</td>
<td valign="top" align="left">68.8 69.7</td>
<td valign="top" align="left">29.9 28.8</td>
</tr>
<tr>
<td valign="top" align="left">Mask R-CNN (<xref ref-type="bibr" rid="B15">He et&#xa0;al., 2017</xref>)</td>
<td valign="top" colspan="2" align="left">ResNet-50+FPN ResNet-101+FPN</td>
<td valign="top" colspan="2" align="center">53.7 62.9</td>
<td valign="top" align="left">351M 503M</td>
<td valign="top" align="left">66.8 65.2</td>
<td valign="top" align="left">87.3 87.5</td>
<td valign="top" align="left">74.9 74.0</td>
<td valign="top" align="left">67.9 67.3</td>
<td valign="top" align="left">67.8 69.3</td>
<td valign="top" align="left">18.4 24.3</td>
</tr>
<tr>
<td valign="top" align="left">Cascade Mask R-CNN(<xref ref-type="bibr" rid="B2">Cai and Vasconcelos, 2019</xref>)</td>
<td valign="top" colspan="2" align="left">ResNet-50+FPN ResNet-101+FPN</td>
<td valign="top" colspan="2" align="center">73.0 87.1</td>
<td valign="top" align="left">615M 768M</td>
<td valign="top" align="left">68.7 68.9</td>
<td valign="top" align="left">86.1 86.7</td>
<td valign="top" align="left">76.6 76.8</td>
<td valign="top" align="left">69.4 69.8</td>
<td valign="top" align="left">68.5 70.6</td>
<td valign="top" align="left">21.5 22.9</td>
</tr>
<tr>
<td valign="top" align="left">Hybrid Task Cascade (<xref ref-type="bibr" rid="B7">Chen et&#xa0;al., 2019b</xref>)</td>
<td valign="top" colspan="2" align="left">ResNet-50+FPN ResNet-101+FPN</td>
<td valign="top" colspan="2" align="center">118.9 134.6</td>
<td valign="top" align="left">639M 791M</td>
<td valign="top" align="left">67.1 67.3</td>
<td valign="top" align="left">88.4 87.6</td>
<td valign="top" align="left">79.8 79.3</td>
<td valign="top" align="left">70.3 70.8</td>
<td valign="top" align="left">72.6 73.6</td>
<td valign="top" align="left">39.0 32.8</td>
</tr>
<tr>
<td valign="top" align="left">Our Model</td>
<td valign="top" align="left" colspan="2">ELAN-Net</td>
<td valign="top" align="center" colspan="2">87</td>
<td valign="top" align="left">403M</td>
<td valign="top" align="left">69.7</td>
<td valign="top" align="left">94.9</td>
<td valign="top" align="left">86.5</td>
<td valign="top" align="left">73.4</td>
<td valign="top" align="left">76.8</td>
<td valign="top" align="left">58.6</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Comparing to various cutting-edge methods on the SSDD dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Methods</th>
<th valign="top" align="left">Backbone</th>
<th valign="top" align="left">Time (ms)</th>
<th valign="top" align="left">Model (Size)</th>
<th valign="top" align="left">AP</th>
<th valign="top" align="left">AP50</th>
<th valign="top" align="left">AP75</th>
<th valign="top" align="left">APS</th>
<th valign="top" align="left">APM</th>
<th valign="top" align="left">APL</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Faster R-CNN(<xref ref-type="bibr" rid="B31">Ren et&#xa0;al., 2015</xref>)</td>
<td valign="middle" align="left">ResNet-50+FPN ResNet-101+FPN</td>
<td valign="middle" align="left">55.5 66.1</td>
<td valign="middle" align="left">330M 482M</td>
<td valign="middle" align="left">57.5 58.6</td>
<td valign="middle" align="left">78.1 79.0</td>
<td valign="middle" align="left">64.2 65.5</td>
<td valign="middle" align="left">42.8 43.6</td>
<td valign="middle" align="left">57.8 58.1</td>
<td valign="middle" align="left">62.7 61.6</td>
</tr>
<tr>
<td valign="middle" align="left">Cascade R-CNN (<xref ref-type="bibr" rid="B2">Cai and Vasconcelos, 2019</xref>)</td>
<td valign="middle" align="left">ResNet-50+FPN ResNet-101+FPN</td>
<td valign="middle" align="left">61.9 70.2</td>
<td valign="middle" align="left">552M 704M</td>
<td valign="middle" align="left">60.7 61.1</td>
<td valign="middle" align="left">90.2 91.4</td>
<td valign="middle" align="left">67.8 66.7</td>
<td valign="middle" align="left">46.4 45.7</td>
<td valign="middle" align="left">61.7 61.4</td>
<td valign="middle" align="left">66.4 61.3</td>
</tr>
<tr>
<td valign="middle" align="left">Mask R-CNN (<xref ref-type="bibr" rid="B15">He et&#xa0;al., 2017</xref>)</td>
<td valign="middle" align="left">ResNet-50+FPN ResNet-101+FPN</td>
<td valign="middle" align="left">63.0 72.3</td>
<td valign="middle" align="left">351M 503M</td>
<td valign="middle" align="left">55.3 56.5</td>
<td valign="middle" align="left">91.3 90.7</td>
<td valign="middle" align="left">64.8 65.8</td>
<td valign="middle" align="left">41.8 41.1</td>
<td valign="middle" align="left">55.7 54.4</td>
<td valign="middle" align="left">59.9 60.2</td>
</tr>
<tr>
<td valign="middle" align="left">Cascade Mask R-CNN(<xref ref-type="bibr" rid="B2">Cai and Vasconcelos, 2019</xref>)</td>
<td valign="middle" align="left">ResNet-50+FPN ResNet-101+FPN</td>
<td valign="middle" align="left">85.6 93.8</td>
<td valign="middle" align="left">615M 768M</td>
<td valign="middle" align="left">60.2 59.7</td>
<td valign="middle" align="left">88.5 87.2</td>
<td valign="middle" align="left">66.8 67.7</td>
<td valign="middle" align="left">47.5 46.2</td>
<td valign="middle" align="left">63.5 63.0</td>
<td valign="middle" align="left">66.4 65.7</td>
</tr>
<tr>
<td valign="middle" align="left">Hybrid Task Cascade (<xref ref-type="bibr" rid="B7">Chen et&#xa0;al., 2019b</xref>)</td>
<td valign="middle" align="left">ResNet-50+FPN ResNet-101+FPN</td>
<td valign="middle" align="left">153.2 168.5</td>
<td valign="middle" align="left">639M 791M</td>
<td valign="middle" align="left">68.7 67.8</td>
<td valign="middle" align="left">91.2 92.6</td>
<td valign="middle" align="left">75.5 74.9</td>
<td valign="middle" align="left">52.2 54.6</td>
<td valign="middle" align="left">68.9 67.8</td>
<td valign="middle" align="left">70.5 73.8</td>
</tr>
<tr>
<td valign="middle" align="left">Our Model</td>
<td valign="middle" align="left">ELAN-Net</td>
<td valign="middle" align="left">96</td>
<td valign="middle" align="left">403M</td>
<td valign="middle" align="left">70.3</td>
<td valign="middle" align="left">94.7</td>
<td valign="middle" align="left">76.5</td>
<td valign="middle" align="left">55.9</td>
<td valign="middle" align="left">70.2</td>
<td valign="middle" align="left">75.1</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>
<xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref> shows that our model achieves a 70.3% AP, which represents an improvement of 11.7% compared to Faster R-CNN, 9.2% compared to Cascade R-CNN, 13.8% compared to Mask R-CNN, 10.3% compared to Cascade Mask R-CNN, and 2.5% compared to Hybrid Task Cascade. In summary, the recommended model has superior instance segmentation effectiveness and more precise predicted instance masks when compared to previous instance segmentation algorithms on the SSDD dataset. The AP50 score of our model is also 94.7%, which is an improvement of 15.7% over Faster R-CNN, 3.3% over Cascade R-CNN, 4% over Mask R-CNN, 7.7% over Cascade Mask R-CNN, and 2% over Hybrid Task Cascade. Our model obtains an AP75 of 76.5 percent, which is an improvement of 11% over Faster R-CNN, 9.8% over Cascade R-CNN, 10.8% over Mask R-CNN, 8.8% over Cascade Mask R-CNN, and 1.6% over Hybrid Task Cascade. It has been proven that segmentation using the mask will be more accurate and superior than segmentation using other cutting-edge techniques, such as segmentation on the SSDD dataset. According to APL, APM, and APS, the HRSID dataset&#x2019;s small, medium, and large target efficacy has also enhanced. We achieve the similar achievement as our model on the NWPU VHR-10 dataset under several AP indicators, and some AP indicators even outperform it.</p>
<p>
<xref ref-type="table" rid="T3">
<bold>Tables&#xa0;3</bold>
</xref>, <xref ref-type="table" rid="T4">
<bold>4</bold>
</xref> show how our model performs better with fewer parameters and less computational expense. The proposed model incorporates several improvements to the YOLOv7 backbone architecture, including the addition of an ELAN-Net backbone and FPN, the SiLU activation function, and the E-ELAN module. These improvements allow the model to more effectively extract and use relevant features from SAR images, resulting in improved detection and segmentation performance. Moreover, the proposed model achieves this improved performance while using fewer parameters and less computational expense compared to other modern models, as shown in <xref ref-type="table" rid="T3">
<bold>Tables&#xa0;3</bold>
</xref> and <xref ref-type="table" rid="T4">
<bold>4</bold>
</xref>. The reduced parameter count and computational expense are due to the use of the SiLU activation function, which is more computationally efficient than the traditional ReLU activation function. Additionally, the E-ELAN module selectively weighs the feature maps, further reducing the computational expense without compromising performance.</p>
<p>Furthermore, with comparable model sizes and levels of computational complexity, our models outperform the Mask Scoring R-CNN and Mask R-CNN. Comparing our models to Hybrid Task Cascade and Cascade Mask R-CNN, we find that our models outperform them while consuming less processing power and having a smaller model size. Our network is therefore better than other modern algorithms in terms of model size and processing complexity.</p>
<p>In order to assess the detectors&#x2019; capacities to locate the ship in complex situations and to test their capacity to deliver adequately observable results, some complex scenarios are added to the datasets. The findings demonstrate that complex situations, like those containing nearby ships and small ships scattered in a cluster, continue to provide a challenge to detectors. The generated mask may accurately show the distribution of ships with their concrete shape pixel-by-pixel with regard to the visual identification outcomes in instance segmentation, laying the groundwork for further instance segmentation investigations. As a result, when compared to other cutting-edge techniques, our model creates instance masks that are more precise and improves the performance of instance segmentation in HR-RS images.</p>
<p>The object detection of RS images has been shown to have problems by CNN. YOLOv7 was actually created as the fundamental detecting network, whereas the ELAN-Net backbone network was designed for advancement. The results of our studies demonstrate that the enhanced algorithm we built would considerably improve the identification efficiency of small-scale items in RS pictures and can increase the accuracy of multi-scale object segmentation. The HRSID and SSDD datasets were used for our investigation because there are no established, open remote sensing mask datasets available, and there might only be a few different varieties. We also need to conduct further research to improve and advance the model inference speed. However, using fuzzy preprocessing techniques to images is also necessary because the processed images are frequently affected by unknown factors (<xref ref-type="bibr" rid="B38">Versaci et&#xa0;al., 2015</xref>). Our next study will focus on solving the aforementioned issues, and in order to test our new models, we will first look for and create more RS mask datasets with a wider range of object classes. Additionally, we will use more accurate and representative datasets. The next phase of our research will involve creating a lightweight framework model that will speed up inference without sacrificing identification accuracy.</p>
<p>In summary, our proposed model achieves better performance with fewer parameters and less computational expense by incorporating several improvements to the YOLOv7 backbone architecture, and by using the SiLU activation function and the E-ELAN module to more effectively extract and use relevant features from SAR pictures.</p>
</sec>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusions</title>
<p>The field of aerospace and remote sensing (RS) domains is heavily influenced by instance segmentation and object recognition tasks, which have a wide range of potential applications in various real-world scenarios. In recent times, the importance of ship identification in RS satellite images has increased. While most current algorithms identify ships using rectangular bounding boxes, they do not segment pixels. As a result, our research offers an enhanced YOLOv7 one-stage detection technique for ship segmentation and identification in RS imagery, capable of accurately recognizing and segmenting ships at the pixel level. We have redesigned the network structure to adapt to the task of ship target segmentation and added two feature optimization modules to the backbone network to increase the robustness of network feature extraction. In addition, we improved the network feature fusion structure and enhanced the prediction capability of multi-scale targets by optimizing the model acceptance domain. Based on the experimental outcomes on the SSDD and HRSID datasets, our model demonstrates improved accuracy in predicting instance masks, promoting the success of instance segmentation in HR-RS imaging and encouraging further advancements in mask prediction accuracy. Our proposed method outperforms existing methods for segmenting ships in remote sensing images, and we plan to extend our research to the segmentation of objects in drone images. While our proposed approach has limitations in handling extremely small or crowded ship instances, we acknowledge this limitation and suggest further optimization of the network architecture and training strategies. Additionally, we have not yet explored the potential of other advanced techniques such as depthwise separable convolution neural network, balance learning, and attention mechanisms, which could be interesting directions for future research. In summary, our proposed approach provides a more precise and effective solution for ship segmentation and identification in RS imagery, and our future work will focus on extending the application of our proposed method to other remote sensing scenarios.</p>
</sec>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material. Further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>MY, LZ. Methodology, MY, and SL. software, MY, SL, and LZ. Validation, WJ, SL, and LZ. Formal analysis, MH, QI, and AC. The investigation, ML. Resources, LZ. Data curation, MY, SM, QI, and QY. Writing-original draft preparation, MY. Writing-review and editing, JW, LZ, and SL. Visualization, SL, LZ. Supervision, JW, and SL. Project administration, JW. Funding acquisition, LZ. All authors contributed to the article and approved the submitted version.</p>
</sec>
</body>
<back>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>This work is supported by Global atmospheric aerosol dataset development (HX20220168).</p>
</sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Bolya</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Xiao</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>Y. J.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Yolact: real-time instance segmentation</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF international conference on computer vision</conf-name>. <fpage>9157</fpage>&#x2013;<lpage>9166</lpage>.</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cai</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Vasconcelos</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Cascade r-CNN: high quality object detection and instance segmentation</article-title>. <source>IEEE Trans. Pattern Anal. Mach. Intell.</source> <volume>43</volume>, <fpage>1483</fpage>&#x2013;<lpage>1498</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TPAMI.2019.2956516</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chang</surname> <given-names>Y.-L.</given-names>
</name>
<name>
<surname>Anagaw</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Chang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y. C.</given-names>
</name>
<name>
<surname>Hsiao</surname> <given-names>C.-Y.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>W.-H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Ship detection based on YOLOv2 for SAR imagery</article-title>. <source>Remote Sens.</source> <volume>11</volume>, <fpage>786</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs11070786</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>S.-W.</given-names>
</name>
<name>
<surname>Cui</surname> <given-names>X.-C.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X.-S.</given-names>
</name>
<name>
<surname>Xiao</surname> <given-names>S.-P.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Speckle-free SAR image ship detection</article-title>. <source>IEEE Trans. Image Process.</source> <volume>30</volume>, <fpage>5969</fpage>&#x2013;<lpage>5983</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TIP.2021.3089936</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>C.</given-names>
</name>
<name>
<surname>He</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Pei</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Jiao</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2019</year>a). <article-title>MSARN: a deep neural network based on an adaptive recalibration mechanism for multiscale and arbitrary-oriented SAR ship detection</article-title>. <source>IEEE Access</source> <volume>7</volume>, <fpage>159262</fpage>&#x2013;<lpage>159283</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ACCESS.2019.2951030</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Mulgrew</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Grant</surname> <given-names>P. M.</given-names>
</name>
</person-group> (<year>1993</year>). <article-title>A clustering technique for digital communications channel equalization using radial basis function networks</article-title>. <source>IEEE Trans. Neural Networks</source> <volume>4</volume>, <fpage>570</fpage>&#x2013;<lpage>590</lpage>. doi: <pub-id pub-id-type="doi">10.1109/72.238312</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Pang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Xiong</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>b). &#x201c;<article-title>Hybrid task cascade for instance segmentation</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</conf-name>. <fpage>4974</fpage>&#x2013;<lpage>4983</lpage>.</citation>
</ref>
<ref id="B8">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Tian</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Yan</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Blendmask: top-down meets bottom-up for instance segmentation</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</conf-name>. <fpage>8573</fpage>&#x2013;<lpage>8581</lpage>.</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cui</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Cao</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Dense attention pyramid networks for multi-scale ship detection in SAR images</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>57</volume>, <fpage>8983</fpage>&#x2013;<lpage>8997</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TGRS.2019.2923988</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Dai</surname> <given-names>J.</given-names>
</name>
<name>
<surname>He</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Instance-aware semantic segmentation <italic>via</italic> multi-task network cascades</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>. <fpage>3150</fpage>&#x2013;<lpage>3158</lpage>.</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Everingham</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Van Gool</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Williams</surname> <given-names>C. K.</given-names>
</name>
<name>
<surname>Winn</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zisserman</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>The pascal visual object classes (voc) challenge</article-title>. <source>Int. J. Comput. Vision</source> <volume>88</volume>, <fpage>303</fpage>&#x2013;<lpage>338</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11263-009-0275-4</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fan</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Cheng</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Lou</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Xiao</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>B.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>a). <article-title>Ship detection using a fully convolutional network with compact polarimetric SAR images</article-title>. <source>Remote Sens.</source> <volume>11</volume>, <fpage>2171</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs11182171</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fan</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Bai</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Tao</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Tian</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2019</year>b). <article-title>Ship detection using deep convolutional neural networks for PolSAR images</article-title>. <source>Remote Sens.</source> <volume>11</volume>, <fpage>2862</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs11232862</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gao</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Enhanced feature extraction for ship detection from multi-resolution and multi-scene synthetic aperture radar (SAR) images</article-title>. <source>Remote Sens.</source> <volume>11</volume>, <fpage>2694</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs11222694</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>He</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Gkioxari</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Doll&#xe1;r</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Mask r-cnn</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE international conference on computer vision</conf-name>. <fpage>2961</fpage>&#x2013;<lpage>2969</lpage>.</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hou</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Jiao</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Object detection in high-resolution panchromatic images using deep models and spatial template matching</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>58</volume>, <fpage>956</fpage>&#x2013;<lpage>970</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TGRS.2019.2942103</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Kang</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Leng</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>A modified faster r-CNN based on CFAR algorithm for SAR ship detection</article-title>,&#x201d; in <conf-name>2017 International Workshop on Remote Sensing with Intelligent Processing (RSIP): IEEE</conf-name>. <fpage>1</fpage>&#x2013;<lpage>4</lpage>.</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kong</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Yasir</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Lightweight algorithm for multi-scale ship detection based on high-resolution SAR images</article-title>. <source>Int. J. Remote Sens.</source> <volume>44</volume>, <fpage>1390</fpage>&#x2013;<lpage>1415</lpage>. doi: <pub-id pub-id-type="doi">10.1080/01431161.2023.2182652</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Qu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Shao</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Ship detection in SAR images based on an improved faster r-CNN</article-title>,&#x201d; in <conf-name>2017 SAR in Big Data Era: Models, Methods and Applications (BIGSARDATA): IEEE</conf-name>. <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Tang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Ship detection in SAR images based on feature enhancement swin transformer and adjacent feature fusion</article-title>. <source>Remote Sens.</source> <volume>14</volume>, <fpage>3186</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs14133186</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Lin</surname> <given-names>T.-Y.</given-names>
</name>
<name>
<surname>Doll&#xe1;r</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>He</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Hariharan</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Belongie</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2017</year>a). &#x201c;<article-title>Feature pyramid networks for object detection</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>. <fpage>2117</fpage>&#x2013;<lpage>2125</lpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1708.02002</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname> <given-names>T.-Y.</given-names>
</name>
<name>
<surname>Goyal</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>He</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Doll&#xe1;r</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2017</year>b). <article-title>Focal loss for dense object detection</article-title>. <source>Proc. IEEE Int. Conf. Comput. Vision</source>, <fpage>2980</fpage>&#x2013;<lpage>2988</lpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1708.02002</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Kong</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Yasir</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>L.</given-names>
</name>
<etal/></person-group>(<year>2022</year>). <article-title>Multi-scale ship detection algorithm based on a lightweight neural network for spaceborne SAR images</article-title>. <source>Remote Sens.</source> <volume>14</volume>, <fpage>1149</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs14051149</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Qi</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Qin</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Jia</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Path aggregation network for instance segmentation</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>. <fpage>8759</fpage>&#x2013;<lpage>8768</lpage>.</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Fan</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>a). <article-title>SCCGAN: style and characters inpainting based on CGAN</article-title>. <source>Mobile Networks Appl.</source> <volume>26</volume>, <fpage>3</fpage>&#x2013;<lpage>12</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11036-020-01717-x</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Xia</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2021</year>b). <article-title>Efficient image segmentation based on deep learning for mineral image classification</article-title>. <source>Advanced Powder Technol.</source> <volume>32</volume>, <fpage>3885</fpage>&#x2013;<lpage>3903</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.apt.2021.08.038</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mou</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>X. X.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Vehicle instance segmentation from aerial image and video using a multitask learning residual fully convolutional network</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>56</volume>, <fpage>6699</fpage>&#x2013;<lpage>6711</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TGRS.2018.2841808</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nie</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Duan</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Ding</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Wong</surname> <given-names>E. K.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Attention mask r-CNN for ship detection and segmentation from remote sensing images</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>9325</fpage>&#x2013;<lpage>9334</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ACCESS.2020.2964540</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qian</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Cheng</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Yao</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Object detection in remote sensing images based on improved bounding box regression and multi-level features fusion</article-title>. <source>Remote Sens.</source> <volume>12</volume>, <fpage>143</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs12010143</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Redmon</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Divvala</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Farhadi</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>You only look once: unified, real-time object detection</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>. <fpage>779</fpage>&#x2013;<lpage>788</lpage>.</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ren</surname> <given-names>S.</given-names>
</name>
<name>
<surname>He</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Faster r-cnn: towards real-time object detection with region proposal networks</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>28</volume>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1506.01497</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shao</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zeng</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>RBFA-net: a rotated balanced feature-aligned network for rotated SAR ship detection and classification</article-title>. <source>Remote Sens.</source> <volume>14</volume>, <fpage>3345</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs14143345</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Su</surname> <given-names>N.</given-names>
</name>
<name>
<surname>He</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yan</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Xing</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>SII-net: spatial information integration network for small target detection in SAR images</article-title>. <source>Remote Sens.</source> <volume>14</volume>, <fpage>442</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs14030442</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Su</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Yan</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Object detection and instance segmentation in remote sensing imagery based on precise mask r-CNN</article-title>,&#x201d; in <conf-name>IGARSS 2019-2019 IEEE International Geoscience and Remote Sensing Symposium: IEEE</conf-name>. <fpage>1454</fpage>&#x2013;<lpage>1457</lpage>.</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Dai</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Leng</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Lei</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Xiong</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>K.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>a). <article-title>An anchor-free detection method for ship targets in high-resolution SAR images</article-title>. <source>IEEE J. Selected Topics Appl. Earth Observations Remote Sens.</source> <volume>14</volume>, <fpage>7799</fpage>&#x2013;<lpage>7816</lpage>. doi: <pub-id pub-id-type="doi">10.1109/JSTARS.2021.3099483</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Sun</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Lei</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Leng</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Xiong</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>An improved oriented ship detection method in high-resolution SAR image based on YOLOv5</article-title>,&#x201d; in <conf-name>2022 Photonics &amp; Electromagnetics Research Symposium (PIERS): IEEE</conf-name>. <fpage>647</fpage>&#x2013;<lpage>653</lpage>.</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Leng</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Lei</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Xiong</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Kuang</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2021</year>b). <article-title>BiFA-YOLO: a novel YOLO-based method for arbitrary-oriented ship detection in high-resolution SAR images</article-title>. <source>Remote Sens.</source> <volume>13</volume>, <fpage>4209</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs13214209</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Versaci</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Calcagno</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Morabito</surname> <given-names>F. C.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Fuzzy geometrical approach based on unit hyper-cubes for image contrast enhancement</article-title>,&#x201d; in <conf-name>2015 IEEE international conference on signal and image processing applications (ICSIPA): IEEE</conf-name>. <fpage>488</fpage>&#x2013;<lpage>493</lpage>.</citation>
</ref>
<ref id="B39">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Kong</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2020</year>a). &#x201c;<article-title>Solo: segmenting objects by locations</article-title>,&#x201d; in <conf-name>European Conference on Computer Vision</conf-name>. <fpage>649</fpage>&#x2013;<lpage>665</lpage>.</citation>
</ref>
<ref id="B40">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>C.-Y.</given-names>
</name>
<name>
<surname>Liao</surname> <given-names>H.-Y. M.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>Y.-H.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>P.-Y.</given-names>
</name>
<name>
<surname>Hsieh</surname> <given-names>J.-W.</given-names>
</name>
<name>
<surname>Yeh</surname> <given-names>I.-H.</given-names>
</name>
</person-group> (<year>2020</year>b). &#x201c;<article-title>CSPNet: a new backbone that can enhance learning capability of CNN</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition workshops</conf-name>. <fpage>390</fpage>&#x2013;<lpage>391</lpage>.</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Simultaneous ship detection and orientation estimation in SAR images based on attention module and angle regression</article-title>. <source>Sensors</source> <volume>18</volume>, <fpage>2851</fpage>. doi: <pub-id pub-id-type="doi">10.3390/s18092851</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Automatic ship detection based on RetinaNet using multi-resolution gaofen-3 imagery</article-title>. <source>Remote Sens.</source> <volume>11</volume>, <fpage>531</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs11050531</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Waqas Zamir</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Arora</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Gupta</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Khan</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Shahbaz Khan</surname> <given-names>F.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). &#x201c;<article-title>Isaid: a large-scale dataset for instance segmentation in aerial images</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops</conf-name>. <fpage>28</fpage>&#x2013;<lpage>37</lpage>.</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wei</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Su</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Ming</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Yan</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Kumar</surname> <given-names>D.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Precise and robust ship detection for high-resolution SAR imagery based on HR-SDNet</article-title>. <source>Remote Sens.</source> <volume>12</volume>, <fpage>167</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs12010167</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Sheng</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Xiong</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Ke</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Hybrid motion model for multiple object tracking in mobile devices</article-title>. <source>IEEE Internet Things J</source>. doi: <pub-id pub-id-type="doi">10.1109/JIOT.2022.3219627</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Xie</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Song</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>D.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). &#x201c;<article-title>Polarmask: single shot instance segmentation with polar representation</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</conf-name>. <fpage>12193</fpage>&#x2013;<lpage>12202</lpage>.</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiong</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Leng</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A lightweight model for ship detection and recognition in complex-scene SAR images</article-title>. <source>Remote Sens.</source> <volume>14</volume>, <fpage>6053</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs14236053</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Cao</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>An improved swin transformer-based model for remote sensing object detection and instance segmentation</article-title>. <source>Remote Sens.</source> <volume>13</volume>, <fpage>4779</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs13234779</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Shao</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>a). <article-title>A group-wise feature enhancement-and-Fusion network with dual-polarization feature enrichment for SAR ship detection</article-title>. <source>Remote Sens.</source> <volume>14</volume>, <fpage>5276</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs14205276</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhan</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>b). <article-title>Shadow-Background-Noise 3D spatial decomposition using sparse low-rank Gaussian properties for video-SAR moving target shadow enhancement</article-title>. <source>IEEE Geosci. Remote Sens. Lett.</source> <volume>19</volume>, <fpage>1</fpage>&#x2013;<lpage>5</lpage>. doi: <pub-id pub-id-type="doi">10.1109/LGRS.2022.3223514</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yasir</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Jianhua</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Mingming</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Hui</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhe</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Shanwei</surname> <given-names>L.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Ship detection based on deep learning using SAR imagery: a systematic literature review</article-title>. <source>Soft Computing</source> <volume>1-22</volume>. doi: <pub-id pub-id-type="doi">10.1007/s00500-022-07522-w</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yasir</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Jianhua</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Mingming</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Hui</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhe</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Shanwei</surname> <given-names>L.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>a). <article-title>Ship detection based on deep learning using SAR imagery: a systematic literature review</article-title>. <source>Soft Computing</source> <volume>27</volume>, <fpage>63</fpage>&#x2013;<lpage>84</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s00500-022-07522-w</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yasir</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Shanwei</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Sheng</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Hossain</surname> <given-names>M. S.</given-names>
</name>
<name>
<surname>Colak</surname> <given-names>A. T. I.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>b). <article-title>Multi-scale ship target detection using SAR images based on improved Yolov5</article-title>. <source>Front. Mar. Science</source>. doi: <pub-id pub-id-type="doi">10.3389/fmars.2022.1086140</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yin</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Yin</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Fu</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Deep feature interaction network for point cloud registration, with applications to optical measurement of blade profiles</article-title>. <source>IEEE Trans. Ind. Informatics</source>. doi: <pub-id pub-id-type="doi">10.1109/TII.2022.3220889</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zeng</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A lightweight adaptive roi extraction network for precise aerial image instance segmentation</article-title>. <source>IEEE Trans. Instrumentation Measurement</source> <volume>70</volume>, <fpage>1</fpage>&#x2013;<lpage>17</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TIM.2021.3121485</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2020</year>c). <article-title>Intelligent ship detection in remote sensing images based on multi-layer convolutional feature fusion</article-title>. <source>Remote Sens.</source> <volume>12</volume>, <fpage>3316</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.isprsjprs.2020.05.016</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ding</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Bruzzone</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2020</year>a). <article-title>Multi-scale context aggregation for semantic segmentation of remote sensing images</article-title>. <source>Remote Sens.</source> <volume>12</volume>, <fpage>701</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs12040701</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Luo</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>F.-Y.</given-names>
</name>
</person-group> (<year>2021</year>a). <article-title>C2FDA: coarse-to-fine domain adaptation for traffic object detection</article-title>. <source>IEEE Trans. Intelligent Transportation Syst.</source> <volume>23</volume>, <fpage>12633</fpage>&#x2013;<lpage>12647</lpage>. doi: <pub-id pub-id-type="doi">10.1109/JSTARS.2021.3102989</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2019</year>a). <article-title>R-CNN-based ship detection from high resolution remote sensing imagery</article-title>. <source>Remote Sens.</source> <volume>11</volume>, <fpage>631</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs11060631</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zeng</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Synthetic aperture radar (SAR) meets deep learning</article-title>. <source>Remote Sens.</source>
<volume>15</volume>, <fpage>2</fpage>.</citation>
</ref>
<ref id="B61">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2021</year>b). &#x201c;<article-title>Integrate traditional hand-crafted features into modern CNN-based models to further improve SAR ship classification accuracy</article-title>,&#x201d; in <conf-name>2021 7th Asia-Pacific Conference on Synthetic Aperture Radar (APSAR): IEEE</conf-name>. <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>High-speed ship detection in SAR images based on a grid convolutional neural network</article-title>. <source>Remote Sens.</source> <volume>11</volume>, <fpage>1206</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs11101206</pub-id>
</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2021</year>a). <article-title>Injection of traditional hand-crafted features into modern CNN-based models for SAR ship classification: what, why, where, and how</article-title>. <source>Remote Sens.</source> <volume>13</volume>, <fpage>2091</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs13112091</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>a). <article-title>A full-level context squeeze-and-excitation ROI extractor for SAR ship instance segmentation</article-title>. <source>IEEE Geosci. Remote Sens. Lett.</source> <volume>19</volume>, <fpage>1</fpage>&#x2013;<lpage>5</lpage>. doi: <pub-id pub-id-type="doi">10.1109/LGRS.2022.3166387</pub-id>
</citation>
</ref>
<ref id="B65">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>b). <article-title>HTC+ for SAR ship instance segmentation</article-title>. <source>Remote Sens.</source> <volume>14</volume>, <fpage>2395</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs14102395</pub-id>
</citation>
</ref>
<ref id="B66">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>c). <article-title>A mask attention interaction and scale enhancement network for SAR ship instance segmentation</article-title>. <source>IEEE Geosci. Remote Sens. Lett.</source> <volume>19</volume>, <fpage>1</fpage>&#x2013;<lpage>5</lpage>. doi: <pub-id pub-id-type="doi">10.1109/LGRS.2022.3189961</pub-id>
</citation>
</ref>
<ref id="B67">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>d). <article-title>A polarization fusion network with geometric feature embedding for SAR ship classification</article-title>. <source>Pattern Recognition</source> <volume>123</volume>, <fpage>108365</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.patcog.2021.108365</pub-id>
</citation>
</ref>
<ref id="B68">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ahmad</surname> <given-names>I.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>b). <article-title>Balance learning for ship detection from synthetic aperture radar remote sensing imagery</article-title>. <source>ISPRS J. Photogrammetry Remote Sens.</source> <volume>182</volume>, <fpage>190</fpage>&#x2013;<lpage>207</lpage>.</citation>
</ref>
<ref id="B69">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>b). <article-title>Depthwise separable convolution neural network for high-speed SAR ship detection</article-title>. <source>Remote Sens.</source> <volume>11</volume>, <fpage>2483</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs11212483</pub-id>
</citation>
</ref>
<ref id="B70">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>b). <article-title>HyperLi-net: a hyper-light deep learning network for high-accurate and high-speed ship detection from synthetic aperture radar imagery</article-title>. <source>ISPRS J. Photogrammetry Remote Sens.</source> <volume>167</volume>, <fpage>123</fpage>&#x2013;<lpage>153</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TGRS.2022.3167569</pub-id>
</citation>
</ref>
<ref id="B71">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Lei</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Hwang</surname> <given-names>J.-N.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>GMNet: graded-feature multilabel-learning network for RGB-thermal urban scene semantic segmentation</article-title>. <source>IEEE Trans. Image Process.</source> <volume>30</volume>, <fpage>7790</fpage>&#x2013;<lpage>7802</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TIP.2021.3109518</pub-id>
</citation>
</ref>
<ref id="B72">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Lv</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Lei</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Global and local-contrast guides content-aware fusion for RGB-d saliency prediction</article-title>. <source>IEEE Trans. Systems Man Cybernetics: Syst.</source> <volume>51</volume>, <fpage>3641</fpage>&#x2013;<lpage>3649</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TSMC.2019.2957386</pub-id>
</citation>
</ref>
<ref id="B73">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wan</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2022</year>b). <article-title>Ore image classification based on improved CNN</article-title>. <source>Comput. Electrical Eng.</source> <volume>99</volume>, <fpage>107819</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compeleceng.2022.107819</pub-id>
</citation>
</ref>
<ref id="B74">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Xiao</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>a). <article-title>Study on pixel entanglement theory for imagery classification</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>60</volume>, <fpage>1</fpage>&#x2013;<lpage>18</lpage>.</citation>
</ref>
<ref id="B75">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Isolated Ni atoms induced edge stabilities and equilibrium shapes of CVD-prepared hexagonal boron nitride on the Ni (111) surface</article-title>. <source>New J. Chem.</source> <volume>46</volume>, <fpage>17496</fpage>&#x2013;<lpage>17504</lpage>. doi: <pub-id pub-id-type="doi">10.1039/D2NJ03735A</pub-id>
</citation>
</ref>
<ref id="B76">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zong</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wan</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Container ship cell guide accuracy check technology based on improved 3D point cloud instance segmentation</article-title>. <source>Brodogradnja: Teorija i praksa brodogradnje i pomorske tehnike</source> <volume>73</volume>, <fpage>23</fpage>&#x2013;<lpage>35</lpage>. doi: <pub-id pub-id-type="doi">10.21278/brod73102</pub-id>
</citation>
</ref>
<ref id="B77">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zong</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>An improved 3D point cloud instance segmentation method for overhead catenary height detection</article-title>. <source>Comput. electrical Eng.</source> <volume>98</volume>, <fpage>107685</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compeleceng.2022.107685</pub-id>
</citation>
</ref>
<ref id="B78">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zou</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Gu</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Mw-acgan: generating multiscale high-resolution SAR images for ship detection</article-title>. <source>Sensors</source> <volume>20</volume>, <fpage>6673</fpage>. doi: <pub-id pub-id-type="doi">10.3390/s20226673</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>