<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mar. Sci.</journal-id>
<journal-title>Frontiers in Marine Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mar. Sci.</abbrev-journal-title>
<issn pub-type="epub">2296-7745</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmars.2022.1086140</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Marine Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Multi-scale ship target detection using SAR images based on improved Yolov5</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Yasir</surname>
<given-names>Muhammad</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2076588"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Shanwei</surname>
<given-names>Liu</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Mingming</surname>
<given-names>Xu</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Hui</surname>
<given-names>Sheng</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1984178"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Hossain</surname>
<given-names>Md Sakaouth</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Colak</surname>
<given-names>Arife Tugsan Isiacik</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2079456"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Dawei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2025181"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Jianhua</surname>
<given-names>Wan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Dang</surname>
<given-names>Kinh Bac</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2078314"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>College of Oceanography and Space Informatics, China University of Petroleum (East China)</institution>, <addr-line>Qingdao</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Geological Sciences, Jahangirnagar University</institution>, <addr-line>Dhaka</addr-line>, <country>Bangladesh</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>National University International Maritime College Oman</institution>, <addr-line>Sahar</addr-line>, <country>Oman</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Faculty of Geography, VNU University of Science, Vietnam National University</institution>, <addr-line>Hanoi</addr-line>, <country>Vietnam</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Hong Song, Zhejiang University, China</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Islam Zada, International Islamic University, Pakistan; Mohammad Faisal, University of Malakand, Pakistan; Yi Ma, Ministry of Natural Resources, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Wan Jianhua, <email xlink:href="mailto:19850014@upc.edu.cn">19850014@upc.edu.cn</email>
</p>
</fn>
<fn fn-type="other" id="fn002">
<p>This article was submitted to Ocean Observation, a section of the journal Frontiers in Marine Science</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>13</day>
<month>01</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>9</volume>
<elocation-id>1086140</elocation-id>
<history>
<date date-type="received">
<day>01</day>
<month>11</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>28</day>
<month>11</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Yasir, Shanwei, Mingming, Hui, Hossain, Colak, Wang, Jianhua and Dang</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Yasir, Shanwei, Mingming, Hui, Hossain, Colak, Wang, Jianhua and Dang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Synthetic aperture radar (SAR) imaging is used to identify ships, which is a vital task in the maritime industry for managing maritime fisheries, marine transit, and rescue operations. However, some problems, like complex background interferences, various size ship feature variations, and indistinct tiny ship characteristics, continue to be challenges that tend to defy accuracy improvements in SAR ship detection. This research study for multiscale SAR ships detection has developed an upgraded YOLOv5s technique to address these issues. Using the C3 and FPN + PAN structures and attention mechanism, the generic YOLOv5 model has been enhanced in the backbone and neck section to achieve high identification rates. The SAR ship detection datasets and AirSARship datasets, along with two SAR large scene images acquired from the Chinese GF-3 satellite, are utilized to determine the experimental results. This model&#x2019;s applicability is assessed using a variety of validation metrics, including accuracy, different training and test sets, and TF values, as well as comparisons with other cutting-edge classification models (ARPN, DAPN, Quad-FPN, HR-SDNet, Grid R-CNN, Cascade R-CNN, Multi-Stage YOLOv4-LITE, EfficientDet, Free-Anchor, Lite-Yolov5). The performance values demonstrate that the suggested model performed superior to the benchmark model used in this study, with higher identification rates. Additionally, these excellent identification rates demonstrate the recommended model&#x2019;s applicability for maritime surveillance.</p>
</abstract>
<kwd-group>
<kwd>synthetic aperture radar (SAR)</kwd>
<kwd>ship identification</kwd>
<kwd>artificial intelligence</kwd>
<kwd>deep learning (DL)</kwd>
<kwd>YOLOv5S</kwd>
<kwd>SAR ship detection dataset (SSDD)</kwd>
<kwd>AirSARship</kwd>
</kwd-group>
<counts>
<fig-count count="15"/>
<table-count count="8"/>
<equation-count count="7"/>
<ref-count count="65"/>
<page-count count="21"/>
<word-count count="8372"/>
</counts>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Detection of ships is a crucial task in the maritime industry for controlling maritime fisheries, marine transit, and rescue operations. However, some issues, such as intricate backdrop interferences, numerous ship size fluctuations, and imprecise little ship features, still pose difficulties and frequently thwart advancements in SAR ship recognition accuracy. Accurate position and trajectory determination of the target ship is essential for managing maritime traffic, recovering from maritime accidents, and the economy (<xref ref-type="bibr" rid="B50">Xiao et&#xa0;al., 2020</xref>).</p>
<p>According to the kinds of remote sensing technologies used, the two main categories of ship detection study are, respectively, the SAR image-based and optical satellite image-based methodologies. One of the major challenges of ship identification in optical remote sensing images is finding suitable areas from complex backgrounds fast and correctly (<xref ref-type="bibr" rid="B43">Wang et&#xa0;al., 2021</xref>). High-resolution capabilities, independence from the weather, and flight altitude independence are all attributes of SAR images. SAR&#x2019;s self-illumination capability ensures that they always produce high-quality images under any circumstance (<xref ref-type="bibr" rid="B4">Chang et&#xa0;al., 2019</xref>). SAR has been extensively employed in ship identification (<xref ref-type="bibr" rid="B25">Ma et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B53">Xu et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B23">Li et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B55">Yasir et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B52">Xiong et al., 2022</xref>), oil spill identification (<xref ref-type="bibr" rid="B56">Yekeen et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B45">Wang et&#xa0;al., 2022</xref>), change detection (<xref ref-type="bibr" rid="B10">Gao et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B6">Chen and Shi, 2020</xref>; <xref ref-type="bibr" rid="B59">Zhang et&#xa0;al., 2020b</xref>; <xref ref-type="bibr" rid="B45">Wang et&#xa0;al., 2022</xref>), and other fields (<xref ref-type="bibr" rid="B28">Niedermeier et&#xa0;al., 2000</xref>; <xref ref-type="bibr" rid="B1">Baselice and Ferraioli, 2013</xref>). Because of its broad observation range, brief observation duration, great data timeliness, and high spatial resolution (<xref ref-type="bibr" rid="B29">Ouchi, 2013</xref>), SAR performing a significant role in ship identification. The amount and quality of SAR data have been steadily improving recently due to the quick development of space-borne SAR-imaging technologies. As a result, many researchers are studying how to identify ships in HR SAR images (<xref ref-type="bibr" rid="B44">Wang et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B16">Li et&#xa0;al., 2017b</xref>; <xref ref-type="bibr" rid="B36">Salembier et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B8">Du et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B17">Lin et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B39">Wang et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B46">Wang et&#xa0;al., 2020b</xref>; <xref ref-type="bibr" rid="B41">Wang et&#xa0;al., 2020c</xref>; <xref ref-type="bibr" rid="B42">Wang et&#xa0;al., 2020d</xref>; <xref ref-type="bibr" rid="B62">Zhang et&#xa0;al., 2020c</xref>; <xref ref-type="bibr" rid="B55">Yasir et&#xa0;al., 2022</xref>). However, due to the complicated environment and other difficult issues, such as sidelobes and target defocusing (<xref ref-type="bibr" rid="B5">Chen et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B13">Han et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B51">Xiong et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B57">Yuan et&#xa0;al., 2020</xref>), identifying ship targets in HR SAR images is still challenging.</p>
<p>Deep learning (DL) technologies has enhanced quickly in recent years, enabling natural image identification. Convolutional neural networks (CNNs) were introduced into the target identification area by R-CNN (<xref ref-type="bibr" rid="B12">Girshick et&#xa0;al., 2014</xref>), and as a result, target identification has received new scientific research thoughts, and its use in SAR images has a wide range of potential applications. Currently, two stage identification approaches addressed by R-CNN, Fast R-CNN, and Faster R-CNN (<xref ref-type="bibr" rid="B11">Girshick, 2015</xref>; <xref ref-type="bibr" rid="B34">Ren et&#xa0;al., 2015</xref>) are the main convolutional neural network-based algorithms employed in ship identification in SAR images. The complexity of their network topologies, the sheer number of parameters, and the slow recognition speed, however, prevent them from being able to complete ship detection tasks in the required amount of time. The target identification problem is also seen as a regression analysis task involving target location and category information by the single stage algorithms from the SSD (<xref ref-type="bibr" rid="B20">Liu et&#xa0;al., 2016</xref>) and YOLO (<xref ref-type="bibr" rid="B31">Redmon et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B32">Redmon and Farhadi, 2017</xref>; <xref ref-type="bibr" rid="B33">Redmon and Farhadi, 2018</xref>; <xref ref-type="bibr" rid="B30">Patel et&#xa0;al., 2022</xref>) series. They are more suited to ship identification applications that need virtually real-time identification since they output the identification results directly through a neural network model with high accuracy and speed (<xref ref-type="bibr" rid="B48">Willburger et&#xa0;al., 2020</xref>).</p>
<p>Although the aforementioned methods have strong detection performance, it is challenging to directly apply them to SAR ship identification. There are still a number of problem with the DL-based ship identification approaches in SAR images (<xref ref-type="bibr" rid="B15">Li et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B60">Zhang et&#xa0;al., 2021a</xref>). (i) Due to the unique imaging technique utilized by SAR, there is less contrast between the ocean and ship in the SAR images since there are more scattering noise and sea debris and less side flap. (ii) Different ships have various sizes and shapes which are reflected in SAR images as varying numbers of pixels, especially for tiny-scale ships. Smaller ships have less information about their whereabouts than large ships, and since they have fewer pixels, they are more susceptible to being deceived by the speckle noise in SAR images. While this is going on, the detection process becomes more complex, which lowers the accuracy of identification and recognition. (iii) SAR images cannot be directly supplied to the network for identification if the scene is large. It is anticipated that the network has now received the SAR image of the expansive landscape. The ship target will be resampled in this situation to a few or possibly only one pixel, which will significantly reduce the identification accuracy. The main goals of the current study are as follows:</p>
<list list-type="simple">
<list-item>
<p>&#x2022; To identify optimum multi-size ship target in SAR images by modified YOLOv5 model.</p>
</list-item>
<list-item>
<p>&#x2022; To offer the backbone extraction network a well-designed structure, a set of CSP framework and attention mechanisms have been upgraded, and the output layer has been expanded to four feature layers.</p>
</list-item>
<list-item>
<p>&#x2022; To improve the overall performance throughout the recognition process, this improved version of the YOLOv5 model also produces effective results in a condensed amount of time with a relatively smaller database.</p>
</list-item>
<list-item>
<p>&#x2022; To use the SSDD and AirSAR ship Datasets, two distinct and well-known datasets, in these simulations. The SSDD collection contains 1160 SAR images in total, collected by RadarSat-2, TerraSAR-X, and Sentinel-1, with resolutions ranging from 1m to 10m and polarizations in HH, HV, VV, and VH. Gaofen-3 has collected 31 single-polarized SAR images, which are included in the AirSARship.</p>
</list-item>
<list-item>
<p>&#x2022; To assess the suggested model&#x2019;s applicability utilizing cutting-edge benchmark convolutional neural network-based techniques.</p>
</list-item>
<list-item>
<p>&#x2022; To employ several performance indicators for application evaluation reasons, including precision, accuracy, time consumption, and different training and test sets.</p>
</list-item>
<list-item>
<p>&#x2022; To demonstrate the model&#x2019;s superiority the performance results would be demonstrated to the desired benchmark models (CNN-based SAR ship identification techniques).</p>
</list-item>
</list>
<p>The paper is organized as follows; Section 2 shows the study serves as an organizational framework for the remainder of the research, explaining the proposed methodology. The findings and analysis of the suggested research project are described in Section 3. Additionally, by contrasting it with other cutting-edge produced models, it has demonstrated the model&#x2019;s usefulness. Section 4 describes the ablation study and the paper is concluded in Section 5.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Proposed methodology</title>
<p>The target of this current study is to develop a ship detection model that could potentially function when there are inadequate hardware resources. Because of its reputation for speed and accuracy, the lightweight version of YOLO has received attention. Open source model YOLO was first presented by Joseph Redmon in 2016 (<xref ref-type="bibr" rid="B31">Redmon et&#xa0;al., 2016</xref>). It is suitable as a real-time system since it can identify things at extremely quick speeds. In this research work, the upgraded model lightweight version of YOLOv5 is used. This upgraded model resulted with higher accuracy and efficient identification capabilities (<xref ref-type="bibr" rid="B3">Caputo et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B27">Nepal and Eslamiat, 2022</xref>). Two datasets that are available in the literature, the AirSARship (<xref ref-type="bibr" rid="B49">Xian et&#xa0;al., 2019</xref>) dataset and the SSDD dataset (<xref ref-type="bibr" rid="B19">Li et&#xa0;al., 2017a</xref>), have both been considered.</p>
<sec id="s2_1">
<label>2.1</label>
<title>Data augmentation</title>
<p>In order to train the model for deep learning, a lot of data is typically required. However, in practice, certain data sets are challenging to collect, leading to a small quantity of data in this category that falls short of the required data volume for deep learning. Experts have thus suggested data augmentation approaches to successfully address this issue (<xref ref-type="bibr" rid="B26">Najafabadi et&#xa0;al., 2015</xref>). The data augmentation techniques such as random rotation and mosaic was used. Given training data, mosaic randomly crops four images and stitch them together to create one. It has the advantage of enriching the background of the image and enhancing the batch size discretely so that it can help to minimize the model dependence on a large batch size when training (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>).</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Illustrations the random rotation mosaic data augmentation technique.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-09-1086140-g001.tif"/>
</fig>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Data annotation</title>
<p>The images were annotated using the Labellmg software, which generates a json annotation file and transforms it into a txt file. The type and number of the labeling target, the labeling image&#x2019;s standardized width and height, and the center point&#x2019;s coordinates are all information that can be found in the txt file. <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref> displays the labelling outcomes.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>SAR images annotation procedures. The parameters of actual ships were obtained using DL algorithms for image annotation, where (x, y) represented for the coordinates of the top left corner of the rectangular box, w for width, and h for height.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-09-1086140-g002.tif"/>
</fig>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>YOLOv5 network</title>
<p>YOLO is a regression-based technique and, despite being less accurate, is actually faster than region proposal-based methods like R-CNN (<xref ref-type="bibr" rid="B12">Girshick et&#xa0;al., 2014</xref>). The goal of YOLO is to achieve object identification by approaching it as a regression and classification issue. Identifying the bounding box coordinates for the objects in the images is the first step, and second step is to classifying the objects that are identified in a class. This is accomplished in a single step by first splitting the input images into a grid of cells, then determining the bounding box and relative confidence score for each cell&#x2019;s containing object.</p>
<p>The YOLOv5 network is one of the recent research advancements in the YOLO series of algorithms. Despite sharing a network structure with the YOLOv4 network, it is smaller, has a faster running speed and convergence speed, and uses a lightweight algorithm. Additionally, it improves precision at the same time. As a result, the YOLOv5s algorithm is used in the current study work to detect ships in SAR images. Four components make up the YOLOv5 network structure: input, backbone, neck, and prediction. The Yolov5 framework architecture is displayed in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3A</bold>
</xref>. Networks can be categorized as YOLOv5l, YOLOv5m, YOLOv5x, and YOLOv5s. Their widths and depths may differ significantly, but their network structures are comparable. The network structure of YOLOv5s is the shortest, shallowest, runs the fastest, and has the least accuracy. As a result, the accuracy continues to rise, the speed of operation declines, and the other three network structures increasingly deepen and widen. Adaptive anchor box operation, mosaic data augmentation, image scaling, and CSP structure were used to process the input dataset, while focus and CSP framework were used to build the backbone. Focus increased network speed and cut down on floating-point operations (FLOPs) by clipping the input image. <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3A</bold>
</xref> presented the focal structure. The two CSP (<xref ref-type="bibr" rid="B40">Wang et&#xa0;al., 2020a</xref>) framework that were used by YOLOv5 were CSP1_X and CSP2_X; CSP1_X was utilized for down sampling in the backbone while CSP2_ X was utilized in the neck. CSP can reduce operations while increasing the network&#x2019;s capacity for learning and guaranteeing accuracy. <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3B</bold>
</xref> depicted the two CSPs&#x2019; structures; the neck used the SPP-net and FPN + PAN framework to improve the network&#x2019;s feature fusion effect, while the prediction employed the GIOU_ Loss (<xref ref-type="bibr" rid="B35">Rezatofighi et&#xa0;al., 2019</xref>), which did not only focus on the overlap between the prediction box and the ground truth but also on the non-overlapping areas. (<xref ref-type="bibr" rid="B58">Yu et&#xa0;al., 2016</xref>) found that GIOU maintains the benefits of IOU while solving its issues. The computation for Equation (1-2) is as follows:</p>
<disp-formula>
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>U</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>&#x2229;</mml:mo>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>|</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo>|</mml:mo> <mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>&#x222a;</mml:mo>
<mml:mi>B</mml:mi>
</mml:mrow> <mml:mo>|</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>U</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>I</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>U</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mo>|</mml:mo> <mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mfrac>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>&#x222a;</mml:mo>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo>|</mml:mo> <mml:mi>C</mml:mi> <mml:mo>|</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mo>|</mml:mo>
</mml:mrow> </mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:math>
</disp-formula>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>&#xa0;<bold>(A)</bold> The framework of the YOLOv5 Model, and <bold>(B)</bold> CSP structure.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-09-1086140-g003.tif"/>
</fig>
<p>Four separate networks were used, and YOLOv5 was continuously upgraded as well. Version 5.0 of YOLOv5s was used in this study project; in comparison to version 4.0, this version modified all functional activation in the framework to SiLU (<xref ref-type="bibr" rid="B9">Elfwing et&#xa0;al., 2018</xref>), eliminated the conv in the CSP, and designated it C3 as presented in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>. Additionally, v5.0 has a smaller and faster network structure than v4.0.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>The C3 structure.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-09-1086140-g004.tif"/>
</fig>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>YOLOv5 network improvement</title>
<p>In this section of the study, the improvements made to the YOLOv5 classifier have been described in accordance with the guidelines of the proposed research challenge. The neck and backbone parts are enhanced to produce the greatest identification outcomes.</p>
<sec id="s2_4_1">
<label>2.4.1</label>
<title>Backbone improvement</title>
<p>It is frequently possible to combine features from various scales to obtain more meaningful object information. The high-level feature has lesser resolution and poor perception of object information, but the receptive field is bigger, which is suited for identifying huge objects. The low-level feature has higher resolution, a smaller receptive field, more texture information, and more noise. The complex background environment in the SSDD and AirSARship dataset results in some large ground objects having an inadequate detection effect. In the current research work, a set of C3 framework was used to construct the YOLOv5s backbone network. The original three sets of C3 were converted into four sets of C3 to further the network framework as a whole (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>). In turn, the model&#x2019;s detection accuracy may have improved as a result of the network&#x2019;s increased ability to communicate and learn about larger ground objects.</p>
</sec>
<sec id="s2_4_2">
<label>2.4.2</label>
<title>Attentional mechanism</title>
<p>The human visual attention process is referred to as the &#x201c;attention mechanism,&#x201d; which concentrate on local details and blocks out redundant details. To put it another way, the network is able to identify critical information among a plethora of data due to the attention mechanism. The network performance is enhanced in this way by the addition of a small amount of computing. <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5</bold>
</xref> presented the increased backbone structure.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>The improved backbone Network structure.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-09-1086140-g005.tif"/>
</fig>
</sec>
<sec id="s2_4_3">
<title>2.4.3 Neck enhancement</title>
<p>The neck was constructed using the FPN (<xref ref-type="bibr" rid="B18">Lin et&#xa0;al., 2017</xref>) + PAN (<xref ref-type="bibr" rid="B22">Liu et&#xa0;al., 2018</xref>) framework. This framework incorporated a bottom-up feature pyramid network after the FPN, which improved location information and semantic expression on various scales. The C32_X structure was incorporated into the neck of the YOLOv5s to enhanced the feature fusion impact of the network framework. Because of the development of a set of C3 structures in the current research work, an output layer was updated to the network&#x2019;s neck to increase feature extraction. <xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref> presented the increase structure of FPN + PAN.</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Improved FPN + PAN structure.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-09-1086140-g006.tif"/>
</fig>
</sec>
<sec id="s2_4_4">
<label>2.4.4</label>
<title>Extending receptive field area</title>
<p>Each pixel in the output feature map must respond to an area in the image that is large enough for it to get information about the large object, which makes the size of the receptive field a major issue in many vision applications. Consequently, a maximum pooling layer has been chosen to be added to the space pyramid in order to improve multiple receptive field fusion and increase the accuracy of identification of tiny targets. The updated architecture is shown in <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref>. <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref> includes a graphic representation of the contribution of a maximum pooling layer. <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref> shows the spatial pyramid pooling module SPP and the combination module CBL, which combines convolutional layers, BN, and activation function layers. The addition of a 3*3 maximum pooling filter has increased the model&#x2019;s receptive field.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Pooling layer improvement structure.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-09-1086140-g007.tif"/>
</fig>
</sec>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results and discussion</title>
<p>This section has a detailed description of the SSDD, AirSARship datasets, experimental settings as well as evaluation metrics and assesses the performance of the technique. The testing set is then separated into two sets, one is offshore ships and the other one is inshore ships, and each group has been used to assess the efficacy of the various strategies. The identification outcomes of the current model and a few unique CNN-based models are shown on the two SAR large scene images.</p>
<sec id="s3_1">
<label>3.1</label>
<title>Dataset introduction</title>
<p>
<bold>SSDD dataset:</bold> The first and most important stage in ship detection using deep learning techniques is the construction of a sizable and representative dataset. Therefore, the experiment utilize the SSDD (<xref ref-type="bibr" rid="B19">Li et&#xa0;al., 2017a</xref>) dataset, which have 1160 SAR images with resolutions ranging from 1m to 10m with polarizations in HH, HV, VV, and VH from RadarSat-2, TerraSAR-X, and Sentinel-1. Each sample image has a dimension of roughly 800 x 800, with a ratio of 7:1:2, where the SSDD dataset is divided into three sets for the experiment: a training set, a validation set, and a testing set (<xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>).</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Reveal the dataset&#x2019;s complete details.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Dataset</th>
<th valign="top" align="center">Size (pixel)</th>
<th valign="top" align="center">Image (num)</th>
<th valign="top" align="center">Mode</th>
<th valign="top" align="center">Satellite</th>
<th valign="top" align="center">Resolution(m)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">SSDD</td>
<td valign="top" align="center">390 &#xd7;205<break/>500 &#xd7;500</td>
<td valign="top" align="center">1160</td>
<td valign="top" align="left">SL</td>
<td valign="top" align="left">RadarSat-2<break/>TerraSAR-X<break/>Sentinel-1</td>
<td valign="top" align="center">1-15</td>
</tr>
<tr>
<td valign="top" align="left">AirSARship</td>
<td valign="top" align="center">3000 &#xd7; 3000</td>
<td valign="top" align="center">31</td>
<td valign="top" align="left">SL/UFS</td>
<td valign="top" align="left">GF-3</td>
<td valign="top" align="center">1/3</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>
<bold>AirSARship Dataset:</bold> In the present study, experiments also use the AirSARShip-1.0 (<xref ref-type="bibr" rid="B49">Xian et&#xa0;al., 2019</xref>) dataset to assess the performance of proposed model utilize high-resolution SAR ship identification dataset. Gaofen-3 provided 31 single-polarized SAR images for AirSARShip-1.0. Most images have a size of 3000 x 3000 pixels, while one has a size of 4140 x 4140 pixels with resolutions ranging from 1 to 3 meters with HH polarization. The large scene image has been split into 1000 x 1000 slices with a ratio of 7:1:2, where the dataset is divided into three sets for the experiment: a training set, a validation set, and a testing set (<xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>).</p>
<p>Two SAR large scene images from the Chinese GF-3 satellite, as shown in <xref ref-type="fig" rid="f13">
<bold>Figure&#xa0;13</bold>
</xref>, further illustrate the efficacy of the suggested strategy for identifying different size ships in SAR large scene images with complicated sceneries. These images contain inshore and offshore scenery as well as ship targets at various scales. In <xref ref-type="fig" rid="f8">
<bold>Figure&#xa0;8</bold>
</xref>, some image slices are presented and offshore and inshore scenes as well as multiscale ship targets are primarily shown in <xref ref-type="fig" rid="f8">
<bold>Figure&#xa0;8A</bold>
</xref>. The dataset clearly shows that both off-shore and inshore scenarios are included, and that the sizes of the ships fluctuate widely.</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Inshore, offshore and different scale ship target on SAR images, <bold>(A)</bold> ships from the first SAR large scene images presented in <xref ref-type="fig" rid="f13">
<bold>Figure 13A</bold>
</xref>, and <bold>(B)</bold> ship from the second SAR large images presented in <xref ref-type="fig" rid="f13">
<bold>Figure 13B</bold>
</xref>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-09-1086140-g008.tif"/>
</fig>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Experimental environment</title>
<p>The experiments are all carried out using PyTorch 1.7.0, CUDA 10.1, and CUDNN 7.6.5 on an NVIDIA Geforce GTX 2080Ti GPU and an Intel Core i9-9900KF CPU. The PC, which was equipped with a deep learning environment for our research and was running Windows 10, is depicted in <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>. Additionally, each model was trained over 100000 iterations utilizing the Stochastic Gradient Descent (SGD) technique on a total of two images per minibatch. The initial learning rate was set at 0.001, while the weight decay was set to 0.00004. In every trial, the detection threshold IOU was set to 0.7. <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref> displayed the experimental hardware and software configuration. During the experiment, the same platform was used for all comparison techniques.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Environment Configuration.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Project</th>
<th valign="top" align="center">Model/Parameter</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">System</td>
<td valign="top" align="left">windows 10</td>
</tr>
<tr>
<td valign="top" align="left">RAM</td>
<td valign="top" align="left">32GB</td>
</tr>
<tr>
<td valign="top" align="left">GPU</td>
<td valign="top" align="left">NVIDIA GTX Geforce 2080 Ti</td>
</tr>
<tr>
<td valign="top" align="left">CPU</td>
<td valign="top" align="left">Intel i9-9900KF</td>
</tr>
<tr>
<td valign="top" align="left">Framework</td>
<td valign="top" align="left">CUDA10.1/cudnn7.6.5/torch 1.7.0</td>
</tr>
<tr>
<td valign="top" align="left">Code</td>
<td valign="top" align="left">python3.7</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Evaluation metrics</title>
<p>Since optical and SAR image object detection tasks are comparable, the effectiveness of various approaches is assessed using a variety of established indicators, such as average precision (AP), recall rate (r), precision rate (p), F score (F1), and these indications are specifically formulated in following equations (3-6):</p>
<disp-formula>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:mi>Pr</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mo stretchy="false">(</mml:mo>
<mml:mn>3</mml:mn>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:mi>Re</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mo stretchy="false">(</mml:mo>
<mml:mn>4</mml:mn>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The number of correctly recognized ships, false alarms, and missing ships are denoted by the acronyms FN (false negative), FP (false positives), and TP (true positives). The precision and recall are combined into the F1 score as follows:</p>
<disp-formula>
<mml:math display="block" id="M5">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>-</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>Pr</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>Re</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>Pr</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>Re</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x200b;</mml:mo>
<mml:mo stretchy="false">(</mml:mo>
<mml:mn>5</mml:mn>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(6)</label>
<mml:math display="block" id="M6">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>=</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mstyle displaystyle="true">
<mml:mrow>
<mml:munderover>
<mml:mo>&#x222b;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mn>1</mml:mn>
</mml:munderover>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mstyle>
<mml:mi>d</mml:mi>
<mml:mi>R</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The complete detection effectiveness of the various models is assessed using the AP and F1-score metrics, and a higher number indicates a superior detector performance.</p>
<p>The percentage of ground truth ships that networks correctly predict in all predictions is referred to as the precision rate. The percentage of ground truth ships that networks correctly predicted in all ground truth ships is referred to as recall rate. F1 is a comprehensive statistic that combines precision rate and recall rate to assess the effectiveness of various framework. AP outlines the region beneath Precision-Recall (PR) curves and also shows the overall effectiveness of various approaches. Additionally, Frames-Per-Second (FPS), which is derived from Equation (7), is used to assess the detection speed of various approaches. A method achieves a higher speed the higher the FPS.</p>
<disp-formula>
<mml:math display="block" id="M7">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>S</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mo stretchy="false">(</mml:mo>
<mml:mn>7</mml:mn>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>When processing an image, the inference time (Tper-imgis) is the cost of a method.</p>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Detection performance of inshore and offshore ships</title>
<p>In this section, the proposed approach and alternative CNN-based approaches, such as Faster-RCNN (<xref ref-type="bibr" rid="B18">Lin et&#xa0;al., 2017</xref>), SSD (<xref ref-type="bibr" rid="B20">Liu et&#xa0;al., 2016</xref>), R2CNN (<xref ref-type="bibr" rid="B14">Jiang et&#xa0;al., 2017</xref>), ARPN (<xref ref-type="bibr" rid="B65">Zhao et&#xa0;al., 2020</xref>), DAPN (<xref ref-type="bibr" rid="B7">Cui et&#xa0;al., 2019</xref>), Quad-FPN (<xref ref-type="bibr" rid="B63">Zhang et&#xa0;al., 2021b</xref>), HR-SDNet (<xref ref-type="bibr" rid="B47">Wei et&#xa0;al., 2020</xref>), Grid R-CNN (<xref ref-type="bibr" rid="B24">Lu et&#xa0;al., 2019</xref>), Cascade R-CNN (<xref ref-type="bibr" rid="B2">Cai and Vasconcelos, 2018</xref>), YOLOv4-LITE (<xref ref-type="bibr" rid="B21">Liu et&#xa0;al., 2022</xref>), EfficientDet (<xref ref-type="bibr" rid="B37">Tan et&#xa0;al., 2020</xref>), Free-Anchor (<xref ref-type="bibr" rid="B61">Zhang et&#xa0;al., 2019</xref>), Lite-Yolov5 (<xref ref-type="bibr" rid="B54">Xu et&#xa0;al., 2022</xref>), and yolov5-X, are assessed using offshore and inshore ships of testing sets. Aside from these metrics, F1, AP, and FPS are also employed to investigate the applicability of various methodologies. The suggested model&#x2019;s identification performance against other CNN-based approaches tested on offshore ships and inshore ships based on SSDD dataset and AirSAR ship dataset is presented in <xref ref-type="table" rid="T3">
<bold>Tables&#xa0;3</bold>
</xref>, <xref ref-type="table" rid="T4">
<bold>4</bold>
</xref>; <xref ref-type="fig" rid="f9">
<bold>Figures&#xa0;9</bold>
</xref>, <xref ref-type="fig" rid="f10">
<bold>10</bold>
</xref>. The current model provides the best accuracy for offshore SSDD (about 95.36% AP for the offshore scenes). The second-best result is 89.03% from the R2CNN approach, although it still performs better at detecting anomalies than the currently suggested model by 6.33% AP. The studied model also delivers the best accuracy for inshore on SSDD (about 92.27% AP for the inshore scenes). The second-best result is 83.53% from the R2CNN approach, although it still performs better in terms of detection than the currently suggested model by 8.74% AP. The researched model provides the best accuracy for offshore on AirSARship (about 94.57% AP for the offshore scenes). It has the best detection performance, as seen by the second-best result of 88.27% from the Quad-FPN approach, which is still 6.3% AP less than the proposed model. The proposed model also delivers the best accuracy for inshore on AirSARship (about 91.11% AP for the inshore scenes). It also has the best detection performance, with the second-best estimate coming from the Lite-Yolov5 approach at 84.94%, however it is still 6.17% AP lower than the current model.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>The identification outcomes of various state-of-the-art CNN based approaches on offshore and inshore ship scene for SSDD Dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Methods</th>
<th valign="top" colspan="4" align="center">Off-Shore</th>
<th valign="top" colspan="4" align="center">In-Shore</th>
<th valign="top" align="center">FPS</th>
</tr>
<tr>
<th valign="top" align="center"/>
<th valign="top" align="center">
<italic>P (%)</italic>
</th>
<th valign="top" align="center">
<italic>R (%)</italic>
</th>
<th valign="top" align="center">
<italic>AP (%)</italic>
</th>
<th valign="top" align="center">
<italic>F1</italic>
</th>
<th valign="top" align="center">
<italic>P (%)</italic>
</th>
<th valign="top" align="center">
<italic>R (%)</italic>
</th>
<th valign="top" align="center">
<italic>AP (%)</italic>
</th>
<th valign="top" align="center">
<italic>F1</italic>
</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Faster-RCNN (<xref ref-type="bibr" rid="B18">Lin et&#xa0;al., 2017</xref>)</td>
<td valign="top" align="center">88.60</td>
<td valign="top" align="center">91.81</td>
<td valign="top" align="center">88.60</td>
<td valign="top" align="center">0.8770</td>
<td valign="top" align="center">77.21</td>
<td valign="top" align="center">79.37</td>
<td valign="top" align="center">74.20</td>
<td valign="top" align="center">0.7377</td>
<td valign="top" align="center">16</td>
</tr>
<tr>
<td valign="top" align="left">SSD (<xref ref-type="bibr" rid="B20">Liu et&#xa0;al., 2016</xref>)</td>
<td valign="top" align="center">85.59</td>
<td valign="top" align="center">85.65</td>
<td valign="top" align="center">82.50</td>
<td valign="top" align="center">0.8170</td>
<td valign="top" align="center">74.67</td>
<td valign="top" align="center">71.45</td>
<td valign="top" align="center">78.25</td>
<td valign="top" align="center">0.7735</td>
<td valign="top" align="center">87</td>
</tr>
<tr>
<td valign="top" align="left">R2CNN (<xref ref-type="bibr" rid="B14">Jiang et&#xa0;al., 2017</xref>)</td>
<td valign="top" align="center">91.14</td>
<td valign="top" align="center">90.07</td>
<td valign="top" align="center">
<underline>89.03</underline>
</td>
<td valign="top" align="center">0.8853</td>
<td valign="top" align="center">78.99</td>
<td valign="top" align="center">70.88</td>
<td valign="top" align="center">
<underline>83.53</underline>
</td>
<td valign="top" align="center">0.8289</td>
<td valign="top" align="center">48</td>
</tr>
<tr>
<td valign="top" align="left">ARPN (<xref ref-type="bibr" rid="B65">Zhao et&#xa0;al., 2020</xref>)</td>
<td valign="top" align="center">91.64</td>
<td valign="top" align="center">90.38</td>
<td valign="top" align="center">88.10</td>
<td valign="top" align="center">0.8797</td>
<td valign="top" align="center">79.63</td>
<td valign="top" align="center">77.70</td>
<td valign="top" align="center">77.7</td>
<td valign="top" align="center">0.7690</td>
<td valign="top" align="center">21.55</td>
</tr>
<tr>
<td valign="top" align="left">DAPN (<xref ref-type="bibr" rid="B7">Cui et&#xa0;al., 2019</xref>)</td>
<td valign="top" align="center">91.89</td>
<td valign="top" align="center">89.89</td>
<td valign="top" align="center">83.37</td>
<td valign="top" align="center">0.8271</td>
<td valign="top" align="center">75.76</td>
<td valign="top" align="center">78.77</td>
<td valign="top" align="center">70.98</td>
<td valign="top" align="center">0.6950</td>
<td valign="top" align="center">20.81</td>
</tr>
<tr>
<td valign="top" align="left">Quad-FPN (<xref ref-type="bibr" rid="B63">Zhang et&#xa0;al., 2021b</xref>)</td>
<td valign="top" align="center">85.97</td>
<td valign="top" align="center">89.56</td>
<td valign="top" align="center">80.10</td>
<td valign="top" align="center">0.7989</td>
<td valign="top" align="center">52.90</td>
<td valign="top" align="center">77.78</td>
<td valign="top" align="center">76.59</td>
<td valign="top" align="center">0.7551</td>
<td valign="top" align="center">20.25</td>
</tr>
<tr>
<td valign="top" align="left">HR-SDNet (<xref ref-type="bibr" rid="B47">Wei et&#xa0;al., 2020</xref>)</td>
<td valign="top" align="center">86.47</td>
<td valign="top" align="center">88.28</td>
<td valign="top" align="center">84.03</td>
<td valign="top" align="center">0.8350</td>
<td valign="top" align="center">72.57</td>
<td valign="top" align="center">77.97</td>
<td valign="top" align="center">73.85</td>
<td valign="top" align="center">0.7225</td>
<td valign="top" align="center">15.16</td>
</tr>
<tr>
<td valign="top" align="left">Grid R-CNN (<xref ref-type="bibr" rid="B24">Lu et&#xa0;al., 2019</xref>)</td>
<td valign="top" align="center">89.10</td>
<td valign="top" align="center">91.18</td>
<td valign="top" align="center">79.73</td>
<td valign="top" align="center">0.7853</td>
<td valign="top" align="center">81.08</td>
<td valign="top" align="center">81.56</td>
<td valign="top" align="center">74.01</td>
<td valign="top" align="center">0.7332</td>
<td valign="top" align="center">7.55</td>
</tr>
<tr>
<td valign="top" align="left">Cascade R-CNN (<xref ref-type="bibr" rid="B2">Cai and Vasconcelos, 2018</xref>)</td>
<td valign="top" align="center">90.10</td>
<td valign="top" align="center">84.75</td>
<td valign="top" align="center">87.01</td>
<td valign="top" align="center">0.8623</td>
<td valign="top" align="center">69.78</td>
<td valign="top" align="center">79.03</td>
<td valign="top" align="center">69.89</td>
<td valign="top" align="center">0.6814</td>
<td valign="top" align="center">10.55</td>
</tr>
<tr>
<td valign="top" align="left">YOLOv4-LITE (<xref ref-type="bibr" rid="B21">Liu et&#xa0;al., 2022</xref>)</td>
<td valign="top" align="center">89.59</td>
<td valign="top" align="center">87.03</td>
<td valign="top" align="center">79.17</td>
<td valign="top" align="center">0.7835</td>
<td valign="top" align="center">54.80</td>
<td valign="top" align="center">85.70</td>
<td valign="top" align="center">74.62</td>
<td valign="top" align="center">0.7323</td>
<td valign="top" align="center">42.5</td>
</tr>
<tr>
<td valign="top" align="left">EfficientDet (<xref ref-type="bibr" rid="B37">Tan et&#xa0;al., 2020</xref>)</td>
<td valign="top" align="center">92.19</td>
<td valign="top" align="center">89.15</td>
<td valign="top" align="center">86.11</td>
<td valign="top" align="center">0.8517</td>
<td valign="top" align="center">79.01</td>
<td valign="top" align="center">77.64</td>
<td valign="top" align="center">73.63</td>
<td valign="top" align="center">0.7292</td>
<td valign="top" align="center">21.44</td>
</tr>
<tr>
<td valign="top" align="left">Free-Anchor (<xref ref-type="bibr" rid="B61">Zhang et&#xa0;al., 2019</xref>)</td>
<td valign="top" align="center">90.31</td>
<td valign="top" align="center">87.35</td>
<td valign="top" align="center">87.67</td>
<td valign="top" align="center">0.8677</td>
<td valign="top" align="center">75.70</td>
<td valign="top" align="center">76.07</td>
<td valign="top" align="center">71.80</td>
<td valign="top" align="center">0.7055</td>
<td valign="top" align="center">13.76</td>
</tr>
<tr>
<td valign="top" align="left">Lite-Yolov5 (<xref ref-type="bibr" rid="B54">Xu et&#xa0;al., 2022</xref>)</td>
<td valign="top" align="center">91.32</td>
<td valign="top" align="center">91.08</td>
<td valign="top" align="center">80.16</td>
<td valign="top" align="center">0.7901</td>
<td valign="top" align="center">75.78</td>
<td valign="top" align="center">80.62</td>
<td valign="top" align="center">81.92</td>
<td valign="top" align="center">0.8045</td>
<td valign="top" align="center">43.25</td>
</tr>
<tr>
<td valign="top" align="left">Yolov5-X</td>
<td valign="top" align="center">77.25</td>
<td valign="top" align="center">82.60</td>
<td valign="top" align="center">79.56</td>
<td valign="top" align="center">0.7862</td>
<td valign="top" align="center">70.89</td>
<td valign="top" align="center">81.61</td>
<td valign="top" align="center">76.49</td>
<td valign="top" align="center">0.7578</td>
<td valign="top" align="center">140</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>Our model</bold>
</td>
<td valign="top" align="center">94.57</td>
<td valign="top" align="center">94.06</td>
<td valign="top" align="center">
<bold>95.36</bold>
</td>
<td valign="top" align="center">0.9413</td>
<td valign="top" align="center">89.44</td>
<td valign="top" align="center">90.77</td>
<td valign="top" align="center">
<bold>92.27</bold>
</td>
<td valign="top" align="center">0.9140</td>
<td valign="top" align="center">
<bold>157</bold>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>The identification outcomes of various state-of-the-art CNN based approaches on offshore and inshore ship scene for AirSARship Dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Methods</th>
<th valign="top" colspan="4" align="center">Off-Shore</th>
<th valign="top" colspan="4" align="center">In-Shore</th>
<th valign="top" align="center">FPS</th>
</tr>
<tr>
<th valign="top" align="center"/>
<th valign="top" align="center">
<italic>P (%)</italic>
</th>
<th valign="top" align="center">
<italic>R (%)</italic>
</th>
<th valign="top" align="center">
<italic>AP (%)</italic>
</th>
<th valign="top" align="center">
<italic>F1</italic>
</th>
<th valign="top" align="center">
<italic>P (%)</italic>
</th>
<th valign="top" align="center">
<italic>R (%)</italic>
</th>
<th valign="top" align="center">
<italic>AP (%)</italic>
</th>
<th valign="top" align="center">
<italic>F1</italic>
</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Faster-RCNN (<xref ref-type="bibr" rid="B18">Lin et&#xa0;al., 2017</xref>)</td>
<td valign="top" align="center">83.72</td>
<td valign="top" align="center">89.89</td>
<td valign="top" align="center">84.12</td>
<td valign="top" align="center">0. 8370</td>
<td valign="top" align="center">69.58</td>
<td valign="top" align="center">86.01</td>
<td valign="top" align="center">79.21</td>
<td valign="top" align="center">0.7877</td>
<td valign="top" align="center">16</td>
</tr>
<tr>
<td valign="top" align="left">SSD (<xref ref-type="bibr" rid="B20">Liu et&#xa0;al., 2016</xref>)</td>
<td valign="top" align="center">92.09</td>
<td valign="top" align="center">86.66</td>
<td valign="top" align="center">86.54</td>
<td valign="top" align="center">0.8550</td>
<td valign="top" align="center">64.76</td>
<td valign="top" align="center">88.08</td>
<td valign="top" align="center">81.41</td>
<td valign="top" align="center">0.8035</td>
<td valign="top" align="center">87</td>
</tr>
<tr>
<td valign="top" align="left">R2CNN (<xref ref-type="bibr" rid="B14">Jiang et&#xa0;al., 2017</xref>)</td>
<td valign="top" align="center">84.08</td>
<td valign="top" align="center">94.07</td>
<td valign="top" align="center">85.11</td>
<td valign="top" align="center">0.8653</td>
<td valign="top" align="center">72.15</td>
<td valign="top" align="center">90.92</td>
<td valign="top" align="center">79.22</td>
<td valign="top" align="center">0.6989</td>
<td valign="top" align="center">48</td>
</tr>
<tr>
<td valign="top" align="left">ARPN (<xref ref-type="bibr" rid="B65">Zhao et&#xa0;al., 2020</xref>)</td>
<td valign="top" align="center">82.40</td>
<td valign="top" align="center">80.83</td>
<td valign="top" align="center">82.22</td>
<td valign="top" align="center">0.8161</td>
<td valign="top" align="center">71.74</td>
<td valign="top" align="center">69.13</td>
<td valign="top" align="center">68.10</td>
<td valign="top" align="center">0.6754</td>
<td valign="top" align="center">21.55</td>
</tr>
<tr>
<td valign="top" align="left">DAPN (<xref ref-type="bibr" rid="B7">Cui et&#xa0;al., 2019</xref>)</td>
<td valign="top" align="center">90.18</td>
<td valign="top" align="center">87.98</td>
<td valign="top" align="center">85.83</td>
<td valign="top" align="center">0.8454</td>
<td valign="top" align="center">69.50</td>
<td valign="top" align="center">66.57</td>
<td valign="top" align="center">72.98</td>
<td valign="top" align="center">0.7141</td>
<td valign="top" align="center">20.81</td>
</tr>
<tr>
<td valign="top" align="left">Quad-FPN (<xref ref-type="bibr" rid="B63">Zhang et&#xa0;al., 2021b</xref>)</td>
<td valign="top" align="center">89.70</td>
<td valign="top" align="center">90.56</td>
<td valign="top" align="center">
<underline>88.27</underline>
</td>
<td valign="top" align="center">0.8789</td>
<td valign="top" align="center">62.10</td>
<td valign="top" align="center">78.15</td>
<td valign="top" align="center">80.77</td>
<td valign="top" align="center">0.7945</td>
<td valign="top" align="center">20.25</td>
</tr>
<tr>
<td valign="top" align="left">HR-SDNet (<xref ref-type="bibr" rid="B47">Wei et&#xa0;al., 2020</xref>)</td>
<td valign="top" align="center">83.40</td>
<td valign="top" align="center">89.82</td>
<td valign="top" align="center">79.27</td>
<td valign="top" align="center">0.7818</td>
<td valign="top" align="center">70.50</td>
<td valign="top" align="center">78.71</td>
<td valign="top" align="center">76.05</td>
<td valign="top" align="center">0.7506</td>
<td valign="top" align="center">15.16</td>
</tr>
<tr>
<td valign="top" align="left">Grid R-CNN (<xref ref-type="bibr" rid="B24">Lu et&#xa0;al., 2019</xref>)</td>
<td valign="top" align="center">89.89</td>
<td valign="top" align="center">89.91</td>
<td valign="top" align="center">81.03</td>
<td valign="top" align="center">0.8010</td>
<td valign="top" align="center">76.81</td>
<td valign="top" align="center">71.60</td>
<td valign="top" align="center">69.55</td>
<td valign="top" align="center">0.6854</td>
<td valign="top" align="center">7.55</td>
</tr>
<tr>
<td valign="top" align="left">Cascade R-CNN (<xref ref-type="bibr" rid="B2">Cai and Vasconcelos, 2018</xref>)</td>
<td valign="top" align="center">88.75</td>
<td valign="top" align="center">74.57</td>
<td valign="top" align="center">79.26</td>
<td valign="top" align="center">0.7854</td>
<td valign="top" align="center">70.87</td>
<td valign="top" align="center">81.03</td>
<td valign="top" align="center">70.91</td>
<td valign="top" align="center">0.6998</td>
<td valign="top" align="center">10.55</td>
</tr>
<tr>
<td valign="top" align="left">YOLOv4-LITE (<xref ref-type="bibr" rid="B21">Liu et&#xa0;al., 2022</xref>)</td>
<td valign="top" align="center">87.01</td>
<td valign="top" align="center">89.55</td>
<td valign="top" align="center">79.07</td>
<td valign="top" align="center">0.7889</td>
<td valign="top" align="center">68.60</td>
<td valign="top" align="center">84.09</td>
<td valign="top" align="center">82. 02</td>
<td valign="top" align="center">0.8189</td>
<td valign="top" align="center">42.5</td>
</tr>
<tr>
<td valign="top" align="left">EfficientDet (<xref ref-type="bibr" rid="B37">Tan et&#xa0;al., 2020</xref>)</td>
<td valign="top" align="center">89.10</td>
<td valign="top" align="center">90.75</td>
<td valign="top" align="center">81.20</td>
<td valign="top" align="center">0.8081</td>
<td valign="top" align="center">87.29</td>
<td valign="top" align="center">76.91</td>
<td valign="top" align="center">63.35</td>
<td valign="top" align="center">0.6260</td>
<td valign="top" align="center">21.44</td>
</tr>
<tr>
<td valign="top" align="left">Free-Anchor (<xref ref-type="bibr" rid="B61">Zhang et&#xa0;al., 2019</xref>)</td>
<td valign="top" align="center">90.38</td>
<td valign="top" align="center">77.35</td>
<td valign="top" align="center">84.60</td>
<td valign="top" align="center">0.8354</td>
<td valign="top" align="center">70.65</td>
<td valign="top" align="center">78.60</td>
<td valign="top" align="center">74.12</td>
<td valign="top" align="center">0.7379</td>
<td valign="top" align="center">13.76</td>
</tr>
<tr>
<td valign="top" align="left">Lite-Yolov5 (<xref ref-type="bibr" rid="B54">Xu et&#xa0;al., 2022</xref>)</td>
<td valign="top" align="center">91.56</td>
<td valign="top" align="center">90.48</td>
<td valign="top" align="center">81.17</td>
<td valign="top" align="center">0.8054</td>
<td valign="top" align="center">78.90</td>
<td valign="top" align="center">74.82</td>
<td valign="top" align="center">
<underline>84.94</underline>
</td>
<td valign="top" align="center">0.8388</td>
<td valign="top" align="center">66.25</td>
</tr>
<tr>
<td valign="top" align="left">Yolov5-X</td>
<td valign="top" align="center">79.12</td>
<td valign="top" align="center">82.70</td>
<td valign="top" align="center">79.44</td>
<td valign="top" align="center">0.7822</td>
<td valign="top" align="center">76.35</td>
<td valign="top" align="center">79.69</td>
<td valign="top" align="center">73.46</td>
<td valign="top" align="center">0.7225</td>
<td valign="top" align="center">140</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>Our model</bold>
</td>
<td valign="top" align="center">92.83</td>
<td valign="top" align="center">90.55</td>
<td valign="top" align="center">
<bold>94.57</bold>
</td>
<td valign="top" align="center">0.9313</td>
<td valign="top" align="center">90.14</td>
<td valign="top" align="center">89.08</td>
<td valign="top" align="center">
<bold>91.11</bold>
</td>
<td valign="top" align="center">0.9014</td>
<td valign="top" align="center">
<bold>157</bold>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>Performances of AP and FPS for various CNN-based techniques on offshore and inshore ships for SSDD dataset.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-09-1086140-g009.tif"/>
</fig>
<fig id="f10" position="float">
<label>Figure&#xa0;10</label>
<caption>
<p>Performances of AP and FPS for various CNN-based techniques on offshore and inshore ships for AirSARship dataset.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-09-1086140-g010.tif"/>
</fig>
<p>The suggested model and the other state-of-the-art CNN based approaches include Faster-RCNN (<xref ref-type="bibr" rid="B18">Lin et&#xa0;al., 2017</xref>), SSD (<xref ref-type="bibr" rid="B20">Liu et&#xa0;al., 2016</xref>), R2CNN (<xref ref-type="bibr" rid="B14">Jiang et&#xa0;al., 2017</xref>), ARPN (<xref ref-type="bibr" rid="B65">Zhao et&#xa0;al., 2020</xref>), DAPN (<xref ref-type="bibr" rid="B7">Cui et&#xa0;al., 2019</xref>), Quad-FPN (<xref ref-type="bibr" rid="B63">Zhang et&#xa0;al., 2021b</xref>), HR-SDNet (<xref ref-type="bibr" rid="B47">Wei et&#xa0;al., 2020</xref>), Grid R-CNN (<xref ref-type="bibr" rid="B24">Lu et&#xa0;al., 2019</xref>), Cascade R-CNN (<xref ref-type="bibr" rid="B2">Cai and Vasconcelos, 2018</xref>), YOLOv4-LITE (<xref ref-type="bibr" rid="B21">Liu et&#xa0;al., 2022</xref>), EfficientDet (<xref ref-type="bibr" rid="B37">Tan et&#xa0;al., 2020</xref>), Free-Anchor (<xref ref-type="bibr" rid="B61">Zhang et&#xa0;al., 2019</xref>), Lite-Yolov5 (<xref ref-type="bibr" rid="B54">Xu et&#xa0;al., 2022</xref>), and yolov5-X techniques all have detection accuracies that are higher for offshore scenes than for inshore situations. This is reasonable considering that the former has a more complicated background than the latter. Perhaps as a result of their poor small ship identification capabilities, the other alternative approaches have lower precision values than the suggested model. In this current research work, the recall values of the proposed model are occasionally less than those of other offered methods. As a result, a suitable score threshold can be further thought about in the future to balance missed detections and false alarms. Additionally, the current model appears to be faster than other approaches based on the FPS data in <xref ref-type="table" rid="T3">
<bold>Tables&#xa0;3</bold>
</xref>, <xref ref-type="table" rid="T4">
<bold>4</bold>
</xref>, and <xref ref-type="fig" rid="f9">
<bold>Figures&#xa0;9</bold>
</xref>, <xref ref-type="fig" rid="f10">
<bold>10</bold>
</xref>, potentially as a result of the separable depth-wise and point-wise convolutions utilized in the backbone network. In conclusion, the offshore scene has greater accuracy, AP, and F1 scores for both datasets than the inshore scenario. This might be due to the inshore scene&#x2019;s densely packed ships and increased backdrop interference from the land. Additionally, it demonstrates that it is more difficult to spot ships in the inshore scene than it is in the offshore environment.</p>
<p>The proposed model&#x2019;s performance in terms of detection results compared to existing convolutional neural network -based approaches tested on offshore ships and inshore ships using SSDD dataset and AirSARship dataset is shown in <xref ref-type="fig" rid="f11">
<bold>Figures&#xa0;11</bold>
</xref>, <xref ref-type="fig" rid="f12">
<bold>12</bold>
</xref>. The suggested model is capable of detecting different SAR ships with multiscale sizes under varied backgrounds. This demonstrates its great scale and scene adaption together with excellent detection performance. The currently proposed model can increase the detection confidence scores when compared to the second-best CNN-based ship detector R2CNN. For instance, the suggested model raises the confidence score in <xref ref-type="fig" rid="f11">
<bold>Figure&#xa0;11</bold>
</xref> first detection sample from 0.96 to 1.0. This can demonstrate the better trustworthiness of the newly proposed model. It is evident that the inshore scenario contains a sizable number of tightly packed ship targets. The other suggested solutions miss certain closely grouped inshore ships. The proposed model, however, is capable of accurately localizing and detecting these multiscale ships with high probabilities.</p>
<fig id="f11" position="float">
<label>Figure&#xa0;11</label>
<caption>
<p>The visual detection outcomes of CNN-based approaches for offshore and inshore ships based on SSDD Dataset. Results from <bold>(A)</bold> is ground truth, results from <bold>(B)</bold> is Faster-R-CNN method, results from <bold>(C)</bold> SSD method, results from <bold>(D)</bold> is R2CNN method, results from <bold>(E)</bold> is ARPN, results from <bold>(F)</bold> is DAPN, results from <bold>(G)</bold> is Quad-FPN, results from <bold>(H)</bold> is HR-SDNet, results from <bold>(I)</bold> is Grid R-CNN, results from <bold>(J)</bold> is Cascade R-CNN, results from <bold>(K)</bold> is YOLOv4-LITE, results from <bold>(L)</bold> is EfficientDet, results from <bold>(M)</bold> is Free-Anchor, results from <bold>(N)</bold> is Lite-Yolov5, results from <bold>(O)</bold> is yolov5-X, and results from <bold>(P)</bold> is our proposed method. Note the pink circle show the false detection of ship and red circle is show the missing ship.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-09-1086140-g011.tif"/>
</fig>
<fig id="f12" position="float">
<label>Figure&#xa0;12</label>
<caption>
<p>The visual detection outcomes of CNN-based approaches for offshore and inshore ships based on AirSARship Dataset. Results from <bold>(A)</bold> is ground truth, results from <bold>(B)</bold> is Faster-R-CNN method, results from <bold>(C)</bold> SSD method, results from <bold>(D)</bold> is R2CNN method, results from <bold>(E)</bold> is ARPN, results from <bold>(F)</bold> is DAPN, results from <bold>(G)</bold> is Quad-FPN, results from <bold>(H)</bold> is HR-SDNet, results from <bold>(I)</bold> is Grid R-CNN, results from <bold>(J)</bold> is Cascade R-CNN, results from <bold>(K)</bold> is YOLOv4-LITE, results from <bold>(L)</bold> is EfficientDet, results from <bold>(M)</bold> is Free-Anchor, results from <bold>(N)</bold> is Lite-Yolov5, results from <bold>(O)</bold> is yolov5-X, and results from <bold>(P)</bold> is our proposed method. Note the pink circle show the false detection of ship and red circle is show the missing ship.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-09-1086140-g012.tif"/>
</fig>
<p>The other suggested CNN-based techniques in this research can precisely identify the port&#x2019;s heavily docked ships. However, it can be observed that the suggested method is more accurate and can detect these ships better when comparing the detection outcomes of several proposed CNN-based systems. The detection outcomes of the Faster-RCNN (<xref ref-type="bibr" rid="B18">Lin et&#xa0;al., 2017</xref>), SSD (<xref ref-type="bibr" rid="B20">Liu et&#xa0;al., 2016</xref>), R2CNN (<xref ref-type="bibr" rid="B14">Jiang et&#xa0;al., 2017</xref>), ARPN (<xref ref-type="bibr" rid="B65">Zhao et&#xa0;al., 2020</xref>), DAPN (<xref ref-type="bibr" rid="B7">Cui et&#xa0;al., 2019</xref>), Quad-FPN (<xref ref-type="bibr" rid="B63">Zhang et&#xa0;al., 2021b</xref>), HR-SDNet (<xref ref-type="bibr" rid="B47">Wei et&#xa0;al., 2020</xref>), Grid R-CNN (<xref ref-type="bibr" rid="B24">Lu et&#xa0;al., 2019</xref>), Cascade R-CNN (<xref ref-type="bibr" rid="B2">Cai and Vasconcelos, 2018</xref>), YOLOv4-LITE (<xref ref-type="bibr" rid="B21">Liu et&#xa0;al., 2022</xref>), EfficientDet (<xref ref-type="bibr" rid="B37">Tan et&#xa0;al., 2020</xref>), Free-Anchor (<xref ref-type="bibr" rid="B61">Zhang et&#xa0;al., 2019</xref>), Lite-Yolov5 (<xref ref-type="bibr" rid="B54">Xu et&#xa0;al., 2022</xref>), and yolov5-X algorithms contain several false alarms for the offshore and inshore scene (<xref ref-type="fig" rid="f11">
<bold>Figures&#xa0;11</bold>
</xref>, <xref ref-type="fig" rid="f12">
<bold>12</bold>
</xref>). Additionally, there are a few missed ships in the detection findings, which could be a result of how closely docked the ship targets are, making it more challenging for the framework to discriminate between them. Similarly, it is observed by comparing the suggested model&#x2019;s detection results that they are more precise than those produced by existing CNN- based techniques. <xref ref-type="fig" rid="f11">
<bold>Figures&#xa0;11</bold>
</xref>, <xref ref-type="fig" rid="f12">
<bold>12</bold>
</xref> displays the outcomes of various CNN-based object detection techniques in an offshore scenario created for SAR images. It is evident that the offshore landscape contains a substantial number of dense multi-scale ship targets (the first two column of <xref ref-type="fig" rid="f11">
<bold>Figures&#xa0;11</bold>
</xref>, <xref ref-type="fig" rid="f12">
<bold>12</bold>
</xref>).</p>
<p>A pink color circle denotes false alarms in the identification outcomes of other proposed CNN-based models in this study that are not the current model. This might be due to a small number of false alarms that closely resemble ships, creating it great challenging for the network to successfully recognize. Because there are wakes of ships and surroundings, such ship A in <xref ref-type="fig" rid="f12">
<bold>Figure&#xa0;12</bold>
</xref>, the Faster-RCNN (<xref ref-type="bibr" rid="B18">Lin et&#xa0;al., 2017</xref>), SSD (<xref ref-type="bibr" rid="B20">Liu et&#xa0;al., 2016</xref>), R2CNN (<xref ref-type="bibr" rid="B14">Jiang et&#xa0;al., 2017</xref>), ARPN (<xref ref-type="bibr" rid="B65">Zhao et&#xa0;al., 2020</xref>), DAPN (<xref ref-type="bibr" rid="B7">Cui et&#xa0;al., 2019</xref>), Quad-FPN (<xref ref-type="bibr" rid="B63">Zhang et&#xa0;al., 2021b</xref>), HR-SDNet (<xref ref-type="bibr" rid="B47">Wei et&#xa0;al., 2020</xref>), Grid R-CNN (<xref ref-type="bibr" rid="B24">Lu et&#xa0;al., 2019</xref>), Cascade R-CNN (<xref ref-type="bibr" rid="B2">Cai and Vasconcelos, 2018</xref>), YOLOv4-LITE (<xref ref-type="bibr" rid="B21">Liu et&#xa0;al., 2022</xref>), EfficientDet (<xref ref-type="bibr" rid="B37">Tan et&#xa0;al., 2020</xref>), Free-Anchor (<xref ref-type="bibr" rid="B61">Zhang et&#xa0;al., 2019</xref>), Lite-Yolov5 (<xref ref-type="bibr" rid="B54">Xu et&#xa0;al., 2022</xref>), and yolov5-X algorithms can distinguish between wakes of ships and their surroundings. Additionally, several ships are overlooked by the Faster-RCNN (<xref ref-type="bibr" rid="B18">Lin et&#xa0;al., 2017</xref>), SSD (<xref ref-type="bibr" rid="B20">Liu et&#xa0;al., 2016</xref>), R2CNN (<xref ref-type="bibr" rid="B14">Jiang et&#xa0;al., 2017</xref>), ARPN (<xref ref-type="bibr" rid="B65">Zhao et&#xa0;al., 2020</xref>), DAPN (<xref ref-type="bibr" rid="B7">Cui et&#xa0;al., 2019</xref>), Quad-FPN (<xref ref-type="bibr" rid="B63">Zhang et&#xa0;al., 2021b</xref>), HR-SDNet (<xref ref-type="bibr" rid="B47">Wei et&#xa0;al., 2020</xref>), Grid R-CNN(<xref ref-type="bibr" rid="B24">Lu et&#xa0;al., 2019</xref>), Cascade R-CNN (<xref ref-type="bibr" rid="B2">Cai and Vasconcelos, 2018</xref>), YOLOv4-LITE (<xref ref-type="bibr" rid="B21">Liu et&#xa0;al., 2022</xref>), EfficientDet (<xref ref-type="bibr" rid="B37">Tan et&#xa0;al., 2020</xref>), Free-Anchor (<xref ref-type="bibr" rid="B61">Zhang et&#xa0;al., 2019</xref>), Lite-Yolov5 (<xref ref-type="bibr" rid="B54">Xu et&#xa0;al., 2022</xref>), and yolov5-X algorithms, as seen by the red circles in <xref ref-type="fig" rid="f11">
<bold>Figures&#xa0;11</bold>
</xref>, <xref ref-type="fig" rid="f12">
<bold>12</bold>
</xref>. The inability to extract distinguishing characteristics of ships and interference may be to blame. However, the suggested model could identify these ships without any false alarms, which is also one of the driving forces for this paper and highlights the current proposed model&#x2019;s powerful and robust feature representation skills.</p>
<p>The suggested model is compared with other state-of-the-art CNN based methods SAR ship identification techniques. As can be observed, two-stage or multi-stage approaches typically execute detection tasks more effectively than single-stage methods. However, compared to these two stage or multistage detection methods, one stage detection methods clearly have a faster inference efficiency. This might be as a result of the two-stage detection network&#x2019;s sophisticated network architecture and increased computational load. Some one-stage detection techniques that are more effective at detecting ships have recently been proposed, including YOLOv4-LITE (<xref ref-type="bibr" rid="B21">Liu et&#xa0;al., 2022</xref>), EfficientDet (<xref ref-type="bibr" rid="B37">Tan et&#xa0;al., 2020</xref>), Free Anchor (<xref ref-type="bibr" rid="B61">Zhang et&#xa0;al., 2019</xref>), Lite-Yolov5 (<xref ref-type="bibr" rid="B54">Xu et&#xa0;al., 2022</xref>), and two-stage detectors like ARPN (<xref ref-type="bibr" rid="B65">Zhao et&#xa0;al., 2020</xref>), DAPN (<xref ref-type="bibr" rid="B7">Cui et&#xa0;al., 2019</xref>), Quad-FPN (<xref ref-type="bibr" rid="B63">Zhang et&#xa0;al., 2021b</xref>), HR-SDNet (<xref ref-type="bibr" rid="B47">Wei et&#xa0;al., 2020</xref>), Grid R-CNN(<xref ref-type="bibr" rid="B24">Lu et&#xa0;al., 2019</xref>), and Cascade R-CNN (<xref ref-type="bibr" rid="B2">Cai and Vasconcelos, 2018</xref>) are the performance comparisons of the current model with other cutting-edge detectors.</p>
<p>The detection accuracies of inshore scenes for the proposed model and the other CNN-based techniques are also all lower than those of offshore scenes. De-formable convolution can lessen the interference of complicated backgrounds, particularly for inshore sceneries, hence the recently presented technique seems to be robust to background interferences. The other state-of-the-art techniques are less precise and have lower recall values than the model now under study because of their poor small ship recognition capabilities. As a result, it will be possible to consider an acceptable score threshold in the future to balance missed detections and false alarms. Additionally, accuracy needs to be further improved, for example, when striking military targets with precision. It might be suggested in the future to choose between speed and accuracy. The suggested model has a higher detection effectiveness. This might be the case because other methods overlook smaller targets since they do not consider the underlying data in the prediction layer. There are some false alarms in the identification outcomes of previous techniques for the complicated inshore scenarios. Particularly, several land features in the inshore scene are wrongly identified as targets by Faster-RCNN, SSD, R2CNN, ARPN, DAPN, Quad-FPN, HR-SDNet, Grid R-CNN, Cascade R-CNN, YOLOv4-LITE, EfficientDet, Free-Anchor, Lite-Yolov5. By doing several experiments using the SSDD and AirSARship datasets, we illustrate the effectiveness of our suggested model. The SSDD dataset ablation studies of FPN+PAN and attention mechanism modules have shown that each of them can enhance ship detection performance, and the combination of both can increase detection outcomes.</p>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Detection performance on SAR large scene images</title>
<p>This section compares various CNN-based algorithms, such as Yolov5-X, Faster-RCNN (<xref ref-type="bibr" rid="B18">Lin et&#xa0;al., 2017</xref>), SSD (<xref ref-type="bibr" rid="B20">Liu et&#xa0;al., 2016</xref>), and R2CNN (<xref ref-type="bibr" rid="B14">Jiang et&#xa0;al., 2017</xref>), as well as current methods for object recognition in SAR images using large-scale scene SAR images. To validate the good migration ability of the suggested model, the actual ship identification in two more SAR large scene images has been carried out. <xref ref-type="fig" rid="f13">
<bold>Figure&#xa0;13</bold>
</xref> displays the areas covered by the two SAR large scene images acquired by the Chinese satellite GF3. These two SAR images were selected because they both lie along globally important routes named Malacca strait (<xref ref-type="table" rid="T5">
<bold>Table&#xa0;5</bold>
</xref>). The VV polarization SAR images from <xref ref-type="table" rid="T5">
<bold>Table&#xa0;5</bold>
</xref> are considered because ships often exhibit higher backscattering values in VV polarization (<xref ref-type="bibr" rid="B38">Torres et&#xa0;al., 2012</xref>). Due to the restricted GPU memory, they are divided into 800 x 800 (<xref ref-type="bibr" rid="B64">Zhang et&#xa0;al., 2020a</xref>) and followed by (<xref ref-type="bibr" rid="B49">Xian et&#xa0;al., 2019</xref>) 1000 x 1000 of small sub images before being used for training and testing. SAR ships are ultimately added to the suggested model in order to actually detect them. The outcomes of the sub- image&#x2019;s detection are then added to the original SAR large scene image (<xref ref-type="fig" rid="f14">
<bold>Figure&#xa0;14</bold>
</xref>). The detection accuracy and speed of various approaches are assessed, respectively, using the AP and FPS (<xref ref-type="table" rid="T6">
<bold>Table&#xa0;6</bold>
</xref>).</p>
<fig id="f13" position="float">
<label>Figure&#xa0;13</label>
<caption>
<p>In this current research work, the two SAR large scenes images acquired from Chinese GF-3 satellite are utilized for ship detection. <bold>(A)</bold> AirSARship resolution is 1/3m, and <bold>(B)</bold> GF 3 satellite resolution is 1m.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-09-1086140-g013.tif"/>
</fig>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>The details descriptions of two SAR large scene images.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Images</th>
<th valign="top" align="center">Mode</th>
<th valign="top" align="center">Satellite</th>
<th valign="top" align="center">Resolution (m)</th>
<th valign="top" align="center">Image Size</th>
<th valign="top" align="center">Format</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">
<bold>AirSARship</bold>
</td>
<td valign="top" align="left">SL/UFS</td>
<td valign="top" align="left">GF-3</td>
<td valign="top" align="center">1/3</td>
<td valign="top" align="center">3000 &#xd7; 3000</td>
<td valign="top" align="left">Tiff</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>GF3</bold>
</td>
<td valign="top" align="left">SL</td>
<td valign="top" align="left">GF-3</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">15350 &#xd7; 13592</td>
<td valign="top" align="left">Tiff</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="f14" position="float">
<label>Figure&#xa0;14</label>
<caption>
<p>Ship target identification Framework in a SAR large scene images.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-09-1086140-g014.tif"/>
</fig>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>Detection outcomes of various CNN-based approaches on two SAR large scene ship images.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Method</th>
<th valign="top" colspan="2" align="center">Image-1</th>
<th valign="top" colspan="2" align="center">Image-2</th>
</tr>
<tr>
<th valign="top" align="center"/>
<th valign="top" align="center">AP</th>
<th valign="top" align="center">Time (s)</th>
<th valign="top" align="center">AP</th>
<th valign="top" align="center">Time (s)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Faster-RCNN (<xref ref-type="bibr" rid="B18">Lin et&#xa0;al., 2017</xref>)</td>
<td valign="top" align="left">0.838</td>
<td valign="top" align="left">37s</td>
<td valign="top" align="left">0.845</td>
<td valign="top" align="left">17s</td>
</tr>
<tr>
<td valign="top" align="left">SSD (<xref ref-type="bibr" rid="B20">Liu et&#xa0;al., 2016</xref>)</td>
<td valign="top" align="left">0.790</td>
<td valign="top" align="left">14s</td>
<td valign="top" align="left">0.785</td>
<td valign="top" align="left">25s</td>
</tr>
<tr>
<td valign="top" align="left">R2CNN (<xref ref-type="bibr" rid="B14">Jiang et&#xa0;al., 2017</xref>)</td>
<td valign="top" align="left">0.879</td>
<td valign="top" align="left">65s</td>
<td valign="top" align="left">0.830</td>
<td valign="top" align="left">39s</td>
</tr>
<tr>
<td valign="top" align="left">Yolov5-X</td>
<td valign="top" align="left">0.765</td>
<td valign="top" align="left">140s</td>
<td valign="top" align="left">0.749</td>
<td valign="top" align="left">140s</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>Ours</bold>
</td>
<td valign="top" align="left">0.956</td>
<td valign="top" align="left">157s</td>
<td valign="top" align="left">0.948</td>
<td valign="top" align="left">156s</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The two SAR large scene images are used to depict the results of SAR ship detection using the current model and other CNN-based methods. The current model is able to successfully detect the majority of ships, demonstrating its strong migration application capabilities in ocean surveillance (<xref ref-type="fig" rid="f15">
<bold>Figures&#xa0;15A, B</bold>
</xref>). Features and a clear environment come first. In particular, just a few ships in the big panorama were missed by all of these convolutional neural network -based approaches, which are indicated by pink circles in <xref ref-type="fig" rid="f15">
<bold>Figures&#xa0;15A, B</bold>
</xref>. The identification outcomes on the second SAR large scene image are displayed, and the identification results of various CNN-based approaches on the SAR large scene images are demonstrated on the left side of <xref ref-type="fig" rid="f15">
<bold>Figures&#xa0;15A, B</bold>
</xref>. <xref ref-type="fig" rid="f15">
<bold>Figures&#xa0;15A, B</bold>
</xref>&#x2019;s right side enlarges and displays two particular regions designated with brown and blue rectangles. The comparison of the detection outcomes of various model performances on SAR large scene images is presented in <xref ref-type="fig" rid="f15">
<bold>Figures&#xa0;15A, B</bold>
</xref>. Offshore ships make up the majority of this SAR large-scene image. Inshore scenes have substantial clutter, which could cause false alarms. The results of another CNN-based approach suggested in this study show that there are few false alarms and missed targets in the offshore scenes, and the false alarms are repressed in the inshore scenes as well. However, the proposed approach does not have false alarms or miss target detection, which is one of the motivating factors for this paper. <xref ref-type="fig" rid="f15">
<bold>Figures&#xa0;15A, B</bold>
</xref> illustrates the visualization of the SAR ship identification performance of the suggested model on two SAR large scene images acquired from the Chinese GF3 satellite and employed in the current study. The suggested model can successfully detect the majority of ships, which demonstrates its good migration application capability in ocean surveillance, it can be deduced from <xref ref-type="fig" rid="f15">
<bold>Figures&#xa0;15A, B</bold>
</xref>. In conclusion, the identification outcomes on the two SAR large scene images at various resolutions show that the present system identifies multi size ships with competitive outcomes and has a strong generalization capacity in comparison to the various CNN-based approaches created for identification object in SAR images. It demonstrates that the approach now under study can adapt to SAR images from various sources more effectively.</p>
<fig id="f15" position="float">
<label>Figure&#xa0;15</label>
<caption>
<p>The identification outcomes of various CNN based approaches, <bold>(A)</bold> first large-scene SAR image based on SSDD Dataset, <bold>(B)</bold> second SAR large scene image based on AirSARship Dataset. Rectangles with green colors correspond to the ground truth ships while yellow, sky color, pink and red colors are referred to predictions ship, respectively. The red circle shows the false detection of ship and pink circle is showing the missing ship. The right side of an image displays two enlarged special areas that are marked by blue and brown rectangles respectively.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-09-1086140-g015.tif"/>
</fig>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Ablation study</title>
<p>The ablation studies presented in this section, used to demonstrate the suggested FPN+PAN and attention mechanism module&#x2019;s effectiveness through removal and installation to better understand the behavior of the framework.</p>
<sec id="s4_1">
<label>4.1</label>
<title>Ablation study on the FPN+PAN module</title>
<p>
<xref ref-type="table" rid="T7">
<bold>Table&#xa0;7</bold>
</xref> illustrates the ablation research of YOLOv5 removal and installation of the FPN+PAN module. <xref ref-type="table" rid="T7">
<bold>Table&#xa0;7</bold>
</xref>, &#x201c;&#x2718;&#x201c; denotes YOLOv5 without the FPN+PAN module, while &#x201c;&#x2714;&#x201c; denotes YOLOv5 with the FPN+PAN module (i.e., our suggested model). Experiments were carried out in offshore and inshore scenes respectively, to evaluate the identification achievement of the suggested approach for offshore and inshore scenes. In <xref ref-type="table" rid="T7">
<bold>Table&#xa0;7</bold>
</xref>, the identification effectiveness of the model is approximately similar, with little modification in identification indicator, as a result of the relatively simple background of offshore ship and little interference, due to the identification achievement has been outstanding in the simple background, and only limited enhancement can be obtained. In contrast to offshore ships, inshore ships have a more complicated backdrop clutter. Moreover, wharfs as well as other structures on the shore significantly undermine detections, and SAR ship detection effectiveness generally declines. However, the detection performance is significantly enhanced by improving the feature representation capacity and minimizing the aliasing effect of fusion features. In <xref ref-type="table" rid="T7">
<bold>Table&#xa0;7</bold>
</xref>, by installation of FPN +PAN modules, the result of p, r and AP rate of the model are enhanced by about 2.9%, 4.6% and, 5.5% respectively for offshore ship detection, while for inshore ship detection the p, r and AP rate of the model are enhanced by about 6.08%, 15% and, 8.07% respectively.</p>
<table-wrap id="T7" position="float">
<label>Table&#xa0;7</label>
<caption>
<p>The ablation study of our proposed model removal and installation the FPN+PAN module on the SSDD dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">FPN+PAN</th>
<th valign="top" colspan="4" align="center">OFF-shore</th>
<th valign="top" colspan="3" align="center">In-shore</th>
</tr>
<tr>
<th valign="top" align="center"/>
<th valign="top" align="center">p (%)</th>
<th valign="top" align="center">r (%)</th>
<th valign="top" colspan="2" align="center">AP(%)</th>
<th valign="top" align="center">p (%)</th>
<th valign="top" align="center">r (%)</th>
<th valign="top" align="center">AP(%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">&#x2718;</td>
<td valign="top" align="center">91.67</td>
<td valign="top" colspan="2" align="center">89.43</td>
<td valign="top" align="center">89.78</td>
<td valign="top" align="center">83.36</td>
<td valign="top" align="center">75.56</td>
<td valign="top" align="center">84.20</td>
</tr>
<tr>
<td valign="top" align="left">&#x2714;</td>
<td valign="top" align="center">94.57</td>
<td valign="top" colspan="2" align="center">94.06</td>
<td valign="top" align="center">
<bold>95.36</bold>
</td>
<td valign="top" align="center">89.44</td>
<td valign="top" align="center">90.77</td>
<td valign="top" align="center">
<bold>92.27</bold>
</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Ablation study on the attention mechanism module</title>
<p>
<xref ref-type="table" rid="T8">
<bold>Table&#xa0;8</bold>
</xref> presented the ablation research of YOLOv5 removal and installation of the attention mechanism module. <xref ref-type="table" rid="T8">
<bold>Table&#xa0;8</bold>
</xref>, &#x201c;&#x2718;&#x201c; denotes YOLOv5 without the attention mechanism module, &#x201c;&#x2714;&#x201c; denotes YOLOv5 with the attention mechanism module (i.e., our suggested model). The installation of attention mechanism module to our model, the detection performance is significantly enhanced by improving the feature representation capacity and minimizing the aliasing effect of fusion features. In <xref ref-type="table" rid="T8">
<bold>Table&#xa0;8</bold>
</xref>, through installation of attention mechanism modules, as a result the p, r and AP rate of the model are enhanced by about 7.22%, 1.36% and, 5.31% respectively for offshore ship detection, while for inshore ship detection the p, r and AP rate of the model are enhanced by about 1.03%, 10.97% and, 10.67% respectively.</p>
<table-wrap id="T8" position="float">
<label>Table&#xa0;8</label>
<caption>
<p>The ablation study of our proposed model removal and installation the attention mechanism module on the SSDD dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Attention Mechanism</th>
<th valign="top" colspan="3" align="center">OFF-shore</th>
<th valign="top" colspan="3" align="center">In-shore</th>
</tr>
<tr>
<th valign="top" align="center"/>
<th valign="top" align="center">p (%)</th>
<th valign="top" align="center">r (%)</th>
<th valign="top" align="center">AP(%)</th>
<th valign="top" align="center">p (%)</th>
<th valign="top" align="center">r (%)</th>
<th valign="top" align="center">AP(%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">
<italic>&#x2718;</italic>
</td>
<td valign="top" align="center">87.35</td>
<td valign="top" align="center">92.70</td>
<td valign="top" align="center">90.05</td>
<td valign="top" align="center">88.41</td>
<td valign="top" align="center">79.80</td>
<td valign="top" align="center">81.60</td>
</tr>
<tr>
<td valign="top" align="left">
<italic>&#x2714;</italic>
</td>
<td valign="top" align="center">94.57</td>
<td valign="top" align="center">94.06</td>
<td valign="top" align="center">
<bold>95.36</bold>
</td>
<td valign="top" align="center">89.44</td>
<td valign="top" align="center">90.77</td>
<td valign="top" align="center">
<bold>92.27</bold>
</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusions and future work</title>
<p>In this modern technological era, the advanced machine learning and artificial intelligence-based models have revolutionized diverse research domains with full spectrum. Due to its automatic feature extraction and strong identification skills, it can be used in a variety of study fields. An improved version of the unique one stage YOLOv5 for SAR ship identification has been proposed in this study work, drawing inspiration from the capabilities of these models in other research domains. The generic YOLOv5 model has been improved to address the major issues with the SAR ship detection process. These issues include complexity (complex background interferences, various size ship feature differences, and indistinct tiny ship characteristics), high-cost effectiveness, poor identification and recognition rates, and implementation complexities. The changes to the generic YOLOv5 model in the neck region and backbone section employing C3 and PAN structure have been designed to address these major issues. The SSDD and AirSARship open SAR ship datasets, as well as two SAR large scene images acquired from the GF-3 Chinese satellite, are utilized to obtain the experimental results. After producing testing findings, it has been determined that the enhancement to the generic YOLOv5 model not only enhanced identification capabilities but also demonstrated that this model is not data-hungry (to provide optimum results even for a small amount of dataset). The applicability of this model is assessed using a variety of validation metrics, including accuracy, different training and test sets, and TF values, as well as comparisons with other cutting-edge classification models (ARPN, DAPN, Quad-FPN, HR-SDNet, Grid R-CNN, Cascade R-CNN, Multi-Stage YOLOv4-LITE, EfficientDet, Free-Anchor, Lite-Yolov5). Based on the performance values, it has been determined that the examined model exceeded the benchmark models targeted in this research work by producing high identification rates. Additionally, these high identification rates show how useful the suggested approach is for maritime surveillance. Recommended and forthcoming future work includes the following:</p>
<list list-type="simple">
<list-item>
<p>&#x2022; To enhance the effectiveness of our model detection in the future, we will consider the challenges in SAR data, such as the azimuth ambiguity, sidelobes, and the sea condition.</p>
</list-item>
<list-item>
<p>&#x2022; In the future, we will investigate optimizing the detection speed of our model.</p>
</list-item>
<list-item>
<p>&#x2022; We might suggest merging contemporary deep CNN abstract features with conventional concrete ones to further improve detection accuracy.</p>
</list-item>
<list-item>
<p>&#x2022; In order to further boost the identification speed and accuracy, we will focus on merging the backscattering characteristics of ships in SAR images with convolutional network architecture and offering a robust constraint, such as a mask.</p>
</list-item>
<list-item>
<p>&#x2022; Future research on instance segmentation and ship detection will be taken into consideration.</p>
</list-item>
</list>
</sec>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>Conceptualization, MY, LS. Methodology, MY and SH. software, MY, SH and LS. Validation, WJ, LS and XM. Formal analysis, AC and DW. Investigation, XM. Resources, WJ. Data curation, MY, KD and DW. Writing&#x2014;original draft preparation, MY. Writing&#x2014;review and editing, MY, XM and LS. Visualization, LS, KD. Supervision, WJ and LS. Project administration, LS. Funding acquisition, WJ and LS. All authors contributed to the article and approved the submitted version.</p>
</sec>
</body>
<back>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>Funding National Key Research and Development Program of China (2017YFC1405600).</p>
</sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baselice</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Ferraioli</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Unsupervised coastal line extraction from SAR images</article-title>. <source>IEEE Geosci. Remote Sens. Lett.</source> <volume>10</volume>, <fpage>1350</fpage>&#x2013;<lpage>1354</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/LGRS.2013.2241013</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Cai</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Vasconcelos</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Cascade r-cnn: Delving into high quality object detection</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>. <fpage>6154</fpage>&#x2013;<lpage>6162</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1712.00726</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Caputo</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Castellano</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Greco</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Mencar</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Petti</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Vessio</surname> <given-names>G</given-names>
</name>
</person-group>. &#x201c;<article-title>Human detection in drone images using YOLO for search-and-Rescue operations</article-title>,&#x201d; in <source>International conference of the Italian association for artificial intelligence</source> (<publisher-name>Springer</publisher-name>), <fpage>326</fpage>&#x2013;<lpage>337</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-3-031-08421-8_22</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chang</surname> <given-names>Y.-L.</given-names>
</name>
<name>
<surname>Anagaw</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Chang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y. C.</given-names>
</name>
<name>
<surname>Hsiao</surname> <given-names>C.-Y.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>W.-H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Ship detection based on YOLOv2 for SAR imagery</article-title>. <source>Remote Sens.</source> <volume>11</volume>, <elocation-id>786</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs11070786</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.-P.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>SAR image despeckling based on combination of fractional-order total variation and nonlocal low rank regularization</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>58</volume>, <fpage>2056</fpage>&#x2013;<lpage>2070</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2019.2952662</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A spatial-temporal attention-based method and a new dataset for remote sensing image change detection</article-title>. <source>Remote Sens.</source> <volume>12</volume>, <elocation-id>1662</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs12101662</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cui</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Cao</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Dense attention pyramid networks for multi-scale ship detection in SAR images</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>57</volume>, <fpage>8983</fpage>&#x2013;<lpage>8997</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2019.2923988</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Du</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Dai</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Target discrimination based on weakly supervised learning for high-resolution SAR images in complex scenes</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>58</volume>, <fpage>461</fpage>&#x2013;<lpage>472</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2019.2937175</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Elfwing</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Uchibe</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Doya</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Sigmoid-weighted linear units for neural network function approximation in reinforcement learning</article-title>. <source>Neural Networks</source> <volume>107</volume>, <fpage>3</fpage>&#x2013;<lpage>11</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1702.03118</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Change detection from synthetic aperture radar images based on channel weighting-based deep cascade network</article-title>. <source>IEEE J. selected topics Appl. Earth observations Remote Sens.</source> <volume>12</volume>, <fpage>4517</fpage>&#x2013;<lpage>4529</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/JSTARS.2019.2953128</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Fast r-cnn</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE international conference oncomputer vision</conf-name>. <fpage>1440</fpage>&#x2013;<lpage>1448</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ICCV.2015.169</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Donahue</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Darrell</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Malik</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>Rich feature hierarchies for accurate object detection and semantic segmentation</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>. <fpage>580</fpage>&#x2013;<lpage>587</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1311.2524</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Han</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.-P.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Refocusing of moving targets based on low-bit quantized SAR data <italic>via</italic> parametric quantized iterative hard thresholding</article-title>. <source>IEEE Trans. Aerospace Electronic Syst.</source> <volume>56</volume>, <fpage>2198</fpage>&#x2013;<lpage>2211</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TAES.2019.2944707</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>H</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>R2CNN: Rotational region CNN for orientation robust scene text detection</article-title>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1706.09579</pub-id>. arXiv preprint arXiv:1706.09579.</citation>
</ref>
<ref id="B15">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Fan</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Overview of deep convolutional neural network approaches for satellite remote sensing ship monitoring technology</article-title>,&#x201d; in <source>IOP conference series: Materials science and engineering</source> (<publisher-name>IOP Publishing</publisher-name>), <fpage>012071</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1088/1757-899X/730/1/012071</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Ran</surname> <given-names>L</given-names>
</name>
</person-group> (<year>2017</year>b). <article-title>An improved superpixel-level CFAR detection method for ship targets in high-resolution SAR images</article-title>. <source>IEEE J. Selected Topics Appl. Earth Observations Remote Sens.</source> <volume>11</volume>, <fpage>184</fpage>&#x2013;<lpage>194</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/JSTARS.2017.2764506</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Jin</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Zeng</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Ship detection with superpixel-level Fisher vector in high-resolution SAR images</article-title>. <source>IEEE Geosci. Remote Sens. Lett.</source> <volume>17</volume>, <fpage>247</fpage>&#x2013;<lpage>251</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/LGRS.2019.2920668</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Lin</surname> <given-names>T.-Y.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>He</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Hariharan</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Belongie</surname> <given-names>,. S.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Feature pyramid networks for object detection</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>. <fpage>2117</fpage>&#x2013;<lpage>2125</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1612.03144</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Qu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Shao</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2017</year>a). &#x201c;<article-title>Ship detection in SAR images based on an improved faster r-CNN</article-title>,&#x201d; in <source>2017 SAR in big data era: Models, methods and applications (BIGSARDATA)</source> (<publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>6</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/BIGSARDATA.2017.8124934</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Anguelov</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Erhan</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Szegedy</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Reed</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Fu</surname> <given-names>C.-Y.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). &#x201c;<article-title>Ssd: Single shot multibox detector</article-title>,&#x201d; in <source>European Conference on computer vision</source> (<publisher-name>Springer</publisher-name>), <fpage>21</fpage>&#x2013;<lpage>37</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1512.02325</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Kong</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Yasir</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>L.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Multi-scale ship detection algorithm based on a lightweight neural network for spaceborne SAR images</article-title>. <source>Remote Sens.</source> <volume>14</volume>, <elocation-id>1149</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs14051149</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Qi</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Qin</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Jia</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Path aggregation network for instance segmentation</article-title>,&#x201d; in <source>Proceedings of the IEEE conference on computer vision and pattern recognition</source>, <fpage>8759</fpage>&#x2013;<lpage>8768</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1803.01534</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Su</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Deep learning for SAR ship detection: Past, present and future</article-title>. <source>Remote Sens.</source> <volume>14</volume>, <elocation-id>2712</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs14112712</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Lu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Yue</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Yan</surname> <given-names>J</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Grid r-cnn</article-title>,&#x201d; in <source>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</source>, <fpage>7363</fpage>&#x2013;<lpage>7372</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1811.12030</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ma</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Ship classification and detection based on CNN using GF-3 SAR images</article-title>. <source>Remote Sens.</source> <volume>10</volume>, <elocation-id>2043</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs10122043</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Najafabadi</surname> <given-names>M. M.</given-names>
</name>
<name>
<surname>Villanustre</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Khoshgoftaar</surname> <given-names>T. M.</given-names>
</name>
<name>
<surname>Seliya</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Wald</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Muharemagic</surname> <given-names>E.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Deep learning applications and challenges in big data analytics</article-title>. <source>J. big Data</source> <volume>2</volume>, <fpage>1</fpage>&#x2013;<lpage>21</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s40537-014-0007-7</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nepal</surname> <given-names>U.</given-names>
</name>
<name>
<surname>Eslamiat</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Comparing YOLOv3, YOLOv4 and YOLOv5 for autonomous landing spot detection in faulty UAVs</article-title>. <source>Sensors</source> <volume>22</volume>, <elocation-id>464</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s22020464</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Niedermeier</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Romaneessen</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Lehner</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>Detection of coastlines in SAR images using wavelet methods</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>38</volume>, <fpage>2270</fpage>&#x2013;<lpage>2281</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/36.868884</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ouchi</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Recent trend and advance of synthetic aperture radar with selected topics</article-title>. <source>Remote Sens.</source> <volume>5</volume>, <fpage>716</fpage>&#x2013;<lpage>807</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs5020716</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Patel</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Bhatt</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Mazzeo</surname> <given-names>P. L.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Deep learning-based automatic detection of ships: an experimental study using satellite images</article-title>. <source>J. Imaging</source> <volume>8</volume>, <elocation-id>182</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/jimaging8070182</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Redmon</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Divvala</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Farhadi</surname> <given-names>A</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>You only look once: Unified, real-time object detection</article-title>,&#x201d; in <source>Proceedings of the IEEE conference on computer vision and pattern recognition</source>, <fpage>779</fpage>&#x2013;<lpage>788</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1506.02640</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Redmon</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Farhadi</surname> <given-names>A</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>YOLO9000: better, faster, stronger</article-title>,&#x201d; in <source>Proceedings of the IEEE conference on computer vision and pattern recognition</source>, <fpage>7263</fpage>&#x2013;<lpage>7271</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1612.08242</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Redmon</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Farhadi</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Yolov3: An incremental improvement</article-title>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1804.02767</pub-id>. arXiv preprint arXiv:1804.02767.</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ren</surname> <given-names>S.</given-names>
</name>
<name>
<surname>He</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Faster r-cnn: Towards real-time object detection with region proposal networks</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>28</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1506.01497</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Rezatofighi</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Tsoi</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Gwak</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Sadeghian</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Reid</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Savarese</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Generalized intersection over union: A metric and a loss for bounding box regression</article-title>,&#x201d; in <source>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</source>, <fpage>658</fpage>&#x2013;<lpage>666</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1902.09630</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Salembier</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Liesegang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>L&#xf3;pez-Mart&#xed;nez</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Ship detection in SAR images based on maxtree representation and graph signal processing</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>57</volume>, <fpage>2709</fpage>&#x2013;<lpage>2724</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2018.2876603</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Tan</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Pang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Le</surname> <given-names>Q. V.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Efficientdet: Scalable and efficient object detection</article-title>,&#x201d; in <source>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</source>, <fpage>10781</fpage>&#x2013;<lpage>10790</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1911.09070</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Torres</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Snoeij</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Geudtner</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Bibby</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Davidson</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Attema</surname> <given-names>E.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>GMES sentinel-1 mission</article-title>. <source>Remote Sens. Environ.</source> <volume>120</volume>, <fpage>9</fpage>&#x2013;<lpage>24</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.rse.2011.05.028</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Pan</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Pan</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Fast and automatic ship detection for SAR imagery based on multiscale contrast measure</article-title>. <source>IEEE Geosci. Remote Sens. Lett.</source> <volume>16</volume>, <fpage>1834</fpage>&#x2013;<lpage>1838</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/LGRS.2019.2913873</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>C.-Y.</given-names>
</name>
<name>
<surname>Liao</surname> <given-names>H.-Y. M.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>Y.-H.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>P.-Y.</given-names>
</name>
<name>
<surname>Hsieh</surname> <given-names>J.-W.</given-names>
</name>
<name>
<surname>Yeh</surname> <given-names>I.-H.</given-names>
</name>
</person-group> (<year>2020</year>a). &#x201c;<article-title>CSPNet: A new backbone that can enhance learning capability of CNN</article-title>,&#x201d; in <source>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition workshops</source>, <fpage>390</fpage>&#x2013;<lpage>391</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1911.11929</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.-P.</given-names>
</name>
</person-group> (<year>2020</year>c). &#x201c;<article-title>Contrast of contextual Fisher vectors for ship detection in SAR images</article-title>,&#x201d; in <source>2020 IEEE international radar conference (RADAR)</source> (<publisher-name>IEEE</publisher-name>), <fpage>198</fpage>&#x2013;<lpage>202</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/RADAR42522.2020.9114850</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.-P.</given-names>
</name>
<name>
<surname>He</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>d). <article-title>Ship detection in SAR images <italic>via</italic> local contrast of Fisher vectors</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>58</volume>, <fpage>6467</fpage>&#x2013;<lpage>6479</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2020.2976880</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Su</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Ship detection in multispectral remote sensing images <italic>via</italic> saliency analysis</article-title>. <source>Appl. Ocean Res.</source> <volume>106</volume>, <elocation-id>102448</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.apor.2020.102448</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Jiao</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>New hierarchical saliency filtering for fast ship detection in high-resolution SAR images</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>55</volume>, <fpage>351</fpage>&#x2013;<lpage>362</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2016.2606481</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Wan</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Yasir</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>BO-DRNet: An improved deep learning model for oil spill detection by polarimetric features from SAR images</article-title>. <source>Remote Sens.</source> <volume>14</volume>, <elocation-id>264</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs14020264</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Pei</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>b). &#x201c;<article-title>Context semantic perception based on superpixel segmentation for inshore ship detection in SAR image</article-title>,&#x201d; in <source>2020 IEEE radar conference (RadarConf20)</source> (<publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>6</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/RadarConf2043947.2020.9266627</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wei</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Su</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Ming</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Yan</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Kumar</surname> <given-names>D.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Precise and robust ship detection for high-resolution SAR imagery based on HR-SDNet</article-title>. <source>Remote Sens.</source> <volume>12</volume>, <elocation-id>167</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs12010167</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Willburger</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Schwenk</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Brauchle</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Amaro&#x2013;an on-board ship detection and real-time information system</article-title>. <source>Sensors</source> <volume>20</volume>, <elocation-id>1324</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s20051324</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xian</surname> <given-names>S. Z. ,. W.</given-names>
</name>
<name>
<surname>Yuanrui</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Wenhui</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Yue</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Kun</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Air-sarship&#x2013;1.0: High resolution sar ship detection dataset</article-title>. <source>J. Radars</source> <volume>8</volume>, <fpage>852</fpage>&#x2013;<lpage>862</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.12000/JR19097</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiao</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Cheng</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Xiao</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Niu</surname> <given-names>L.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Improved region convolutional neural network for ship detection in multiresolution synthetic aperture radar images</article-title>. <source>Concurrency Computation: Pract. Exp.</source> <volume>32</volume>, <fpage>e5820</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/cpe.5820</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiong</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>SAR target detection in complex scene based on 2-d singularity power spectrum analysis</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>57</volume>, <fpage>9993</fpage>&#x2013;<lpage>10003</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2019.2930797</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiong</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Leng</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>K. A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Lightweight Model for Ship Detection and Recognition in Complex-Scene SAR Images</article-title>. <source>Remote Sens</source> <volume>14</volume>, <elocation-id>6053</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs14236053</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Du</surname> <given-names>X.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>On-board real-time ship detection in HISEA-1 SAR images based on CFAR and lightweight deep learning</article-title>. <source>Remote Sens.</source> <volume>13</volume>, <elocation-id>1995</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs13101995</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Lite-yolov5: A lightweight deep learning detector for on-board ship detection in large-scene sentinel-1 sar images</article-title>. <source>Remote Sens.</source> <volume>14</volume>, <elocation-id>1018</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs14041018</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yasir</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Jianhua</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Mingming</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Hui</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhe</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Shanwei</surname> <given-names>L.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Ship detection based on deep learning using SAR imagery: a systematic literature review</article-title>. <source>Soft Computing</source>, <fpage>1</fpage>&#x2013;<lpage>22</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00500-022-07522-w</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yekeen</surname> <given-names>S. T.</given-names>
</name>
<name>
<surname>Balogun</surname> <given-names>A. L.</given-names>
</name>
<name>
<surname>Yusof</surname> <given-names>K. B. W.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A novel deep learning instance segmentation model for automated marine oil spill detection</article-title>. <source>ISPRS J. Photogrammetry Remote Sens.</source> <volume>167</volume>, <fpage>190</fpage>&#x2013;<lpage>200</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.isprsjprs.2020.07.011</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yuan</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A novel SAR sidelobe suppression method based on CNN</article-title>. <source>IEEE Geosci. Remote Sens. Lett.</source> <volume>18</volume>, <fpage>132</fpage>&#x2013;<lpage>136</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/LGRS.2020.2968336</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Yu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Cao</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Unitbox: An advanced object detection network</article-title>,&#x201d; in <source>Proceedings of the 24th ACM international conference on multimedia</source>, <fpage>516</fpage>&#x2013;<lpage>520</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1145/2964284.2967274</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Atkinson</surname> <given-names>P. M.</given-names>
</name>
<name>
<surname>Tan</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Jian</surname> <given-names>X.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>b). <article-title>Two-phase object-based deep learning for multi-temporal SAR image change detection</article-title>. <source>Remote Sens.</source> <volume>12</volume>, <elocation-id>548</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs12030548</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Luo</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Z</given-names>
</name>
</person-group> (<year>2021</year>a). &#x201c;<article-title>Overview of research on marine target recognition</article-title>,&#x201d; in <source>2nd international conference on computer vision, image, and deep learning</source> (<publisher-name>SPIE</publisher-name>), <fpage>273</fpage>&#x2013;<lpage>282</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1117/12.2604530</pub-id>
</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wan</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Ye</surname> <given-names>Q.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Freeanchor: Learning to match anchors for visual object detection</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>32</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1909.02466</pub-id>
</citation>
</ref>
<ref id="B62">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2020</year>c). &#x201c;<article-title>Ship detection based on superpixel-level hybrid non-local MRF for SAR imagery</article-title>,&#x201d; in <source>2020 5th Asia-pacific conference on intelligent robot systems (ACIRS)</source> (<publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>6</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACIRS49895.2020.9162609</pub-id>
</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Ke</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2021</year>b). <article-title>Quad-FPN: A novel quad feature pyramid network for SAR ship detection</article-title>. <source>Remote Sens.</source> <volume>13</volume>, <elocation-id>2771</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs13142771</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Ke</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhan</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>a). <article-title>LS-SSDD-v1. 0: A deep learning dataset dedicated to small ship detection from large-scale sentinel-1 SAR images</article-title>. <source>Remote Sens.</source> <volume>12</volume>, <elocation-id>2997</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs12182997</pub-id>
</citation>
</ref>
<ref id="B65">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Xiong</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Kuang</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Attention receptive pyramid network for ship detection in SAR images</article-title>. <source>IEEE J. Selected Topics Appl. Earth Observations Remote Sens.</source> <volume>13</volume>, <fpage>2738</fpage>&#x2013;<lpage>2756</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/JSTARS.2020.2997081</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>