<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mar. Sci.</journal-id>
<journal-title>Frontiers in Marine Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mar. Sci.</abbrev-journal-title>
<issn pub-type="epub">2296-7745</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmars.2025.1476616</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Marine Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Automatic detection, identification and counting of deep-water snappers on underwater baited video using deep learning</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Baletaud</surname>
<given-names>Florian</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2799983"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Villon</surname>
<given-names>S&#xe9;bastien</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Gilbert</surname>
<given-names>Antoine</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>C&#xf4;me</surname>
<given-names>Jean-Marie</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Fiat</surname>
<given-names>Sylvie</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/887002"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Iovan</surname>
<given-names>Corina</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Vigliola</surname>
<given-names>Laurent</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2890105"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>ENTROPIE, Institut de Recherche pour le D&#xe9;veloppement (IRD), UR, UNC, IFREMER, CNRS, Centre IRD de Noum&#xe9;a</institution>, <addr-line>Noumea</addr-line>, <country>New Caledonia</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Soproner, Groupe GINGER</institution>, <addr-line>Noumea</addr-line>, <country>New Caledonia</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Burgeap, Groupe GINGER</institution>, <addr-line>Lyon</addr-line>, <country>France</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Maria Grazia Pennino, Spanish Institute of Oceanography (IEO), Spain</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Andr&#xe9;s Fuster-Guill&#xf3;, University of Alicante, Spain</p>
<p>Suja Cherukullapurath Mana, PES University, India</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Florian Baletaud, <email xlink:href="mailto:florianbaletaud@hotmail.com">florianbaletaud@hotmail.com</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>06</day>
<month>02</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>12</volume>
<elocation-id>1476616</elocation-id>
<history>
<date date-type="received">
<day>06</day>
<month>08</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>20</day>
<month>01</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Baletaud, Villon, Gilbert, C&#xf4;me, Fiat, Iovan and Vigliola</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Baletaud, Villon, Gilbert, C&#xf4;me, Fiat, Iovan and Vigliola</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Deep-sea demersal fisheries in the Pacific have strong commercial, cultural, and recreational value, especially snappers (Lutjanidae) which make the bulk of catches. Yet, managing these fisheries is challenging due to the scarcity of data. Stereo-Baited Remote Underwater Video Stations (BRUVS) can provide valuable quantitative information on fish stocks, but manually processing large amounts of videos is time-consuming and sometimes unrealistic. To address this issue, we used a Region-based Convolutional Neural Network (Faster R-CNN), a deep learning architecture to automatically detect, identify and count deep-water snappers in BRUVS. Videos were collected in New Caledonia (South Pacific) at depths ranging from 47 to 552 m. Using a dataset of 12,100 annotations from 11 deep-water snapper species observed in 6,364 images, we obtained good model performance for the 6 species with sufficient annotations (F-measures &gt;0.7, up to 0.87). The correlation between automatic and manual estimates of fish MaxN abundance in videos was high (0.72 &#x2013; 0.9), but the Faster R-CNN showed an underestimation bias at higher abundances. A semi-automatic protocol where our model supported manual observers in processing BRUVS footage improved performance with a correlation of 0.96 with manual counts and a perfect match (R=1) for some key species. This model can already assist manual observers to semi-automatically process BRUVS footage and will certainly improve when more training data will be available to decrease the rate of false negatives. This study further shows that the use of artificial intelligence in marine science is progressive but warranted for the future.</p>
</abstract>
<kwd-group>
<kwd>deep-water snapper fisheries</kwd>
<kwd>artificial intelligence</kwd>
<kwd>semi-automatic</kwd>
<kwd>BRUVS</kwd>
<kwd>faster R-CNN</kwd>
</kwd-group>
<counts>
<fig-count count="3"/>
<table-count count="3"/>
<equation-count count="3"/>
<ref-count count="75"/>
<page-count count="12"/>
<word-count count="5876"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Marine Fisheries, Aquaculture and Living Resources</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>In order to assess fisheries stock for a target species, it is necessary to estimate its abundance and biomass spatially and across time, but also along the species length structure (<xref ref-type="bibr" rid="B22">Gulland, 1983</xref>). Such information may be insufficient or biased when acquired from landings of data-poor fisheries, thus calling for independent methods to complement traditional fisheries stock assessments (<xref ref-type="bibr" rid="B44">Moore et&#xa0;al., 2013</xref>). The emergence of video-assisted methods like BRUVS (Baited Remote Underwater Video Stations) (<xref ref-type="bibr" rid="B69">Whitmarsh et&#xa0;al., 2017</xref>) using low-cost small action cameras may provide such valuable complementary information (<xref ref-type="bibr" rid="B44">Moore et&#xa0;al., 2013</xref>; <xref ref-type="bibr" rid="B34">Letessier et&#xa0;al., 2015</xref>). However, video-based assessments require a considerable processing time to manually count fish on images, limiting their broad-scale applications (<xref ref-type="bibr" rid="B59">Sheaves et&#xa0;al., 2020</xref>). Modern automated video analyses using deep learning algorithms are becoming more accurate (<xref ref-type="bibr" rid="B65">Villon et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B43">Marrable et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B5">Bhalla et&#xa0;al., 2024</xref>) and may reduce these costly video-processing constraints (<xref ref-type="bibr" rid="B62">Tseng and Kuo, 2020</xref>; <xref ref-type="bibr" rid="B14">Connolly et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B38">Lopez-Marcano et&#xa0;al., 2021</xref>). Yet the lack of labelled, species-rich, datasets for fish classification and identification keeps its automation binding. Furthermore, the performance of deep learning algorithms on deep video surveys, including darkness, artificial lightning or generally variable image conditions and backgrounds, is still poorly known (<xref ref-type="bibr" rid="B51">Saleh et al., 2024</xref>; <xref ref-type="bibr" rid="B26">Jian et&#xa0;al., 2024</xref>).</p>
<p>In the Pacific, deep-water demersal fisheries are of high significance not only for local consumption but also for their commercial, cultural, and recreational value (<xref ref-type="bibr" rid="B15">Dalzell and Preston, 1992</xref>). Their commercial development began in the 1970s to alleviate fishing pressure on coral reefs but has generally collapsed in the 1990s (<xref ref-type="bibr" rid="B70">Williams et&#xa0;al., 2012</xref>). Over time, these fisheries have transitioned primarily to subsistence but continued to hold commercial significance in more developed and isolated regions like New Caledonia and Hawaii (<xref ref-type="bibr" rid="B45">Newman et&#xa0;al., 2016</xref>). While deep demersal fisheries include around 200 species in the western Pacific Ocean, the landed species are mainly composed of snappers, a group in the <italic>Lutjanidae</italic> family associated to the genera <italic>Etelis, Pristipomoides, Aphareus</italic>, and <italic>Aprion</italic>. Deep-water snappers are characterized by relatively slow metabolic rates and long lifespans, making them highly vulnerable to overfishing (<xref ref-type="bibr" rid="B45">Newman et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B67">Wakefield et&#xa0;al., 2020</xref>). Usually found at depths starting at 100 m to 500 m and more, these fish aggregate in structured topographies like steep slopes, seamounts, or any topographic anomaly such as sand banks or pinnacles (<xref ref-type="bibr" rid="B20">Gomez et&#xa0;al., 2015</xref>). Yet, deep-water snapper fisheries lack core management measures based on stock assessments which remain challenging to perform in such hardly accessible marine habitats (<xref ref-type="bibr" rid="B45">Newman et&#xa0;al., 2016</xref>).</p>
<p>Baited Remote Underwater Video Stations are among the most used, standardized video technics to study underwater fish ecology (<xref ref-type="bibr" rid="B69">Whitmarsh et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B30">Langlois et&#xa0;al., 2020</xref>). BRUVS can assess spatial and temporal variation in fish assemblages through visual identification and quantifying species abundance (<xref ref-type="bibr" rid="B34">Letessier et&#xa0;al., 2015</xref>; <xref ref-type="bibr" rid="B68">Wellington et&#xa0;al., 2018</xref>). They are a low-cost method able to generate large amounts of data (<xref ref-type="bibr" rid="B10">Cappo et&#xa0;al., 2007</xref>; <xref ref-type="bibr" rid="B46">Osgood et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B39">MacNeil et&#xa0;al., 2020</xref>). BRUVS can be deployed in a variety of habitats, including coral reefs, but also soft sediments, freshwater, the deep sea, or the pelagic environment (<xref ref-type="bibr" rid="B16">Ellender et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B19">Gladstone et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B75">Zintzen et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B24">Henderson et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B54">Schmid et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B35">Letessier et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B49">Reis-Filho et&#xa0;al., 2019</xref>). Their use in environmental monitoring is increasing with more studies focusing on industrial settings like underwater pipelines (<xref ref-type="bibr" rid="B8">Bond et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B56">Schramm et&#xa0;al., 2020</xref>, <xref ref-type="bibr" rid="B57">2021</xref>) or windfarms (<xref ref-type="bibr" rid="B21">Griffin et&#xa0;al., 2016</xref>). BRUVS are also emerging as independent and complementary methods for fisheries stock assessments (<xref ref-type="bibr" rid="B11">Cappo et&#xa0;al., 2004</xref>; <xref ref-type="bibr" rid="B3">Ault et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B7">Boldt et&#xa0;al., 2018</xref>). Clearly, BRUVS show great potential for monitoring deep-sea fisheries.</p>
<p>When manually processing BRUVS footage by identifying, counting, and measuring fish, the fastest and commonly used metric is the MaxN (<xref ref-type="bibr" rid="B69">Whitmarsh et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B30">Langlois et&#xa0;al., 2020</xref>). MaxN corresponds to the maximum number of individuals per species that can be counted in a single image per video. While conservative, this measure prevents from counting the same individuals twice. It has been shown that getting accurate fish abundance measures on each image from a video station, or within short video periods, and averaging these measures along the whole video may be more representative, but would multiply processing costs (<xref ref-type="bibr" rid="B55">Schobernd et&#xa0;al., 2014</xref>). This cost could effectively be reduced using deep learning algorithms.</p>
  <p>Deep learning and specifically Convolutional Neural Networks (CNNs) are artificial intelligence algorithms that generate classification by autonomously identifying features in images (<xref ref-type="bibr" rid="B31">LeCun et&#xa0;al., 2015</xref>). The rapid progress in the automatic processing of underwater images has already long permeated in ecology with the accurate detection of several marine species (<xref ref-type="bibr" rid="B13">Christin et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B42">Mannocci et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B52">Saleh et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B71">Xu et&#xa0;al., 2023</xref>). The ability to detect and identify fish on images in their natural environment has also been explored, but have mainly targeted coral reef fish, which can be highly differentiated due to their diversity of shapes and colors (<xref ref-type="bibr" rid="B41">Mandal et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B65">Villon et&#xa0;al., 2018</xref>, <xref ref-type="bibr" rid="B64">2022</xref>; <xref ref-type="bibr" rid="B51">Saleh et al., 2024</xref>). The available public images follow the same trend but are diversifying on shallow habitats, with images from fish at deeper strata still lacking (<xref ref-type="bibr" rid="B51">Saleh et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B5">Bhalla et&#xa0;al., 2024</xref>).To our knowledge, few studies have used deep-water images with their own singular constraints like variable light levels (<xref ref-type="bibr" rid="B52">Saleh et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B26">Jian et&#xa0;al., 2024</xref> but see <xref ref-type="bibr" rid="B37">Liu et&#xa0;al., 2023</xref>), and none for the deep-water snappers. Given the diversity of habitats and conditions in which fish can be detected, incorporating more diverse species and backgrounds is crucial for improving general fish detection and identification techniques (<xref ref-type="bibr" rid="B52">Saleh et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B5">Bhalla et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B26">Jian et&#xa0;al., 2024</xref>).</p>
<p>The state-of-the-art of object detection and classification features three primary algorithms: Single Shot Detection (SSD), Faster Region-based Convolutional Neural Network (Faster R-CNN), and You Only Look Once (YOLO) (<xref ref-type="bibr" rid="B5">Bhalla et&#xa0;al., 2024</xref>). While YOLO and SSD have demonstrated notable speed advantages over Faster R-CNN, the latter has shown superior accuracy in object detection and classification (<xref ref-type="bibr" rid="B29">Kim et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B9">Bose and Kumar, 2020</xref>; <xref ref-type="bibr" rid="B27">Kaarmukilan et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B32">Lee and Kim, 2020</xref>; <xref ref-type="bibr" rid="B33">Lee et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B40">Mahendrakar et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B53">Sarma et&#xa0;al., 2024</xref>). This difference is due to the NAS (Neural Architecture Search) automatically searching and building the most efficient architecture (<xref ref-type="bibr" rid="B17">Elsken et&#xa0;al., 2018</xref>). Furthermore, while some recent versions of YOLO do outperform older Faster R-CNN implementations, one of YOLO&#x2019;s weaknesses is its inability to address important variation of object sizes like Faster R-CNN can do (<xref ref-type="bibr" rid="B2">Ammar et&#xa0;al., 2019</xref>). Such variation is commonplace in underwater videos, where individuals can appear either close or very far from the camera. One of the main advantages of YOLO is its speed in real-time detection operations where faster R-CNN will take more processing time. BRUVS are usually deployed and retrieved over a short period of time, leading to an inevitable separated processing time from deployment. For this reason, Faster R-CNN seems to represent the best option for this context as being the most precise although a little bit slower (<xref ref-type="bibr" rid="B53">Sarma et&#xa0;al., 2024</xref>).</p>
<p>Here, we chose the Faster R-CNN architecture and assessed its ability to automatically detect deep-water snapper species in BRUVS images from deep slopes and seamounts of a South Pacific island: New Caledonia. We then discuss constraints and solutions about how this algorithm may help accelerate video processing for fisheries stock assessments considering a fully automatic and semi-automatic approach. To our knowledge, this study is the first to train an artificial intelligence algorithm for the detection, identification and counting of deep-water snapper in the wild on baited videos.</p>
<p>The main contributions of this article are as follows:</p>
<list list-type="order">
<list-item>
<p>To address the problem of high processing costs associated with manual data extraction on images by experts on BRUVS footage of commercial species, we propose the use of artificial intelligence, specifically the Faster R-CNN deep learning algorithm, to automate the detection, identification and counting of deep-water snappers (Lutjanidae family) observed in New Caledonia.</p>
</list-item>
<list-item>
<p>To address the choice of deep learning algorithm for non-specialists, we propose the use of the Faster R-CNN architecture. It has proven to be effective in processing varying objects (species) with higher accuracy compared to other model architectures.</p>
</list-item>
<list-item>
<p>To address the problem of too small training dataset, we propose a semi-automatic method which combines manual and automatic processes to improve the accuracy of fish abundance estimates. This semi-automatic process achieved results much closer to manual count while reducing the number of images checked by the expert to the amount of detections by the algorithm.</p>
</list-item>
</list>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Video dataset origin</title>
<p>New Caledonia is a sanctuary and hotspot for marine biodiversity (<xref ref-type="bibr" rid="B48">Payri et&#xa0;al., 2019</xref>). Anthropic pressure is low, with around 271 400 inhabitants over 16,372 km&#xb2; (isee.nc) disproportionally localized around its capital, Noumea. The 400 km long main island is surrounded by a 1,600 km long coral reef barrier and wilderness atolls, reefs, and small islands scattered across the 1,450,000 km&#xb2; of the New-Caledonian Exclusive Economic Zone (EEZ). Mainly composed of deep sea, 40% of the EEZ surface is a potential habitat for deep-sea snappers (<xref ref-type="bibr" rid="B20">Gomez et&#xa0;al., 2015</xref>). A total of 15 sites were sampled with BRUVS, including 11 seamount summits and 4 deep island slopes, during four oceanographic campaigns conducted aboard the RV ALIS in 2019 and 2020. Sample depths varied between 47 and 552 m (<xref ref-type="bibr" rid="B4">Baletaud et&#xa0;al., 2023</xref>).</p>
<p>On each seamount or deep slope, five to ten video samples were collected for a total of 121 deep water BRUVS deployments using GoPro Hero 4. Cameras were set with a medium field of view in 1920x1080 at 30 frames per second and at 1200 lumens, 120-degree angle led light (Groupbinc). BRUVS were baited with 1kg of crushed sardines in a perforated PVC canister and provided 2 hours of usable seafloor footage. Then, videos were manually processed, and MaxN (maximum abundance per species in a single frame, <xref ref-type="bibr" rid="B30">Langlois et&#xa0;al., 2020</xref>) was estimated for each species using the EventMeasure (Seagis) software (version 5.42). Eleven species of deep-water snappers were observed throughout this 121 BRUVS dataset. Snappers were observed at variable abundances on 98 BRUVS and were absent in the remaining 23 video stations. We then extracted a total of 410 video clips of 15 seconds centered around each MaxN observation. Overlapping sequences between different species&#x2019; MaxN on each video clip were filtered to avoid duplicated annotations of identical images. These video clips were then sliced to two or five frames per second for manual annotation. The annotation procedure was identical to a previous study (<xref ref-type="bibr" rid="B65">Villon et&#xa0;al., 2018</xref>). Briefly, for each image, the coordinates of the box enclosing each observed snapper were registered using Computer Vision Annotation Tool (CVAT) (<xref ref-type="bibr" rid="B58">Sekachev et&#xa0;al., 2020</xref>). This procedure yielded 12,100 individual deep-water snapper annotations identified at the species level on 6,364 images extracted from the video sequences. The image dataset was then split into a training and a testing dataset. Splitting considered individual BRUVS to avoid images of the same species and BRUVS in the training and testing dataset, and thus minimize false negatives (<xref ref-type="bibr" rid="B66">Villon et&#xa0;al., 2020</xref>). The training dataset included 80% of annotations (5,031 images, 9,782 annotations), and the remaining 20% were used in the testing dataset (1,333 images, 2,318 annotations). Species-wise annotations were highly unbalanced as some species occurred more often than others (<xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>). <italic>Randallichthys filamentosus</italic> was represented by only three annotations, resulting in no image in the testing dataset. Therefore, the species was only kept in the model training to add diversity to its training data.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Annotation summary for the training and testing datasets per species used with the R-CNN algorithm.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center">Species</th>
<th valign="middle" colspan="2" align="center">Annotations</th>
</tr>
<tr>
<th valign="middle" align="center"/>
<th valign="middle" align="center">Train</th>
<th valign="middle" align="center">Test</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">
<italic>Pristipomoides filamentosus</italic>
</td>
<td valign="middle" align="center">5,729</td>
<td valign="middle" align="center">1,303</td>
</tr>
<tr>
<td valign="middle" align="center">
<italic>Pristipomoides flavipinnis</italic>
</td>
<td valign="middle" align="center">1,724</td>
<td valign="middle" align="center">395</td>
</tr>
<tr>
<td valign="middle" align="center">
<italic>Aphareus rutilans</italic>
</td>
<td valign="middle" align="center">847</td>
<td valign="middle" align="center">239</td>
</tr>
<tr>
<td valign="middle" align="center">
<italic>Etelis coruscans</italic>
</td>
<td valign="middle" align="center">508</td>
<td valign="middle" align="center">117</td>
</tr>
<tr>
<td valign="middle" align="center">
<italic>Pristipomoides argyrogrammicus</italic>
</td>
<td valign="middle" align="center">489</td>
<td valign="middle" align="center">114</td>
</tr>
<tr>
<td valign="middle" align="center">
<italic>Aprion virescens</italic>
</td>
<td valign="middle" align="center">186</td>
<td valign="middle" align="center">74</td>
</tr>
<tr>
<td valign="middle" align="center">
<italic>Etelis carbunculus</italic>
</td>
<td valign="middle" align="center">89</td>
<td valign="middle" align="center">13</td>
</tr>
<tr>
<td valign="middle" align="center">
<italic>Pristipomoides multidens</italic>
</td>
<td valign="middle" align="center">73</td>
<td valign="middle" align="center">13</td>
</tr>
<tr>
<td valign="middle" align="center">
<italic>Parapristipomoides squamimaxillaris</italic>
</td>
<td valign="middle" align="center">68</td>
<td valign="middle" align="center">38</td>
</tr>
<tr>
<td valign="middle" align="center">
<italic>Pristipomoides zonatus</italic>
</td>
<td valign="middle" align="center">66</td>
<td valign="middle" align="center">12</td>
</tr>
<tr>
<td valign="middle" align="center">
<italic>Randallichthys filamentosus</italic>
</td>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">0</td>
</tr>
<tr>
<td valign="middle" align="center">All species</td>
<td valign="middle" align="center">9,782</td>
<td valign="middle" align="center">2,318</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Deep learning model and evaluation metrics</title>
<p>CNNs are specific algorithms designed for object detection and image classification. By initially extracting pixel sets that represent potential features, CNNs apply filters and weights to generate a localized sum of pixels throughout the image. Training these algorithms involves supplying raw images along with manually annotated features, enabling the recognition of specified objects. The output generated by the CNN is the list of identified objects and their respective probability scores.</p>
<p>We used the Faster Region-Based Convolutional Neural Network (Faster R-CNN) dedicated to object detection (<xref ref-type="bibr" rid="B50">Ren et&#xa0;al., 2017</xref>). Faster R-CNN has proven to be the best type of architecture to process objects within a large range of sizes, and to provide higher accuracies than other models (<xref ref-type="bibr" rid="B2">Ammar et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B9">Bose and Kumar, 2020</xref>; <xref ref-type="bibr" rid="B27">Kaarmukilan et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B33">Lee et&#xa0;al., 2021</xref>). For these reasons, the architecture is particularly suited to applications in the field of marine biodiversity and is indeed commonly used for fish detection and classification (<xref ref-type="bibr" rid="B6">Blowers et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B12">Chen et&#xa0;al., 2023</xref>). The model was used with a hybrid inception module coupled to a Nas ResNet configuration (Inception-ResNet V2) with images processed in 1024x1024 format. The architecture was pre-trained on the COCO (common objects in context) dataset (<xref ref-type="bibr" rid="B36">Lin et&#xa0;al., 2014</xref>), and is built as following: 1) a feature extractor relying on inception (<xref ref-type="bibr" rid="B61">Szegedy et&#xa0;al., 2015</xref>) and residual connections (<xref ref-type="bibr" rid="B23">He et&#xa0;al., 2016</xref>) to embed the image, 2) a region proposal network composed of convolutional layers predicting the likelihood of object presence (<xref ref-type="bibr" rid="B74">Zhong et&#xa0;al., 2020</xref>), 3) a region of interest pooling layer deleting redundant bounding boxes, 4) fully connected layers refining the features of each object and 5) a classification layer with a softmax function which outputs classification scores for each region proposal. Such two-stage architecture is particularly efficient to process images with objects of different sizes, fitting the context of fish detection and classification. All further details and model architecture can be found on the TensorFlow 2&#x2019;s GitHub model directory. The training and testing data of our BRUVS images annotated with the deep-water snapper species were converted into the tensorflow file format and supplied to the architecture. Model training and testing were carried out through the open-source Tensorflow API in Python 3. The used hardware contained four parallelized NVIDIA Quadro RTX 8000 cards with 196 GB of CPU memory and 42 GB of GPU memory and operated on an Ubuntu operating system. The model was run on 200 000 iterations with a batch size of 8.</p>
<p>The test dataset provided the number of true positives (a detection of the correct species where it has been manually annotated), false negatives (no detection in images with manually annotated species), and false positives (detection in an image where no individual was present, or the incorrect species detected). From these parameters, the common assessment metrics used in deep learning were computed: recall (1), precision (2), and F-measure (3) (<xref ref-type="bibr" rid="B73">Zhang and Zhang, 2009</xref>). Each metric&#x2019;s value ranges from 0 to 1, with values closer to 1 indicating better performance.</p>
<p>The recall reveals the algorithm&#x2019;s essential ability to accurately detect and identify the desired features. It represents instances where detections should have taken place in the test dataset but were missed. It is calculated by dividing the number of true positives by the sum of true positives and false negatives:</p>
<disp-formula id="eq1">
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>=</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>e</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>p</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>e</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>p</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Precision indicates the algorithm&#x2019;s detection error rate, calculated by dividing the number of true positives by the combined sum of true positives and false positives:</p>
<disp-formula id="eq2">
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>=</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>e</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>p</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>e</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>p</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>p</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The F-measure is a general indicator of the model&#x2019;s quality and is equal to the harmonic mean of recall and precision:</p>
<disp-formula id="eq3">
<label>(3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>m</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>=</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mn>2</mml:mn>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&#xd7;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>These evaluation metrics were calculated for each of the eleven species seen across all frames of the test dataset.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Automatic and semi-automatic fish counting on video</title>
<p>In order to evaluate the ability of the algorithm to estimate MaxN, the number of automatic detections per frame (MaxN<sub>Auto</sub>) in the test dataset was compared to the number of manual annotations (MaxN<sub>Man</sub>). First, the Pearson correlation coefficient was used for its simplicity in quantifying the strength and direction of the linear relationship between MaxN<sub>Auto</sub> and MaxN<sub>Man</sub>. A high correlation (close to 1) will indicate a strong positive linear relationship between both indices. Then, using a standard linear regression, the intercept of the linear relationship between MaxN<sub>Auto</sub> and MaxN<sub>Man</sub> was tested against zero. The slope was also tested against 1 to evaluate whether the algorithm underestimated or overestimated the number of detections, and hence fish abundance.</p>
<p>Next, we proposed a semi-automatic approach that combines the trained algorithm with manual intervention on images containing detections. This method aimed at evaluating the potential of deep learning-assisted video processing. All images where the faster R-CNN detected deep water snappers were reviewed and manually corrected by an expert biologist. This process eliminated false positives, leaving only errors due to false negatives. Using this protocol, we recalculated the model metrics based on the corrected misclassifications. With no more false positives, the precision metrics consistently reached 1. The semi-automatic MaxN (MaxN<sub>Semi</sub>) was then compared to the MaxN<sub>Auto</sub> using the Pearson correlation and linear regression against MaxN<sub>Man.</sub>
</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<p>Faster R-CNN training lasted for four days in order to execute 200,000 iterations on the multi-GPU calculator. Out of the 1,333 testing images comprising 2,318 annotated fish, the trained Faster R-CNN automatically detected 2,351 fish, out of which 1786 were true positives (76%) so 565 were false positives.</p>
<p>The F-measure of automatic detections ranged between 0.15 to 0.87, indicating considerable variation in the evaluation measures per species. Largest values were obtained for <italic>Etelis coruscans</italic> (F-measure: 0.87, recall: 0.91, precision: 0.84), closely followed by <italic>Pristipomoides filamentosus</italic> (F-measure: 0.79, recall: 0.86, precision: 0.73, <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>). <italic>Pristipomoides multidens</italic>, was not detected on any of the 13 testing observations, hence values of 0 for the recall and precision. <italic>Pristipomoides zonatus</italic> was hardly detected in the equally low testing observations (recall of 0.08 on 12 annotations). However, the model never classified another deep-water snapper as this species (precision of 1.0). These two latter species along with <italic>Etelis carbunculus</italic> and <italic>Parapristipomoides squamimaxillaris</italic>, were those with less than 89 annotations to train the model. Species with comparatively higher annotation numbers (&gt; 186, <italic>Aprion</italic> virescens, up to 5729, <italic>P.filamentosus</italic>) showed F-measures of at least 0.71 (<italic>Pristipomoides flavipinnis</italic>). A sample of the testing dataset is illustrated in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Evaluation metrics (recall, precision and F-measure) generated from the testing dataset for 10 deep-water snapper species on the trained Faster R-CNN (automatic) and the corrected detections from the Faster R-CNN (semi-automatic).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center"/>
<th valign="top" colspan="2" align="center">Annotations</th>
<th valign="middle" colspan="3" align="center">Automatic</th>
<th valign="top" colspan="3" align="center">Semi-automatic</th>
</tr>
<tr>
<th valign="middle" align="center">Species</th>
<th valign="top" align="center">Train</th>
<th valign="top" align="center">Test</th>
<th valign="middle" align="center">Recall</th>
<th valign="middle" align="center">Precision</th>
<th valign="middle" align="center">F-measure</th>
<th valign="middle" align="center">Recall</th>
<th valign="middle" align="center">Precision</th>
<th valign="middle" align="center">F-measure</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">
<italic>Etelis coruscans</italic>
</td>
<td valign="middle" align="center">508</td>
<td valign="middle" align="center">117</td>
<td valign="middle" align="center">0.91</td>
<td valign="middle" align="center">0.84</td>
<td valign="middle" align="center">0.87</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">1</td>
</tr>
<tr>
<td valign="middle" align="center">
<italic>Pristipomoides filamentosus</italic>
</td>
<td valign="middle" align="center">5,729</td>
<td valign="middle" align="center">1,303</td>
<td valign="middle" align="center">0.86</td>
<td valign="middle" align="center">0.73</td>
<td valign="middle" align="center">0.79</td>
<td valign="top" align="center">0.95</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.97</td>
</tr>
<tr>
<td valign="middle" align="center">
<italic>Aprion virescens</italic>
</td>
<td valign="middle" align="center">186</td>
<td valign="middle" align="center">74</td>
<td valign="middle" align="center">0.76</td>
<td valign="middle" align="center">0.76</td>
<td valign="middle" align="center">0.76</td>
<td valign="top" align="center">0.76</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.86</td>
</tr>
<tr>
<td valign="middle" align="center">
<italic>Pristipomoides argyrogrammicus</italic>
</td>
<td valign="middle" align="center">489</td>
<td valign="middle" align="center">114</td>
<td valign="middle" align="center">0.70</td>
<td valign="middle" align="center">0.95</td>
<td valign="middle" align="center">0.81</td>
<td valign="top" align="center">0.75</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.86</td>
</tr>
<tr>
<td valign="middle" align="center">
<italic>Aphareus rutilans</italic>
</td>
<td valign="middle" align="center">847</td>
<td valign="middle" align="center">239</td>
<td valign="middle" align="center">0.66</td>
<td valign="middle" align="center">0.82</td>
<td valign="middle" align="center">0.73</td>
<td valign="top" align="center">0.80</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.89</td>
</tr>
<tr>
<td valign="middle" align="center">
<italic>Pristipomoides flavipinnis</italic>
</td>
<td valign="middle" align="center">1,724</td>
<td valign="middle" align="center">395</td>
<td valign="middle" align="center">0.65</td>
<td valign="middle" align="center">0.78</td>
<td valign="middle" align="center">0.71</td>
<td valign="top" align="center">0.75</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.86</td>
</tr>
<tr>
<td valign="middle" align="center">
<italic>Etelis carbunculus</italic>
</td>
<td valign="middle" align="center">89</td>
<td valign="middle" align="center">13</td>
<td valign="middle" align="center">0.15</td>
<td valign="middle" align="center">0.22</td>
<td valign="middle" align="center">0.18</td>
<td valign="top" align="center">0.15</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.27</td>
</tr>
<tr>
<td valign="middle" align="center">
<italic>Parapristipomoides squamimaxillaris</italic>
</td>
<td valign="middle" align="center">66</td>
<td valign="middle" align="center">12</td>
<td valign="middle" align="center">0.11</td>
<td valign="middle" align="center">1.00</td>
<td valign="middle" align="center">0.19</td>
<td valign="top" align="center">0.11</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.19</td>
</tr>
<tr>
<td valign="middle" align="center">
<italic>Pristipomoides zonatus</italic>
</td>
<td valign="middle" align="center">68</td>
<td valign="middle" align="center">38</td>
<td valign="middle" align="center">0.08</td>
<td valign="middle" align="center">1.00</td>
<td valign="middle" align="center">0.15</td>
<td valign="top" align="center">0.08</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.15</td>
</tr>
<tr>
<td valign="middle" align="center">
<italic>Pristipomoides multidens</italic>
</td>
<td valign="middle" align="center">73</td>
<td valign="middle" align="center">13</td>
<td valign="middle" align="center">0</td>
<td valign="middle" align="center">0</td>
<td valign="middle" align="center">NA</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">NA</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Species are ordered by the Automatic Recall.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Examples of correct <bold>(A&#x2013;D)</bold> and incorrect <bold>(E&#x2013;H)</bold> detections on the test dataset. <bold>(A)</bold> Six correct detections of <italic>Pristipomoides filamentosus</italic>, <bold>(B)</bold> correct detection of <italic>Aphareus rutilans</italic> and <italic>P.filamentosus</italic> while correctly leaving two emperors <italic>Lethrinus miniatus</italic> and a grouper <italic>Epinephelus maculatus.</italic> <bold>(C)</bold> Correct detection of <italic>Etelis coruscans</italic>. <bold>(D)</bold> Correct detection of three <italic>A. rutilans</italic> and a single <italic>Pristipomoides flavipinnis.</italic> <bold>(E)</bold> Correct detection of the single <italic>P.filamentosus</italic> with incorrect detection of an emperor (<italic>Gymnocranius euanus</italic>) as <italic>P. filamentosus</italic> and a surgeonfish (<italic>Naso hexacanthus</italic>) as <italic>Aprion virescens</italic>. <bold>(F)</bold> Incorrect classification of a<italic>. rutilans</italic> and an emperor (<italic>L. miniatus</italic>) as <italic>P.filamentosus</italic>. <bold>(G)</bold> Incorrect classification of <italic>A. rutilans</italic> and a grouper (<italic>Epinephelus chlorostigma</italic>) as <italic>P. filamentosus</italic>, <bold>(H)</bold> incorrect classification of two <italic>P. flavipinnis</italic> as <italic>P.filamentosus</italic> and a grouper (<italic>Variola louti</italic>) as <italic>Etelis carbunculus</italic>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1476616-g001.tif"/>
</fig>
<p>The semi-automatic approach, in which the expert corrected classification errors, showed an F-measure ranging from 0.15 to 1 (<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>). A drastic increase in performance metrics was observed for species with higher number of annotations (&gt;186), with semi-automatic F-measures ranging from 0.86 for <italic>A. virescens</italic> to 1 for <italic>E. coruscans</italic>, which showed no more false negatives in the testing dataset. <italic>P. filamentosus</italic>, with the highest number of images tested (1,303), returned an F-value of 0.97 compared to 0.86 without correction. The largest increase in F value was for <italic>A. rutilans</italic> and <italic>P. flavipinnis</italic>, from 0.66 and 0.65 to 0.86 and 0.89, respectively.</p>
<p>For the analysis of fish abundance on whole BRUVS (MaxN), we focused on species with F-measures superior or equal to 0.71 (aka with more than 100 annotations, <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>) as models with lower F-measures provided poor abundance estimates. High correlation coefficients were observed between manually and automatically estimated fish abundance (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>; <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>). Pearson Correlation coefficient ranged between 0.72 and 0.90 among species, with the highest values observed for <italic>Etelis coruscans</italic> and an overall value of 0.85 when combining data from all species.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Comparison of manual (MaxN<sub>Man</sub>), automatic (MaxN<sub>Auto</sub>) and semi-automatic (MaxN<sub>Semi</sub>) fish abundance on baited remote underwater video stations (BRUVS) using the R-CNN trained on the deep-water snapper species. Only species with more than 100 annotations were considered relevant for this analysis. Point size is proportional to the number of detections against annotated fish. Automatic and semi-automatic linear fits are also shown with a dotted reference line of slope 1 and intercept 0.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1476616-g002.tif"/>
</fig>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>R squared (R&#xb2;), Pearson correlation coefficient (correlation), test of intercept against zero (intercept), test of slope against zero (slope) and test of slope against one (slope = 1 p-value) for automatic (MaxN<sub>Auto</sub>) and semi-automatic (MaxN<sub>semi</sub>) counts.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Species</th>
<th valign="middle" align="center">Method</th>
<th valign="middle" align="center">R&#xb2;</th>
<th valign="middle" align="center">Correlation</th>
<th valign="middle" align="center">Intercept</th>
<th valign="middle" align="center">Slope</th>
<th valign="middle" align="center">Slope = 1<break/>p-value</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="2" align="center">All species</td>
<td valign="middle" align="center">Automatic</td>
<td valign="middle" align="center">0.73</td>
<td valign="middle" align="center">0.85<sup>***</sup>
</td>
<td valign="middle" align="center">0.05<sup>***</sup>
</td>
<td valign="middle" align="center">0.88<sup>***</sup>
</td>
<td valign="middle" align="center">***</td>
</tr>
<tr>
<td valign="middle" align="center">Semi-auto</td>
<td valign="middle" align="center">0.93</td>
<td valign="middle" align="center">0.96<sup>***</sup>
</td>
<td valign="middle" align="center">-0.02<sup>***</sup>
</td>
<td valign="middle" align="center">0.94<sup>***</sup>
</td>
<td valign="middle" align="center">***</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">
<italic>Etelis coruscans</italic>
</td>
<td valign="middle" align="center">Automatic</td>
<td valign="middle" align="center">0.81</td>
<td valign="middle" align="center">0.90<sup>***</sup>
</td>
<td valign="middle" align="center">0.02<sup>***</sup>
</td>
<td valign="middle" align="center">0.87<sup>***</sup>
</td>
<td valign="middle" align="center">***</td>
</tr>
<tr>
<td valign="middle" align="center">Semi-auto</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">0<sup>NS</sup>
</td>
<td valign="middle" align="center">1.00<sup>***</sup>
</td>
<td valign="middle" align="center">NS</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">
<italic>Pristipomoides filamentosus</italic>
</td>
<td valign="middle" align="center">Automatic</td>
<td valign="middle" align="center">0.70</td>
<td valign="middle" align="center">0.84<sup>***</sup>
</td>
<td valign="middle" align="center">0.31<sup>***</sup>
</td>
<td valign="middle" align="center">0.86<sup>***</sup>
</td>
<td valign="middle" align="center">***</td>
</tr>
<tr>
<td valign="middle" align="center">Semi-auto</td>
<td valign="middle" align="center">0.96</td>
<td valign="middle" align="center">0.98<sup>***</sup>
</td>
<td valign="middle" align="center">-0.05<sup>***</sup>
</td>
<td valign="middle" align="center">0.99<sup>***</sup>
</td>
<td valign="middle" align="center">NS</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">
<italic>Aprion virescens</italic>
</td>
<td valign="middle" align="center">Automatic</td>
<td valign="middle" align="center">0.56</td>
<td valign="middle" align="center">0.75<sup>***</sup>
</td>
<td valign="middle" align="center">0.01<sup>*</sup>
</td>
<td valign="middle" align="center">0.80<sup>***</sup>
</td>
<td valign="middle" align="center">***</td>
</tr>
<tr>
<td valign="middle" align="center">Semi-auto</td>
<td valign="middle" align="center">0.75</td>
<td valign="middle" align="center">0.86<sup>***</sup>
</td>
<td valign="middle" align="center">0NS</td>
<td valign="middle" align="center">0.76<sup>***</sup>
</td>
<td valign="middle" align="center">***</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">
<italic>Pristipomoides argyrogrammicus</italic>
</td>
<td valign="middle" align="center">Automatic</td>
<td valign="middle" align="center">0.71</td>
<td valign="middle" align="center">0.84<sup>***</sup>
</td>
<td valign="middle" align="center">0.0005<sup>NS</sup>
</td>
<td valign="middle" align="center">0.73<sup>***</sup>
</td>
<td valign="middle" align="center">***</td>
</tr>
<tr>
<td valign="middle" align="center">Semi-auto</td>
<td valign="middle" align="center">0.79</td>
<td valign="middle" align="center">0.89<sup>***</sup>
</td>
<td valign="middle" align="center">-0.004<sup>NS</sup>
</td>
<td valign="middle" align="center">0.80<sup>***</sup>
</td>
<td valign="middle" align="center">***</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">
<italic>Aphareus rutilans</italic>
</td>
<td valign="middle" align="center">Automatic</td>
<td valign="middle" align="center">0.64</td>
<td valign="middle" align="center">0.80<sup>***</sup>
</td>
<td valign="middle" align="center">0.03<sup>***</sup>
</td>
<td valign="middle" align="center">0.66<sup>***</sup>
</td>
<td valign="middle" align="center">***</td>
</tr>
<tr>
<td valign="middle" align="center">Semi-auto</td>
<td valign="middle" align="center">0.84</td>
<td valign="middle" align="center">0.91<sup>***</sup>
</td>
<td valign="middle" align="center">-0.01<sup>NS</sup>
</td>
<td valign="middle" align="center">0.85<sup>***</sup>
</td>
<td valign="middle" align="center">***</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">
<italic>Pristipomoides flavipinnis</italic>
</td>
<td valign="middle" align="center">Automatic</td>
<td valign="middle" align="center">0.52</td>
<td valign="middle" align="center">0.72<sup>***</sup>
</td>
<td valign="middle" align="center">0.05<sup>***</sup>
</td>
<td valign="middle" align="center">0.65<sup>***</sup>
</td>
<td valign="middle" align="center">***</td>
</tr>
<tr>
<td valign="middle" align="center">Semi-auto</td>
<td valign="middle" align="center">0.76</td>
<td valign="middle" align="center">0.87<sup>***</sup>
</td>
<td valign="middle" align="center">-0.02<sup>*</sup>
</td>
<td valign="middle" align="center">0.81<sup>***</sup>
</td>
<td valign="middle" align="center">***</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Significant p-values are represented as follow: &#x201c;***&#x201d;:&lt;0.001, &#x201c;*&#x201d;:&lt;0.05, &#x201c;N.S&#x201d;: Non-significant. P-values for <italic>E.coruscans</italic>&#x2019; semi-automatic coefficients are not showed as it was the identical fit as manual counts.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>The slope coefficient for each and all individual species were significantly different from zero (<italic>p&lt;</italic> 0.001). However, while automatic fish abundances appeared comparable to manual abundances for up to three to four individuals in the same frame, the Faster R-CNN model tended to underestimate higher abundance with slope coefficients significantly smaller than 1 for all and each species. Slope coefficient ranged nonetheless between 0.65 and 0.88 with highest value found when considering all species together (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>). Except for <italic>P. argyrogrammicus</italic>, all intercepts were significantly different from zero, but with marginal deviation (range: 0.01 to 0.05 except for <italic>P. filamentosus</italic>: 0.31).</p>
<p>The semi-automatic protocol yielded fish abundance estimates much closer to manual counts, with a Pearson correlation coefficient of 0.96 for all species combined (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>; <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>). Correlations ranged from 0.86 to 1 depending on the species. <italic>E. coruscans</italic> showed a perfect fit (slope = 1, intercept = 0) with semi-automatic MaxN identical to manual MaxN. The slope between MaxN<sub>Man</sub> and MaxN<sub>Semi</sub> was not significantly different from one for <italic>P. filamentosus</italic>, revealing extremely good semi-automatic model performance.</p>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>The use of the Faster R-CNN algorithm to automatically detect, identify and count deep-water snappers proved successful and highly promising considering the challenge this group of fish presents and the variable background habitat. The algorithm effectively differentiated between species that were very similar and hard to distinguish, even for an experienced taxonomist. While the detection and identification will probably need post-verification until enough annotations are gathered to achieve automatic F-measures above 0.9 for all species, the abundance estimations were still consistent with manual counts. This procedure can already be employed for automatic deep-sea snapper monitoring, or semi-automatic monitoring, where observers would save substantial processing time by simply verifying and adjusting detections rather than processing entire BRUVS videos.</p>
<p>It is crucial for fisheries stock management to be able to work on the species level. This deep-water snappers&#x2019; dataset represents a fine addition to the collection with varying habitat constraints such as presence or absence of natural light and hard and soft substrates. Especially, this species group is challenging due to the similar appearance of its members. Deep-water snappers are mostly &#x201c;greyish&#x201d;, &#x201c;fish-looking&#x201d; species, posing a challenge in identification, particularly for <italic>P. filamentosus</italic> and <italic>P. flavipinnis</italic> which share almost identical characteristics (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>). <italic>E. coruscans</italic> stands out with its reddish color and long elongated tail tips, allowing the algorithm to distinguish it from other species, and lead to the highest recall, precision, and F-measure metrics. Furthermore, the semi-automatic treatment of <italic>E. coruscans</italic> yielded individual detections and abundance values that matched the manual estimates perfectly. This is highly encouraging, considering <italic>E. coruscans</italic> is a highly targeted species of this fishery (<xref ref-type="bibr" rid="B45">Newman et&#xa0;al., 2016</xref>). <italic>P. filamentosus</italic> had the highest number of annotations and images, which likely explains its high identification success rate. The bigger the training database per feature, the better the identification for the Faster R-CNN algorithm, which typically requires at least 1,300 training images per feature to achieve over 95% certainty in fish identification (<xref ref-type="bibr" rid="B65">Villon et&#xa0;al., 2018</xref>). In our study, only two out of the 11 species studied (<italic>P. filamentosus</italic> and <italic>P. flavipinnis)</italic> met this training size requirement.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Correct automatic detection of closely related and similar-looking deep-water species <italic>Pristipomoides filamentosus</italic> (yellow) and <italic>P. flavipinnis</italic> (purple). The expert would be interested by the accentuated yellow eye color and recognize the slight vertical band pattern presented only by <italic>P.flavipinnis</italic>. Other frames before and after this image would have been required by the expert to confirm the identification.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1476616-g003.tif"/>
</fig>
<p>While the human observer may browse through the video sequence to observe color, behavior, movements, and other clues to identify species and count individuals, the algorithm is restricted to each single image to decide. That the algorithm was able to effectively differentiate between snapper species with so little information at hand is therefore very encouraging. However, errors have still been observed with many false positives caused by rarer species (e.g., <italic>A. rutilans</italic>) being confused with the most common ones (<italic>P. filamentosus</italic>, <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>). This type of confusion was easily corrected by the intervention of an expert during the semi-automatic counting protocol as the fish still got detected. The expert fully corrected each false positive, and the precision became equal to one. Additionally, semi-automatic recall also increased compared to its value with the automatic protocol. This is because some fish were not detected in frames where other individuals were detected. Since the expert corrected the entire frames, undetected individuals were also annotated, thereby reducing the number of false negatives and increasing recall. For example, the recall of <italic>A. rutilans</italic> increased from 0.66 to 0.80, indicating that this species was present in many frames with other detected species. However, the recall of <italic>A. virescens</italic> remained the same, indicating that no further detections of this species occurred on frames where other snappers were detected by the algorithm. The confusion problem between species could be partly due to the disparity in available images between similar species, with those with fewer images being misclassified more often than those with more images. While a semi-automatic protocol can partly address the issue, an alternative solution might involve adding temporal information through motion analysis or a tracking algorithm that would isolate the background or follow the same individuals, thereby adding detection and identification information from previous frames to subsequent ones (<xref ref-type="bibr" rid="B60">Shin, 2016</xref>; <xref ref-type="bibr" rid="B25">Jalal et&#xa0;al., 2020</xref>). The other major constraint highlighted in this study is the underestimation bias at higher abundances. We observed that frames involving many fish can become easily saturated (notably <italic>P. filamentosus</italic>, cf. <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1A</bold>
</xref>) with few individuals blocking the camera&#x2019;s field of view. This bias in the algorithm seems rather inevitable due to its dependance to the technical video sampling system with a single sensor and angle of view. The MaxN abundance index based on the maximum number of fish present in the same frame is known to be sensitive to the phenomenon of image saturation (<xref ref-type="bibr" rid="B39">MacNeil et&#xa0;al., 2020</xref>). It is also reported in another study working on a different species of snapper in a different configuration (daylight reef) (<xref ref-type="bibr" rid="B14">Connolly et&#xa0;al., 2021</xref>). Our semi-automatic protocol could correct this bias for the two species that presented the highest MaxN, <italic>E. coruscans</italic> and <italic>P. filamentosus</italic>, yielding F-measure &gt; 0.96 after correction by an expert taxonomist. The tracking of individuals across successive frames might also permit a better differentiation of individuals saturating images, hence reducing or removing the bias in MaxN at high abundance, as the expert usually also does.</p>
<p>We are confident that our trained Faster R-CNN algorithm is already operational for fisheries assessment using our semi-automatic detection procedure. The whole process using BRUVS to assess fish abundance is nondestructive, independent from fisheries data and may today become cost-effective with the support of artificial intelligence. Our model, as it is, can provide a matrix of detections per species for each frame of the video stations. The frames with the greatest number of detections per species can then be identified and used as references to define video intervals of a few seconds including the MaxN of the different species. These short video sequences could then be processed by biologists using programs like EventMeasure, reducing hours of video processing to minutes. Furthermore, the manual processing of the short video sequences would be limited to simply correcting algorithmic detections, which would further speed up the process. Additionally, new annotations should be used to retrain the algorithm and further improve its performance. If stereo cameras are used, then fish size could be measured in addition to abundance. Although size-measurements are performed manually so far using programs like EventMeasure (<xref ref-type="bibr" rid="B34">Letessier et&#xa0;al., 2015</xref>), algorithms exist to automatically measure object dimensions on videos like with instance segmentation (<xref ref-type="bibr" rid="B47">Othman et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B18">Garcia-d&#x2019;Urso et&#xa0;al., 2022</xref>). Their ongoing development represents the next stage and their application on BRUVS and fisheries management is warranted.</p>
<p>Some caveats can still be discussed for further improvement. The uneven distribution of training images among species calls for an increased sampling to complete the dataset and improve identification accuracy (<xref ref-type="bibr" rid="B65">Villon et&#xa0;al., 2018</xref>). Our current algorithm may still drastically reduce annotation times for rarer species as they are detected but mostly confused with more occurring species. However, rarity is a key characteristic of biodiversity, and a large number of annotations can remain difficult to gather for the rarest species (<xref ref-type="bibr" rid="B64">Villon et&#xa0;al., 2022</xref>). In this case, methods like the few shots deep learning algorithm could be coupled with the Faster R-CNN to compensate for the lack of annotations (<xref ref-type="bibr" rid="B63">Villon et&#xa0;al., 2021</xref>). A coupling with other BRUVS datasets from other regions may also improve the algorithm performances but may then face issues related to changes in environmental conditions across regions (<xref ref-type="bibr" rid="B28">Kalogeiton et&#xa0;al., 2016</xref>). However, while our study relied on a dataset restricted to New Caledonia, the sampling occurred across the spatially immense EEZ and across depth ranging from shallow photic seamounts (50-60 meters deep) to deep aphotic seamounts and continental deep slopes (150-500 meters deep), exploring diverse environmental backgrounds and light intensities (<xref ref-type="bibr" rid="B4">Baletaud et&#xa0;al., 2023</xref>).</p>
<p>While this case study involved a particularly constraining group of species (looking-alike deep-water snappers), in variable background conditions of light and habitats, it further shows that the Faster R-CNN is a worthy algorithm architecture that may be used in many use-case scenarios involving fish species detection. The methodology is applicable to any visually identifiable fish species provided sufficient training images for the model, as is the main constraint for any deep learning development (<xref ref-type="bibr" rid="B1">Ahmad et&#xa0;al., 2023</xref>). New CNN architectures are released more and more frequently, improving classification speed and accuracy, and their review using this new dataset will prove interesting although not in the scope of this study. The potential for deep learning to improve the day-to-day work of marine scientists in monitoring fisheries seems certified for the future (<xref ref-type="bibr" rid="B72">Zhang et&#xa0;al., 2021</xref>). The transition is progressive, and a semi-automatic approach may be yet closer to being adopted by operational monitoring organizations or consultancy firms using this work.</p>
</sec>
</body>
<back>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to <email xlink:href="mailto:laurent.vigliola@ird.fr">laurent.vigliola@ird.fr</email>.</p>
</sec>
<sec id="s6" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>Ethical approval was not required for this study involving animals captured by video in accordance with the local legislation and institutional requirements because the data analyzed was from a previous study.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>FB: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Software, Validation, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. SV: Conceptualization, Data curation, Formal analysis, Methodology, Software, Validation, Visualization, Writing &#x2013; review &amp; editing. AG: Conceptualization, Funding acquisition, Project administration, Resources, Supervision, Writing &#x2013; review &amp; editing. J-MC: Funding acquisition, Project administration, Resources, Supervision, Writing &#x2013; review &amp; editing. SF: Data curation, Investigation, Resources, Writing &#x2013; review &amp; editing. CI: Conceptualization, Funding acquisition, Methodology, Project administration, Resources, Writing &#x2013; review &amp; editing. LV: Conceptualization, Funding acquisition, Investigation, Methodology, Project administration, Resources, Supervision, Validation, Visualization, Writing &#x2013; review &amp; editing.</p>
</sec>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. The study was funded by grant ANR &#x201c;SEAMOUNTS&#x201d; #ANR-18-CE02-0016, the French Oceanographic Fleet, and IRD core funding. FB was supported by grant ANRT CIFRE #2019/0105.</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>We would like to thank the many undergrad students and technical staffs for their help in the annotation process at the lab. Data was collected under permits 2019-733/GNC, 2020-503/GNC and 2020-1077/GNC delivered by the Government of New-Caledonia, 898-2019/ARR/DENV, 3066-2019/ARR/DENV, 844-2020/ARR/DDDT and 1955-2020/ARR/DDDT delivered by the Southern Province of New-Caledonia, and 609011/2019/DEPART/JJC, 609011-18/2019/DEPART/JJC and 609011-39/2020/DEPART/JJC delivered by the Northern Province of New-Caledonia.</p>
</ack>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>FB, AG ang J-MC was employed by Groupe GINGER.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ahmad</surname> <given-names>U.</given-names>
</name>
<name>
<surname>Junaid Ali</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Ahmed Khan</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Ahmad Khan</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Ur Rehman</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Muhammad Ali Shahid</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Large scale fish images classification and localization using transfer learning and localization aware CNN architecture</article-title>. <source>Comput. Syst. Sci. Eng.</source> <volume>45</volume>, <fpage>2125</fpage>&#x2013;<lpage>2140</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.32604/csse.2023.031008</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ammar</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Koubaa</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Ahmed</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Saad</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Benjdira</surname> <given-names>B.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Aerial images processing for car detection using convolutional neural networks: comparison between faster R-CNN and YoloV3</article-title>. <source>Electronics (Basel)</source> <volume>10</volume>, <fpage>820</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/electronics10070820</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ault</surname> <given-names>J. S.</given-names>
</name>
<name>
<surname>Smith</surname> <given-names>S. G.</given-names>
</name>
<name>
<surname>Richards</surname> <given-names>B. L.</given-names>
</name>
<name>
<surname>Yau</surname> <given-names>A. J.</given-names>
</name>
<name>
<surname>Langseth</surname> <given-names>B. J.</given-names>
</name>
<name>
<surname>O&#x2019;Malley</surname> <given-names>J. M.</given-names>
</name>
<etal/>
</person-group>. (<year>2018</year>). <article-title>Towards fishery-independent biomass estimation for Hawaiian Islands deepwater snappers</article-title>. <source>Fish Res.</source> <volume>208</volume>, <fpage>321</fpage>&#x2013;<lpage>328</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.fishres.2018.08.012</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baletaud</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Lecellier</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Gilbert</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Mathon</surname> <given-names>L.</given-names>
</name>
<name>
<surname>C&#xf4;me</surname> <given-names>J.-M.</given-names>
</name>
<name>
<surname>Dejean</surname> <given-names>T.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Comparing seamounts and coral reefs with eDNA and BRUVS reveals oases and refuges on shallow seamounts</article-title>. <source>Biol. (Basel)</source> <volume>12</volume>, <elocation-id>1446</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/biology12111446</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bhalla</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Kumar</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Kushwaha</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Analysis of recent techniques in marine object detection: a review</article-title>. <source>Multimed Tools Appl</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11042-024-19782-9</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Blowers</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Evans</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Mcnally</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Automated identification of fish and other aquatic life in underwater video</article-title>. <source>Scottish Mar. Freshw. Sci.</source> <volume>11</volume>, <fpage>1</fpage>&#x2013;<lpage>62</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.7489/12333-1</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Boldt</surname> <given-names>J. L.</given-names>
</name>
<name>
<surname>Williams</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Rooper</surname> <given-names>C. N.</given-names>
</name>
<name>
<surname>Towler</surname> <given-names>R. H.</given-names>
</name>
<name>
<surname>Gauthier</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Development of stereo camera methodologies to improve pelagic fish biomass estimates and inform ecosystem management in marine waters</article-title>. <source>Fish Res.</source> <volume>198</volume>, <fpage>66</fpage>&#x2013;<lpage>77</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.fishres.2017.10.013</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bond</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Partridge</surname> <given-names>J. C.</given-names>
</name>
<name>
<surname>Taylor</surname> <given-names>M. D.</given-names>
</name>
<name>
<surname>Cooper</surname> <given-names>T. F.</given-names>
</name>
<name>
<surname>McLean</surname> <given-names>D. L.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>The influence of depth and a subsea pipeline on fish assemblages and commercially fished species</article-title>. <source>PloS One</source> <volume>13</volume>, <elocation-id>e0207703</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0207703</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bose</surname> <given-names>S. R.</given-names>
</name>
<name>
<surname>Kumar</surname> <given-names>V. S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Efficient inception V2 based deep convolutional neural network for real-time hand action recognition</article-title>. <source>IET Image Process</source> <volume>14</volume>, <fpage>688</fpage>&#x2013;<lpage>696</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1049/iet-ipr.2019.0985</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cappo</surname> <given-names>M.</given-names>
</name>
<name>
<surname>De&#x2019;ath</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Speare</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Inter-reef vertebrate communities of the Great Barrier Reef Marine Park determined by baited remote underwater video stations</article-title>. <source>Mar. Ecol. Prog. Ser.</source> <volume>350</volume>, <fpage>209</fpage>&#x2013;<lpage>221</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3354/meps07189</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cappo</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Speare</surname> <given-names>P.</given-names>
</name>
<name>
<surname>De&#x2019;ath</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Comparison of baited remote underwater video stations (BRUVS) and prawn (shrimp) trawls for assessments of fish biodiversity in inter-reefal areas of the Great Barrier Reef Marine Park</article-title>. <source>J. Exp. Mar. Biol. Ecol.</source> <volume>302</volume>, <fpage>123</fpage>&#x2013;<lpage>152</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jembe.2003.10.006</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>M.-H.</given-names>
</name>
<name>
<surname>Lai</surname> <given-names>T.-H.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Y.-C.</given-names>
</name>
<name>
<surname>Chou</surname> <given-names>T.-Y.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A robust fish species classification framework: FRCNN-VGG16-SPPNet</article-title>. doi:&#xa0;<pub-id pub-id-type="doi">10.21203/rs.3.rs-2825927/v1</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Christin</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Hervet</surname> <given-names>&#xc9;.</given-names>
</name>
<name>
<surname>Lecomte</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Applications for deep learning in ecology</article-title>. <source>Methods Ecol. Evol.</source> <volume>10</volume>, <fpage>1632</fpage>&#x2013;<lpage>1644</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/2041-210X.13256</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Connolly</surname> <given-names>R. M.</given-names>
</name>
<name>
<surname>Fairclough</surname> <given-names>D. V.</given-names>
</name>
<name>
<surname>Jinks</surname> <given-names>E. L.</given-names>
</name>
<name>
<surname>Ditria</surname> <given-names>E. M.</given-names>
</name>
<name>
<surname>Jackson</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Lopez-Marcano</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Improved accuracy for automated counting of a fish in baited underwater videos for stock assessment</article-title>. <source>Front. Mar. Sci.</source> <volume>8</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fmars.2021.658135</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Dalzell</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Preston</surname> <given-names>G. L.</given-names>
</name>
</person-group> (<year>1992</year>). <source>Deep reef slope fishery resources of the South Pacific</source> (<publisher-loc>Noumea (New Caledonia</publisher-loc>: <publisher-name>South Pacific Commission</publisher-name>).</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ellender</surname> <given-names>B. R.</given-names>
</name>
<name>
<surname>Becker</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Weyl</surname> <given-names>O. L. F.</given-names>
</name>
<name>
<surname>Swartz</surname> <given-names>E. R.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Underwater video analysis as a non-destructive alternative to electrofishing for sampling imperiled headwater stream fishes</article-title>. <source>Aquat Conserv.</source> <volume>22</volume>, <fpage>58</fpage>&#x2013;<lpage>65</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/aqc.1236</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Elsken</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Metzen</surname> <given-names>J. H.</given-names>
</name>
<name>
<surname>Hutter</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Neural architecture search: A survey</article-title>. <source>J. Mach. Learn. Res.</source> <volume>20</volume>, <fpage>1</fpage>&#x2013;<lpage>21</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1808.05377</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Garcia-d&#x2019;Urso</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Galan-Cuenca</surname> <given-names>A.</given-names>
</name>
<name>
<surname>P&#xe9;rez-S&#xe1;nchez</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Climent-P&#xe9;rez</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Fuster-Guillo</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Azorin-Lopez</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>The DeepFish computer vision dataset for fish instance segmentation, classification, and size estimation</article-title>. <source>Sci. Data</source> <volume>9</volume>, <fpage>287</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41597-022-01416-0</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gladstone</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Lindfield</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Coleman</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Kelaher</surname> <given-names>B.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Optimization of baited remote underwater video sampling designs for estuarine fish assemblages</article-title>. <source>J. Exp. Mar. Biol. Ecol.</source> <volume>429</volume>, <fpage>28</fpage>&#x2013;<lpage>35</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jembe.2012.06.013</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gomez</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Williams</surname> <given-names>A. J.</given-names>
</name>
<name>
<surname>Nicol</surname> <given-names>S. J.</given-names>
</name>
<name>
<surname>Mellin</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Loeun</surname> <given-names>K. L.</given-names>
</name>
<name>
<surname>Bradshaw</surname> <given-names>C. J. A.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Species distribution models of tropical deep-sea snappers</article-title>. <source>PloS One</source> <volume>10</volume>, <fpage>1</fpage>&#x2013;<lpage>17</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0127395</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Griffin</surname> <given-names>R. A.</given-names>
</name>
<name>
<surname>Robinson</surname> <given-names>G. J.</given-names>
</name>
<name>
<surname>West</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Gloyne-Phillips</surname> <given-names>I. T.</given-names>
</name>
<name>
<surname>Unsworth</surname> <given-names>R. K. F.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Assessing fish and motile fauna around offshore windfarms using stereo baited video</article-title>. <source>PloS One</source> <volume>11</volume>, <fpage>1</fpage>&#x2013;<lpage>15</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0149701</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Gulland</surname> <given-names>J. A.</given-names>
</name>
</person-group> (<year>1983</year>). <source>Fish stock assessment: a manual of basic methods</source> (<publisher-loc>Chichester</publisher-loc>: <publisher-name>Wiley</publisher-name>).</citation>
</ref>
<ref id="B23">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>He</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Deep residual learning for image recognition</article-title>,&#x201d; in <source>Proceedings of the IEEE conference on computer vision and pattern recognition</source>, (<publisher-loc>Las Vegas</publisher-loc>: <publisher-name>CVPR</publisher-name>), <fpage>770</fpage>&#x2013;<lpage>778</lpage>. Available at: <uri xlink:href="http://image-net.org/challenges/LSVRC/2015/">http://image-net.org/challenges/LSVRC/2015/</uri>.</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Henderson</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Olds</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Gilby</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Maxwell</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Connolly</surname> <given-names>R.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>Marine reserves and seascape context shape fish assemblages in seagrass ecosystems</article-title>. <source>Mar. Ecol. Prog. Ser.</source> <volume>566</volume>, <fpage>135</fpage>&#x2013;<lpage>144</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3354/meps12048</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jalal</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Salman</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Mian</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Shortis</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Shafait</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Fish detection and species classification in underwater environments using deep learning with temporal information</article-title>. <source>Ecol. Inform</source> <volume>57</volume>, <fpage>101088</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ecoinf.2020.101088</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jian</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Tao</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Zhi</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Luo</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Underwater object detection and datasets: a survey</article-title>. <source>Intelligent Mar. Technol. Syst.</source> <volume>2</volume>, <fpage>9</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s44295-024-00023-6</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Kaarmukilan</surname> <given-names>S. P.</given-names>
</name>
<name>
<surname>Poddar</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Thomas</surname> <given-names>A. K.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>FPGA based Deep Learning Models for Object Detection and Recognition Comparison of Object Detection: Comparison of object detection models using FPGA</article-title>,&#x201d; in <source>2020 Fourth International Conference on Computing Methodologies and Communication (ICCMC)</source> (<publisher-loc>Erode, India</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>471</fpage>&#x2013;<lpage>474</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ICCMC48092.2020.ICCMC-00088</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kalogeiton</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Ferrari</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Schmid</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Analyzing domain shift factors between videos and images for object detection</article-title>. <source>IEEE Trans. Pattern Anal. Mach. Intell.</source> <volume>38</volume>, <fpage>2327</fpage>&#x2013;<lpage>2334</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TPAMI.2016.2551239</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Kim</surname> <given-names>C. E.</given-names>
</name>
<name>
<surname>Dar Oghaz</surname> <given-names>M. M.</given-names>
</name>
<name>
<surname>Fajtl</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Argyriou</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Remagnino</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>A comparison of embedded deep learning methods for person detection</article-title>,&#x201d; in <source>VISIGRAPP 2019 - Proceedings of the 14th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications</source>, vol. <volume>5</volume>. , <fpage>459</fpage>&#x2013;<lpage>465</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5220/0007386304590465</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Langlois</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Goetze</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Bond</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Monk</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Abesamis</surname> <given-names>R. A.</given-names>
</name>
<name>
<surname>Asher</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>A field and video annotation guide for baited remote underwater stereo-video surveys of demersal fish assemblages</article-title>. <source>Methods Ecol. Evol.</source> <volume>11</volume>, <fpage>1401</fpage>&#x2013;<lpage>1409</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/2041-210X.13470</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>LeCun</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Bengio</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Hinton</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Deep learning</article-title>. <source>Nature</source> <volume>521</volume>, <fpage>436</fpage>&#x2013;<lpage>444</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nature14539</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname> <given-names>Y.-H.</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Comparison of CNN and YOLO for object detection</article-title>. <source>J. Semiconductor Display Technol.</source> <volume>19</volume>, <fpage>85</fpage>&#x2013;<lpage>92</lpage>.</citation>
</ref>
<ref id="B33">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Lee</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Dasari</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Weston</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). &#x201c;<article-title>Benchmarking video object detection systems on embedded devices under resource contention</article-title>,&#x201d; in <source>Proceedings of the 5th International Workshop on Embedded and Mobile Deep Learning</source> (<publisher-name>ACM</publisher-name>, <publisher-loc>New York, NY, USA</publisher-loc>), <fpage>19</fpage>&#x2013;<lpage>24</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1145/3469116.3470010</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Letessier</surname> <given-names>T. B.</given-names>
</name>
<name>
<surname>Juhel</surname> <given-names>J. B.</given-names>
</name>
<name>
<surname>Vigliola</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Meeuwig</surname> <given-names>J. J.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Low-cost small action cameras in stereo generates accurate underwater measurements of fish</article-title>. <source>J. Exp. Mar. Biol. Ecol.</source> <volume>466</volume>, <fpage>120</fpage>&#x2013;<lpage>126</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jembe.2015.02.013</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Letessier</surname> <given-names>T. B.</given-names>
</name>
<name>
<surname>Mouillot</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Bouchet</surname> <given-names>P. J.</given-names>
</name>
<name>
<surname>Vigliola</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Fernandes</surname> <given-names>M. C.</given-names>
</name>
<name>
<surname>Thompson</surname> <given-names>C.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Remote reefs and seamounts are the last refuges for marine predators across the Indo-Pacific</article-title>. <source>PloS Biol.</source> <volume>17</volume>, <elocation-id>e3000366</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pbio.3000366</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname> <given-names>T.-Y.</given-names>
</name>
<name>
<surname>Maire</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Belongie</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Hays</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Perona</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Ramanan</surname> <given-names>D.</given-names>
</name>
<etal/>
</person-group>. (<year>2014</year>). <article-title>&#x201c;Microsoft COCO: Common Objects in Context,&#x201d;</article-title> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>) <fpage>740</fpage>&#x2013;<lpage>755</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-3-319-10602-1_48</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Hou</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Qi</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A deep learning approach for object detection of rockfish in challenging underwater environments</article-title>. <source>Front. Mar. Sci.</source> <volume>10</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fmars.2023.1242041</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lopez-Marcano</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Brown</surname> <given-names>C. J.</given-names>
</name>
<name>
<surname>Sievers</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Connolly</surname> <given-names>R. M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>The slow rise of technology: Computer vision techniques in fish population connectivity</article-title>. <source>Aquat Conserv.</source> <volume>31</volume>, <fpage>210</fpage>&#x2013;<lpage>217</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/aqc.3432</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>MacNeil</surname> <given-names>M. A.</given-names>
</name>
<name>
<surname>Chapman</surname> <given-names>D. D.</given-names>
</name>
<name>
<surname>Heupel</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Simpfendorfer</surname> <given-names>C. A.</given-names>
</name>
<name>
<surname>Heithaus</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Meekan</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Global status and conservation potential of reef sharks</article-title>. <source>Nature</source> <volume>583</volume>, <fpage>801</fpage>&#x2013;<lpage>806</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41586-020-2519-y</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Mahendrakar</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Ekblad</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Fischer</surname> <given-names>N.</given-names>
</name>
<name>
<surname>White</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Wilde</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Kish</surname> <given-names>B.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). &#x201c;<article-title>Performance study of YOLOv5 and faster R-CNN for autonomous navigation around non-cooperative targets</article-title>,&#x201d; in <source>2022 IEEE Aerospace Conference (AERO)</source> (<publisher-loc>Big Sky, MT, USA</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>12</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/AERO53065.2022.9843537</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Mandal</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Connolly</surname> <given-names>R. M.</given-names>
</name>
<name>
<surname>Schlacher</surname> <given-names>T. A.</given-names>
</name>
<name>
<surname>Stantic</surname> <given-names>B.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Assessing fish abundance from underwater video using deep neural networks</article-title>,&#x201d; in <source>2018 International Joint Conference on Neural Networks (IJCNN)</source> (<publisher-loc>Rio de Janeiro, Brazil</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>6</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/IJCNN.2018.8489482</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mannocci</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Villon</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Chaumont</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Guellati</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Mouquet</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Iovan</surname> <given-names>C.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Leveraging social media and deep learning to detect rare megafauna in video surveys</article-title>. <source>Conserv. Biol.</source> <volume>36</volume>, <fpage>1</fpage>&#x2013;<lpage>11</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/cobi.13798</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marrable</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Barker</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Tippaya</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Wyatt</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Bainbridge</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Stowar</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Accelerating species recognition and labelling of fish from underwater video with machine-assisted deep learning</article-title>. <source>Front. Mar. Sci.</source> <volume>9</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fmars.2022.944582</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moore</surname> <given-names>C. H.</given-names>
</name>
<name>
<surname>Drazen</surname> <given-names>J. C.</given-names>
</name>
<name>
<surname>Kelley</surname> <given-names>C. D.</given-names>
</name>
<name>
<surname>Misa</surname> <given-names>W. F. X. E.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Deepwater marine protected areas of the main Hawaiian Islands: Establishing baselines for commercially valuable bottom fish populations</article-title>. <source>Mar. Ecol. Prog. Ser.</source> <volume>476</volume>, <fpage>167</fpage>&#x2013;<lpage>183</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3354/meps10132</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Newman</surname> <given-names>S. J.</given-names>
</name>
<name>
<surname>Williams</surname> <given-names>A. J.</given-names>
</name>
<name>
<surname>Wakefield</surname> <given-names>C. B.</given-names>
</name>
<name>
<surname>Nicol</surname> <given-names>S. J.</given-names>
</name>
<name>
<surname>Taylor</surname> <given-names>B. M.</given-names>
</name>
<name>
<surname>O&#x2019;Malley</surname> <given-names>J. M.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Review of the life history characteristics, ecology and fisheries for deep-water tropical demersal fish in the Indo-Pacific region</article-title>. <source>Rev. Fish Biol. Fish</source> <volume>26</volume>, <fpage>537</fpage>&#x2013;<lpage>562</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11160-016-9442-1</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Osgood</surname> <given-names>G. J.</given-names>
</name>
<name>
<surname>McCord</surname> <given-names>M. E.</given-names>
</name>
<name>
<surname>Baum</surname> <given-names>J. K.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Using baited remote underwater videos (BRUVs) to characterize chondrichthyan communities in a global biodiversity hotspot</article-title>. <source>PloS One</source> <volume>14</volume>, <elocation-id>e0225859</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0225859</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Othman</surname> <given-names>N. A.</given-names>
</name>
<name>
<surname>Salur</surname> <given-names>M. U.</given-names>
</name>
<name>
<surname>Karakose</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Aydin</surname> <given-names>I.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>An embedded real-time object detection and measurement of its size</article-title>,&#x201d; in <source>2018 International Conference on Artificial Intelligence and Data Processing (IDAP)</source> (<publisher-loc>Malatya, Turkey</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>4</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/IDAP.2018.8620812</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Payri</surname> <given-names>C. E.</given-names>
</name>
<name>
<surname>Allain</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Aucan</surname> <given-names>J.</given-names>
</name>
<name>
<surname>David</surname> <given-names>C.</given-names>
</name>
<name>
<surname>David</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Dutheil</surname> <given-names>C.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). &#x201c;<article-title>New Caledonia</article-title>,&#x201d; in <source>World Seas: An Environmental Evaluation</source> (<publisher-name>Elsevier</publisher-name>), <fpage>593</fpage>&#x2013;<lpage>618</lpage>.</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reis-Filho</surname> <given-names>J. A.</given-names>
</name>
<name>
<surname>Schmid</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Harvey</surname> <given-names>E. S.</given-names>
</name>
<name>
<surname>Giarrizzo</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Coastal fish assemblages reflect marine habitat connectivity and ontogenetic shifts in an estuary-bay-continental shelf gradient</article-title>. <source>Mar. Environ. Res.</source> <volume>148</volume>, <fpage>57</fpage>&#x2013;<lpage>66</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.marenvres.2019.05.004</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Ren</surname> <given-names>S.</given-names>
</name>
<name>
<surname>He</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Faster R-CNN: Towards real-time object detection with region proposal networks</article-title>,&#x201d; in <source>IEEE Trans Pattern Anal Mach Intell</source>. <volume>39</volume>, <fpage>1137</fpage>&#x2013;<lpage>1149</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TPAMI.2016.2577031</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Saleh</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Sheaves</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Jerry</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Azghadi</surname> <given-names>M. R.</given-names>
</name>
</person-group> (<year>2024</year>). <source>Applications of Deep Learning in Fish Habitat Monitoring: A Tutorial and Survey</source>. Available online at: <uri xlink:href="http://arxiv.org/abs/2206.05394">http://arxiv.org/abs/2206.05394</uri>.</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Saleh</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Sheaves</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Rahimi Azghadi</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Computer vision and deep learning for fish classification in underwater habitats: A survey</article-title>. <source>Fish Fisheries</source> <volume>23</volume>, <fpage>977</fpage>&#x2013;<lpage>999</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/faf.12666</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sarma</surname> <given-names>K. S. R. K.</given-names>
</name>
<name>
<surname>Sasikala</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Surendra</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Erukala</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Aruna</surname> <given-names>S. L.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>A comparative study on faster R-CNN, YOLO and SSD object detection algorithms on HIDS system</article-title>, in <source>AIP Conference Proceedings</source>, <fpage>060044</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1063/5.0195857</pub-id>.</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schmid</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Reis-Filho</surname> <given-names>J. A.</given-names>
</name>
<name>
<surname>Harvey</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Giarrizzo</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Baited remote underwater video as a promising nondestructive tool to assess fish assemblages in clearwater Amazonian rivers: testing the effect of bait and habitat type</article-title>. <source>Hydrobiologia</source> <volume>784</volume>, <fpage>93</fpage>&#x2013;<lpage>109</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10750-016-2860-1</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schobernd</surname> <given-names>Z. H.</given-names>
</name>
<name>
<surname>Bacheler</surname> <given-names>N. M.</given-names>
</name>
<name>
<surname>Conn</surname> <given-names>P. B.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Examining the utility of alternative video monitoring metrics for indexing reef fish abundance</article-title>. <source>Can. J. Fisheries Aquat. Sci.</source> <volume>71</volume>, <fpage>464</fpage>&#x2013;<lpage>471</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1139/cjfas-2013-0086</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schramm</surname> <given-names>K. D.</given-names>
</name>
<name>
<surname>Marnane</surname> <given-names>M. J.</given-names>
</name>
<name>
<surname>Elsdon</surname> <given-names>T. S.</given-names>
</name>
<name>
<surname>Jones</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Saunders</surname> <given-names>B. J.</given-names>
</name>
<name>
<surname>Goetze</surname> <given-names>J. S.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>A comparison of stereo-BRUVs and stereo-ROV techniques for sampling shallow water fish communities on and off pipelines</article-title>. <source>Mar. Environ. Res.</source> <volume>162</volume>, <elocation-id>105198</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.marenvres.2020.105198</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schramm</surname> <given-names>K. D.</given-names>
</name>
<name>
<surname>Marnane</surname> <given-names>M. J.</given-names>
</name>
<name>
<surname>Elsdon</surname> <given-names>T. S.</given-names>
</name>
<name>
<surname>Jones</surname> <given-names>C. M.</given-names>
</name>
<name>
<surname>Saunders</surname> <given-names>B. J.</given-names>
</name>
<name>
<surname>Newman</surname> <given-names>S. J.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Fish associations with shallow water subsea pipelines compared to surrounding reef and soft sediment habitats</article-title>. <source>Sci. Rep.</source> <volume>11</volume>, <fpage>1</fpage>&#x2013;<lpage>15</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-021-85396-y</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="other">
<person-group person-group-type="author">
<name>
<surname>Sekachev</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Manovich</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Zhiltsov</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zhavoronkov</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Kalinin</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Hoff</surname> <given-names>B.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.5281/zenodo.4009388</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sheaves</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Bradley</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Herrera</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Mattone</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Lennard</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Sheaves</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Optimizing video sampling for juvenile fish surveys: Using deep learning and evaluation of assumptions to produce critical fisheries parameters</article-title>. <source>Fish Fisheries</source> <volume>21</volume>, <fpage>1259</fpage>&#x2013;<lpage>1276</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/faf.12501</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shin</surname> <given-names>K. J.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Robot fish tracking control using an optical flow object-detecting algorithm</article-title>. <source>IEIE Trans. Smart Process. Computing</source> <volume>5</volume>, <fpage>375</fpage>&#x2013;<lpage>382</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5573/IEIESPC.2016.5.6.375</pub-id>
</citation>
</ref>
<ref id="B61">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Szegedy</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Jia</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Sermanet</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Reed</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Anguelov</surname> <given-names>D.</given-names>
</name>
<etal/>
</person-group>. (<year>2015</year>). &#x201c;<article-title>Going deeper with convolutions</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>. (<publisher-loc>Boston</publisher-loc>: <publisher-name>CVPR</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>9</lpage>.</citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tseng</surname> <given-names>C.-H.</given-names>
</name>
<name>
<surname>Kuo</surname> <given-names>Y.-F.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Detecting and counting harvested fish and identifying fish types in electronic monitoring system videos using deep convolutional neural networks</article-title>. <source>ICES J. Mar. Sci.</source> <volume>77</volume>, <fpage>1367</fpage>&#x2013;<lpage>1378</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/icesjms/fsaa076</pub-id>
</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Villon</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Iovan</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Mangeas</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Claverie</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Mouillot</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Vill&#xe9;ger</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Automatic underwater fish species classification with limited data using few-shot learning</article-title>. <source>Ecol. Inform</source> <volume>63</volume>, <elocation-id>101320</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ecoinf.2021.101320</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Villon</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Iovan</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Mangeas</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Vigliola</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Confronting deep-learning and biodiversity challenges for automatic video-monitoring of marine ecosystems</article-title>. <source>Sensors</source> <volume>22</volume>, <fpage>497</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s22020497</pub-id>
</citation>
</ref>
<ref id="B65">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Villon</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Mouillot</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Chaumont</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Darling</surname> <given-names>E. S.</given-names>
</name>
<name>
<surname>Subsol</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Claverie</surname> <given-names>T.</given-names>
</name>
<etal/>
</person-group>. (<year>2018</year>). <article-title>A Deep learning method for accurate and fast identification of coral reef fishes in underwater images</article-title>. <source>Ecol. Inform</source> <volume>48</volume>, <fpage>238</fpage>&#x2013;<lpage>244</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ecoinf.2018.09.007</pub-id>
</citation>
</ref>
<ref id="B66">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Villon</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Mouillot</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Chaumont</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Subsol</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Claverie</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Vill&#xe9;ger</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A new method to control error rates in automated species identification with deep learning algorithms</article-title>. <source>Sci. Rep.</source> <volume>10</volume>, <fpage>10972</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-020-67573-7</pub-id>
</citation>
</ref>
<ref id="B67">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wakefield</surname> <given-names>C. B.</given-names>
</name>
<name>
<surname>Williams</surname> <given-names>A. J.</given-names>
</name>
<name>
<surname>Fisher</surname> <given-names>E. A.</given-names>
</name>
<name>
<surname>Hall</surname> <given-names>N. G.</given-names>
</name>
<name>
<surname>Hesp</surname> <given-names>S. A.</given-names>
</name>
<name>
<surname>Halafihi</surname> <given-names>T.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Variations in life history characteristics of the deep-water giant ruby snapper (Etelis sp.) between the Indian and Pacific Oceans and application of a data-poor assessment</article-title>. <source>Fish Res.</source> <volume>230</volume>, <elocation-id>105651</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.fishres.2020.105651</pub-id>
</citation>
</ref>
<ref id="B68">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wellington</surname> <given-names>C. M.</given-names>
</name>
<name>
<surname>Harvey</surname> <given-names>E. S.</given-names>
</name>
<name>
<surname>Wakefield</surname> <given-names>C. B.</given-names>
</name>
<name>
<surname>Langlois</surname> <given-names>T. J.</given-names>
</name>
<name>
<surname>Williams</surname> <given-names>A.</given-names>
</name>
<name>
<surname>White</surname> <given-names>W. T.</given-names>
</name>
<etal/>
</person-group>. (<year>2018</year>). <article-title>Peak in biomass driven by larger-bodied meso-predators in demersal fish communities between shelf and slope habitats at the head of a submarine canyon in the south-eastern Indian Ocean</article-title>. <source>Cont Shelf Res.</source> <volume>167</volume>, <fpage>55</fpage>&#x2013;<lpage>64</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.csr.2018.08.005</pub-id>
</citation>
</ref>
<ref id="B69">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Whitmarsh</surname> <given-names>S. K.</given-names>
</name>
<name>
<surname>Fairweather</surname> <given-names>P. G.</given-names>
</name>
<name>
<surname>Huveneers</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>What is Big BRUVver up to? Methods and uses of baited underwater video</article-title>. <source>Rev. Fish Biol. Fish</source> <volume>27</volume>, <fpage>53</fpage>&#x2013;<lpage>73</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11160-016-9450-1</pub-id>
</citation>
</ref>
<ref id="B70">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Williams</surname> <given-names>A. J.</given-names>
</name>
<name>
<surname>Nicol</surname> <given-names>S. J.</given-names>
</name>
<name>
<surname>Bentley</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Starr</surname> <given-names>P. J.</given-names>
</name>
<name>
<surname>Newman</surname> <given-names>S. J.</given-names>
</name>
<name>
<surname>McCoy</surname> <given-names>M. A.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>International workshop on developing strategies for monitoring data-limited deepwater demersal line fisheries in the Pacific Ocean</article-title>. <source>Rev. Fish Biol. Fish</source> <volume>22</volume>, <fpage>527</fpage>&#x2013;<lpage>531</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11160-011-9234-6</pub-id>
</citation>
</ref>
<ref id="B71">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Song</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Mei</surname> <given-names>H.</given-names>
</name>
<name>
<surname>He</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Liotta</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A systematic review and analysis of deep learning-based underwater object detection</article-title>. <source>Neurocomputing</source> <volume>527</volume>, <fpage>204</fpage>&#x2013;<lpage>232</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.neucom.2023.01.056</pub-id>
</citation>
</ref>
<ref id="B72">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Pan</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Survey on deep learning-based marine object detection</article-title>. <source>J. Adv. Transp</source> <volume>2021</volume>, <fpage>1</fpage>&#x2013;<lpage>18</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1155/2021/5808206</pub-id>
</citation>
</ref>
<ref id="B73">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2009</year>). &#x201c;<article-title>F-measure</article-title>,&#x201d; in <source>Encyclopedia of Database Systems</source> (<publisher-name>Springer US</publisher-name>, <publisher-loc>Boston, MA</publisher-loc>), <fpage>1147</fpage>&#x2013;<lpage>1147</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-0-387-39940-9_483</pub-id>
</citation>
</ref>
<ref id="B74">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhong</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Peng</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Anchor box optimization for object detection</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision</conf-name>, (<publisher-loc>Snowmass, CO, USA</publisher-loc>: <publisher-name>WACV</publisher-name>), <fpage>1286</fpage>&#x2013;<lpage>1294</lpage>.</citation>
</ref>
<ref id="B75">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zintzen</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Anderson</surname> <given-names>M. J.</given-names>
</name>
<name>
<surname>Roberts</surname> <given-names>C. D.</given-names>
</name>
<name>
<surname>Harvey</surname> <given-names>E. S.</given-names>
</name>
<name>
<surname>Stewart</surname> <given-names>A. L.</given-names>
</name>
<name>
<surname>Struthers</surname> <given-names>C. D.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Diversity and composition of demersal fishes along a depth gradient assessed by baited remote underwater stereo-video</article-title>. <source>PloS One</source> <volume>7</volume>, <elocation-id>e48522</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0048522</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>