<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mar. Sci.</journal-id>
<journal-title>Frontiers in Marine Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mar. Sci.</abbrev-journal-title>
<issn pub-type="epub">2296-7745</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmars.2017.00128</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Marine Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Estimates of Water-Column Nutrient Concentrations and Carbonate System Parameters in the Global Ocean: A Novel Approach Based on Neural Networks</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Sauz&#x000E8;de</surname> <given-names>Rapha&#x000EB;lle</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/344015/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Bittig</surname> <given-names>Henry C.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/391875/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Claustre</surname> <given-names>Herv&#x000E9;</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/206420/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Pasqueron de Fommervault</surname> <given-names>Orens</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/391783/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Gattuso</surname> <given-names>Jean-Pierre</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/227907/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Legendre</surname> <given-names>Louis</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/406762/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Johnson</surname> <given-names>Kenneth S.</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/218873/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Laboratoire d&#x00027;Oc&#x000E9;anographie de Villefranche, Observatoire Oc&#x000E9;anologique de Villefranche, Centre National de la Recherche Scientifique-INSU, Sorbonne Universit&#x000E9;s, UPMC University Paris 06</institution> <country>Villefranche-Sur-Mer, France</country></aff>
<aff id="aff2"><sup>2</sup><institution>Ecosystemes Insulaires Oc&#x000E9;aniens (UMR-241), IRD, Ifremer, UPF and ILM</institution> <country>Papeete, French Polynesia</country></aff>
<aff id="aff3"><sup>3</sup><institution>Departamento de Oceanograf&#x000EC;a Fisica, Centro de Investigacion Cient&#x000EC;fica y de Educacion Superior de Ensenada</institution> <country>Ensenada, Mexico</country></aff>
<aff id="aff4"><sup>4</sup><institution>Institute for Sustainable Development and International Relations, Sciences Po</institution> <country>Paris, France</country></aff>
<aff id="aff5"><sup>5</sup><institution>Monterey Bay Aquarium Research Institute</institution> <country>Moss Landing, CA, USA</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Astrid Bracher, Alfred-Wegener-Institute Helmholtz Center for Polar and Marine Research, Germany</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Brian Ward, NUI Galway, Ireland; Kemal Can Bizsel, Institute of Marine Sciences and Technology, Turkey</p></fn>
<fn fn-type="corresp" id="fn001"><p>&#x0002A;Correspondence: Rapha&#x000EB;lle Sauz&#x000E8;de <email>raphaelle.sauzede&#x00040;ird.fr</email></p></fn>
<fn fn-type="other" id="fn002"><p>This article was submitted to Ocean Observation, a section of the journal Frontiers in Marine Science</p></fn></author-notes>
<pub-date pub-type="epub">
<day>22</day>
<month>05</month>
<year>2017</year>
</pub-date>
<pub-date pub-type="collection">
<year>2017</year>
</pub-date>
<volume>4</volume>
<elocation-id>128</elocation-id>
<history>
<date date-type="received">
<day>12</day>
<month>11</month>
<year>2016</year>
</date>
<date date-type="accepted">
<day>18</day>
<month>04</month>
<year>2017</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2017 Sauz&#x000E8;de, Bittig, Claustre, Pasqueron de Fommervault, Gattuso, Legendre and Johnson.</copyright-statement>
<copyright-year>2017</copyright-year>
<copyright-holder>Sauz&#x000E8;de, Bittig, Claustre, Pasqueron de Fommervault, Gattuso, Legendre and Johnson</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) or licensor are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract><p>A neural network-based method (CANYON: CArbonate system and Nutrients concentration from hYdrological properties and Oxygen using a Neural-network) was developed to estimate water-column (i.e., from surface to 8,000 m depth) biogeochemically relevant variables in the Global Ocean. These are the concentrations of three nutrients [nitrate (NO<sub>3</sub><sup>&#x02212;</sup>), phosphate (PO<sub>4</sub><sup>3&#x02212;</sup>), and silicate (Si(OH)<sub>4</sub>)] and four carbonate system parameters [total alkalinity (<italic>A</italic><sub>T</sub>), dissolved inorganic carbon (<italic>C</italic><sub>T</sub>), pH (pH<sub>T</sub>), and partial pressure of CO<sub>2</sub> (<italic>p</italic>CO<sub>2</sub>)], which are estimated from concurrent <italic>in situ</italic> measurements of temperature, salinity, hydrostatic pressure, and oxygen (O<sub>2</sub>) together with sampling latitude, longitude, and date. Seven neural-networks were developed using the GLODAPv2 database, which is largely representative of the diversity of open-ocean conditions, hence making CANYON potentially applicable to most oceanic environments. For each variable, CANYON was trained using 80 % randomly chosen data from the whole database (after eight 10&#x000B0; &#x000D7; 10&#x000B0; zones removed providing an &#x0201C;independent data-set&#x0201D; for additional validation), the remaining 20 % data were used for the neural-network test of validation. Overall, CANYON retrieved the variables with high accuracies (RMSE): 1.04 &#x003BC;mol kg<sup>&#x02212;1</sup> (NO<sub>3</sub><sup>&#x02212;</sup>), 0.074 &#x003BC;mol kg<sup>&#x02212;1</sup> (PO<sub>4</sub><sup>3&#x02212;</sup>), 3.2 &#x003BC;mol kg<sup>&#x02212;1</sup> (Si(OH)<sub>4</sub>), 0.020 (pH<sub>T</sub>), 9 &#x003BC;mol kg<sup>&#x02212;1</sup> (<italic>A</italic><sub>T</sub>), 11 &#x003BC;mol kg<sup>&#x02212;1</sup> (<italic>C</italic><sub>T</sub>) and 7.6 % (<italic>p</italic>CO<sub>2</sub>) (30 &#x003BC;atm at 400 &#x003BC;atm). This was confirmed for the eight independent zones not included in the training process. CANYON was also applied to the Hawaiian Time Series site to produce a 22 years long simulated time series for the above seven variables. Comparison of modeled and measured data was also very satisfactory (RMSE in the order of magnitude of RMSE from validation test). CANYON is thus a promising method to derive distributions of key biogeochemical variables. It could be used for a variety of global and regional applications ranging from data quality control to the production of datasets of variables required for initialization and validation of biogeochemical models that are difficult to obtain. In particular, combining the increased coverage of the global Biogeochemical-Argo program, where O<sub>2</sub> is one of the core variables now very accurately measured, with the CANYON approach offers the fascinating perspective of obtaining large-scale estimates of key biogeochemical variables with unprecedented spatial and temporal resolutions. The Matlab and R codes of the proposed algorithms are provided as <xref ref-type="supplementary-material" rid="SM5">Supplementary Material</xref>.</p></abstract>
<kwd-group>
<kwd>neural network</kwd>
<kwd>nutrients</kwd>
<kwd>carbonate system</kwd>
<kwd>global ocean</kwd>
<kwd>GLODAPv2 database</kwd>
<kwd>profiling floats</kwd>
</kwd-group>
<contract-num rid="cn001">246777</contract-num>
<contract-num rid="cn002">2014-633211</contract-num>
<contract-sponsor id="cn001">European Research Council<named-content content-type="fundref-id">10.13039/501100000781</named-content></contract-sponsor>
<contract-sponsor id="cn002">Horizon 2020<named-content content-type="fundref-id">10.13039/501100007601</named-content></contract-sponsor>
<counts>
<fig-count count="7"/>
<table-count count="4"/>
<equation-count count="3"/>
<ref-count count="57"/>
<page-count count="17"/>
<word-count count="10212"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>Introduction</title>
<p>The ocean is under increasing stress (Gruber, <xref ref-type="bibr" rid="B17">2011</xref>; Gattuso et al., <xref ref-type="bibr" rid="B15">2015</xref>). Given this context of a rapidly changing ocean, it is crucial to reinforce the observation capability of biogeochemical variables and develop ways of measuring or estimating new ones (Claustre et al., <xref ref-type="bibr" rid="B7">2010</xref>; Gruber et al., <xref ref-type="bibr" rid="B19">2010b</xref>). This is required not only for monitoring ongoing changes, but also to gain a better understanding of key biogeochemical processes and for reducing uncertainties in budgets of major elements (e.g., carbon, oxygen, nitrogen, phosphorus, and silicium).</p>
<p>Reaching the goal of an improved global observation system for biogeochemical variables primarily relies on enhancing the spatio-temporal resolution of measurements. Historically, marine biogeochemical observations have been conducted from ships either taking discrete water samples followed by laboratory analyses (e.g., Global Ocean Ship-based Hydrographic Investigations Panel, GO-SHIP program; Talley et al., <xref ref-type="bibr" rid="B52">2016</xref>), or conducting continuous measurements of surface-water properties. These approaches have been and still remain essential as their estimates generally have the highest accuracies. Such measurements have been assembled into global databases (e.g., GLODAPv2; Key et al., <xref ref-type="bibr" rid="B33">2015</xref>; Olsen et al., <xref ref-type="bibr" rid="B42">2016</xref>), which are a key resource for making budgets of chemical elements, directly from available measurements (Takahashi et al., <xref ref-type="bibr" rid="B51">2009</xref>) or indirectly through specific innovative methods (Landsch&#x000FC;tzer et al., <xref ref-type="bibr" rid="B37">2013</xref>, <xref ref-type="bibr" rid="B36">2014</xref>, <xref ref-type="bibr" rid="B35">2016</xref>), conducting climate change research (Le Qu&#x000E9;r&#x000E9; et al., <xref ref-type="bibr" rid="B38">2015</xref>) and biogeochemical modeling (e.g., use of data for model initialization and/or validation; Doney et al., <xref ref-type="bibr" rid="B9">2009</xref>; Ilyina et al., <xref ref-type="bibr" rid="B22">2013</xref>). The ship-based sampling mode has one major limit, i.e., coarse spatio-temporal resolution and resulting under-sampling of marine biogeochemical properties. This severely limits the understanding of fundamental processes and the accurate documentation of ongoing changes, especially at some critical scales (e.g., seasonal, regional).</p>
<p>Over the last two decades, observation technologies such as autonomous platforms have matured (e.g., profiling floats and gliders equipped with biogeochemical sensors; Johnson et al., <xref ref-type="bibr" rid="B30">2009</xref>, <xref ref-type="bibr" rid="B27">2013</xref>, <xref ref-type="bibr" rid="B28">2016</xref>). Robotic observation now provides a reliable complement to ship-based sampling that can be used to cost-effectively densify the acquisition of marine biogeochemical properties (Johnson et al., <xref ref-type="bibr" rid="B30">2009</xref>). Among such observation systems, the recently launched Biogeochemical-Argo (BGC-Argo) network offers a promising approach for the global coverage and spatio-temporal resolution of biogeochemical properties (Johnson and Claustre, <xref ref-type="bibr" rid="B25">2016</xref>). The biogeochemically-relevant variables amenable to systematic and reliable acquisition with robotic observation systems presently include concentrations of oxygen (O<sub>2</sub>) and their number increases rapidly (Johnson et al., <xref ref-type="bibr" rid="B29">2015</xref>). More generally, O<sub>2</sub> concentration is the most mature measurement, and could be easily implemented on all types of profiling floats (Gruber et al., <xref ref-type="bibr" rid="B18">2010a</xref>) including those of the BGC-Argo network.</p>
<p>Oxygen optode sensors have been progressively implemented on profiling floats since the early 2000s, and have thus opened a new area of research (e.g., K&#x000F6;rtzinger et al., <xref ref-type="bibr" rid="B34">2004</xref>; Martz et al., <xref ref-type="bibr" rid="B41">2008</xref>; Riser and Johnson, <xref ref-type="bibr" rid="B46">2008</xref>). Strong efforts have been devoted toward the improvement of the long-term reliability and accuracy of autonomous O<sub>2</sub> measurements on profiling floats. A crucial step is the possibility of frequently calibrating optodes by recording O<sub>2</sub> in air when the float surfaces (Bittig and K&#x000F6;rtzinger, <xref ref-type="bibr" rid="B4">2015</xref>; Johnson et al., <xref ref-type="bibr" rid="B29">2015</xref>; Bushinsky et al., <xref ref-type="bibr" rid="B5">2016</xref>). Such a calibration can be done for each profile and throughout the float&#x00027;s lifetime, improves the precision and accuracy of O<sub>2</sub> measurements to within 0.2 and 1 %, respectively (Bittig and K&#x000F6;rtzinger, <xref ref-type="bibr" rid="B4">2015</xref>), which accuracy is comparable to that of the reference Winkler titration technique. Water column O<sub>2</sub> concentration can therefore be globally monitored at the biogeochemically relevant spatial and temporal resolutions. This will move O<sub>2</sub>, which required specialized measurements until now, among the key standard oceanographic variables.</p>
<p>In the present study, we develop a new approach with which the expected increased densification of O<sub>2</sub> measurements in the near future could be used to support new studies related to seven key biogeochemical variables, i.e., concentrations of three dissolved inorganic macronutrients (nitrate, phosphate, and silicate) and four parameters of the carbonate system (total alkalinity, dissolved inorganic carbon, pH on the total scale, and partial pressure of CO<sub>2</sub>). Because O<sub>2</sub> concentration is a variable that reflects both phytoplankton production and community respiration processes, the first-order relationships which link O<sub>2</sub>, nutrients and inorganic carbon are rather well-constrained through Redfield stoichiometry (Redfield, <xref ref-type="bibr" rid="B44">1934</xref>, <xref ref-type="bibr" rid="B45">1958</xref>). These intrinsic relationships have been used to develop, from regional to global scales, multiple linear regression, or neural network approaches that link O<sub>2</sub> and simultaneously acquired variables (e.g., pressure, temperature, salinity) to biogeochemical variables, in particular parameters of the carbonate system (Juranek et al., <xref ref-type="bibr" rid="B31">2011</xref>; Velo et al., <xref ref-type="bibr" rid="B55">2013</xref>; Carter et al., <xref ref-type="bibr" rid="B6">2016</xref>; Williams et al., <xref ref-type="bibr" rid="B57">2016</xref>). These relationships could be used as transfer functions to convert dense fields of O<sub>2</sub> (and associated variables) into corresponding fields of biogeochemical variables of interest. This represents a way to cost-effectively populate, spatially, and temporally, the previously loosely resolved fields of these variables.</p>
<p>Such transfer functions represent a potential approach to profit from the upcoming numerous accurate measurements of O<sub>2</sub> (from profiling floats), which are expected to be routine soon, to derive properties or variables that are difficult or costly to acquire. To be useful, these functions must provide &#x0201C;predicted&#x0201D; variables with relatively high accuracy, and they should be as generic as possible (i.e., ideally of global applicability). Among the different possible methods for developing such transfer functions, artificial neural networks are an attractive tool as these powerful methods can be used for approximating any differentiable and continuous functions and thus allow to model complex and non-linear relationships (Hornik et al., <xref ref-type="bibr" rid="B21">1989</xref>; Lek and Gu&#x000E9;gan, <xref ref-type="bibr" rid="B39">1999</xref>). As a consequence, neural networks have already been largely used for biogeochemical and geophysical applications (e.g., Ward and Redfern, <xref ref-type="bibr" rid="B56">1999</xref>; Friedrich and Oschlies, <xref ref-type="bibr" rid="B12">2009</xref>; Jamet et al., <xref ref-type="bibr" rid="B23">2012</xref>; Ben Mustapha et al., <xref ref-type="bibr" rid="B2">2013</xref>). More recently, neural networks have been used successfully to retrieve the vertical distribution of biogeochemical variables at the global scale using as input the geolocation variables, providing a single global transfer function handling boundary issues compared to regional-based functions (Sauz&#x000E8;de et al., <xref ref-type="bibr" rid="B49">2015</xref>, <xref ref-type="bibr" rid="B50">2016</xref>).</p>
<p>The present study takes advantage of the simultaneous release of the GLODAPv2 database (Olsen et al., <xref ref-type="bibr" rid="B42">2016</xref>) and the planning of the BGC-Argo program (Johnson and Claustre, <xref ref-type="bibr" rid="B25">2016</xref>). The two observational systems and resulting databases are highly complementary, and new approaches can be developed to synergistically use their respective strengths, i.e., measurement accuracy for GLODAPv2, and spatio-temporal coverage for BGC-Argo, with a specific emphasis on O<sub>2</sub> measurements. We thus focus in this study on the development of global neural network-based transfer functions using O<sub>2</sub> as a primary input to retrieve nutrient concentrations and carbonate system parameters in the water column down to 8,000 m. Hereinafter, we refer to our method as CANYON, for CArbonate system and Nutrients concentration from hYdrological properties and Oxygen using a Neural network.</p>
</sec>
<sec id="s2">
<title>Material and methods</title>
<sec>
<title>The GLODAPv2 database</title>
<p>The Global Ocean Data Analysis Project version 2 (GLODAPv2) was an effort from the international community to consolidate all data from ocean bottle samples collected as part of many oceanic cruises (Olsen et al., <xref ref-type="bibr" rid="B42">2016</xref>). The GLODAPv2 database (available at <ext-link ext-link-type="uri" xlink:href="http://cdiac.ornl.gov/oceans/GLODAPv2/">http://cdiac.ornl.gov/oceans/GLODAPv2/</ext-link>) provides a single high-quality internally consistent global data product that contains CO<sub>2</sub>-relevant ocean interior measurements from ship-based surveys. The GLODAPv2 database includes samples of core variables such as salinity, oxygen, macronutrients, and seawater CO<sub>2</sub> chemistry from 724 oceanic cruises. In this study, we focused on seven variables representative of the macronutrients and of the seawater carbonate system: nitrate (NO<sub>3</sub><sup>&#x02212;</sup>), phosphate (PO<sub>4</sub><sup>3&#x02212;</sup>), silicate (Si(OH)<sub>4</sub>), pH on the total scale (pH<sub>T</sub>), total alkalinity (<italic>A</italic><sub>T</sub>), total dissolved inorganic carbon (<italic>C</italic><sub>T</sub>), and partial pressure of CO<sub>2</sub> (<italic>p</italic>CO<sub>2</sub>). Note that we estimated this last variable from the <italic>A</italic><sub>T</sub> and <italic>C</italic><sub>T</sub> measurements available in GLODAPv2 (see details below).</p>
<p>Initially, GLODAPv2 was instigated to prepare a unified, bias-corrected interior ocean data product. Thus, a high quality control, QC, based on two steps (i.e., primary and secondary QC) was applied to each data (Olsen et al., <xref ref-type="bibr" rid="B42">2016</xref>). The primary QC was carried out following routines outlined in Sabine et al. (<xref ref-type="bibr" rid="B48">2005</xref>) and Tanhua et al. (<xref ref-type="bibr" rid="B53">2010</xref>), essentially based on inspection of property-property plots. The secondary QC for salinity, oxygen, nutrients, <italic>C</italic><sub>T</sub>, and <italic>A</italic><sub>T</sub> was more complex, and carried out through crossover (i.e., comparing data where two different cruises crossed or came close to each over) and inversion analyses (i.e., calculation of corrections required to minimize all cruise-by-cruise offsets). This two step-based method was introduced by Gouretski and Jancke (<xref ref-type="bibr" rid="B16">2000</xref>) and Johnson et al. (<xref ref-type="bibr" rid="B24">2001</xref>). For the secondary QC applied to the GLODAPv2 database, the crossover offsets were calculated using the running-cluster crossover routine (Tanhua et al., <xref ref-type="bibr" rid="B53">2010</xref>), with data from beneath 2,000 m to minimize effects of real variations. For pH<sub>T</sub>, crossover analysis was not possible because data only exist for a small fraction of the cruises. To pass the secondary QC, pH<sub>T</sub> measurements had to be concomitant with <italic>C</italic><sub>T</sub> and/or <italic>A</italic><sub>T</sub> for calculating offsets (see details in Olsen et al., <xref ref-type="bibr" rid="B42">2016</xref>). For Mediterranean Sea data, the secondary QC always failed because none of the cruises inside the Mediterranean had an overlap with other cruises (e.g., outside the Mediterranean) thus preventing the crossover analysis. Hence, only &#x0201C;high-quality&#x0201D; GLODAPv2 data that passed the secondary QC were used to train and validate the CANYON methods except for the Mediterranean Sea where we used data that only passed the primary QC.</p>
<p>The subset of the GLODAPv2 database used for our study (i.e., the data that passed the secondary QC, except for the Mediterranean Sea data as explained above) contained 37,863 concurrent profiles of water-column (from the surface to a maximum sample depth of 8,000 m) hydrological properties together with nutrients concentration and/or parameters of the carbonate system (see Figure <xref ref-type="fig" rid="F1">1</xref>). These data were collected between 1972 and 2013 and were representative of the diversity of oceanic regions, i.e., 25 % were collected in the North Atlantic, 10 % in the South Atlantic, 22 % in the North Pacific, 12 % in the South Pacific, 10 % in the Indian Ocean, 13 % in the Southern Ocean, 7 % in the Arctic Ocean, and &#x0007E;0.2 % in the Mediterranean Sea (geographic boundaries are provided in Figure <xref ref-type="supplementary-material" rid="SM1">S1</xref>). On the temporal scale, most of the data were acquired since the 1990&#x00027;s and more data were available for the spring and summer months (Figure <xref ref-type="supplementary-material" rid="SM2">S2</xref>). There was a sampling bias according to latitude as data from autumn and winter months (i.e., December to March for the Northern hemisphere, and May to August for the Southern hemisphere) were less represented at high than low latitudes (i.e., &#x0003E;45&#x000B0; North and South, respectively) in the GLODAPv2 database (Figure <xref ref-type="supplementary-material" rid="SM2">S2</xref>).</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p><bold>Geographic distribution of the 37,863 stations (gray dots) used in this study (from the GLODAPv2 database; Olsen et al., <xref ref-type="bibr" rid="B42">2016</xref>)</bold>. For each station, concurrent samples of temperature, salinity, concentrations of O<sub>2</sub>, and nutrients and/or carbonate system parameters were analyzed. The red cross indicates the location of the Hawaiian Time Series (HOT, used in Section Example of Application: Illustration with HOT Database). The eight colored boxes delineate the eight independent zones of which data were not included in the training and validation of the neural network.</p></caption>
<graphic xlink:href="fmars-04-00128-g0001.tif"/>
</fig>
<p>All the data used to train and validate CANYON were measurements recorded in the GLODAPv2 database, except the <italic>p</italic>CO<sub>2</sub> estimates that we calculated from <italic>A</italic><sub>T</sub> and <italic>C</italic><sub>T</sub> measurements using the R package &#x0201C;seacarb&#x0201D; (Gattuso et al., <xref ref-type="bibr" rid="B15">2015</xref>, <xref ref-type="bibr" rid="B14">2016</xref>). The carbonate system parameters were computed using the carbonic acid dissociation constants of Lueker et al. (<xref ref-type="bibr" rid="B40">2000</xref>), the hydrogen fluoride dissociation constant of Perez and Fraga (<xref ref-type="bibr" rid="B43">1987</xref>), the dissociation constant for bisulfate of Dickson (<xref ref-type="bibr" rid="B8">1990</xref>), and a ratio of total boron to salinity derived from Uppstr&#x000F6;m (<xref ref-type="bibr" rid="B54">1974</xref>). <italic>In situ</italic> measurements of salinity, temperature, hydrostatic pressure as well as the concentrations of PO<sub>4</sub><sup>3&#x02212;</sup> and Si(OH)<sub>4</sub> were used to calculate <italic>p</italic>CO<sub>2</sub>. When not available in the GLODAPv2 database, the concentrations of PO<sub>4</sub><sup>3&#x02212;</sup> and Si(OH)<sub>4</sub> were estimated using our CANYON algorithm (see associated accuracies in Section Overall CANYON Performance).</p>
<p>For the neural network development, vertical profiles of nutrients and carbonate system parameters from eight independent zones of the GLODAPv2 database (squares of 10&#x000B0; latitude &#x000D7; 10&#x000B0; longitude) were first removed from the general database to provide a more &#x0201C;independent data set&#x0201D; used for an independent validation of the algorithm developed in this study. These zones were chosen in several major oceanic basins and were representative of the Sub-Equatorial Pacific, the Sub-Equatorial Indian, the North Atlantic Subtropical Gyre, the North Atlantic Subpolar Gyre, the North Pacific, the South Atlantic, the South Indian, and the South Pacific (Figure <xref ref-type="fig" rid="F1">1</xref>). The remaining profiles were then split into two subsets with 80 % and 20 % of the data, the so-called training and validation datasets, respectively (see the number of data for each variable in Table <xref ref-type="table" rid="T1">1</xref>).</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p><bold>Number of data available for each variable in the different datasets used in this study: the general GLODAPv2 database (data that passed the secondary quality control, except for the Mediterranean data), the dataset from the eight independent zones that were first removed from the general database, the dataset used to train the neural network (80 % of the general database <italic>minus</italic> the eight independent zones), and the dataset used to validate the neural network (20 %)</bold>.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>Variable in the GLODAPv2 database</bold></th>
<th valign="top" align="center"><bold>Total dataset</bold></th>
<th valign="top" align="center"><bold>Independent dataset (eight zones)</bold></th>
<th valign="top" align="center"><bold>Training dataset (&#x0007E;80 %)</bold></th>
<th valign="top" align="center"><bold>Validation dataset (&#x0007E;20 %)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">NO<sub>3</sub><sup>&#x02212;</sup></td>
<td valign="top" align="center">700,682</td>
<td valign="top" align="center">13,902</td>
<td valign="top" align="center">549,561</td>
<td valign="top" align="center">137,219</td>
</tr>
<tr>
<td valign="top" align="left">PO<sub>4</sub><sup>3&#x02212;</sup></td>
<td valign="top" align="center">650,809</td>
<td valign="top" align="center">11,564</td>
<td valign="top" align="center">510,981</td>
<td valign="top" align="center">128,264</td>
</tr>
<tr>
<td valign="top" align="left">Si(OH)<sub>4</sub></td>
<td valign="top" align="center">704,624</td>
<td valign="top" align="center">14,350</td>
<td valign="top" align="center">552,311</td>
<td valign="top" align="center">137,963</td>
</tr>
<tr>
<td valign="top" align="left">pH<sub>T</sub></td>
<td valign="top" align="center">277,749</td>
<td valign="top" align="center">6,522</td>
<td valign="top" align="center">217,043</td>
<td valign="top" align="center">54,161</td>
</tr>
<tr>
<td valign="top" align="left"><italic>A</italic><sub>T</sub></td>
<td valign="top" align="center">273,288</td>
<td valign="top" align="center">6,609</td>
<td valign="top" align="center">213,607</td>
<td valign="top" align="center">53,072</td>
</tr>
<tr>
<td valign="top" align="left"><italic>C</italic><sub>T</sub></td>
<td valign="top" align="center">317,604</td>
<td valign="top" align="center">7,292</td>
<td valign="top" align="center">247,634</td>
<td valign="top" align="center">62,678</td>
</tr>
<tr>
<td valign="top" align="left"><italic>p</italic>CO<sub>2</sub></td>
<td valign="top" align="center">247,745</td>
<td valign="top" align="center">6,274</td>
<td valign="top" align="center">192,857</td>
<td valign="top" align="center">48,592</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p><italic>Details are given in Section The GLODAPv2 database</italic>.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec>
<title>Neural network development</title>
<sec>
<title>General principle of multi-layered perceptron (MLP)</title>
<p>A multi-layer perceptron (MLP; Bishop, <xref ref-type="bibr" rid="B3">1995</xref>; Rumelhart et al., <xref ref-type="bibr" rid="B47">1988</xref>) is an artificial neural network based on several layers (i.e., the so-called input, hidden, and output layers) composed of neurons which are basically elementary transfer functions. These neurons are interconnected with the neurons of the preceding and following layers by weights (Figure <xref ref-type="fig" rid="F2">2</xref>), which are iteratively readjusted during the training phase of the MLP. The criterion for readjusting the weights is the minimization of a cost function defined as the quadratic difference between the reference measurements and the MLP-based outputs. This minimization is done through the back-propagation conjugate-gradient technique (Hornik et al., <xref ref-type="bibr" rid="B21">1989</xref>; Bishop, <xref ref-type="bibr" rid="B3">1995</xref>), an iterative optimization method adapted to the development of MLPs. To prevent overlearning (Bishop, <xref ref-type="bibr" rid="B3">1995</xref>), the training data set is randomly split into two subsets called &#x0201C;learning&#x0201D; and &#x0201C;test&#x0201D; data sets (50 % of the training dataset each). Finally the validation data set is used to evaluate the final method performance. Moreover, the &#x0201C;independent data set&#x0201D; (see above in Section The GLODAPv2 Database) is used to check the general applicability of the method.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p><bold>Schematic representation of the CANYON MLP-based neural-network algorithm that retrieves the concentrations of nutrients [NO<sub>3</sub><sup>&#x02212;</sup>, PO<sub>4</sub><sup>3&#x02212;</sup>, and Si(OH)<sub>4</sub>] and the parameters of the carbonate system in seawater (pH<sub>T</sub>, <italic>A</italic><sub>T</sub>, <italic>C</italic><sub>T</sub>, and <italic>p</italic>CO<sub>2</sub>)</bold>. The input variables are field measured temperature, salinity, O<sub>2</sub>, and hydrostatic pressure (i.e., depth) together with the geolocation and time of sampling. The year is used as input only for retrieving pH<sub>T</sub>, <italic>C</italic><sub>T</sub>, and <italic>p</italic>CO<sub>2</sub>. Doy, day of year.</p></caption>
<graphic xlink:href="fmars-04-00128-g0002.tif"/>
</fig>
</sec>
<sec>
<title>CANYON: developing a MLP to retrieve nutrient and carbonate system concentrations</title>
<p>The optimal architectures of CANYON MLPs for the seven variables to retrieve (i.e., concentrations of three dissolved macronutrients, NO<sub>3</sub><sup>&#x02212;</sup>, PO<sub>4</sub><sup>3&#x02212;</sup>, Si(OH)<sub>4</sub>, and four parameters of the carbonate system, pH<sub>T</sub>, <italic>A</italic><sub>T</sub>, <italic>C</italic><sub>T</sub>, and <italic>p</italic>CO<sub>2</sub>) were chosen after multiple tests. As summarized in Figure <xref ref-type="fig" rid="F2">2</xref>, the chosen input variables include hydrological and biogeochemical components (i.e., temperature, salinity, and O<sub>2</sub> measurements), spatial components (i.e., hydrostatic pressure, latitude, and longitude) and a temporal component (i.e., day of the year, doy, for the seven variables, and the year for only pH<sub>T</sub>, <italic>C</italic><sub>T</sub>, and <italic>p</italic>CO<sub>2</sub> retrievals). We chose to use the year as input of the MLPs developed for retrieving pH<sub>T</sub>, <italic>C</italic><sub>T</sub>, and <italic>p</italic>CO<sub>2</sub> in order to take into account the long-term changes in seawater CO<sub>2</sub>-carbonate chemistry due to the uptake of anthropogenic CO<sub>2</sub> (e.g., Gattuso and Hansson, <xref ref-type="bibr" rid="B13">2011</xref>).</p>
<p>Prior to the full-depth CANYON version in this study, an initial depth-restricted CANYON algorithm (i.e., 30 &#x02013; 1,500 dbar depth range) was first developed, and showed a very good performance in subsurface, mode, and intermediate waters. However, estimated concentrations at 1,500 dbar occasionally showed small-amplitude seasonal cycles (data not shown). This especially occurred in regions with scarce reference data, where spatially adjacent data had been acquired in different seasons. We believe that, when the day of the year (doy) had been provided as extra degree of freedom at depth to the MLP, per-se spatial variability was parameterized as seasonal variability. To avoid this misattribution by the neural network, we decided to develop the full-depth CANYON where the doy information is not provided below a certain depth. This depth is the larger of 750 dbar or the climatological maximum mixed layer depth (Holte et al., <xref ref-type="bibr" rid="B20">2016</xref>), below which no seasonal cycle is expected.</p>
<p>For the full-depth CANYON algorithm development, the pressure input was specifically transformed by a combination of a linear and a logistic curve according to:</p>
<disp-formula id="E1"><mml:math id="M1"><mml:mrow><mml:mtext>P</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mtext>P</mml:mtext><mml:mrow><mml:mn>20000</mml:mn><mml:mo>&#x000A0;</mml:mo><mml:mtext>dbar</mml:mtext></mml:mrow></mml:mfrac><mml:mo>+</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:mtext>exp</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mo>&#x02212;</mml:mo><mml:mfrac><mml:mtext>P</mml:mtext><mml:mrow><mml:mn>300</mml:mn><mml:mo>&#x000A0;</mml:mo><mml:mtext>dbar</mml:mtext></mml:mrow></mml:mfrac></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mn>3</mml:mn></mml:msup></mml:mrow></mml:mfrac></mml:mrow></mml:math></disp-formula>
<p>for two main reasons. (1) Based on our previous experience of including doy in the model, we wanted to limit the degrees of freedom of the neural network in deep and abyssal waters and focus instead its parameterization on temperature and salinity, i.e., the water mass properties. Preliminary analysis confirmed that temperature and salinity were the main determinants of nutrient concentrations and carbonate system parameters in the deep water masses. (2) Preliminary analysis showed that sub-surface and mode-water CANYON estimates for a full-depth version without pressure transformation were less satisfactory than our initial 30 &#x02013; 1,500 dbar version. We attributed this to the extension of the pressure range to full ocean depth, where sub-surface, and mode waters comprise a much smaller dynamic range than previously. To counteract this effect, we chose the above input transformation of pressure with the aim to mimic the dynamic range of our initial CANYON in the interval between 0 and 1,000 dbar.</p>
<p>The uncertainty of the <italic>p</italic>CO<sub>2</sub> calculation from <italic>C</italic><sub>T</sub> and <italic>A</italic><sub>T</sub> is proportional to <italic>p</italic>CO<sub>2</sub>, i.e., high <italic>p</italic>CO<sub>2</sub> levels have a higher uncertainty than low <italic>p</italic>CO<sub>2</sub> levels. Similarly, the uncertainty of the CANYON-predicted <italic>p</italic>CO<sub>2</sub> scales with <italic>p</italic>CO<sub>2</sub> as well. The cost function of the MLP training (the quadratic difference between reference and MLP output), however, works on the absolute <italic>p</italic>CO<sub>2</sub>-value. To account for the different behavior of <italic>p</italic>CO<sub>2</sub> and to avoid potential biases to the MLP induced by large absolute <italic>p</italic>CO<sub>2</sub>-values (with large uncertainties) during training, we transformed the <italic>p</italic>CO<sub>2</sub> to a hypothetical, <italic>p</italic>CO<sub>2</sub>-equivalent <italic>C</italic><sub>T</sub> at constant conditions (i.e., <italic>A</italic><sub>T</sub> 2,300 &#x003BC;mol kg<sup>&#x02212;1</sup>, 25 &#x000B0;C, 35 salinity, 0 dbar, zero silicate and phosphate) before training. A constant change in this hypothetical <italic>C</italic><sub>T</sub> corresponds to a change in <italic>p</italic>CO<sub>2</sub> that is proportional to <italic>p</italic>CO<sub>2</sub>. This transformation thus approximates the observed <italic>p</italic>CO<sub>2</sub> behavior while we retain the benefits of our MLP architecture and the backpropagation technique for training.</p>
<p>Similarly to the methods developed by Sauz&#x000E8;de et al. (<xref ref-type="bibr" rid="B49">2015</xref>, <xref ref-type="bibr" rid="B50">2016</xref>), a specific normalization procedure was applied to the doy and longitude inputs to take into account the periodicity of these variables (e.g., doy 1 of a given year is very similar from a seasonal perspective to doy 365 of the previous year). These two input variables were transformed into radians:</p>
<disp-formula id="E2"><mml:math id="M2"><mml:mrow><mml:msub><mml:mtext>X</mml:mtext><mml:mrow><mml:mtext>rad</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mtext>X</mml:mtext><mml:mo>.</mml:mo><mml:mi>&#x003C0;</mml:mi></mml:mrow><mml:mtext>a</mml:mtext></mml:mfrac></mml:mrow></mml:math></disp-formula>
<p>where X was either the doy or the longitude, and <italic>a</italic> was a constant equal to 182.625 or 180 for the doy or the longitude, respectively (accounting for half the number of days in the year and half the maximum value of longitude, respectively). Moreover, as the elementary transfer function that provided outputs when inputs were applied to the MLP was a sigmoid non-linear function and subsequently varied within the [&#x02212;1;1] domain, the inputs and outputs of the MLP were centered and reduced to match the range [&#x02212;1;1] (see details in Sauz&#x000E8;de et al., <xref ref-type="bibr" rid="B50">2016</xref>).</p>
<p>Finally, the MLP developed in this study for each of the seven variables to retrieve (i.e., output variables) are composed of the input layer, two hidden layers, and one output layer (schematic overview in Figure <xref ref-type="fig" rid="F2">2</xref>). To choose the best architecture of each MLP, tests were performed using one or two hidden layers with a number of neurons varying between 1 and 50 and 1 and 20, respectively. The MLP architecture for each output variable with minimum error of validation and minimum number of neurons was then selected as the best (Table <xref ref-type="table" rid="T2">2</xref>). In order to evaluate the method robustness for each MLP, several subsets of the training data set were tested with no difference observed in the prediction.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p><bold>Characteristics of the Multi-Layered Perceptron architecture for each CANYON-retrieved variable</bold>.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>CANYON-retrieved variable</bold></th>
<th valign="top" align="center"><bold>Number of input variables</bold></th>
<th valign="top" align="center"><bold>Number of neurons in the first hidden layer</bold></th>
<th valign="top" align="center"><bold>Number of neurons in the second hidden layer</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">NO<sub>3</sub><sup>&#x02212;</sup></td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">20</td>
<td valign="top" align="center">17</td>
</tr>
<tr>
<td valign="top" align="left">PO<sub>4</sub><sup>3&#x02212;</sup></td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">17</td>
<td valign="top" align="center">17</td>
</tr>
<tr>
<td valign="top" align="left">Si(OH)<sub>4</sub></td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">20</td>
<td valign="top" align="center">15</td>
</tr>
<tr>
<td valign="top" align="left">pH<sub>T</sub></td>
<td valign="top" align="center">10</td>
<td valign="top" align="center">19</td>
<td valign="top" align="center">8</td>
</tr>
<tr>
<td valign="top" align="left"><italic>A</italic><sub>T</sub></td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">19</td>
<td valign="top" align="center">17</td>
</tr>
<tr>
<td valign="top" align="left"><italic>C</italic><sub>T</sub></td>
<td valign="top" align="center">10</td>
<td valign="top" align="center">18</td>
<td valign="top" align="center">15</td>
</tr>
<tr>
<td valign="top" align="left"><italic>p</italic>CO<sub>2</sub></td>
<td valign="top" align="center">10</td>
<td valign="top" align="center">18</td>
<td valign="top" align="center">8</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p><italic>Each MLP had a single output variable</italic>.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec>
<title>Statistical evaluation of method performance</title>
<p>Four statistics were chosen to evaluate the CANYON algorithms performance on the validation datasets. The coefficient of determination (<italic>r</italic><sup>2</sup>) and the slope of the linear regression between the CANYON-retrieved values and the corresponding GLODAPv2 measurements were computed. The statistics also included the MAE (Mean Absolute Error) and the RMSE (Root Mean Squared Error) to evaluate the errors and accuracies of each model:</p>
<disp-formula id="E3"><mml:math id="M3"><mml:mtable columnalign='left'><mml:mtr><mml:mtd><mml:mtext>&#x000A0;&#x000A0;&#x000A0;MAE</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mtext>N</mml:mtext></mml:mfrac><mml:mo>.</mml:mo><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mtext>i&#x000A0;</mml:mtext><mml:mo>=</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>1</mml:mn></mml:mrow><mml:mtext>N</mml:mtext></mml:munderover><mml:mrow><mml:mo>&#x0007C;</mml:mo><mml:msub><mml:mtext>X</mml:mtext><mml:mrow><mml:mtext>CANYON</mml:mtext><mml:mo>&#x000A0;</mml:mo><mml:mtext>i</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mtext>X</mml:mtext><mml:mrow><mml:mtext>GLODAPv</mml:mtext><mml:mn>2</mml:mn><mml:mo>&#x000A0;</mml:mo><mml:mtext>i</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x0007C;</mml:mo></mml:mrow></mml:mstyle></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>RMSE</mml:mtext><mml:mo>=</mml:mo><mml:msqrt><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mtext>N</mml:mtext></mml:mfrac><mml:mo>.</mml:mo><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mtext>i&#x000A0;</mml:mtext><mml:mo>=</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>1</mml:mn></mml:mrow><mml:mtext>N</mml:mtext></mml:munderover><mml:mrow><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mtext>X</mml:mtext><mml:mrow><mml:mtext>CANYON</mml:mtext><mml:mo>&#x000A0;</mml:mo><mml:mtext>&#x000A0;i</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mtext>X</mml:mtext><mml:mrow><mml:mtext>GLODAPv</mml:mtext><mml:mn>2</mml:mn><mml:mo>&#x000A0;</mml:mo><mml:mtext>i</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:mstyle></mml:mrow></mml:msqrt></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Note that absolute uncertainties are expressed as values for NO<sub>3</sub><sup>&#x02212;</sup>, PO<sub>4</sub><sup>3&#x02212;</sup>, Si(OH)<sub>4</sub>, pH<sub>T</sub>, <italic>A</italic><sub>T</sub>, and <italic>C</italic><sub>T</sub> parameters<sub>.</sub> For <italic>p</italic>CO<sub>2</sub> parameter, the relative uncertainties are expressed as percentages (e.g., a relative uncertainty of 5 % is an absolute uncertainty of 20 &#x003BC;atm at 400 &#x003BC;atm).</p>
</sec>
</sec>
</sec>
<sec id="s3">
<title>Results and discussion</title>
<sec>
<title>Overall CANYON performance</title>
<p>Using the validation database (i.e., 20 % of the general database minus the eight independent zones), we evaluated the performance of the method by comparing the CANYON-retrieved nutrient concentrations and carbonate system parameters with the measurements in the GLODAPv2 database using the statistics from Section Statistical Evaluation of Method Performance. Scatterplots of CANYON-retrieved variables vs. GLODAPv2 measurements (Figure <xref ref-type="fig" rid="F3">3</xref>) show that the CANYON method predicts nutrient concentration and carbonate system parameters with good accuracy (i.e., of 0.93, 0.066, and 3.0 &#x003BC;mol kg<sup>&#x02212;1</sup> for the concentrations of NO<sub>3</sub><sup>&#x02212;</sup>, PO<sub>4</sub><sup>3&#x02212;</sup>, and Si(OH)<sub>4</sub>, respectively, and of 0.019, 7 &#x003BC;mol kg<sup>&#x02212;1</sup>, 10 &#x003BC;mol kg<sup>&#x02212;1</sup> and 5.1 % or 20 &#x003BC;atm at 400 &#x003BC;atm for pH<sub>T</sub>, <italic>A</italic><sub>T</sub>, <italic>C</italic><sub>T</sub>, and <italic>p</italic>CO<sub>2</sub>, respectively). The determination coefficients of the seven linear models between the CANYON-retrieved and GLODAPv2-variables are comprised between 0.982 and 0.996 with slopes ranging from 0.986 to 0.999. In Figure <xref ref-type="fig" rid="F3">3</xref> only very few data points diverge from the 1:1 line. A higher scatter is observed for low CANYON-retrieved NO<sub>3</sub><sup>&#x02212;</sup> (and PO<sub>4</sub><sup>3&#x02212;</sup>), which mostly corresponds to low surface nutrient concentrations inside and at the edges of the subtropical gyres. Moreover, the higher scatter observed near the surface than deeper for most variables is probably due to the higher inherent variability in the surface.</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p><bold>Comparison of the values retrieved by CANYON with the corresponding measurements in the GLODAPv2 database for: (a)</bold> NO<sub>3</sub><sup>&#x02212;</sup>; <bold>(b)</bold> PO<sub>4</sub><sup>3&#x02212;</sup>; <bold>(c)</bold> Si(OH)<sub>4</sub>; <bold>(d)</bold> pH<sub>T</sub>; <bold>(e)</bold> <italic>A</italic><sub>T</sub>; <bold>(f)</bold> <italic>C</italic><sub>T</sub>; and <bold>(g)</bold> <italic>p</italic>CO<sub>2</sub> with data ordered according to the pressure. The 1:1 line is shown in each plot as visual reference. The statistics are defined in Section Statistical Evaluation of Method Performance.</p></caption>
<graphic xlink:href="fmars-04-00128-g0003.tif"/>
</fig>
<p>To go further, the final CANYON-accuracies for the seven variables can be estimated using the merged accuracies of CANYON estimations and GLODAPv2 measurements (RMSE<sub>final</sub> &#x0003D; &#x0221A;[RMSE<sub>CANYON<sup>2</sup></sub> &#x0002B; RMSE<sub>RMSEGLODAPv2<sup>2</sup></sub>]). The accuracies of GLODAPv2 measurements are 0.46, 0.033, and 1.1 &#x003BC;mol kg<sup>&#x02212;1</sup>, for NO<sub>3</sub><sup>&#x02212;</sup>, PO<sub>4</sub><sup>3&#x02212;</sup>, Si(OH)<sub>4</sub>, respectively, and 0.005, 6 &#x003BC;mol kg<sup>&#x02212;1</sup>, and 4 &#x003BC;mol kg<sup>&#x02212;1</sup>, for pH<sub>T</sub>, <italic>A</italic><sub>T</sub>, and <italic>C</italic><sub>T</sub>, respectively from Olsen et al. (<xref ref-type="bibr" rid="B42">2016</xref>) and 5.6 % (22 &#x003BC;atm at 400 &#x003BC;atm) for <italic>p</italic>CO<sub>2</sub> from uncertainty propagation of the carbonate system calculations using seacarb errors (Gattuso et al., <xref ref-type="bibr" rid="B15">2015</xref>, <xref ref-type="bibr" rid="B14">2016</xref>). Thus, ultimately, the final global accuracies of CANYON are 1.04, 0.074, and 3.2 &#x003BC;mol kg<sup>&#x02212;1</sup> for NO<sub>3</sub><sup>&#x02212;</sup>, PO<sub>4</sub><sup>3&#x02212;</sup>, and Si(OH)<sub>4</sub> concentrations, respectively, and 0.020, 9 &#x003BC;mol kg<sup>&#x02212;1</sup>, 11 &#x003BC;mol kg<sup>&#x02212;1</sup> and 7.6 % (30 &#x003BC;atm at 400 &#x003BC;atm) for pH<sub>T</sub>, <italic>A</italic><sub>T</sub>, <italic>C</italic><sub>T</sub>, and <italic>p</italic>CO<sub>2</sub>, respectively.</p>
<p>The training and validation datasets of the neural networks used to retrieve carbonate system parameters were smaller than the datasets for the retrieval of nutrient concentrations (Figure <xref ref-type="fig" rid="F3">3</xref> and Table <xref ref-type="table" rid="T1">1</xref>). It is thus possible that the carbonate system networks are less robust than the nutrient ones. In any case, all the MLPs could be updated in the future as more data become available; this seems especially important for the pH<sub>T</sub> database, which is presently the least populated. In order to assess the importance of this potential weakness, we developed a special neural network using all pH<sub>T</sub> data available, i.e., all the data that passed the primary quality control (see details in Section The GLODAPv2 Database). The results of this special CANYON algorithm, based on more but a priori less accurate data used for training, are not improved when compared to our initial results (i.e., RMSE of 0.030). Given this and in order to maintain consistency among CANYON algorithms and their retrieval performance, all neural networks were trained using data that had passed the secondary quality control (except for the Mediterranean Sea, see details in Section The GLODAPv2 Database).</p>
<p>To identify possible trends, errors were plotted against each input variable for each retrieved nutrient concentration (Figure <xref ref-type="fig" rid="F4">4</xref>) and carbonate system parameter (Figure <xref ref-type="fig" rid="F5">5</xref>). Some errors are larger because of the small numbers of data (the intensity of the shading in each box refers to the number of data). Nevertheless, some clear trends are observed. The range of errors for Si(OH)<sub>4</sub> retrieval seems to be higher at high latitudes in the Southern hemisphere (i.e., latitudes &#x0003C;&#x02212;60&#x000B0;) and low temperatures, i.e., the ranges of box-plot whiskers increased with both decreasing latitude and temperature in Figure <xref ref-type="fig" rid="F4">4</xref>. This corresponds to regions of significant Si(OH)<sub>4</sub>, mostly in the Southern Ocean, which suggests that the CANYON method is less accurate for Si(OH)<sub>4</sub> retrievals in the Southern Ocean than in other areas. The <italic>C</italic><sub>T</sub> estimates show a high error at high temperatures and unusually low (&#x0003C;34 psu) salinities. <italic>p</italic>CO<sub>2</sub> estimates exhibit an increased uncertainty at the extremes of the O<sub>2</sub> input (Figure <xref ref-type="fig" rid="F5">5</xref>), i.e., the range of errors increases at low O<sub>2</sub> concentrations corresponding to high <italic>p</italic>CO<sub>2</sub> (Figure <xref ref-type="supplementary-material" rid="SM3">S3</xref>), and at high O<sub>2</sub> concentrations corresponding to cold, low salinity polar surface waters. For CANYON-estimated NO<sub>3</sub><sup>&#x02212;</sup>, PO<sub>4</sub><sup>3&#x02212;</sup>, and <italic>C</italic><sub>T</sub>, the upper layer (i.e., &#x02264;700 m) has the broadest error range. The Si(OH)<sub>4</sub> displays an opposite trend with error larger for deeper than surface waters (i.e., &#x02265;700 m). The CANYON-estimated <italic>A</italic><sub>T</sub>, pH<sub>T</sub>, and <italic>p</italic>CO<sub>2</sub> errors are not affected by depth inputs. Finally, Figure <xref ref-type="supplementary-material" rid="SM3">S3</xref> shows that the CANYON retrieved variables are not biased against the range of <italic>in situ</italic> values to retrieve, except for a few extreme values where few data (i.e., lightly shaded boxes) were available in the training and validation databases.</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p><bold>Box plots of the differences between CANYON estimates <italic>minus</italic> GLODAPv2 reference measurements for (from left to right): NO<sub>3</sub><sup>&#x02212;</sup> (&#x003BC;mol kg<sup>&#x02212;1</sup>), PO<sub>4</sub><sup>3&#x02212;</sup>(&#x003BC;mol kg<sup>&#x02212;1</sup>), and Si(OH)<sub>4</sub> (&#x003BC;mol kg<sup>&#x02212;1</sup>) vs. the variable indicated on the left: pressure (P), year, doy, longitude, latitude, temperature (T), salinity (S), O<sub>2</sub></bold>. The intensity of the shading in each box refers to the number of data. The intervals were created by dividing the range of values in eight equal intervals. For each box, the negative and positive whiskers represent the Q1&#x02013;1.5<sup>&#x0002A;</sup>IQR and Q3 &#x0002B; 1.5<sup>&#x0002A;</sup>IQR, respectively, where Q1 is the 0.25 quantile, Q3 the 0.75 quantile, and IQR the inter-quantile range. The width of each box represents the IQR and the middle line the median of the values.</p></caption>
<graphic xlink:href="fmars-04-00128-g0004.tif"/>
</fig>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p><bold>Box plots of the difference between CANYON estimates <italic>minus</italic> GLODAPv2 reference measurements for (from left to right) pH<sub>T</sub>, <italic>A</italic><sub>T</sub> (&#x003BC;mol kg<sup>&#x02212;1</sup>), <italic>C</italic><sub>T</sub> (&#x003BC;mol kg<sup>&#x02212;1</sup>), and <italic>p</italic>CO<sub>2</sub> (&#x003BC;atm) vs. the variable indicated on the left: pressure (P), year, doy, longitude, latitude, temperature (T), salinity (S), O<sub>2</sub></bold>. The intensity of the shading in each box refers to the number of data. The intervals were created by sharing the range of values in eight regular intervals. For each box, the lower and upper whiskers represent the Q1&#x02013;1.5<sup>&#x0002A;</sup>IQR and Q3&#x0002B;1.5<sup>&#x0002A;</sup>IQR, respectively, with Q1 the 0.25 quantile, Q3 the 0.75 quantile and IQR the inter-quantile range. The width of each box represents the IQR and the middle line the median of the values.</p></caption>
<graphic xlink:href="fmars-04-00128-g0005.tif"/>
</fig>
<p>The above results indicate that hydrographic data and information about season (doy) and geolocation can certainly predict some aspects of the dynamics of biogeochemical variables at the surface (e.g., nutrient and <italic>C</italic><sub>T</sub> drawdown during the spring bloom, seasonal reset to preformed nutrient concentrations with winter ventilation) with O<sub>2</sub> being the most important predictor in CANYON for production and remineralization, particularly in the ocean interior. The year is the only variable that accounts in CANYON for the increase in anthropogenic CO<sub>2</sub>. As a consequence, when the year was not included, CANYON-estimated pH<sub>T</sub>, <italic>C</italic><sub>T</sub>, and <italic>p</italic>CO<sub>2</sub> showed a clear remaining trend due to the missing information about the long-term changes in seawater CO<sub>2</sub>-carbonate chemistry (data not shown).</p>
</sec>
<sec>
<title>Independent validation for eight geographic zones</title>
<p>The smoothed mean differences between CANYON-retrieved and <italic>in situ</italic> measurements were plotted as vertical profiles for each of the seven variables and the eight independent zones (Figure <xref ref-type="fig" rid="F6">6</xref>). In general, the accuracy (i.e., RMSE) for each variable is comparable to the accuracy determined on the validation data set (Table <xref ref-type="table" rid="T3">3</xref>). Beyond this general agreement, there are a few discrepancies. The errors appear to be higher in the 0 &#x02013; 200 m layer than below. This is maybe due to a larger variability in this upper layer, caused by not only biogeochemical processes but also air-sea exchange of O<sub>2</sub> that act to decouple O<sub>2</sub> from the CANYON outputs (see also Figure <xref ref-type="fig" rid="F3">3</xref>). The Sub-Equatorial Pacific, North Pacific and South Atlantic display higher RMSE than calculated from the 20 % validation data (Table 3). These results suggest that the GLODAPv2 data set for these specific zones could have been underrepresented in the training database with respect to the regional variability in nutrient concentrations and carbonate system parameters.</p>
<fig id="F6" position="float">
<label>Figure 6</label>
<caption><p><bold>Vertical profiles of the smoothed mean differences between CANYON-retrieved and <italic>in situ</italic> measurements from surface to 6,000 m depth for: (a)</bold> NO<sub>3</sub><sup>&#x02212;</sup>; <bold>(b)</bold> PO<sub>4</sub><sup>3&#x02212;</sup>; <bold>(c)</bold> Si(OH)<sub>4</sub>; <bold>(d)</bold> pH<sub>T</sub>; <bold>(e)</bold> <italic>A</italic><sub>T</sub>; <bold>(f)</bold> <italic>C</italic><sub>T</sub>; and <bold>(g)</bold> <italic>p</italic>CO<sub>2</sub>. The number of profiles used to compute the mean difference for each zone is indicated in the bottom left-hand side of each panel. Color code: eight independent zones in Figure <xref ref-type="fig" rid="F1">1</xref>.</p></caption>
<graphic xlink:href="fmars-04-00128-g0006.tif"/>
</fig>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p><bold>CANYON retrieval accuracy (RMSE) for each variable in each of the eight independent zones</bold>.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th/>
<th valign="top" align="center"><bold>NO<sub>3</sub><sup>&#x02212;</sup> (&#x003BC;mol kg<sup>&#x02212;1</sup>)</bold></th>
<th valign="top" align="center"><bold>PO<sub>4</sub><sup>3&#x02212;</sup> (&#x003BC;mol kg<sup>&#x02212;1</sup>)</bold></th>
<th valign="top" align="center"><bold>Si(OH)<sub>4</sub> (&#x003BC;mol kg<sup>&#x02212;1</sup>)</bold></th>
<th valign="top" align="center"><bold>pH<sub>T</sub></bold></th>
<th valign="top" align="center"><bold><italic>A</italic><sub>T</sub> (&#x003BC;mol kg<sup>&#x02212;1</sup>)</bold></th>
<th valign="top" align="center"><bold><italic>C</italic><sub>T</sub> (&#x003BC;mol kg<sup>&#x02212;1</sup>)</bold></th>
<th valign="top" align="center"><bold><italic>p</italic>CO<sub>2</sub>(%)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Sub-Equatorial Pacific</td>
<td valign="top" align="center">0.99 (284)</td>
<td valign="top" align="center">0.073 (284)</td>
<td valign="top" align="center">4.4 (284)</td>
<td/>
<td/>
<td/>
<td valign="top" align="center">5.7 (274)</td>
</tr>
<tr>
<td valign="top" align="left">Sub-Equatorial Indian</td>
<td valign="top" align="center">0.37 (886)</td>
<td valign="top" align="center">0.036 (903)</td>
<td valign="top" align="center">2.8 (919)</td>
<td/>
<td valign="top" align="center">5 (408)</td>
<td valign="top" align="center">4 (418)</td>
<td valign="top" align="center">3.4 (367)</td>
</tr>
<tr>
<td valign="top" align="left">North Atlantic Subtropical Gyre</td>
<td valign="top" align="center">0.59 (4,526)</td>
<td valign="top" align="center">0.052 (3,848)</td>
<td valign="top" align="center">1.1 (4,660)</td>
<td valign="top" align="center">0.014 (1,468)</td>
<td valign="top" align="center">5 (2,031)</td>
<td valign="top" align="center">5 (2,565)</td>
<td valign="top" align="center">3.8 (1,884)</td>
</tr>
<tr>
<td valign="top" align="left">North Atlantic Subpolar Gyre</td>
<td valign="top" align="center">0.60 (2,889)</td>
<td valign="top" align="center">0.045 (1,293)</td>
<td valign="top" align="center">1.7 (3,168)</td>
<td valign="top" align="center">0.014 (1,960)</td>
<td valign="top" align="center">7 (1,428)</td>
<td valign="top" align="center">6 (1,469)</td>
<td valign="top" align="center">3.0 (1,333)</td>
</tr>
<tr>
<td valign="top" align="left">North Pacific</td>
<td valign="top" align="center">0.94 (1,005)</td>
<td valign="top" align="center">0.093 (1,017)</td>
<td valign="top" align="center">4.5 (1,005)</td>
<td valign="top" align="center">0.024 (341)</td>
<td valign="top" align="center">6 (343)</td>
<td valign="top" align="center">8 (331)</td>
<td valign="top" align="center">6.3 (330)</td>
</tr>
<tr>
<td valign="top" align="left">South Atlantic</td>
<td valign="top" align="center">0.85 (1,395)</td>
<td valign="top" align="center">0.065 (1,323)</td>
<td valign="top" align="center">2.5 (1,396)</td>
<td valign="top" align="center">0.016 (525)</td>
<td valign="top" align="center">6 (516)</td>
<td valign="top" align="center">7 (571)</td>
<td valign="top" align="center">4.5 (509)</td>
</tr>
<tr>
<td valign="top" align="left">South Indian</td>
<td valign="top" align="center">0.69 (1,511)</td>
<td valign="top" align="center">0.039 (1,490)</td>
<td valign="top" align="center">1.8 (1,512)</td>
<td valign="top" align="center">0.015 (833)</td>
<td valign="top" align="center">6 (841)</td>
<td valign="top" align="center">5 (900)</td>
<td valign="top" align="center">4.3 (823)</td>
</tr>
<tr style="border-bottom: thin solid #000000;">
<td valign="top" align="left">South Pacific</td>
<td valign="top" align="center">0.54 (1,406)</td>
<td valign="top" align="center">0.037 (1,406)</td>
<td valign="top" align="center">1.7 (1,406)</td>
<td valign="top" align="center">0.016 (754)</td>
<td valign="top" align="center">3 (760)</td>
<td valign="top" align="center">5 (762)</td>
<td valign="top" align="center">3.0 (754)</td>
</tr> <tr>
<td valign="top" align="left">Validation dataset (20 %)</td>
<td valign="top" align="center">0.93 (137,219)</td>
<td valign="top" align="center">0.066 (128,264)</td>
<td valign="top" align="center">3.0 (137,963)</td>
<td valign="top" align="center">0.019 (54,161)</td>
<td valign="top" align="center">7 (53,072)</td>
<td valign="top" align="center">10 (62,678)</td>
<td valign="top" align="center">5.1 (48,592)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p><italic>In the last row, comparable information is provided for the validation dataset (Section Overall CANYON Performance), as reference. In brackets figures the number of observations used to compute each RMSE</italic>.</p>
</table-wrap-foot>
</table-wrap>
<p>The above comparisons and the identification of spatio-temporal domains where CANYON performed less accurately could be used to identify more objectively the periods or regions that require more intense acquisition of discrete high-quality measurements. Indeed, the periods and regions where CANYON fails to reproduce well <italic>in situ</italic> observations are those where the variability (mostly seasonal) was not well-represented in the training dataset. From Table <xref ref-type="table" rid="T3">3</xref> and Figure <xref ref-type="fig" rid="F6">6</xref>, these regions are mainly the Sub-Equatorial Pacific, the North Pacific and the South Atlantic, and the periods are most likely late autumn and winter, when harsh sea conditions generally prevent ship-based collection of high-quality measurements. In fact, the GLODAPv2 database seems to be biased in this respect (Section The GLODAPv2 Database and Figure <xref ref-type="supplementary-material" rid="SM2">S2</xref>).</p>
</sec>
<sec>
<title>Further results for specific applications</title>
<p>Here we present further results important for specific applications. Indeed, one of a potential application of the CANYON method is the calibration of NO<sub>3</sub><sup>&#x02212;</sup> and pH<sub>T</sub> sensors mounted on BGC-Argo profiling floats (because the corresponding sensors may drift over long-term deployment). To overcome this problem, CANYON could be used to compute deep (e.g., &#x02265;1,000 m) reference measurements each time the float profiles. With this aim in mind, specific MLPs were developed to retrieve NO<sub>3</sub><sup>&#x02212;</sup> concentration and pH<sub>T</sub> only at depths between 950 and 2,050 m, i.e., different from the full water-column MLPs used above. Results show that specific, deep MLPs do not significantly improve the quality of the results compared to the MLPs developed for the entire water column applied to this layer (i.e., RMSE for NO<sub>3</sub><sup>&#x02212;</sup> of 0.50 and 0.54 &#x003BC;mol kg<sup>&#x02212;1</sup>, respectively, and for pH<sub>T</sub> of 0.013 units for the two approaches). The CANYON method can therefore be used to retrieve NO<sub>3</sub><sup>&#x02212;</sup> concentration and pH<sub>T</sub> for specific applications focused on deep layers with excellent accuracy.</p>
<p>For <italic>p</italic>CO<sub>2</sub> applications, most studies focus on the surface layer of the open ocean and on CO<sub>2</sub> air-sea exchange (e.g., Takahashi et al., <xref ref-type="bibr" rid="B51">2009</xref>). The CANYON method could be used to address the regional and seasonal variability of air-sea CO<sub>2</sub> fluxes in view of comparing it with the results of previous studies based on neural networks (Landsch&#x000FC;tzer et al., <xref ref-type="bibr" rid="B36">2014</xref>, <xref ref-type="bibr" rid="B35">2016</xref>). For this application, it is important to ascertain that the general MLP performs as adequately as a specific MLP developed to retrieve <italic>p</italic>CO<sub>2</sub> for the surface layer only (i.e., &#x02264;100 m). Here again, the CANYON algorithm developed for the entire water column is as robust as a specific, surface-focused MLP (i.e., RMSE of 34 and 33 &#x003BC;atm, respectively).</p>
</sec>
<sec>
<title>Example of application: illustration with HOT database</title>
<p>The CANYON method was further applied outside the GLODAPv2 domain, on which it was trained, using an independent dataset that contained similar variables. The Hawaii Ocean Time Series (HOT) database contains monthly vertical profiles of hydrological properties, O<sub>2</sub>, nutrient concentrations and carbonate system parameters from the deep-water station ALOHA since 1994 (Karl and Lukas, <xref ref-type="bibr" rid="B32">1996</xref>; Dore et al., <xref ref-type="bibr" rid="B10">2003</xref>). The HOT temperature, salinity, and O<sub>2</sub> measurements from HOT were used as input variables to estimate the concentrations of nutrients and the carbonate system parameters. The overall agreement between the CANYON-simulated variables and their measured <italic>in situ</italic> counterparts is satisfactory, as shown by the absolute differences between the two datasets and the corresponding comparison statistics (Figure <xref ref-type="fig" rid="F7">7</xref> and Table <xref ref-type="table" rid="T4">4</xref>). There are few systematic biases, such as the model underestimation of NO<sub>3</sub><sup>&#x02212;</sup> from 450 to 550 m (i.e., the depth horizon of the nitracline) and the overestimation of PO<sub>4</sub><sup>3&#x02212;</sup> and C<sub>T</sub> in the upper 150 m (Figures <xref ref-type="fig" rid="F7">7b,d,l</xref>, respectively).</p>
<fig id="F7" position="float">
<label>Figure 7</label>
<caption><p><bold>Values predicted by CANYON (a,c,e,g,i,k,m)</bold> and absolute differences between HOT measurements and CANYON estimates <bold>(b,d,f,h,j,l,n)</bold>. Time series (22 years) for NO<sub>3</sub><sup>&#x02212;</sup> <bold>(a,b)</bold>, PO<sub>4</sub><sup>3&#x02212;</sup> <bold>(c,d)</bold>, Si(OH)<sub>4</sub> <bold>(e,f)</bold>, pH<sub>T</sub> <bold>(g,h)</bold>, <italic>A</italic><sub>T</sub> <bold>(i,j)</bold>, <italic>C</italic><sub>T</sub> <bold>(k,l)</bold>, and <italic>p</italic>CO<sub>2</sub> <bold>(m,n)</bold>. Five profiles from 25 February 2003 to 29 March 2003 have been removed from the time-series because of their abnormal O<sub>2</sub> profiles (see details in Section Example of Application: Illustration with HOT Database).</p></caption>
<graphic xlink:href="fmars-04-00128-g0007.tif"/>
</fig>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p><bold>Comparison statistics between the values retrieved by CANYON vs. the corresponding measurements in the HOT database</bold>.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>CANYON-retrieved variable</bold></th>
<th valign="top" align="center"><bold><italic>r</italic><sup>2</sup></bold></th>
<th valign="top" align="center"><bold>Slope</bold></th>
<th valign="top" align="center"><bold>MAE</bold></th>
<th valign="top" align="center"><bold>RMSE</bold></th>
<th valign="top" align="center"><bold>Number of data</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">NO<sub>3</sub><sup>&#x02212;</sup></td>
<td valign="top" align="center">0.998</td>
<td valign="top" align="center">1.005</td>
<td valign="top" align="center">0.68 &#x003BC;mol kg<sup>&#x02212;1</sup></td>
<td valign="top" align="center">0.89 &#x003BC;mol kg<sup>&#x02212;1</sup></td>
<td valign="top" align="center">8,373</td>
</tr>
<tr>
<td valign="top" align="left">PO<sub>4</sub><sup>3&#x02212;</sup></td>
<td valign="top" align="center">0.999</td>
<td valign="top" align="center">0.982</td>
<td valign="top" align="center">0.081 &#x003BC;mol kg<sup>&#x02212;1</sup></td>
<td valign="top" align="center">0.100 &#x003BC;mol kg<sup>&#x02212;1</sup></td>
<td valign="top" align="center">8,082</td>
</tr>
<tr>
<td valign="top" align="left">Si(OH)<sub>4</sub></td>
<td valign="top" align="center">0.999</td>
<td valign="top" align="center">1.004</td>
<td valign="top" align="center">0.7 &#x003BC;mol kg<sup>&#x02212;1</sup></td>
<td valign="top" align="center">1.8 &#x003BC;mol kg<sup>&#x02212;1</sup></td>
<td valign="top" align="center">7,999</td>
</tr>
<tr>
<td valign="top" align="left">pH<sub>T</sub></td>
<td valign="top" align="center">0.991</td>
<td valign="top" align="center">0.989</td>
<td valign="top" align="center">0.027</td>
<td valign="top" align="center">0.033</td>
<td valign="top" align="center">2,338</td>
</tr>
<tr>
<td valign="top" align="left"><italic>A</italic><sub>T</sub></td>
<td valign="top" align="center">0.947</td>
<td valign="top" align="center">0.880</td>
<td valign="top" align="center">5 &#x003BC;mol kg<sup>&#x02212;1</sup></td>
<td valign="top" align="center">7 &#x003BC;mol kg<sup>&#x02212;1</sup></td>
<td valign="top" align="center">3,501</td>
</tr>
<tr>
<td valign="top" align="left"><italic>C</italic><sub>T</sub></td>
<td valign="top" align="center">0.995</td>
<td valign="top" align="center">0.928</td>
<td valign="top" align="center">12 &#x003BC;mol kg<sup>&#x02212;1</sup></td>
<td valign="top" align="center">18 &#x003BC;mol kg<sup>&#x02212;1</sup></td>
<td valign="top" align="center">3,567</td>
</tr>
<tr>
<td valign="top" align="left"><italic>p</italic>CO<sub>2</sub></td>
<td valign="top" align="center">0.984</td>
<td valign="top" align="center">0.995</td>
<td valign="top" align="center">4.8 %</td>
<td valign="top" align="center">7.5 %</td>
<td valign="top" align="center">3,421</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p><italic>Five profiles from 25 February 2003 to 29 March 2003 had been removed from the time-series because of their abnormal O<sub>2</sub> profiles (see details in Section Example of Application: Illustration with HOT Database)</italic>.</p>
</table-wrap-foot>
</table-wrap>
<p>Interestingly, running CANYON on this dataset unraveled O<sub>2</sub> outliers in the database. A first run of CANYON produced abnormally high differences between the retrieved and <italic>in situ</italic> values of the seven CANYON-estimated variables for profiles from 25 February 2003 to 29 March 2003. This corresponded to five O<sub>2</sub> profiles with abnormally high concentrations of O<sub>2</sub> (see black triangles in Figure <xref ref-type="supplementary-material" rid="SM4">S4</xref>). These profiles were subsequently removed from the analysis to avoid contaminating the CANYON-retrieved data shown in Figure <xref ref-type="fig" rid="F7">7</xref> and the corresponding statistics in Table <xref ref-type="table" rid="T4">4</xref>.</p>
<p>CANYON also provided a way to fill a gap in the HOT dataset. Indeed, pH<sub>T</sub> had not been acquired during the 1999&#x02013;2003 period, but the input variables needed to run CANYON had been measured. pH<sub>T</sub> could thus be estimated during that period (Figure <xref ref-type="fig" rid="F7">7g</xref>) with a mean accuracy of 0.033 units (i.e., RMSE in Table <xref ref-type="table" rid="T4">4</xref>). However, it is obviously not possible to compare the values predicted by CANYON with (non-existent) corresponding <italic>in situ</italic> measurements from 1999 to 2003 (Figure <xref ref-type="fig" rid="F7">7h</xref>).</p>
<p>Using the HOT temperature, salinity and O<sub>2</sub> measurements from the last year, i.e., 2015, we estimated pH<sub>T</sub> for the 15 years to come (by changing only the year input in CANYON), and a decline is found in pH<sub>T</sub> of 0.024 units over this 15-year period (i.e., decrease of 0.0016 &#x000B1; 0.0004 units year<sup>&#x02212;1</sup>). This value is consistent with the decrease in pH<sub>T</sub> of 0.0019 &#x000B1; 0.0002 year<sup>&#x02212;1</sup> reported in the central North Pacific (Dore et al., <xref ref-type="bibr" rid="B11">2009</xref>) and more generally of 0.0013 to 0.0026 units year<sup>&#x02212;1</sup> units during the 20&#x02013;30 last years (Bates et al., <xref ref-type="bibr" rid="B1">2014</xref>). This suggests that the CANYON approach could perhaps also be used outside the temporal range of training for the carbonate system parameters for estimating near future changes in pH<sub>T</sub>, <italic>C</italic><sub>T</sub>, and <italic>p</italic>CO<sub>2</sub>, thanks to the use of the year among the input variables. However, this would assume that the relationships between the input and output variables, through the hidden layers of the CANYON model, will remain unchanged in the future, and the sensitivity of retrieved carbonate system parameters to departures from this assumption remains to be explored.</p>
</sec>
</sec>
<sec id="s4">
<title>Conclusions and perspectives</title>
<p>The global Biogeochemical-Argo (BGC-Argo) program is being progressively implemented (Johnson and Claustre, <xref ref-type="bibr" rid="B25">2016</xref>), and its core variables include O<sub>2</sub>. BGC-Argo floats can now acquire high-quality vertical profiles of O<sub>2</sub> on the long term (Bittig and K&#x000F6;rtzinger, <xref ref-type="bibr" rid="B4">2015</xref>; Johnson et al., <xref ref-type="bibr" rid="B29">2015</xref>; Bushinsky et al., <xref ref-type="bibr" rid="B5">2016</xref>). Given the possibility of acquiring long-term, global vertical profiles of temperature, salinity and O<sub>2</sub>, CANYON could be used to develop a variety of new applications.</p>
<p>Firstly, CANYON may contribute to developing quality control and post-processing procedures for NO<sub>3</sub><sup>&#x02212;</sup> concentration and pH<sub>T</sub> in oceanic waters (see Section Further Results for Specific Applications). These two variables, together with O<sub>2</sub>, are core BGC-Argo variables and their measurements make use of an optical sensor for NO<sub>3</sub><sup>&#x02212;</sup> (Johnson and Coletti, <xref ref-type="bibr" rid="B26">2002</xref>) and an electrochemical sensor for pH<sub>T</sub> (Johnson et al., <xref ref-type="bibr" rid="B28">2016</xref>) with known accuracies (1 &#x003BC;M and 0.010 for NO<sub>3</sub><sup>&#x02212;</sup> and pH<sub>T</sub>, respectively; Johnson et al., <xref ref-type="bibr" rid="B27">2013</xref>, <xref ref-type="bibr" rid="B28">2016</xref>). However, these sensors, like the O<sub>2</sub> probes, drift over long-term deployments. Following an approach similar to the one developed for oxygen sensors, which can be referenced to atmospheric values each time the floats surface (Bittig and K&#x000F6;rtzinger, <xref ref-type="bibr" rid="B4">2015</xref>; Johnson et al., <xref ref-type="bibr" rid="B29">2015</xref>; Bushinsky et al., <xref ref-type="bibr" rid="B5">2016</xref>), CANYON could be used to compute deep (e.g., &#x02265;1,000 m) reference measurements at depth for pH<sub>T</sub> and NO<sub>3</sub><sup>&#x02212;</sup> each time the float makes a profile. At these depths, it is indeed expected that reliable and stable reference measurements could be acquired, which could be used to develop appropriate correction procedures for NO<sub>3</sub><sup>&#x02212;</sup> concentration and pH<sub>T</sub> and thus guarantee the long-term accuracy of the sensors.</p>
<p>Secondly, CANYON can also provide estimates, with known accuracies, of variables that are not presently measured by BGC-Argo floats. This is the case for PO<sub>4</sub><sup>3&#x02212;</sup> and Si(OH)<sub>4</sub> and the three other variables of the carbonate system than pH<sub>T</sub>. CANYON could thus be used as a cost-effective method for &#x0201C;filling the spatio-temporal gaps&#x0201D; of these variables by populating spatially and temporally their loosely resolved fields in oceanic waters. For these under-sampled variables, CANYON offers novel opportunities at global and local scales. For example, global fields of these variables provided by CANYON could support the initialization and validation of biogeochemical models which presently crucially lack reference data (e.g., Doney et al., <xref ref-type="bibr" rid="B9">2009</xref>; Ilyina et al., <xref ref-type="bibr" rid="B22">2013</xref>).</p>
<p>Thirdly, CANYON could also be used in combination with present measurements of the respective field nutrient concentrations and/or carbonate system parameters. Beside quality control of these data, CANYON values could serve to identify unusual biogeochemical events that had not been covered by the global but sparse GLODAPv2 training data set, in cases where CANYON and field data diverge.</p>
<p>Fourthly, and for more local approaches (e.g., analysis of individual float time series), the possible derivation of <italic>p</italic>CO<sub>2</sub> from BGC-Argo float O<sub>2</sub> and ancillary measurements (with or without pH<sub>T</sub>) potentially represents a new way to address regional and seasonal variability in CO<sub>2</sub> air-sea exchanges, and to reduce present uncertainties in the estimates of these fluxes. Moreover, estimating the three macronutrients (NO<sub>3</sub><sup>&#x02212;</sup>, PO<sub>4</sub><sup>3&#x02212;</sup>, and Si(OH)<sub>4</sub>) from BGC-Argo float data could be of great value for better understanding the dynamics of biogeochemical events such as the development and subsequent collapse of phytoplankton blooms.</p>
<p>Fifthly, CANYON could contribute to design future observational programs by identifying areas and periods where data acquisition could most cost-effectively address variability that is presently unresolved. Indeed, the strict quality control of the input data in the present study (which used only GLODAPv2 data that passed the second quality check, see Section The GLODAPv2 Database) eliminated some specific regions from our training and validation datasets. This argues for developing field-based observation programs to conduct high-quality measurements in these areas. More generally, the spatio-temporal domains where CANYON provided the least satisfactory results likely corresponded to weaknesses in the GLODAPv2 database with respect to catching the inherent and natural variability of different variables, such as the Southern Ocean in winter.</p>
<p>Overall, the CANYON-type estimation of biogeochemical variables based on data provided by the global BGC-Argo program offers new avenues for marine biogeochemistry that are comparable to those that have been created for physical oceanography by the Argo network since the early 2000s. This novel approach could increase tremendously the value of biogeochemical measurements made on board ships and by BGC-Argo floats by combining the high quality of the first type of data with the broad spatio-temporal coverage of the second.</p>
</sec>
<sec id="s5">
<title>Author contributions</title>
<p>HC, RS, and OP have initiated the study and designed the neural-network configurations with JG and HB. RS ran simulations and created the plots. All authors contributed to analysis and discussion of results. RS wrote most part of the manuscript. All authors commented on and contributed to the improvement of several versions of the manuscript.</p>
<sec>
<title>Conflict of interest statement</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
</sec>
</body>
<back>
<ack><p>This study was supported by the remOcean project (funded by the European Research Council, Grant Agreement No. 246777) and the AtlantOS project (funded by the European Union&#x00027;s Horizon 2020 research and innovation program, Grant Agreement No. 2014-633211). This is a contribution to the Southern Ocean Carbon and Climate Observations and Modeling (SOCCOM) project which is supported by the US National Science Foundation (PLR-1425989). We want to thank Tobias Steinhoff (GEOMAR, Kiel), for helpful discussions on carbonate system calculations. We deeply acknowledge the work from analysts, investigators, and crew who collected the data at sea. We are also grateful to all who have contributed their data to GLODAPv2 project and acknowledge the huge effort made to gather all data to create the GLODAPv2 database.</p>
</ack>
<sec sec-type="supplementary-material" id="s6">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="http://journal.frontiersin.org/article/10.3389/fmars.2017.00128/full#supplementary-material">http://journal.frontiersin.org/article/10.3389/fmars.2017.00128/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Image1.JPEG" id="SM1" mimetype="image/jpeg" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Figure S1</label>
<caption><p><bold>Geographic boundaries of the seven major oceanic basins used in Section The GLODAPv2 Database</bold>.</p></caption></supplementary-material>
<supplementary-material xlink:href="Image2.JPEG" id="SM2" mimetype="image/jpeg" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Figure S2</label>
<caption><p><bold>Temporal distribution of the number of observations (N<sub>obs</sub>) available in GLODAPv2 that were used to develop CANYON MLPs, plotted as a function of the sampling months (top)</bold> and years <bold>(bottom)</bold>. The colors refer to the sampling latitude: North high latitudes (&#x02265;45&#x000B0;); North mid latitudes (&#x02265;15&#x000B0; and &#x0003C;45&#x000B0;); Equatorial latitudes (&#x0003E;&#x02212;15&#x000B0; and &#x0003C;15&#x000B0;); South mid latitudes (&#x0003E;&#x02013;45&#x000B0; and &#x02264;&#x02212;15&#x000B0;), and South high latitudes (&#x02264;&#x02212;45&#x000B0;).</p></caption></supplementary-material>
<supplementary-material xlink:href="Image3.jpg" id="SM3" mimetype="image/jpeg" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Figure S3</label>
<caption><p><bold>Box plots of the difference between CANYON estimates <italic>minus</italic> GLODAPv2 reference measurements for the seven variables: NO<sub>3</sub><sup>&#x02212;</sup> (&#x003BC;mol kg<sup>&#x02212;1</sup>), PO<sub>4</sub><sup>3&#x02212;</sup> (&#x003BC;mol kg<sup>&#x02212;1</sup>), Si(OH)<sub>4</sub> (&#x003BC;mol kg<sup>&#x02212;1</sup>), pH<sub>T</sub>, <italic>A</italic><sub>T</sub> (&#x003BC;mol kg<sup>&#x02212;1</sup>), <italic>C</italic><sub>T</sub> (&#x003BC;mol kg<sup>&#x02212;1</sup>), and <italic>p</italic>CO<sub>2</sub> (&#x003BC;atm) vs. the range of the variable retrieved (output variable)</bold>. The intensity of the shading in each box refers to the number of data. The intervals were created by sharing the range of values in eight regular intervals. For each box, the lower and upper whiskers represent the Q1&#x02013;1.5<sup>&#x0002A;</sup>IQR and Q3&#x0002B;1.5<sup>&#x0002A;</sup>IQR, respectively, with Q1 the 0.25 quantile, Q3 the 0.75 quantile, and IQR the inter-quantile range. The width of each box represents the IQR and the middle line the median of the values.</p></caption></supplementary-material>
<supplementary-material xlink:href="Image4.JPEG" id="SM4" mimetype="image/jpeg" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Figure S4</label>
<caption><p><bold>Time series (22 years) of the temperature (a)</bold>, the salinity <bold>(b)</bold>, and the O<sub>2</sub> <bold>(c)</bold> measured at HOT. The five profiles from 25 February 2003 to 29 March 2003 that had been removed from the time-series because of their abnormal O<sub>2</sub> profiles are shown with the black triangle in the panel <bold>(c)</bold>&#x02014;(see details in Section Example of Application: Illustration with HOT Database).</p></caption></supplementary-material>
<supplementary-material xlink:href="RandMatlabcodesoftheproposedalgorithm.zip" id="SM5" mimetype="application/zip" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bates</surname> <given-names>N.</given-names></name> <name><surname>Astor</surname> <given-names>Y.</given-names></name> <name><surname>Church</surname> <given-names>M.</given-names></name> <name><surname>Currie</surname> <given-names>K.</given-names></name> <name><surname>Dore</surname> <given-names>J.</given-names></name> <name><surname>Gona&#x000E1;lez-D&#x000E1;vila</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>A time-series view of changing ocean chemistry due to ocean uptake of Anthropogenic CO2 and ocean acidification</article-title>. <source>Oceanography</source> <volume>27</volume>, <fpage>126</fpage>&#x02013;<lpage>141</lpage>. <pub-id pub-id-type="doi">10.5670/oceanog.2014.16</pub-id></citation></ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ben Mustapha</surname> <given-names>Z.</given-names></name> <name><surname>Alvain</surname> <given-names>S.</given-names></name> <name><surname>Jamet</surname> <given-names>C.</given-names></name> <name><surname>Loisel</surname> <given-names>H.</given-names></name> <name><surname>Dessailly</surname> <given-names>D.</given-names></name></person-group> (<year>2013</year>). <article-title>Automatic classification of water-leaving radiance anomalies from global SeaWiFS imagery: application to the detection of phytoplankton groups in open ocean waters</article-title>. <source>Remote Sens. Environ</source>. <volume>146</volume>, <fpage>97</fpage>&#x02013;<lpage>112</lpage>. <pub-id pub-id-type="doi">10.1016/j.rse.2013.08.046</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Bishop</surname> <given-names>C. M.</given-names></name></person-group> (<year>1995</year>). <source>Neural Networks for Pattern Recognition</source>. <publisher-name>Oxford University Press, Inc</publisher-name>. Available online at: <ext-link ext-link-type="uri" xlink:href="http://dl.acm.org/citation.cfm?id=525960">http://dl.acm.org/citation.cfm?id=525960</ext-link> (Accessed March 20, 2014).</citation></ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bittig</surname> <given-names>H. C.</given-names></name> <name><surname>K&#x000F6;rtzinger</surname> <given-names>A.</given-names></name></person-group> (<year>2015</year>). <article-title>Tackling oxygen optode drift: near-surface and in-air oxygen optode measurements on a float provide an accurate <italic>in situ</italic> reference</article-title>. <source>J. Atmos. Ocean. Technol.</source> <volume>32</volume>, <fpage>1536</fpage>&#x02013;<lpage>1543</lpage>. <pub-id pub-id-type="doi">10.1175/JTECH-D-14-00162.1</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bushinsky</surname> <given-names>S. M.</given-names></name> <name><surname>Emerson</surname> <given-names>S. R.</given-names></name> <name><surname>Riser</surname> <given-names>S. C.</given-names></name> <name><surname>Swift</surname> <given-names>D. D.</given-names></name></person-group> (<year>2016</year>). <article-title>Accurate oxygen measurements on modified Argo floats using <italic>in situ</italic> air calibrations</article-title>. <source>Limnol. Oceanogr. Methods</source> <volume>14</volume>, <fpage>491</fpage>&#x02013;<lpage>505</lpage>. <pub-id pub-id-type="doi">10.1002/lom3.10107</pub-id></citation></ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Carter</surname> <given-names>B. R.</given-names></name> <name><surname>Williams</surname> <given-names>N. L.</given-names></name> <name><surname>Gray</surname> <given-names>A. R.</given-names></name> <name><surname>Feely</surname> <given-names>R. A.</given-names></name></person-group> (<year>2016</year>). <article-title>Locally interpolated alkalinity regression for global alkalinity estimation</article-title>. <source>Limnol. Oceanogr. Methods</source> <volume>14</volume>, <fpage>268</fpage>&#x02013;<lpage>277</lpage>. <pub-id pub-id-type="doi">10.1002/lom3.10087</pub-id></citation></ref>
<ref id="B7">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Claustre</surname> <given-names>H.</given-names></name> <name><surname>Antoine</surname> <given-names>D.</given-names></name> <name><surname>Boehme</surname> <given-names>L.</given-names></name> <name><surname>Boss</surname> <given-names>E.</given-names></name> <name><surname>D&#x00027;Ortenzio</surname> <given-names>F.</given-names></name> <name><surname>Fanton D&#x00027;Andon</surname> <given-names>O.</given-names></name> <etal/></person-group>. (<year>2010</year>). <article-title>Guidelines towards an integrated ocean observation system for ecosystems and biogeochemical cycles</article-title>, in <source>Proceedings of OceanObs&#x00027;09: Sustained Ocean Observations and Information for Society</source>, eds <person-group person-group-type="editor"><name><surname>Hall</surname> <given-names>J.</given-names></name> <name><surname>Harrison</surname> <given-names>D. E.</given-names></name> <name><surname>Stammer</surname> <given-names>D.</given-names></name></person-group> (<publisher-loc>Venice</publisher-loc>: <publisher-name>European Space Agency</publisher-name>), <fpage>546</fpage>&#x02013;<lpage>566</lpage>.</citation></ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dickson</surname> <given-names>A. G.</given-names></name></person-group> (<year>1990</year>). <article-title>Standard potential of the reaction AGCL(S)&#x0002B;1/2H-2(G) &#x0003D; AG(S)&#x0002B;HCL(AQ) and the standard acidity constant of the ion HSO4&#x02212; in synthetic sea water from 273.15 to 318.15 K</article-title>. <source>J. Chem. Thermodyn.</source> <volume>22</volume>, <fpage>113</fpage>&#x02013;<lpage>127</lpage>. <pub-id pub-id-type="doi">10.1016/0021-9614(90)90074-Z</pub-id></citation></ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Doney</surname> <given-names>S. C.</given-names></name> <name><surname>Fabry</surname> <given-names>V. J.</given-names></name> <name><surname>Feely</surname> <given-names>R. A.</given-names></name> <name><surname>Kleypas</surname> <given-names>J. A.</given-names></name></person-group> (<year>2009</year>). <article-title>Ocean acidification: the other CO<sub>2</sub> problem</article-title>. <source>Ann. Rev. Mar. Sci.</source> <volume>1</volume>, <fpage>169</fpage>&#x02013;<lpage>192</lpage>. <pub-id pub-id-type="doi">10.1146/annurev.marine.010908.163834</pub-id><pub-id pub-id-type="pmid">21141034</pub-id></citation></ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dore</surname> <given-names>J. E.</given-names></name> <name><surname>Lukas</surname> <given-names>R.</given-names></name> <name><surname>Sadler</surname> <given-names>D. W.</given-names></name> <name><surname>Karl</surname> <given-names>D. M.</given-names></name></person-group> (<year>2003</year>). <article-title>Climate-driven changes to the atmospheric CO2 sink in the subtropical North Pacific Ocean</article-title>. <source>Nature</source> <volume>424</volume>, <fpage>754</fpage>&#x02013;<lpage>757</lpage>. <pub-id pub-id-type="doi">10.1038/nature01885</pub-id><pub-id pub-id-type="pmid">12917678</pub-id></citation></ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dore</surname> <given-names>J. E.</given-names></name> <name><surname>Lukas</surname> <given-names>R.</given-names></name> <name><surname>Sadler</surname> <given-names>D. W.</given-names></name> <name><surname>Church</surname> <given-names>M. J.</given-names></name> <name><surname>Karl</surname> <given-names>D. M.</given-names></name></person-group> (<year>2009</year>). <article-title>Physical and biogeochemical modulation of ocean acidification in the central North Pacific</article-title>. <source>Proc. Natl. Acad. Sci. U.S.A.</source> <volume>106</volume>, <fpage>12235</fpage>&#x02013;<lpage>12240</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.0906044106</pub-id><pub-id pub-id-type="pmid">19666624</pub-id></citation></ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Friedrich</surname> <given-names>T.</given-names></name> <name><surname>Oschlies</surname> <given-names>A.</given-names></name></person-group> (<year>2009</year>). <article-title>Neural network-based estimates of North Atlantic surface pCO<sub>2</sub> from satellite data: a methodological study</article-title>. <source>J. Geophys. Res.</source> <volume>114</volume>, <fpage>C03020</fpage>. <pub-id pub-id-type="doi">10.1029/2007JC004646</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Gattuso</surname> <given-names>J.-P.</given-names></name> <name><surname>Hansson</surname> <given-names>L.</given-names></name></person-group> (<year>2011</year>). <source>Ocean Acidification</source>. <publisher-loc>Oxford</publisher-loc>: <publisher-name>Oxford University Press</publisher-name>.</citation></ref>
<ref id="B14">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Gattuso</surname> <given-names>J.-P.</given-names></name> <name><surname>Epitalon</surname> <given-names>J.-M.</given-names></name> <name><surname>Lavigne</surname> <given-names>H.</given-names></name></person-group> (<year>2016</year>). <source>Seacarb: Seawater Carbonate Chemistry R Package Version 3.0.14</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/package=seacarb">https://cran.r-project.org/package=seacarb</ext-link></citation></ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gattuso</surname> <given-names>J.-P.</given-names></name> <name><surname>Magnan</surname> <given-names>A.</given-names></name> <name><surname>Bille</surname> <given-names>R.</given-names></name> <name><surname>Cheung</surname> <given-names>W. W. L.</given-names></name> <name><surname>Howes</surname> <given-names>E. L.</given-names></name> <name><surname>Joos</surname> <given-names>F.</given-names></name> <etal/></person-group>. (<year>2015</year>). <article-title>Contrasting futures for ocean and society from different anthropogenic CO2 emissions scenarios</article-title>. <source>Science</source> <volume>349</volume>, <fpage>aac4722</fpage>&#x02013;<lpage>aac4722</lpage>. <pub-id pub-id-type="doi">10.1126/science.aac4722</pub-id></citation></ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gouretski</surname> <given-names>V.</given-names></name> <name><surname>Jancke</surname> <given-names>K.</given-names></name></person-group> (<year>2000</year>). <article-title>Systematic errors as the cause for an apparent deep water property variability: global analysis of the WOCE and historical hydrographic data</article-title>. <source>Prog. Oceanogr.</source> <volume>48</volume>, <fpage>337</fpage>&#x02013;<lpage>402</lpage>. <pub-id pub-id-type="doi">10.1016/S0079-6611(00)00049-5</pub-id></citation></ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gruber</surname> <given-names>N.</given-names></name></person-group> (<year>2011</year>). <article-title>Warming up, turning sour, losing breath: ocean biogeochemistry under global change</article-title>. <source>Philos. Trans. A Math. Phys. Eng. Sci.</source> <volume>369</volume>, <fpage>1980</fpage>&#x02013;<lpage>1996</lpage>. <pub-id pub-id-type="doi">10.1098/rsta.2011.0003</pub-id><pub-id pub-id-type="pmid">21502171</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Gruber</surname> <given-names>N.</given-names></name> <name><surname>Doney</surname> <given-names>S. C.</given-names></name> <name><surname>Emerson</surname> <given-names>S.</given-names></name> <name><surname>Gilbert</surname> <given-names>D.</given-names></name> <name><surname>Kobayashi</surname> <given-names>T.</given-names></name> <name><surname>Kortzinger</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2010a</year>). <article-title>Adding oxygen to argo: developing a global <italic>in situ</italic> observatory for ocean deoxygenation and biogeochemistry</article-title>, in <source>Proceedings of the &#x0201C;OceanObs&#x00027;09: Sustained Ocean Observations and Information for Society&#x0201D; Conference</source>, eds <person-group person-group-type="editor"><name><surname>Hall</surname> <given-names>J.</given-names></name> <name><surname>Harrison</surname> <given-names>D. E.</given-names></name> <name><surname>Stammer</surname> <given-names>D.</given-names></name></person-group> (<publisher-loc>Venice</publisher-loc>: <publisher-name>ESA publication WPP-306</publisher-name>), <fpage>432</fpage>&#x02013;<lpage>441</lpage> (Accessed September 21&#x02013;25, 2009). <pub-id pub-id-type="doi">10.5270/OceanObs09.cwp.39</pub-id></citation></ref>
<ref id="B19">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Gruber</surname> <given-names>N.</given-names></name> <name><surname>K&#x000F6;rtzinger</surname> <given-names>A.</given-names></name> <name><surname>Borges</surname> <given-names>A.</given-names></name> <name><surname>Claustre</surname> <given-names>H.</given-names></name> <name><surname>Doney</surname> <given-names>S. C.</given-names></name> <name><surname>Feely</surname> <given-names>R. A.</given-names></name> <etal/></person-group>. (<year>2010b</year>). <article-title>Towards an integrated observing system for ocean carbon and biogeochemistry at a time of change</article-title>, in <source>Proceedings of the &#x0201C;OceanObs&#x00027;09: Sustained Ocean Observations and Information for Society&#x0201D; Conference</source>, eds <person-group person-group-type="editor"><name><surname>Hall</surname> <given-names>J.</given-names></name> <name><surname>Harrison</surname> <given-names>D. E.</given-names></name> <name><surname>Stammer</surname> <given-names>D.</given-names></name></person-group> (<publisher-loc>Venice</publisher-loc>: <publisher-name>ESA publication WPP-306</publisher-name>) (Accessed September 21&#x02013;25, 2009). <pub-id pub-id-type="doi">10.5270/OceanObs09.pp.18</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Holte</surname> <given-names>J.</given-names></name> <name><surname>Gilson</surname> <given-names>J.</given-names></name> <name><surname>Talley</surname> <given-names>L.</given-names></name> <name><surname>Roemmich</surname> <given-names>D.</given-names></name></person-group> (<year>2016</year>). <source>Argo Mixed Layers, Scripps Institution of Oceanography/UCSD</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="http://mixedlayer.ucsd.edu">http://mixedlayer.ucsd.edu</ext-link></citation></ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hornik</surname> <given-names>K.</given-names></name> <name><surname>Stinchcombe</surname> <given-names>M.</given-names></name> <name><surname>White</surname> <given-names>H.</given-names></name></person-group> (<year>1989</year>). <article-title>Multilayer feedforward networks are universal approximators</article-title>. <source>Neural Netw.</source> <volume>2</volume>, <fpage>359</fpage>&#x02013;<lpage>366</lpage>. <pub-id pub-id-type="doi">10.1016/0893-6080(89)90020-8</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ilyina</surname> <given-names>T.</given-names></name> <name><surname>Six</surname> <given-names>K. D.</given-names></name> <name><surname>Segschneider</surname> <given-names>J.</given-names></name> <name><surname>Maier-Reimer</surname> <given-names>E.</given-names></name> <name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>N&#x000FA;&#x000F1;ez-Riboni</surname> <given-names>I.</given-names></name></person-group> (<year>2013</year>). <article-title>Global ocean biogeochemistry model HAMOCC: model architecture and performance as component of the MPI-Earth system model in different CMIP5 experimental realizations</article-title>. <source>J. Adv. Model. Earth Syst.</source> <volume>5</volume>, <fpage>287</fpage>&#x02013;<lpage>315</lpage>. <pub-id pub-id-type="doi">10.1029/2012MS000178</pub-id></citation></ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jamet</surname> <given-names>C.</given-names></name> <name><surname>Loisel</surname> <given-names>H.</given-names></name> <name><surname>Dessailly</surname> <given-names>D.</given-names></name></person-group> (<year>2012</year>). <article-title>Retrieval of the spectral diffuse attenuation coefficient K d (&#x003BB;) in open and coastal ocean waters using a neural network inversion</article-title>. <source>J. Geophys. Res.</source> <volume>117</volume>, <fpage>C10023</fpage>. <pub-id pub-id-type="doi">10.1029/2012JC008076</pub-id></citation></ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Johnson</surname> <given-names>G. C.</given-names></name> <name><surname>Robbins</surname> <given-names>P. E.</given-names></name> <name><surname>Hufford</surname> <given-names>G. E.</given-names></name> <name><surname>Johnson</surname> <given-names>G. C.</given-names></name> <name><surname>Robbins</surname> <given-names>P. E.</given-names></name> <name><surname>Hufford</surname> <given-names>G. E.</given-names></name></person-group> (<year>2001</year>). <article-title>Systematic adjustments of hydrographic sections for internal consistency<sup>&#x0002A;</sup></article-title>. <source>J. Atmos. Ocean. Technol.</source> <volume>18</volume>, <fpage>1234</fpage>&#x02013;<lpage>1244</lpage>. <pub-id pub-id-type="doi">10.1175/1520-0426(2001)018&#x0003C;1234:SAOHSF&#x0003E;2.0.CO;2</pub-id></citation></ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Johnson</surname> <given-names>K. S.</given-names></name> <name><surname>Claustre</surname> <given-names>H.</given-names></name></person-group> (<year>2016</year>). <article-title>Bringing biogeochemistry into the Argo age</article-title>. <source>Eos Trans. Am. Geophys. Union</source> <fpage>97</fpage>. <pub-id pub-id-type="doi">10.1029/2016EO062427</pub-id>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://eos.org/project-updates/bringing-biogeochemistry-into-the-argo-age">https://eos.org/project-updates/bringing-biogeochemistry-into-the-argo-age</ext-link></citation></ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Johnson</surname> <given-names>K. S.</given-names></name> <name><surname>Coletti</surname> <given-names>L. J.</given-names></name></person-group> (<year>2002</year>). <article-title><italic>In situ</italic> ultraviolet spectrophotometry for high resolution and long-term monitoring of nitrate, bromide and bisulfide in the ocean</article-title>. <source>Deep Sea Res. Part I Oceanogr. Res. Pap.</source> <volume>49</volume>, <fpage>1291</fpage>&#x02013;<lpage>1305</lpage>. <pub-id pub-id-type="doi">10.1016/S0967-0637(02)00020-1</pub-id></citation></ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Johnson</surname> <given-names>K. S.</given-names></name> <name><surname>Coletti</surname> <given-names>L. J.</given-names></name> <name><surname>Jannasch</surname> <given-names>H. W.</given-names></name> <name><surname>Sakamoto</surname> <given-names>C. M.</given-names></name> <name><surname>Swift</surname> <given-names>D. D.</given-names></name> <name><surname>Riser</surname> <given-names>S. C.</given-names></name></person-group> (<year>2013</year>). <article-title>Long-term nitrate measurements in the ocean using the <italic>in situ</italic> ultraviolet spectrophotometer: sensor integration into the APEX profiling float</article-title>. <source>J. Atmos. Ocean. Technol.</source> <volume>30</volume>, <fpage>1854</fpage>&#x02013;<lpage>1866</lpage>. <pub-id pub-id-type="doi">10.1175/JTECH-D-12-00221.1</pub-id></citation></ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Johnson</surname> <given-names>K. S.</given-names></name> <name><surname>Jannasch</surname> <given-names>H. W.</given-names></name> <name><surname>Coletti</surname> <given-names>L. J.</given-names></name> <name><surname>Elrod</surname> <given-names>V. A.</given-names></name> <name><surname>Martz</surname> <given-names>T. R.</given-names></name> <name><surname>Takeshita</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>Deep-sea DuraFET: a pressure tolerant ph sensor designed for global sensor networks</article-title>. <source>Anal. Chem.</source> <volume>88</volume>, <fpage>3249</fpage>&#x02013;<lpage>3256</lpage>. <pub-id pub-id-type="doi">10.1021/acs.analchem.5b04653</pub-id><pub-id pub-id-type="pmid">26890717</pub-id></citation></ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Johnson</surname> <given-names>K. S.</given-names></name> <name><surname>Plant</surname> <given-names>J. N.</given-names></name> <name><surname>Riser</surname> <given-names>S. C.</given-names></name> <name><surname>Gilbert</surname> <given-names>D.</given-names></name></person-group> (<year>2015</year>). <article-title>Air oxygen calibration of oxygen optodes on a profiling float array</article-title>. <source>J. Atmos. Ocean. Technol.</source> <volume>32</volume>, <fpage>2160</fpage>&#x02013;<lpage>2172</lpage>. <pub-id pub-id-type="doi">10.1175/JTECH-D-15-0101.1</pub-id></citation></ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Johnson</surname> <given-names>K.</given-names></name> <name><surname>Berelson</surname> <given-names>W.</given-names></name> <name><surname>Boss</surname> <given-names>E.</given-names></name> <name><surname>Chase</surname> <given-names>Z.</given-names></name> <name><surname>Claustre</surname> <given-names>H.</given-names></name> <name><surname>Emerson</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2009</year>). <article-title>Observing biogeochemical cycles at global scales with profiling floats and gliders: prospects for a global array</article-title>. <source>Oceanography</source> <volume>22</volume>, <fpage>216</fpage>&#x02013;<lpage>225</lpage>. <pub-id pub-id-type="doi">10.5670/oceanog.2009.81</pub-id></citation></ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Juranek</surname> <given-names>L. W.</given-names></name> <name><surname>Feely</surname> <given-names>R. A.</given-names></name> <name><surname>Gilbert</surname> <given-names>D.</given-names></name> <name><surname>Freeland</surname> <given-names>H.</given-names></name> <name><surname>Miller</surname> <given-names>L. A.</given-names></name></person-group> (<year>2011</year>). <article-title>Real-time estimation of pH and aragonite saturation state from Argo profiling floats: prospects for an autonomous carbon observing strategy</article-title>. <source>Geophys. Res. Lett.</source> <volume>38</volume>, <fpage>L17603</fpage>. <pub-id pub-id-type="doi">10.1029/2011GL048580</pub-id></citation></ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Karl</surname> <given-names>D. M.</given-names></name> <name><surname>Lukas</surname> <given-names>R.</given-names></name></person-group> (<year>1996</year>). <article-title>The Hawaii Ocean Time-series (HOT) program: background, rationale and field implementation</article-title>. <source>Deep Sea Res. Part II Top. Stud. Oceanogr.</source> <volume>43</volume>, <fpage>129</fpage>&#x02013;<lpage>156</lpage>. <pub-id pub-id-type="doi">10.1016/0967-0645(96)00005-7</pub-id></citation></ref>
<ref id="B33">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Key</surname> <given-names>R. M.</given-names></name> <name><surname>Olsen</surname> <given-names>A.</given-names></name> <name><surname>Van Heuven</surname> <given-names>S.</given-names></name> <name><surname>Lauvset</surname> <given-names>S. K.</given-names></name> <name><surname>Velo</surname> <given-names>A.</given-names></name> <name><surname>Lin</surname> <given-names>X.</given-names></name> <etal/></person-group>. (<year>2015</year>). <source>Global Ocean Data Analysis Project, Version 2 (GLODAPv2).</source> ORNL/CDIAC-162, ND-P093, <publisher-name>Carbon Dioxide Information Analysis Center, Oak Ridge National Laboratory, US Department of Energy</publisher-name>, <publisher-loc>Oak Ridge, TN</publisher-loc>.</citation></ref>
<ref id="B34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>K&#x000F6;rtzinger</surname> <given-names>A.</given-names></name> <name><surname>Schimanski</surname> <given-names>J.</given-names></name> <name><surname>Send</surname> <given-names>U.</given-names></name> <name><surname>Wallace</surname> <given-names>D.</given-names></name></person-group> (<year>2004</year>). <article-title>The ocean takes a deep breath</article-title>. <source>Science</source> <volume>306</volume>, <fpage>1337</fpage>. <pub-id pub-id-type="doi">10.1126/science.1102557</pub-id><pub-id pub-id-type="pmid">15550662</pub-id></citation></ref>
<ref id="B35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Landsch&#x000FC;tzer</surname> <given-names>P.</given-names></name> <name><surname>Gruber</surname> <given-names>N.</given-names></name> <name><surname>Bakker</surname> <given-names>D. C. E.</given-names></name></person-group> (<year>2016</year>). <article-title>Decadal variations and trends of the global ocean carbon sink</article-title>. <source>Global Biogeochem. Cycles</source> <volume>30</volume>, <fpage>1396</fpage>&#x02013;<lpage>1417</lpage>. <pub-id pub-id-type="doi">10.1002/2015GB005359</pub-id></citation></ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Landsch&#x000FC;tzer</surname> <given-names>P.</given-names></name> <name><surname>Gruber</surname> <given-names>N.</given-names></name> <name><surname>Bakker</surname> <given-names>D. C. E.</given-names></name> <name><surname>Schuster</surname> <given-names>U.</given-names></name></person-group> (<year>2014</year>). <article-title>Recent variability of the global ocean carbon sink</article-title>. <source>Global Biogeochem. Cycles</source> <volume>28</volume>, <fpage>927</fpage>&#x02013;<lpage>949</lpage>. <pub-id pub-id-type="doi">10.1002/2014GB004853</pub-id></citation></ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Landsch&#x000FC;tzer</surname> <given-names>P.</given-names></name> <name><surname>Gruber</surname> <given-names>N.</given-names></name> <name><surname>Bakker</surname> <given-names>D. C. E.</given-names></name> <name><surname>Schuster</surname> <given-names>U.</given-names></name> <name><surname>Nakaoka</surname> <given-names>S.</given-names></name> <name><surname>Payne</surname> <given-names>M. R.</given-names></name> <etal/></person-group>. (<year>2013</year>). <article-title>A neural network-based estimate of the seasonal to inter-annual variability of the Atlantic Ocean carbon sink</article-title>. <source>Biogeosciences</source> <volume>10</volume>, <fpage>7793</fpage>&#x02013;<lpage>7815</lpage>. <pub-id pub-id-type="doi">10.5194/bg-10-7793-2013</pub-id></citation></ref>
<ref id="B38">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Le Qu&#x000E9;r&#x000E9;</surname> <given-names>C.</given-names></name> <name><surname>Moriarty</surname> <given-names>R.</given-names></name> <name><surname>Andrew</surname> <given-names>R. M.</given-names></name> <name><surname>Canadell</surname> <given-names>J. G.</given-names></name> <name><surname>Sitch</surname> <given-names>S.</given-names></name> <name><surname>Korsbakken</surname> <given-names>J. I.</given-names></name> <etal/></person-group>. (<year>2015</year>). <article-title>Global Carbon Budget 2015</article-title>. <source>Earth Syst. Sci. Data</source> <volume>7</volume>, <fpage>349</fpage>&#x02013;<lpage>396</lpage>. <pub-id pub-id-type="doi">10.5194/essd-7-349-2015</pub-id></citation></ref>
<ref id="B39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lek</surname> <given-names>S.</given-names></name> <name><surname>Gu&#x000E9;gan</surname> <given-names>J. F.</given-names></name></person-group> (<year>1999</year>). <article-title>Artificial neural networks as a tool in ecological modelling, an introduction</article-title>. <source>Ecol. Modell.</source> <volume>120</volume>, <fpage>65</fpage>&#x02013;<lpage>73</lpage>. <pub-id pub-id-type="doi">10.1016/S0304-3800(99)00092-7</pub-id></citation></ref>
<ref id="B40">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lueker</surname> <given-names>T. J.</given-names></name> <name><surname>Dickson</surname> <given-names>A. G.</given-names></name> <name><surname>Keeling</surname> <given-names>C. D.</given-names></name></person-group> (<year>2000</year>). <article-title>Ocean pCO2 calculated from dissolved inorganic carbon, alkalinity, and equations for K1 and K2: validation based on laboratory measurements of CO2 in gas and seawater at equilibrium</article-title>. <source>Mar. Chem.</source> <volume>70</volume>, <fpage>105</fpage>&#x02013;<lpage>119</lpage>. <pub-id pub-id-type="doi">10.1016/S0304-4203(00)00022-0</pub-id></citation></ref>
<ref id="B41">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Martz</surname> <given-names>T. R.</given-names></name> <name><surname>Johnson</surname> <given-names>K. S.</given-names></name> <name><surname>Riser</surname> <given-names>S. C.</given-names></name></person-group> (<year>2008</year>). <article-title>Ocean metabolism observed with oxygen sensors on profiling floats in the South Pacific</article-title>. <source>Limnol. Oceanogr.</source> <volume>53</volume>, <fpage>2094</fpage>&#x02013;<lpage>2111</lpage>. <pub-id pub-id-type="doi">10.4319/lo.2008.53.5_part_2.2094</pub-id></citation></ref>
<ref id="B42">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Olsen</surname> <given-names>A.</given-names></name> <name><surname>Key</surname> <given-names>R. M.</given-names></name> <name><surname>van Heuven</surname> <given-names>S.</given-names></name> <name><surname>Lauvset</surname> <given-names>S. K.</given-names></name> <name><surname>Velo</surname> <given-names>A.</given-names></name> <name><surname>Lin</surname> <given-names>X.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>The Global Ocean Data Analysis Project version 2 (GLODAPv2) &#x02013; an internally consistent data product for the world ocean</article-title>. <source>Earth Syst. Sci. Data</source> <volume>8</volume>, <fpage>297</fpage>&#x02013;<lpage>323</lpage>. <pub-id pub-id-type="doi">10.5194/essd-8-297-2016</pub-id></citation></ref>
<ref id="B43">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Perez</surname> <given-names>F. F.</given-names></name> <name><surname>Fraga</surname> <given-names>F.</given-names></name></person-group> (<year>1987</year>). <article-title>Association constant of fluoride and hydrogen ions in seawater</article-title>. <source>Mar. Chem.</source> <volume>21</volume>, <fpage>161</fpage>&#x02013;<lpage>168</lpage>. <pub-id pub-id-type="doi">10.1016/0304-4203(87)90036-3</pub-id></citation></ref>
<ref id="B44">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Redfield</surname> <given-names>A. C.</given-names></name></person-group> (<year>1934</year>). <article-title>On the proportions of organic derivatives in sea water and their relation to the composition of plankton</article-title>, in <source>James Johnstone Memorial Volume</source>, ed <person-group person-group-type="editor"><name><surname>Daniel</surname> <given-names>R. J.</given-names></name></person-group> (<publisher-loc>Liverpool</publisher-loc>: <publisher-name>University Press of Liverpool</publisher-name>), <fpage>176</fpage>&#x02013;<lpage>192</lpage>.</citation></ref>
<ref id="B45">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Redfield</surname> <given-names>A. C.</given-names></name></person-group> (<year>1958</year>). <article-title>The biological control of chemical factors in the environment</article-title>. <source>Am. Sci.</source> 46, <volume>230A</volume>, <fpage>205</fpage>&#x02013;<lpage>221</lpage>.</citation></ref>
<ref id="B46">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Riser</surname> <given-names>S. C.</given-names></name> <name><surname>Johnson</surname> <given-names>K. S.</given-names></name></person-group> (<year>2008</year>). <article-title>Net production of oxygen in the subtropical ocean</article-title>. <source>Nature</source> <volume>451</volume>, <fpage>323</fpage>&#x02013;<lpage>325</lpage>. <pub-id pub-id-type="doi">10.1038/nature06441</pub-id><pub-id pub-id-type="pmid">18202655</pub-id></citation></ref>
<ref id="B47">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rumelhart</surname> <given-names>D. E.</given-names></name> <name><surname>Hinton</surname> <given-names>G. E.</given-names></name> <name><surname>Williams</surname> <given-names>R. J.</given-names></name></person-group> (<year>1988</year>). <article-title>Learning representations by back-propagating errors</article-title>. <source>Cogn. Model.</source> <volume>5</volume>, <fpage>696</fpage>&#x02013;<lpage>699</lpage>.</citation></ref>
<ref id="B48">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Sabine</surname> <given-names>C. L.</given-names></name> <name><surname>Key</surname> <given-names>R. M.</given-names></name> <name><surname>Kozyr</surname> <given-names>A.</given-names></name> <name><surname>Feely</surname> <given-names>R. A.</given-names></name> <name><surname>Wanninkhof</surname> <given-names>R.</given-names></name> <name><surname>Millero</surname> <given-names>F. J.</given-names></name> <etal/></person-group>. (<year>2005</year>). <source>Global Ocean Data Analysis Project (GLODAP): Results and Data</source>. <publisher-loc>Oak Ridge, TN</publisher-loc>: <publisher-name>ORNL/CDIAC-145, NDP-083, Carbon Dioxide Information Analysis Center, Oak Ridge National Laboratory, U.S. Department of Energy</publisher-name>, <fpage>110</fpage>.</citation></ref>
<ref id="B49">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sauz&#x000E8;de</surname> <given-names>R.</given-names></name> <name><surname>Claustre</surname> <given-names>H.</given-names></name> <name><surname>Jamet</surname> <given-names>C.</given-names></name> <name><surname>Uitz</surname> <given-names>J.</given-names></name> <name><surname>Ras</surname> <given-names>J.</given-names></name> <name><surname>Mignot</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2015</year>). <article-title>Retrieving the vertical distribution of chlorophyll a concentration and phytoplankton community composition from <italic>in situ</italic> fluorescence profiles: a method based on a neural network with potential for global-scale applications</article-title>. <source>J. Geophys. Res. Ocean</source> <volume>120</volume>, <fpage>451</fpage>&#x02013;<lpage>470</lpage>. <pub-id pub-id-type="doi">10.1002/2014JC010355</pub-id></citation></ref>
<ref id="B50">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sauz&#x000E8;de</surname> <given-names>R.</given-names></name> <name><surname>Claustre</surname> <given-names>H.</given-names></name> <name><surname>Uitz</surname> <given-names>J.</given-names></name> <name><surname>Jamet</surname> <given-names>C.</given-names></name> <name><surname>Dall&#x00027;Olmo</surname> <given-names>G.</given-names></name> <name><surname>D&#x00027;Ortenzio</surname> <given-names>F.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>A neural network-based method for merging ocean color and Argo data to extend surface bio-optical properties to depth: retrieval of the particulate backscattering coefficient</article-title>. <source>J. Geophys. Res. Ocean</source> <volume>121</volume>, <fpage>2552</fpage>&#x02013;<lpage>2571</lpage>. <pub-id pub-id-type="doi">10.1002/2015JC011408</pub-id></citation></ref>
<ref id="B51">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Takahashi</surname> <given-names>T.</given-names></name> <name><surname>Sutherland</surname> <given-names>S. C.</given-names></name> <name><surname>Wanninkhof</surname> <given-names>R.</given-names></name> <name><surname>Sweeney</surname> <given-names>C.</given-names></name> <name><surname>Feely</surname> <given-names>R. A.</given-names></name> <name><surname>Chipman</surname> <given-names>D. W.</given-names></name> <etal/></person-group>. (<year>2009</year>). <article-title>Climatological mean and decadal change in surface ocean pCO2, and net sea&#x02013;air CO2 flux over the global oceans</article-title>. <source>Deep Sea Res. Part II Top. Stud. Oceanogr.</source> <volume>56</volume>, <fpage>554</fpage>&#x02013;<lpage>577</lpage>. <pub-id pub-id-type="doi">10.1016/j.dsr2.2008.12.009</pub-id></citation></ref>
<ref id="B52">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Talley</surname> <given-names>L. D.</given-names></name> <name><surname>Feely</surname> <given-names>R. A.</given-names></name> <name><surname>Sloyan</surname> <given-names>B. M.</given-names></name> <name><surname>Wanninkhof</surname> <given-names>R.</given-names></name> <name><surname>Baringer</surname> <given-names>M. O.</given-names></name> <name><surname>Bullister</surname> <given-names>J. L.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>Changes in ocean heat, carbon content, and ventilation: a review of the first decade of GO-SHIP global repeat hydrography<sup>&#x0002A;</sup></article-title>. <source>Ann. Rev. Mar. Sci.</source> <volume>8</volume>, <fpage>185</fpage>&#x02013;<lpage>215</lpage>. <pub-id pub-id-type="doi">10.1146/annurev-marine-052915-100829</pub-id><pub-id pub-id-type="pmid">26515811</pub-id></citation></ref>
<ref id="B53">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tanhua</surname> <given-names>T.</given-names></name> <name><surname>van Heuven</surname> <given-names>S.</given-names></name> <name><surname>Key</surname> <given-names>R. M.</given-names></name> <name><surname>Velo</surname> <given-names>A.</given-names></name> <name><surname>Olsen</surname> <given-names>A.</given-names></name> <name><surname>Schirnick</surname> <given-names>C.</given-names></name></person-group> (<year>2010</year>). <article-title>Quality control procedures and methods of the CARINA database</article-title>. <source>Earth Syst. Sci. Data</source> <volume>2</volume>, <fpage>35</fpage>&#x02013;<lpage>49</lpage>. <pub-id pub-id-type="doi">10.5194/essd-2-35-2010</pub-id></citation></ref>
<ref id="B54">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Uppstr&#x000F6;m</surname> <given-names>L. R.</given-names></name></person-group> (<year>1974</year>). <article-title>The boron/chlorinity ratio of deep-sea water from the Pacific Ocean</article-title>. <source>Deep Sea Res. Oceanogr. Abstr.</source> <volume>21</volume>, <fpage>161</fpage>&#x02013;<lpage>162</lpage>. <pub-id pub-id-type="doi">10.1016/0011-7471(74)90074-6</pub-id></citation></ref>
<ref id="B55">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Velo</surname> <given-names>A.</given-names></name> <name><surname>P&#x000E9;rez</surname> <given-names>F. F.</given-names></name> <name><surname>Tanhua</surname> <given-names>T.</given-names></name> <name><surname>Gilcoto</surname> <given-names>M.</given-names></name> <name><surname>R&#x000ED;os</surname> <given-names>A. F.</given-names></name> <name><surname>Key</surname> <given-names>R. M.</given-names></name></person-group> (<year>2013</year>). <article-title>Total alkalinity estimation using MLR and neural network techniques</article-title>. <source>J. Mar. Syst.</source> 111&#x02013;<volume>112</volume>, <fpage>11</fpage>&#x02013;<lpage>18</lpage>. <pub-id pub-id-type="doi">10.1016/j.jmarsys.2012.09.002</pub-id></citation></ref>
<ref id="B56">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ward</surname> <given-names>B.</given-names></name> <name><surname>Redfern</surname> <given-names>S.</given-names></name></person-group> (<year>1999</year>). <article-title>A neural network model for predicting the bulk-skin temperature difference at the sea surface</article-title>. <source>Int. J. Remote Sens.</source> <volume>20</volume>, <fpage>3533</fpage>&#x02013;<lpage>3548</lpage>. <pub-id pub-id-type="doi">10.1080/014311699211183</pub-id></citation></ref>
<ref id="B57">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Williams</surname> <given-names>N. L.</given-names></name> <name><surname>Juranek</surname> <given-names>L. W.</given-names></name> <name><surname>Johnson</surname> <given-names>K. S.</given-names></name> <name><surname>Feely</surname> <given-names>R. A.</given-names></name> <name><surname>Riser</surname> <given-names>S. C.</given-names></name> <name><surname>Talley</surname> <given-names>L. D.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>Empirical algorithms to estimate water column pH in the Southern Ocean</article-title>. <source>Geophys. Res. Lett.</source> <volume>43</volume>, <fpage>3415</fpage>&#x02013;<lpage>3422</lpage>. <pub-id pub-id-type="doi">10.1002/2016GL068539</pub-id></citation></ref>
</ref-list>
</back>
</article>
