<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mar. Sci.</journal-id>
<journal-title>Frontiers in Marine Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mar. Sci.</abbrev-journal-title>
<issn pub-type="epub">2296-7745</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmars.2024.1477710</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Marine Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>PGTransNet: a physics-guided transformer network for 3D ocean temperature and salinity predicting in tropical Pacific</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Wu</surname>
<given-names>Song</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2802347"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Bao</surname>
<given-names>Senliang</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Dong</surname>
<given-names>Wei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Senzhang</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Xiaojiang</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1871797"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Shao</surname>
<given-names>Chengcheng</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhu</surname>
<given-names>Junxing</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Xiaoyong</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>College of Computer Science and Technology, National University of Defense Technology</institution>, <addr-line>Changsha</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>College of Meteorology and Oceanography, National University of Defense Technology</institution>, <addr-line>Changsha</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>School of Computer Science and Engineering, Central South University</institution>, <addr-line>Changsha</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Zhibin Yu, Ocean University of China, China</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Young-Heon Jo, Pusan National University, Republic of Korea</p>
<p>Ajian Liu, The Institute of Automation of the Chinese Academy of Sciences (CASIA), China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Senliang Bao, <email xlink:href="mailto:baosenliang18@nudt.edu.cn">baosenliang18@nudt.edu.cn</email>; Wei Dong, <email xlink:href="mailto:wdong@nudt.edu.cn">wdong@nudt.edu.cn</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>29</day>
<month>11</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>11</volume>
<elocation-id>1477710</elocation-id>
<history>
<date date-type="received">
<day>08</day>
<month>08</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>28</day>
<month>10</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Wu, Bao, Dong, Wang, Zhang, Shao, Zhu and Li</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Wu, Bao, Dong, Wang, Zhang, Shao, Zhu and Li</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Accurately predicting the spatio-temporal evolution trends and long-term dynamics of three-dimensional ocean temperature and salinity plays a crucial role in monitoring climate system changes and conducting fundamental oceanographic research. Numerical models are the most prevalent of the traditional approaches, which are often too complex and lack of generality. Recently, with the rise of AI, many data-driven methods are proposed. However, most of them take no consideration of natural physical laws that may cause issues of physical inconsistency among different variables. In this paper, we proposed PGTransNet, a novel physics-guided transformer network for 3D Ocean temperature and salinity forecasting. This model is based on Vision Transformer, and to enhance the performance we have three aspects of improvements. Firstly, we design a loss function that deliveries the physical relationship among temperature, salinity and density by fusing the Thermodynamic Equation. Secondly, to capture global and long-term dependencies effectively, we add the Pacific Decadal Oscillation (PDO) and North Pacific Gyre Oscillation (NPGO) in the embedding layer. Thirdly, we adopted the Laplacian sparse positional encodings to alleviate the artifacts caused by high-norm tokens. The former two are the core components to leverage the physical information. Finally, to comprehensively evaluate PGTransnet, we conduct rich experiments in metrics RMSE, Anomoly Correlation Coefficients, Bias and physical consistency. Our proposal demonstrates higher prediction accuracy with fast convergence, and the metrics and visualizations show that our model is insensitive to hyperparameter tuning, ensuring better generalization and adherence to physical consistency. Moreover, as observed from the spatial distribution of the anomaly correlation coefficient, the model exhibits higher forecasting accuracy for coastal and marginal sea regions.</p>
</abstract>
<kwd-group>
<kwd>physics-guided machine learning</kwd>
<kwd>spatio-temporal data analysis</kwd>
<kwd>ocean temperature prediction</kwd>
<kwd>ocean salinity prediction</kwd>
<kwd>ViT</kwd>
</kwd-group>
<counts>
<fig-count count="14"/>
<table-count count="3"/>
<equation-count count="7"/>
<ref-count count="40"/>
<page-count count="15"/>
<word-count count="6067"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Ocean Observation</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Temperature and salinity, as fundamental climate variables of the ocean, play a crucial role in ocean circulation, global climate, and biological systems. Accurately predicting the spatio-temporal evolution trends and long-term dynamics of three-dimensional sea temperature and salinity is essential for monitoring climate system changes and conducting fundamental oceanographic research (<xref ref-type="bibr" rid="B18">Kug et&#xa0;al., 2004</xref>; <xref ref-type="bibr" rid="B1">Aguilar-Martinez and Hsieh, 2009</xref>; <xref ref-type="bibr" rid="B20">Lin et&#xa0;al., 2024</xref>). Simultaneously, an accurate and in-depth understanding of the variabilities and correlations of the temperature and salinity both on the surface and subsurface is also helpful for ecological environment protection, Ocean-atmosphere phenomena prediction (El Nino, La Nina), and disaster warning (tsunami, hurricane) (<xref ref-type="bibr" rid="B35">Xiao et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B40">Zhu et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B31">Wang et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B39">Zhou and Zhang, 2023</xref>). However, the intricate marine environment, the coupling, interplay, and mutual constraints among various elements make its forecasting inherently challenging, encompassing the multi-source multi-modal data processing and fusion, high nonlinearity in temporal dynamics dealing, spatio-temporal information extraction, and physical laws capturing between variables.</p>
<p>Most state-of-the-art approaches for predicting these two variables are based on physics-based numerical models and data-driven algorithms. While numerical models are physically plausible and mathematically well-posed, the discretization approximation of nonlinear equations and the challenge of determining the uniqueness of solutions may lead to pseudo-physical effects, generality, and temporal limitations. Moreover, numerical models are computationally expensive. Typical and commonly used ocean numerical models include ROMS (Regional Ocean Modeling System), MOM (MITgcm Ocean Model), NEMO (Nucleus for European Modeling of the Ocean), and POP (Parallel Ocean Program).</p>
<p>Data-driven methods attempt to predict long-term variations in temperature and salinity by leveraging their powerful learning and nonlinear mapping capabilities, making them well-suited for spatio-temporal data forecasting. Common approaches for forecasting fundamental marine variables include LSTM, ConvLSTM, CNN, and their variants such as FC-LSTM (Fully Connected LSTM), RC-LSTM (Regional Convolution-LSTM), CFCC-LSTM (Combined FC-LSTM and Convolution Neural Network), and DPG (Dual Path Gated Recurrent Unit Network) (<xref ref-type="bibr" rid="B38">Zhang et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B34">Xiao et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B28">Song et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B36">Xu et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B25">Patil and Iiyama, 2021</xref>). However, these networks predominantly rely on homogeneous datasets for univariate sea surface temperature forecasting, which limits their ability to fully capture correlations between coupled variables.</p>
<p>While some researchers have proposed spatio-temporal data fusion models like MUST (Multi-source Spatio-Temporal data fusion Model) (<xref ref-type="bibr" rid="B14">Hou et&#xa0;al., 2022</xref>), TemproNet (Transformer-based deep learning model) (<xref ref-type="bibr" rid="B2">Chen et&#xa0;al., 2024</xref>), and attention-based PredRNN (<xref ref-type="bibr" rid="B27">Qiao et&#xa0;al., 2023</xref>), these models are primarily designed for short-term SST (Sea Surface Temperature) prediction. In contrast, Dai (<xref ref-type="bibr" rid="B8">Dai et&#xa0;al., 2024</xref>) focuses on long-term SST prediction in the China Sea. The proposed TransDtSt-Part (Transformer with temporal embedding, attention distilling, and stacked connection in part) achieves high prediction accuracy across five China Sea regions, even with a forecast length of 360 days. However, the absence of known and objective physical information within the meteorological and oceanographic domains is a significant concern. The lack of physical constraints limits the accuracy and reliability of purely data-driven approaches, highlighting the need for models that integrate both data-driven techniques and physical laws. Fortunately, the emergence of physics-guided deep learning and AI for science presents a new scientific paradigm for these problems. One of the earliest relevant papers found to date is the article published in Nature Materials in 2006 (<xref ref-type="bibr" rid="B13">Fischer et&#xa0;al., 2006</xref>). The authors attempted to integrate quantum mechanical mechanisms to enhance the accuracy of crystal predictions. In addition, to enforce the reliability of model prediction, Patil (<xref ref-type="bibr" rid="B24">Patil et&#xa0;al., 2016</xref>) proposes a wavelet neural network (WNN) to make wavelet transforming among the error time series between model output and observation data. It&#x2019;s a primary attempt to integrate physical information with neural networks for temperature prediction. In general, prior research has shown that physics-guided deep learning models have great potential to improve data utilization, enhance interpretability, and improve physical consistency (<xref ref-type="bibr" rid="B10">Daw et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B17">Jiang et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B9">Daw et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B16">Jia et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B30">Von Rueden et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B32">Wu et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B37">Yuan et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B40">Zhu et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B33">Wu et&#xa0;al., 2023</xref>).</p>
<p>Inspired by the successes mentioned above, we propose a novel physics-guided spatio-temporal self-attention transformer network for temperature and salinity forecasting, named PGTransNet. PGTransNet repurposes the Vision Transformer (ViT), which can naturally accommodate our image-like 3D sea temperature and salinity data, and is capable of modeling long-range dependencies effectively. We combine ViT with laplacian sparse positional encodings, which somewhat alleviate the artifacts caused by high-norm tokens, and embed the Pacific Decadal Oscillation (PDO) and North Pacific Gyre Oscillation (NPGO) to help the model capture global and long-term dependencies further. Moreover, temperature and salinity control water density, thus governing the vertical movement of ocean waters, which further affects the occurrence and extinction of other large and mesoscale ocean phenomena. Therefore, we incorporate the thermodynamic equation of seawater-2010 (TEOS-10) representation formula for the relationship between temperature, salinity, and density into the loss function to achieve physics-guided model training. Simultaneously, the feasibility of restricting the solution space based on this thermodynamic equation is discussed.</p>    <p>In summary, the contribution of this paper are summarized in the following three aspects:</p>
<list list-type="bullet">
<list-item>
<p>We propose a physics-guided spatio-temporal self-attention transformer network for jointly predicting ocean temperature and salinity;</p>
</list-item>
<list-item>
<p>We consider laplacian sparse positional encodings and build an embedding layer to embed decadal variability to alleviate artifacts and strengthen long-term trend forecasting;</p>
</list-item>
<list-item>
<p>We design a loss function that deliveries the physical relationship among temperature, salinity and density by fusing the Thermodynamic Equation to achieve physics-guided model training.</p>
</list-item>
</list>
<p>The remainder of this paper is organized as follows. Section 2 briefly describe the data sources and pre-processing methods, and elaborates the workflow, algorithmic designs and implementation details of PGTransNet. Section 3 describe the experimental results and analysis. Finally, conclusions and future plan are remarked in Section 4.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Methodology</title>
<sec id="s2_1">
<label>2.1</label>
<title>Overall model architecture</title>
<p>Given the extensive availability of large-scale temperature and salinity datasets, coupled with the intrinsic physical interdependencies and correlations between these variables, our objective is to develop a physics-guided spatio-temporal self-attention transformer network. This network is designed to enable the simultaneous prediction of oceanic temperature and salinity by integrating domain-specific physical principles into the learning architecture. In other words, by processing multiple inputs, PGTransNet generates corresponding outputs that adhere to predefined physical principles.</p>
<p>As illustrated in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>, given the ocean parameters input <italic>X</italic> of dimensions <inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>C</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, PGTransNet is trained to forecast the future ocean scenario <italic>Y</italic> of identical dimensions <inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>C</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> at a specified lead time <inline-formula>
<mml:math display="inline" id="im3">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Here, <italic>C</italic> denotes the number of input features, while <italic>H</italic> and <italic>W</italic> represent the latitude and longitude grid points, respectively. <inline-formula>
<mml:math display="inline" id="im4">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im5">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> correspond to the input history time step and the output lead time step. In our study, we utilize historical data spanning the previous year to predict temperature and salinity for the subsequent year, where <inline-formula>
<mml:math display="inline" id="im6">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mn>12</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Overall model architecture.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1477710-g001.tif"/>
</fig>
<p>PGTransNet relies on several key components to derive the ultimate prediction from historical inputs. These components include data preprocessing, data embedding and merging, revised ViT-based blocks, and physics-guided information integrating. The specific details of each component will be introduced sequentially in the subsequent subsections.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Datasets and data preprocessing</title>
<p>We utilize the IAP ocean temperature and salinity products from the Institute of Atmospheric Physics (IAP) at the Chinese Academy of Sciences (CAS) <xref ref-type="bibr" rid="B4">Cheng et&#xa0;al. (2017)</xref>. This dataset is gridded onto a 1&#xb0; &#xd7; 1&#xb0; grid with 41 vertical levels ranging from 1-2000m globally, and monthly resolution spanning from 1940 to the present. The product is developed by using new XBT data bias correction scheme, MBT correction scheme, new reduction of sampling errors scheme (an ensemble optimal interpolation method based on dynamic ensemble samples), and &#x201c;subsample test&#x201d; evaluation scheme, which effectively overcomes the problems of large systematic bias and sampling errors. Extensive systematic analysis and evaluation have demonstrated the dataset&#x2019;s ability to accurately replicate various climate features, including climatological means, decadal variations (such as PDO), interannual variability (such as ENSO), and long-term trends within the historical period from 1940 to 2015, as well as long-term trends <xref ref-type="bibr" rid="B4">Cheng et&#xa0;al. (2017</xref>, <xref ref-type="bibr" rid="B3">2019a</xref>, <xref ref-type="bibr" rid="B5">2019b)</xref>; <xref ref-type="bibr" rid="B6">Cheng and Zhu (2016)</xref>; <xref ref-type="bibr" rid="B19">Li et&#xa0;al. (2020)</xref>. Considering the tropical Pacific Ocean&#x2019;s pivotal role in the ocean circulation and global climate system, particularly its strong correlation with El Ni&#xf1;o and La Ni&#xf1;a through changes in upper-ocean temperatures, we specifically select the IAP data over the region spanning from 120&#xb0;E to 90&#xb0;W and 20.5&#xb0;S to 20.5&#xb0;N, covering the upper-ocean mixed layer from 1 to 160m depths (1, 30, 60, 90, 120, and 160m). We concatenate the salinity and temperature data along the depth dimension, resulting in <italic>C</italic> = 12. Additionally, it&#x2019;s worth mentioning that all variations used for predictors and predictands are normalized using min-max normalization within the range [0,1]. The Pacific Decadal Oscillation (PDO) index, as defined by the National Climate Center, serves as the temporal coefficient of the primary mode extracted through empirical orthogonal function (EOF) decomposition of sea surface temperature anomalies within the North Pacific region, spanning from 20&#xb0;N to 70&#xb0;N and 110&#xb0;E to 100&#xb0;W. This index effectively encapsulates the principal features of large-scale oceanic decadal variability. The cold and warm phases of the PDO index correlate well with the cold and warm anomalies in the tropical Pacific sea surface temperatures, which are strongly associated with many North Pacific and Pacific Northwest climate and ecology records, especially the occurrence of El Ni&#xf1;o and La Ni&#xf1;a extreme events. Moreover, <xref ref-type="bibr" rid="B21">Liu and Zhu (2015)</xref> analyzed the regime shift and its possible causes of winter North Pacific sea surface temperature around the 1990s. The analysis indicates that PDO predominantly influenced the dynamics before 1990, whereas the North Pacific Gyre Oscillation (NPGO) took precedence in the subsequent period. Additionally, it suggests that NPGO is likely to dominate in the future. Therefore, considering the significance of these climate indices in capturing oceanic variability, we propose to embed the first two EOF modes (PDO, NPGO) to alleviate artifacts and strengthen long-term trend forecasting. Notably, according to its definition, the NPGO can be understood as the second EOF mode of sea surface height (SSH) anomalies in the North Pacific. Some studies suggest that the second EOF mode of SST can also approximate the NPGO. Therefore, the NPGO used in this paper is based on the second EOF mode of SST.</p>
<p>As shown in the <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>, EOF decomposes a time-dependent vector field of oceanic variables <italic>A</italic> (e.g., temperature or salinity) into spatial modes and temporal coefficients, assuming it consists of <italic>m</italic> spatial points and <italic>n</italic> time points. In this context, &#x3a3; represents the diagonal matrix of singular values, which are the eigenvalues of the covariance matrix of <italic>A</italic>. Matrix <italic>U</italic> corresponds to the left singular vector matrix, capturing the spatial patterns, while matrix <italic>V</italic> represents the right singular vector matrix, reflecting the projection of spatial modes onto the original data matrix <italic>A</italic>. Essentially, the <italic>ith</italic> spatial mode is the <italic>ith</italic> nonzero eigenvector of the covariance matrix of <italic>A</italic>, and the projection of spatial modes onto the original <italic>A</italic> corresponds to the respective time coefficients. <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref> gives the explained variance ratio of the first four modes of the SSTA (SST anomalies) in Pacific. Notably, the cumulative explained variance ratio of the first four modes account over 65%, and the cumulative explained variance ratio of the first two modes over 45%. It is generally believed that the first two modes can basically reflect the main characteristics of the SST variation in this region.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>EOF decomposition.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1477710-g002.tif"/>
</fig>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>The explained variance of the first four modes of EOF decomposition of the Pacific SST anomaly field.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Modes</th>
<th valign="top" align="center">Explained variance ratio (%)</th>
<th valign="top" align="center">Cumulative explained variance ratio (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">mode 1</td>
<td valign="top" align="center">48.10</td>
<td valign="top" align="center">48.10</td>
</tr>
<tr>
<td valign="top" align="center">mode 2</td>
<td valign="top" align="center">8.71</td>
<td valign="top" align="center">56.81</td>
</tr>
<tr>
<td valign="top" align="center">mode 3</td>
<td valign="top" align="center">7.23</td>
<td valign="top" align="center">64.04</td>
</tr>
<tr>
<td valign="top" align="center">mode 4</td>
<td valign="top" align="center">3.69</td>
<td valign="top" align="center">67.73</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Data embedding</title>
<p>In our approach, the data undergo several embedding steps before it feed into the model. Firstly, We incorporate patch embedding as a preprocessing step to streamline computational complexity and enhance local feature capture. Inspired by the methodology of ViT, we segment the input data into fixed-size sub-patches and transform each patch into a vector through linear projection.</p>
<p>As depicted in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>, Given an input sample <italic>X</italic> with shape <inline-formula>
<mml:math display="inline" id="im7">
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>C</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and considering a patch size of <inline-formula>
<mml:math display="inline" id="im8">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>p</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, we generate a sequence of patches with dimensions <inline-formula>
<mml:math display="inline" id="im9">
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>&#xb7;</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#xb7;</mml:mo>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula>
<mml:math display="inline" id="im10">
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo stretchy="false">/</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&#xd7;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mo stretchy="false">/</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mi>h</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. For this study, we opt for a patch size of (2, 2), although this parameter can be adjusted based on model performance and computational efficiency considerations. Subsequently, it undergoes linear projection to map it into a specific <italic>D</italic>-dimensional space.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Data embedding.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1477710-g003.tif"/>
</fig>
<p>Secondly, to bolster the model&#x2019;s temporal and spatial acuity, we introduce spatio-temporal positional encoding and long-term trend embedding into the spatio-temporal embedding process. In this investigation, we employ linear embeddings to convert input tokens into vectors of dimension <italic>D</italic>, and we evaluate the efficacy of two positional encoding methods within the temporal-positional embedding. One approach utilizes sinusoidal positional encodings, following the methodology proposed by Vaswani <xref ref-type="bibr" rid="B29">Vaswani et&#xa0;al. (2017)</xref>, while the other incorporates laplacian positional encodings <xref ref-type="bibr" rid="B22">Maskey et&#xa0;al. (2022)</xref>; <xref ref-type="bibr" rid="B12">Dwivedi et&#xa0;al. (2023)</xref>. Laplacian encodings represent a natural extension of node position encoding in a graph, based on transformer positional encodings. Leveraging the laplacian eigenvectors facilitates the encoding of relative positional relationships among adjacent graph nodes. Therefore, we explore the integration of laplacian encodings to better capture the spatio-temporal characteristics of neighboring grids within gridded thermodynamic element data. We compute a simple laplacian matrix by subtracting the adjacency matrix from the degree matrix. Subsequently, we utilize the eigenvector with a size of <inline-formula>
<mml:math display="inline" id="im11">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>w</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> of the laplacian matrix as the position encoding. The long-term decadal variations embedding will be elaborated further in Section 2.5.</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Revised ViT-based block</title>
<p>Recently, transformer-based models have emerged as leading contenders for object detection and prediction tasks, often adopting an encoder-decoder architecture. In our study, we leverage a standard transformer encoder-decoder framework with minor adaptations to jointly capture spatio-temporal features of temperature and salinity. As illustrated in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>, the encoder comprises a stack of <italic>n</italic>1 = 4 identical layers. Each layer incorporates a multi-head time-space attention block with four attention heads and a two-layer MLP block with a ReLU non-linearity, facilitating the aggregation of spatial-temporal features and physical information. Drawing inspiration from the efficiency of the ViT transformer in computational resource-saving, we feed the resulting sequence of linear embeddings of the fixed-size patches after patch embedding into the encoder. Meanwhile, the decoder consists of a stack of <italic>n</italic>2 = 4 identical layers. In contrast to the encoder, the decoder focuses on multi-head time attention concerning the output from the encoder stack.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Revised ViT-based block.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1477710-g004.tif"/>
</fig>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>Physics-guided information integrating</title>
<p>Next, we elucidate our approach on how we incorporating physics information into the model. This integration encompasses three facets: long-term dynamics embedding, the imposition of soft constraints based on the thermodynamic equation, and the restriction of the output solution space. We make linear PDO or NPGO embedding to capture long-term dynamics. As detailed in Section 2.2, the PDO index effectively characterizes large-scale oceanic decadal variations, serving as the temporal coefficient of the primary mode obtained from EOF analysis. It manifests as a one-dimensional time series, encapsulating the principal features of such variations. Henceforth, we augment the dimension of the PDO index and integrate it with the input data through linear aggregation. A similar procedure is employed for processing the NPGO. Subsequently, we incorporated the thermodynamic properties of seawater (specifically, density) into the model in a soft-constraint manner to guide the model&#x2019;s output to match a specific thermohaline density relationship. It is worth noting that the thermodynamic equation used to calculate density is based on the latest seawater thermodynamic calculation standard TEOS-10. TEOS-10 supersedes the former standard EOS-80 (Equation of State of Seawater, 1980; <xref ref-type="bibr" rid="B15">International Association for the Properties of Water and Steam, 2018</xref>), and it provides a comprehensive, thermodynamically consistent manner for all thermodynamic properties of seawater (density, enthalpy, entropy sound speed, etc.) based on Gibbs function (named after Josiah Willard Gibbs) formulation. This primer <xref ref-type="bibr" rid="B26">Pawlowicz (2010)</xref> points out that all thermodynamic properties of the system can be determined by specific combinations of derivatives of the Gibbs function. So the key to solving the seawater problem becomes how one compute the Gibbs function for seawater. TEOS-10 defines the Gibbs function of seawater as the sum of a pure water <italic>g<sup>W</sup>
</italic> part and the saline part <italic>g<sup>S</sup>
</italic> (IAPWS-08), <inline-formula>
<mml:math display="inline" id="im12">
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>A</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>T</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mi>g</mml:mi>
<mml:mi>W</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>g</mml:mi>
<mml:mi>S</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>A</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>T</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> <xref ref-type="bibr" rid="B7">Commission et&#xa0;al. (2010)</xref>. Concretely, the density of seawater <italic>&#x3c1;</italic> is determined by the reciprocal of the pressure derivative of the Gibbs function (<italic>g</italic>) at constant absolute salinity (<italic>SA</italic>) and <italic>in situ</italic> temperature <italic>T</italic>. Specifically,</p>
<disp-formula id="eq1">
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>&#x3c1;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>A</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>T</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>P</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>g</mml:mi>
<mml:mo stretchy="false">/</mml:mo>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mo>\</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>A</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where, <inline-formula>
<mml:math display="inline" id="im13">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>A</mml:mi>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mn>120</mml:mn>
<mml:mi>g</mml:mi>
<mml:mo stretchy="false">/</mml:mo>
<mml:mi>k</mml:mi>
<mml:mi>g</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>12</mml:mn>
<mml:mtext>&#xb0;C</mml:mtext>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mn>80</mml:mn>
<mml:mtext>&#xb0;C</mml:mtext>
<mml:mo>,</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>0.1</mml:mn>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>P</mml:mi>
<mml:mi>a</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mn>100</mml:mn>
<mml:mi>M</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and <italic>P</italic> means sea pressure. Besides, it&#x2019;s noteworthy that all the equations to calculate thermodynamic properties were integrated into the open source Gibbs-Seawater (GSW) Oceanographic Toolbox <xref ref-type="bibr" rid="B23">McDougall and Barker (2011)</xref>. Consequently, we call the function <italic>gsw_rho_t_exact</italic>(<italic>SA,T,P</italic>) that computes the density in this tool directly [for more information about this, please refer to <xref ref-type="bibr" rid="B7">Commission et&#xa0;al. (2010)</xref>].</p>
<p>Given the prediction <inline-formula>
<mml:math display="inline" id="im14">
<mml:mover accent="true">
<mml:mi>Y</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
</mml:math>
</inline-formula> and the ground truth <italic>Y</italic>, the combined loss is formulated as follows:</p>
<disp-formula id="eq2">
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:mtext>Combine</mml:mtext>
<mml:mo>_</mml:mo>
<mml:mtext>Loss</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mi>L</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mtext>&#x3bb;</mml:mtext>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>*</mml:mo>
<mml:mi>L</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mtext>&#x3bb;</mml:mtext>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>*</mml:mo>
<mml:mi>L</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mi>S</mml:mi>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mtext>&#x3bb;</mml:mtext>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:mo>*</mml:mo>
<mml:mi>L</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mi>&#x3c1;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where,</p>
<disp-formula id="eq3">
<label>(3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:mtext>Loss</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mi>L</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>Y</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mo>,</mml:mo>
<mml:mi>Y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>C</mml:mi>
</mml:munderover>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>H</mml:mi>
</mml:munderover>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>w</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>W</mml:mi>
</mml:munderover>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>Y</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>h</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>Y</mml:mi>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>h</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq4">
<label>(4)</label>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mo stretchy="false">/</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mo stretchy="false">/</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:munderover>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>H</mml:mi>
</mml:munderover>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>w</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>W</mml:mi>
</mml:munderover>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>Y</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>h</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>Y</mml:mi>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>h</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In our formulation, <italic>Loss</italic> denotes a composite temperature-salinity loss, wherein <inline-formula>
<mml:math display="inline" id="im15">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im16">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mi>S</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represent the independent losses for temperature and salinity, respectively. Additionally, <inline-formula>
<mml:math display="inline" id="im17">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mi>&#x3c1;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> signifies the density loss, which is computed based on the model&#x2019;s temperature and salinity outputs using the TEOS-10 equation. The calculation formula for <inline-formula>
<mml:math display="inline" id="im18">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mi>S</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im19">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mi>&#x3c1;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> mirrors that of <inline-formula>
<mml:math display="inline" id="im20">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. Furthermore, <inline-formula>
<mml:math display="inline" id="im21">
<mml:mrow>
<mml:msub>
<mml:mtext>&#x3bb;</mml:mtext>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im22">
<mml:mrow>
<mml:msub>
<mml:mtext>&#x3bb;</mml:mtext>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula>
<mml:math display="inline" id="im23">
<mml:mrow>
<mml:msub>
<mml:mtext>&#x3bb;</mml:mtext>
<mml:mn>3</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denote adaptive hyperparameters, with an optimal combination typically being (0.5, 0.3, 0.2). Finally, considering the range of temperature and salinity values, we incorporated a constraint layer to confine them within a predetermined range, thus ensuring adherence to fundamental physical laws.</p>
<p>As for the output solution space restriction, we sought to limit the model&#x2019;s output by applying the maximum and minimum ranges of temperature and salinity consistent with the TEOS-10 equation <inline-formula>
<mml:math display="inline" id="im29">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>A</mml:mi>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mn>120</mml:mn>
<mml:mi>g</mml:mi>
<mml:mo stretchy="false">/</mml:mo>
<mml:mi>k</mml:mi>
<mml:mi>g</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>12</mml:mn>
<mml:mtext>&#xb0;C</mml:mtext>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mn>80</mml:mn>
<mml:mtext>&#xb0;C</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>. These constraints were standardized alongside the data. To mitigate the impact of normalization, we conducted ablation experiments by directly inputting the raw temperature and salinity data into the model, applying the original scale constraints accordingly. Regardless of whether the data were standardized or not, our experiments revealed no enhancement in the model&#x2019;s forecasting performance upon integrating these constraint ranges. This outcome suggests that the ranges may have been overly broad, surpassing the actual extremities of temperature and salinity within the studied area. Future endeavors could focus on pinpointing more precise ranges based on genuine environmental conditions.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Experiments and results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Experimental settings</title>
<p>We conduct extensive experiments using temperature and salinity data in the tropical Pacific from January 1940 to September 2023. The model training is conducted on a server equipped with a TESLA-V100 GPU with 16GB memory. Detailed model parameters are provided in <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Model parameters.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center">Hyperparameters</th>
<th valign="top" align="center">Optimal Values</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">patch size</td>
<td valign="top" align="center">(2 &#xd7; 2)</td>
</tr>
<tr>
<td valign="top" align="center">batch size</td>
<td valign="top" align="center">1</td>
</tr>
<tr>
<td valign="top" align="center">learning rate</td>
<td valign="top" align="center">0.001</td>
</tr>
<tr>
<td valign="top" align="center">epochs</td>
<td valign="top" align="center">20</td>
</tr>
<tr>
<td valign="top" align="center">encoder/decoder layers</td>
<td valign="top" align="center">4</td>
</tr>
<tr>
<td valign="top" align="center">attention heads</td>
<td valign="top" align="center">4</td>
</tr>
<tr>
<td valign="top" align="center">&#x3bb;<sub>1,2,3</sub>
</td>
<td valign="top" align="center">(0.5, 0.3, 0.2)</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Baseline and evaluation metrics</title>
<p>In this section, we consider the following CNN-based, ConvLSTM-based, and TransNet as baseline. TransNet is the backbone of PGTransNet which does not contain any physical information, but is a modified version of ViT <xref ref-type="bibr" rid="B11">Dosovitskiy et&#xa0;al. (2020)</xref>. <xref ref-type="fig" rid="f5">
<bold>Figures&#xa0;5</bold>
</xref> and <xref ref-type="fig" rid="f6">
<bold>6</bold>
</xref> give the competitive model based on CNN and ConvLSTM, both the prediction models have been trained with a batch size of 8 and kernel size (2,2) or (2,2,2) by adaptive momentum (Adam) with an initial learning rate of 0.001 for 30 epochs, and the learning rate is adjusted using the ReduceLROnPlateau mode.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>CNN based model.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1477710-g005.tif"/>
</fig>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>ConvLSTM based model.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1477710-g006.tif"/>
</fig>
<p>Besides, we use the following evaluation metrics to measure the performance of different methods:</p>
<p>1. Root mean square error (RMSE); It is used to measure the deviation of computed values concerning observed ones.</p>
<disp-formula id="eq6">
<label>(5)</label>
<mml:math display="block" id="M7">
<mml:mrow>
<mml:mtext>RMSE</mml:mtext>
<mml:mo>=</mml:mo>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
<mml:msqrt>
<mml:mrow>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>H</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>w</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>W</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>Y</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>h</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>h</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:math>
</disp-formula>
<p>2. Anomaly correlation coefficient (ACC); It quantifies the correlation between anomalies of predicted values and validation values (ground truth):</p>
<disp-formula id="eq7">
<label>(6)</label>
<mml:math display="block" id="M8">
<mml:mrow>
<mml:mtext>ACC</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
<mml:mo stretchy="false">(</mml:mo>
<mml:mover accent="true">
<mml:mi>Y</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>M</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>M</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mover accent="true">
<mml:mi>Y</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>M</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msqrt>
<mml:msqrt>
<mml:mrow>
<mml:msup>
<mml:mo>&#x2211;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
</mml:msup>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mover accent="true">
<mml:mi>Y</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>M</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>3. Bias/Mean Bias. We employ Bias to gauge the positive and negative deviations between grids, and Mean Bias to quantifies the disparity between the spatial mean of the prediction and the spatial mean of the ground truth.</p>
<disp-formula id="eq8">
<label>(7)</label>    <mml:math display="block" id="M9">
<mml:mrow>
<mml:mtable>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mtext>Mean&#x2009;Bias</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>C</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>C</mml:mi>
</mml:munderover>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>H</mml:mi>
</mml:munderover>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>w</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>W</mml:mi>
</mml:munderover></mml:mtd></mml:mtr>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mtext>&#x2009;</mml:mtext>
<mml:mover accent="true">
<mml:mi>Y</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>C</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mtext>&#xa0;&#xa0;</mml:mtext>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>C</mml:mi>
</mml:munderover>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>H</mml:mi>
</mml:munderover>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>w</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>W</mml:mi>
</mml:munderover>
<mml:mi>Y</mml:mi></mml:mtd></mml:mtr></mml:mtable>
</mml:mrow>
</mml:math>
</disp-formula>
<p>For all metrics, we denote <inline-formula>
<mml:math display="inline" id="im30">
<mml:mover accent="true">
<mml:mi>Y</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
</mml:math>
</inline-formula> and <italic>Y</italic> as the prediction and ground truth, which have a shape of <inline-formula>
<mml:math display="inline" id="im31">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>C</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, where <italic>N</italic> is the number of test samples, <italic>C</italic> refers to the depth channel, <inline-formula>
<mml:math display="inline" id="im32">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the spatial resolution.</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Quantitative evaluation results</title>
<sec id="s3_3_1">
<label>3.3.1</label>
<title>Overall performance</title>
<p>To comprehensively evaluate the performance of PGTransNet, we utilize 12 consecutive months of historical data to forecast temperature and salinity for the subsequent 12 months. We conducted ablation experiments to perform a sensitivity analysis and evaluate the effectiveness of each module in the model based on various validation factors. <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref> presents the averaged RMSE and ACC over the forecasting times of the baseline model alongside the augmented model with progressively integrated physical information modules.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>The PGTransNet performance on tropical Pacific forecasting, and its comparison against other ablation models. <bold>(A)</bold> RMSE of temperature; <bold>(B)</bold> RMSE of salinity; <bold>(C)</bold> ACC of temperature; <bold>(D)</bold> ACC of salinity.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1477710-g007.tif"/>
</fig>
<p>
<xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref> presents the performance of each model in terms of RMSE and ACC. Specifically, TransNet is the modified version of ViT as mentioned above, PGTransNet_PDO represents TransNet augmented with combine loss of temperature, salinity and density, and PDO long-term decadal variations embedding. PGTransNet_NPGO further incorporates NPGO embedding on top of PGTransNet_PDO. PGTransNet_PDO_Laplacian integrates laplacian encodings on top of PGTransNet_PDO, and PGTransNet_NPGO_Laplacian integrates laplacian encodings on top of PGTransNet_NPGO. For brevity, we denote PGTransNet_PDO, PGTransNet_NPGO, PGTransNet_PDO_Laplacian, and PGTransNet_NPGO_Laplacian as PGTransNet1, PGTransNet2, PGTransNet3, and PGTransNet4 in the diagram, respectively. All of these models belong to the PGTransNet group.</p>
<p>The x-axis and y-axis denote the forecast time and the corresponding forecasting RMSE/ACC, respectively. Lower RMSE and higher ACC values indicate better model performance and prediction accuracy. From <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref>, it&#x2019;s evident that the PGTransNet group outperforms the CNN-based and ConvLSTM-based models in both temperature and salinity forecasting. Surprisingly, the ViT-based backbone TransNet, which lacks incorporation of any physical laws, performs the worst. We observe that while TransNet achieves an accuracy of 0.99 on the training set, it yields an RMSE of 1.932 on the test set. Embedding decadal variability and the thermodynamic equation into the model significantly improves prediction accuracy on the test set by over 40%. This highlights the robustness and generalization capabilities of PGTransNet in forecasting. Moreover, ablation experiments involving the incremental addition of PDO, NPGO, and laplacian encodings reveal that all these physics-guided models achieve comparable performance. The benefits of incorporating various physical information and embedding the laplacian encodings are evident in mitigating prediction biases in coastal and marginal sea regions, and improving prediction accuracy in high-temperature areas at different vertical depths. For further details, refer to Section 3.4.1.</p>
<p>Additionally, we present the average performance of the aforementioned models across all test samples (averaged from January 2020 to September 2023) in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>. It can be observed that our physics-guided model outperforms the baseline model in terms of temperature and salinity forecasting, and all models achieve comparable performance.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Ablation experiment performance of PGTransNet.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" rowspan="2" align="center"/>
<th valign="top" colspan="3" align="center">T</th>
<th valign="top" colspan="3" align="center">S</th>
</tr>
<tr>
<th valign="top" align="center">RMSE</th>
<th valign="top" align="center">ACC</th>
<th valign="top" align="center">Mean-Bias</th>
<th valign="top" align="center">RMSE</th>
<th valign="top" align="center">ACC</th>
<th valign="top" align="center">Mean-Bias</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">ConvLSTM</td>
<td valign="top" align="center">1.028</td>
<td valign="top" align="center">0.946</td>
<td valign="top" align="center">-0.026</td>
<td valign="top" align="center">0.23</td>
<td valign="top" align="center">0.9397</td>
<td valign="top" align="center">-0.013</td>
</tr>
<tr>
<td valign="top" align="center">CNN</td>
<td valign="top" align="center">1.072</td>
<td valign="top" align="center">0.939</td>
<td valign="top" align="center">0.0149</td>
<td valign="top" align="center">0.246</td>
<td valign="top" align="center">0.930</td>
<td valign="top" align="center">-0.014</td>
</tr>
<tr>
<td valign="top" align="center">TransNet</td>
<td valign="top" align="center">1.932</td>
<td valign="top" align="center">0.884</td>
<td valign="top" align="center">0.082</td>
<td valign="top" align="center">0.486</td>
<td valign="top" align="center">0.881</td>
<td valign="top" align="center">-0.083</td>
</tr>
<tr>
<td valign="top" align="center">PGTransNet_PDO</td>
<td valign="top" align="center">0.8087</td>
<td valign="top" align="center">0.9803</td>
<td valign="top" align="center">-0.163</td>
<td valign="top" align="center">0.182</td>
<td valign="top" align="center">0.962</td>
<td valign="top" align="center">0.0015</td>
</tr>
<tr>
<td valign="top" align="center">PGTransNet_PDO_NPGO</td>
<td valign="top" align="center">0.816</td>
<td valign="top" align="center">0.9806</td>
<td valign="top" align="center">-0.2178</td>
<td valign="top" align="center">0.186</td>
<td valign="top" align="center">0.961</td>
<td valign="top" align="center">-0.0025</td>
</tr>
<tr>
<td valign="top" align="center">PGTransNet_PDO_Laplacian</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">0.980</td>
<td valign="top" align="center">-0.141</td>
<td valign="top" align="center">0.178</td>
<td valign="top" align="center">0.965</td>
<td valign="top" align="center">-0.030</td>
</tr>
<tr>
<td valign="top" align="center">PGTransNet_PDO_NPGO_Laplacian</td>
<td valign="top" align="center">0.805</td>
<td valign="top" align="center">0.981</td>
<td valign="top" align="center">-0.126</td>
<td valign="top" align="center">0.169</td>
<td valign="top" align="center">0.967</td>
<td valign="top" align="center">0.0068</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_3_2">
<label>3.3.2</label>
<title>Temperature/salinity profiles evaluation</title>
<p>To comprehensively assess the model&#x2019;s forecasting ability in the vertical direction, <xref ref-type="fig" rid="f8">
<bold>Figure&#xa0;8</bold>
</xref> presents the average RMSE in vertical temperature and salinity profiles for different models within the upper 160 meters (at depths of 1, 20, 50, 80, 120, and 160 meters, latitude=0&#xb0;).</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>The profiles of temperature and salinity RMSE above 160m for different models at lead time = (1, 6, 12). The reported RMSE is averaged from Jan. 2020 to Sep. 2023. <bold>(A)</bold> RMSE of temperature profile (lead time = 1); <bold>(B)</bold> RMSE of temperature profile (lead time = 6); <bold>(C)</bold> RMSE of temperature profile (lead time = 12); <bold>(D)</bold> RMSE of salinity profile (lead time = 1); <bold>(E)</bold> RMSE of salinity profile (lead time = 6); <bold>(F)</bold> RMSE of salinity profile (lead time = 12).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1477710-g008.tif"/>
</fig>
<p>
<xref ref-type="fig" rid="f8">
<bold>Figure&#xa0;8</bold>
</xref> illustrates that the physics-guided PGTransNet model group outperforms the baseline model significantly. For salinity prediction, PGTransNet4, which incorporates PDO, NPGO, and laplacian positional encoding, notably performs better than the others. Except for TransNet, the RMSE decreases with increasing depth for other models. For temperature prediction, PGTransNet4 demonstrates the best performance overall. However, from the temperature profile curve, the predictions at lead times 1 and 6 are slightly inferior to the other models in the PGTransNet group at a depth of 160m. Nonetheless, this model exhibits better forecasting performance for sea surface temperature. As the depth increases, the model&#x2019;s forecasting accuracy for underwater temperature deteriorates, leading to higher RMSE values. Besides, it is evident that for both temperature and salinity profiles, the ViT-based TransNet exhibits the largest RMSE, even worse than the CNN-based and ConvLSTM-based models. This is attributed to the overfitting issue mentioned earlier, leading to a deterioration in performance on the test set.</p>
</sec>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Cases study for visualization</title>
<sec id="s3_4_1">
<label>3.4.1</label>
<title>Predictions and bias</title>
<p>
<xref ref-type="fig" rid="f9">
<bold>Figures&#xa0;9</bold>
</xref> and <xref ref-type="fig" rid="f10">
<bold>10</bold>
</xref> visualizes the ground truth, predictions and bias of these model for the temperature and salinity. The first column of the graph represents the ground truth, displaying actual values. The second column represents the predicted values of each ablation model. Finally, the third column illustrates the bias between the predicted values of each ablation model and the ground truth.</p>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>Example visualizations of temperature prediction by PGTransNets (Depth=1m, Jan. 2020).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1477710-g009.tif"/>
</fig>
<fig id="f10" position="float">
<label>Figure&#xa0;10</label>
<caption>
<p>Example visualizations of salinity prediction by PGTransNets (Depth=1m, Jan. 2020).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1477710-g010.tif"/>
</fig>
<p>For sea surface temperature prediction, the graph illustrates that all models effectively capture the large-scale distribution of sea surface temperature in the tropical pacific region except TransNet. It is obvious that all the models within the PGTransNet group can learn a better distribution than CNN-based and ConvLSTM-based model. Comparing the distribution of sea surface temperature ground truth and predictions, PGTransNet4, which integrates PDO, NPGO, and laplacian encodings, can better captures the high-temperature center region with just a slight overestimation. PGTransNet2 exhibits lower bias in regions with relatively lower temperatures. Additionally, from the bias, it can be observed that PGTransNet4 has lower bias in the gulf and nearshore areas. Regarding salinity prediction, all the models demonstrate the capability to capture the large-scale distribution characteristics in the tropical pacific region except TransNet. In general, PGTransNet4 has the best performance.</p>
</sec>
<sec id="s3_4_2">
<label>3.4.2</label>
<title>ACC distribution</title>
<p>To clearly assess the model&#x2019;s forecasting performance across oceanic geographical space, we present the spatial distribution of anomaly correlation coefficient (ACC). As shown in the <xref ref-type="fig" rid="f11">
<bold>Figure&#xa0;11</bold>
</xref>, the physics-guided PGTransNet group exhibits higher forecasting accuracy in coastal and marginal sea areas compared to the baseline TransNet, CNN-based and ConvLSTM-based model. Whether only adding PDO or further incorporating laplacian encoding on top of it, the effect is quite satisfactory.</p>
<fig id="f11" position="float">
<label>Figure&#xa0;11</label>
<caption>
<p>Distributions of the ACCs among all models calculated between analyzed and predicted fields during Jan. 2020 to Sep. 2023.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1477710-g011.tif"/>
</fig>
</sec>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Parameter insensitivity analysis</title>
<p>During model training, we found that without adding any physical information, inappropriate hyperparameter adjustments can easily lead to overfitting or underfitting. For instance, when the patch size and batch size are both small (e.g. <italic>p</italic> = 2&#xd7;2, <italic>batchsize</italic> = 1), TransNet can achieve an accuracy of 0.99 on the training set. However, the RMSE on the test set is 1.932, indicating poor performance in predicting the large-scale temperature distribution. In contrast, our model can achieve comparable forecasting accuracy as long as the parameters are within a reasonable range, regardless of how small they are set.</p>
</sec>
<sec id="s3_6">
<label>3.6</label>
<title>Physical consistency analysis</title>
<p>Under normal circumstances, the density of the upper ocean mixed layer increases gradually with depth. The density of lower seawater is greater than that of upper seawater, exhibiting a monotonic behavior. Therefore, based on the TEOS-10 thermodynamic equation, we calculate the corresponding ocean density values from the temperature and salinity data predicted by the model. <xref ref-type="fig" rid="f12">
<bold>Figure&#xa0;12</bold>
</xref> present the density profiles mean values for the entire study area in January 2020, under the parameter settings described earlier, the densities of all models satisfy the monotonicity condition. However, during the training and parameter tuning process, we find that although the backbone model TransNet performs well on the training set, its performance on the test dataset is poor, under certain parameter conditions (e.g. <italic>batchsize</italic> = 2<italic>,patchsize</italic> = 2 &#xd7; 3), there are occasional instances of density values deviating abnormally, as indicated by the solid brown line the <xref ref-type="fig" rid="f12">
<bold>Figure&#xa0;12</bold>
</xref>.</p>
<fig id="f12" position="float">
<label>Figure&#xa0;12</label>
<caption>
<p>Density profiles of varying algorithms on Jan. 2020 in tropical pacific.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1477710-g012.tif"/>
</fig>
</sec>
<sec id="s3_7">
<label>3.7</label>
<title>Generality analysis</title>
<p>To further analyze the generalization capability of the model, we conduct temperature and salinity forecasting for two localized regions at the Earth&#x2019;s northern and southern extremes. Region 1: part of the Arctic Ocean, with a latitude range of [66.5&#xb0;N, 89.5&#xb0;N] and a longitude range of [1&#xb0;E, 40&#xb0;E]. Region 2: latitude range [29&#xb0;S, 69&#xb0;S] and longitude range [25&#xb0;W, 85&#xb0;W]. Region 1 is selected to assess the effect of the model on the prediction of ocean temperature and salinity in the polar region. Region 2 is chosen because its latitude spans high, medium, and low latitudes, and geographically, it encompasses parts of South America, the Antarctic Peninsula, and the South Shetland Islands. This ensures that the study area includes various complex spatial topographical variations in ocean temperature, allowing for a more comprehensive evaluation of the proposed model&#x2019;s capabilities. <xref ref-type="fig" rid="f13">
<bold>Figure&#xa0;13</bold>
</xref> presents the performance of sea surface temperature forecasting for Region 1. The distribution is similar to that shown in <xref ref-type="fig" rid="f9">
<bold>Figure&#xa0;9</bold>
</xref>. From the figure, it is evident that the PGTransNet_PDO model outperforms other models, indicating that our proposed method achieves superior forecasting performance even in polar regions.</p>
<fig id="f13" position="float">
<label>Figure&#xa0;13</label>
<caption>
<p>Generality analysis: example visualizations of temperature prediction by CNN, ConvLSTM ans PGTransNet_PDO in Region 1 (Depth=1m, Jan. 2020).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1477710-g013.tif"/>
</fig>
<p>
<xref ref-type="fig" rid="f14">
<bold>Figure&#xa0;14</bold>
</xref> shows the sea surface temperature forecasting results for Region 2 during spring (March). We can see from the picture that the overall bias of the ConvLSTM and CNN models is significantly higher than that of PGTransNet_PDO, especially in coastal and nearshore areas. The biases of the two baseline models are significantly higher, possibly because the ocean environment in coastal and nearshore regions is heavily influenced by topography. Additionally, at the boundary between land and sea, geographical features such as ocean currents and tides affect the movement of water bodies and temperature distribution, thereby increasing the complexity of forecasting. In contrast, PGTransNet_PDO exhibits a lower bias in coastal and nearshore areas, indicating that PGTransNet_PDO demonstrates superior forecasting performance and greater model robustness.</p>
<fig id="f14" position="float">
<label>Figure&#xa0;14</label>
<caption>
<p>Generality analysis: example visualizations of temperature prediction by CNN, ConvLSTM ans PGTransNet_PDO in Region 2 (Depth=1m, Mar. 2020).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1477710-g014.tif"/>
</fig>
</sec>
</sec>
<sec id="s4" sec-type="conclusions">
<label>4</label>
<title>Conclusions</title>
<p>In this paper, we propose a novel physics-guided spatio-temporal self-attention hybrid network PGTransNet for temperature and salinity jointly forecasting in the Tropical Pacific. Compared with the benchmark model without physical knowledge, the PGTransNet group can obtain higher prediction accuracy. Extensive experiments and visualizations show that our model is insensitive to hyperparameter tuning, ensuring both better generalization and physical consistency. Moreover, as observed from the spatial distribution of anomaly correlation coefficient, the model exhibits higher forecasting accuracy for coastal and marginal sea regions.</p>
<p>As for the output solution space restriction, it confine the temperature-salinity outputs within a specific range. Since the TEOS-10 provides a broad range of constraint values, which are generally applicable to the outputs, they do not significantly affect the model results. Subsequent refinements of the constraint values can be made based on specific circumstances.</p>
<p>From the <xref ref-type="fig" rid="f9">
<bold>Figures&#xa0;9</bold>
</xref> and <xref ref-type="fig" rid="f10">
<bold>10</bold>
</xref>, it is evident that high biases occur in the central equatorial Pacific region. We know that this anomalous area aligns with wind-driven circulation. Ocean circulation is influenced by wind stress, heat flux, and water flux acting together, with different factors dominating in different scenarios. In the future, we will incorporate sea surface zonal and meridional wind stress as input features, and embedding ocean heat flux information and other relevant data into the model to guide model training and improve predictions in anomalous marine areas.</p>
</sec>
</body>
<back>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material. Further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec id="s6" sec-type="author-contributions">
<title>Author contributions</title>
<p>SW: Formal analysis, Methodology, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing, Conceptualization, Data curation, Software, Validation. SB: Conceptualization, Investigation, Methodology, Validation, Writing &#x2013; review &amp; editing. WD: Funding acquisition, Methodology, Resources, Writing &#x2013; review &amp; editing. SZW: Conceptualization, Methodology, Writing &#x2013; review &amp; editing. XZ: Methodology, Resources, Writing &#x2013; review &amp; editing. CS: Supervision, Writing &#x2013; review &amp; editing. JZ: Formal analysis, Methodology, Writing &#x2013; review &amp; editing. XL: Conceptualization, Funding acquisition, Project administration, Resources, Writing &#x2013; review &amp; editing.</p>
</sec>
<sec id="s7" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This work is supported by the National Natural Science Foundation of China (Grant Nos. 42275170, 62032019, 42305170, 62202487, 62402512), the Science and Technology Innovation Program of Hunan Province (2022RC3070), the National Key R&amp;D Program of China (2021YFC3101502), the Natural Science Foundation of Hunan Province (Grant No. 2023JJ40678), and the Scientific Research Program of the National University of Defense Technology (No. ZK22-13).</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>The dataset was provided by the Chinese Academy of Sciences (CAS). Thanks to all the members for their help.</p>
</ack>
<sec id="s8" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s9" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aguilar-Martinez</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Hsieh</surname> <given-names>W. W.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Forecasts of tropical pacific sea surface temperatures by neural networks and support vector regression</article-title>. <source>Int. J. Oceanogr.</source> <volume>1</volume>, <fpage>167239</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1155/2009/167239</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Cai</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Peng</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Tempronet: A transformer-based deep learning model for seawater temperature prediction</article-title>. <source>Ocean Eng.</source> <volume>293</volume>, <elocation-id>116651</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.oceaneng.2023.116651</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheng</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Abraham</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Hausfather</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Trenberth</surname> <given-names>K. E.</given-names>
</name>
</person-group> (<year>2019</year>a). <article-title>How fast are the oceans warming</article-title>? <source>Science</source> <volume>363</volume>, <fpage>128</fpage>&#x2013;<lpage>129</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1126/science.aav7619</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheng</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Trenberth</surname> <given-names>K. E.</given-names>
</name>
<name>
<surname>Fasullo</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Boyer</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Abraham</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Improved estimates of ocean heat content from 1960 to 2015</article-title>. <source>Sci. Adv.</source> <volume>3</volume>, <elocation-id>e1601545</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1126/sciadv.1601545</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheng</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Trenberth</surname> <given-names>K. E.</given-names>
</name>
<name>
<surname>Fasullo</surname> <given-names>J. T.</given-names>
</name>
<name>
<surname>Mayer</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Balmaseda</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>b). <article-title>Evolution of ocean heat content related to enso</article-title>. <source>J. Climate</source> <volume>32</volume>, <fpage>3529</fpage>&#x2013;<lpage>3556</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1175/JCLI-D-18-0607.1</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheng</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Benefits of cmip5 multimodel ensemble in reconstructing historical ocean subsurface temperature variations</article-title>. <source>J. Climate</source> <volume>29</volume>, <fpage>5393</fpage>&#x2013;<lpage>5416</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1175/JCLI-D-15-0730.1</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="book">
<person-group person-group-type="author">
<collab>Commission, I. O., on Oceanic Research, S. C., and for the Physical Sciences of the Oceans, I. A</collab>
</person-group> (<year>2010</year>). <source>The International Thermodynamic Equation of Seawater &#x2013; 2010: Calculation and Use of Thermodynamic Properties</source> (<publisher-loc>Paris</publisher-loc>).</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dai</surname> <given-names>H.</given-names>
</name>
<name>
<surname>He</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Lei</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>W.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Long-term prediction of sea surface temperature by temporal embedding transformer with attention distilling and partial stacked connection</article-title>. <source>IEEE J. Select. Topics Appl. Earth Observ. Remote Sens.</source> <volume>17</volume>, <fpage>4280</fpage>&#x2013;<lpage>4293</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/JSTARS.2024.3357191</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Daw</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Thomas</surname> <given-names>R. Q.</given-names>
</name>
<name>
<surname>Carey</surname> <given-names>C. C.</given-names>
</name>
<name>
<surname>Read</surname> <given-names>J. S.</given-names>
</name>
<name>
<surname>Appling</surname> <given-names>A. P.</given-names>
</name>
<name>
<surname>Karpatne</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Physics-guided architecture (pga) of neural networks for quantifying uncertainty in lake temperature modeling</article-title>,&#x201d; in <source>Proceedings of the 2020 SIAM International Conference on Data Mining (SDM)</source>. <publisher-name>Society for Industrial and Applied Mathematics</publisher-name>, <fpage>532</fpage>&#x2013;<lpage>540</lpage>.</citation>
</ref>
<ref id="B10">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Daw</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Karpatne</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Watkins</surname> <given-names>W. D.</given-names>
</name>
<name>
<surname>Read</surname> <given-names>J. S.</given-names>
</name>
<name>
<surname>Kumar</surname> <given-names>V.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Physics-guided neural networks (pgnn): An application in lake temperature modeling</article-title>. In <source>Knowledge Guided Machine Learning</source>, <fpage>353</fpage>&#x2013;<lpage>372</lpage>. <publisher-name>Chapman and Hall/CRC</publisher-name>.doi: <pub-id pub-id-type="doi">10.1201/9781003143376-15</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dosovitskiy</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Beyer</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Kolesnikov</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Weissenborn</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Zhai</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Unterthiner</surname> <given-names>T.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>An image is worth 16x16 words: Transformers for image recognition at scale</article-title>. <source>arXiv preprint arXiv:2010.11929</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2010.11929</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dwivedi</surname> <given-names>V. P.</given-names>
</name>
<name>
<surname>Joshi</surname> <given-names>C. K.</given-names>
</name>
<name>
<surname>Luu</surname> <given-names>A. T.</given-names>
</name>
<name>
<surname>Laurent</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Bengio</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Bresson</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Benchmarking graph neural networks</article-title>. <source>J. Mach. Learn. Res.</source> <volume>24</volume>, <fpage>1</fpage>&#x2013;<lpage>48</lpage>.</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fischer</surname> <given-names>C. C.</given-names>
</name>
<name>
<surname>Tibbetts</surname> <given-names>K. J.</given-names>
</name>
<name>
<surname>Morgan</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Ceder</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Predicting crystal structure by merging data mining with quantum mechanics</article-title>. <source>Nat. materials</source> <volume>5</volume>, <fpage>641</fpage>&#x2013;<lpage>646</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nmat1691</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hou</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Guan</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Qin</surname> <given-names>R.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Must: A multi-source spatio-temporal data fusion model for short-term sea surface temperature prediction</article-title>. <source>Ocean Eng.</source> <volume>259</volume>, <elocation-id>111932</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.oceaneng.2022.111932</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<collab>International Association for the Properties of Water and Steam</collab>
</person-group>, <article-title>IAPWS R6-95(2018), Revised Release on the IAPWS Formulation 1995 for the Thermodynamic Properties of Ordinary Water Substance for General and Scientific Use</article-title> (<year>2018</year>).</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jia</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Willard</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Karpatne</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Read</surname> <given-names>J. S.</given-names>
</name>
<name>
<surname>Zwart</surname> <given-names>J. A.</given-names>
</name>
<name>
<surname>Steinbach</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Physics-guided machine learning for scientific discovery: An application in simulating lake temperature profiles</article-title>. <source>ACM/IMS Trans. Data Sci.</source> <volume>2</volume>, <fpage>1</fpage>&#x2013;<lpage>26</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1145/3447814</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname> <given-names>C. M.</given-names>
</name>
<name>
<surname>Kashinath</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Prabhat</surname>
</name>
<name>
<surname>Marcus</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Enforcing physical constraints in CNNs through differentiable PDE layer</article-title>,&#x201d; in <source>ICLR 2020 Workshop on Integration of Deep Neural Models and Differential Equations</source>.</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kug</surname> <given-names>J.-S.</given-names>
</name>
<name>
<surname>Kang</surname> <given-names>I.-S.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>J.-Y.</given-names>
</name>
<name>
<surname>Jhun</surname> <given-names>J.-G.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>A statistical approach to Indian ocean sea surface temperature prediction using a dynamical enso prediction</article-title>. <source>Geophys. Res. Lett.</source> <volume>31</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1029/2003GL019209</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Cheng</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Trenberth</surname> <given-names>K. E.</given-names>
</name>
<name>
<surname>Mann</surname> <given-names>M. E.</given-names>
</name>
<name>
<surname>Abraham</surname> <given-names>J. P.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Increasing ocean stratification over the past half-century</article-title>. <source>Nat. Climate Change</source> <volume>10</volume>, <fpage>1116</fpage>&#x2013;<lpage>1123</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41558-020-00918-2</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Sthcformer: Amultivariate ocean weather predicting method based on spatiotemporal hybridconvolutional attention networks</article-title>. <source>IEEE J. Select. Topics Appl. Earth Observ. Remote Sens.</source> <volume>17</volume>, <fpage>3600</fpage>&#x2013;<lpage>3614</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/JSTARS.2024.3354254</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Regime shift of winter north pacific sea surface temperature after 1990 and its possible causes</article-title>. <source>Chin. J. Atmos. Sci. (in Chinese)</source> <volume>39</volume>, <fpage>926</fpage>&#x2013;<lpage>940</lpage>.</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maskey</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Parviz</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Thiessen</surname> <given-names>M.</given-names>
</name>
<name>
<surname>St&#xe4;rk</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Sadikaj</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Maron</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Generalized laplacian positional encoding for graph representation learning</article-title>. <source>arXivpreprint</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2210.15956</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>McDougall</surname> <given-names>T. J.</given-names>
</name>
<name>
<surname>Barker</surname> <given-names>P. M.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Getting started with teos-10 and the gibbs seawater (gsw) oceanographic toolbox</article-title>. <source>Scor/iapso WG</source> <volume>127</volume>, <fpage>1</fpage>&#x2013;<lpage>28</lpage>.</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Patil</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Deo</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Ravichandran</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Prediction of sea surface temperature by combining numerical and neural techniques</article-title>. <source>J. Atmos. Ocean. Technol.</source> <volume>33</volume>, <fpage>1715</fpage>&#x2013;<lpage>1726</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1175/JTECH-D-15-0213.1</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Patil</surname> <given-names>K. R.</given-names>
</name>
<name>
<surname>Iiyama</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Deep neural networks to predict sub-surface ocean temperatures from satellite-derived surface ocean parameters</article-title>,&#x201d; in <conf-name>Soft Computing for Problem Solving: Proceedings of SocProS 2020</conf-name>, Vol. <volume>2</volume>. <fpage>423</fpage>&#x2013;<lpage>434</lpage> (<publisher-name>Springer</publisher-name>).</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pawlowicz</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>What every oceanographer needs to know about TEOS-10 (The TEOS-10 Primer)</article-title>.</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qiao</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Effective ensemble learning approach for sst field prediction using attention-based predrnn</article-title>. <source>Front. Comput. Sci.</source> <volume>17</volume>, <fpage>171601</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11704-021-1080-7</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Song</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A novel dual path gated recurrent unit model for sea surface salinity prediction</article-title>. <source>J. Atmos. Ocean. Technol.</source> <volume>37</volume>, <fpage>317</fpage>&#x2013;<lpage>325</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1175/JTECH-D-19-0168.1</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vaswani</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Shazeer</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Parmar</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Uszkoreit</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Jones</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Gomez</surname> <given-names>A. N.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>Attention is all you need</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>30</volume>.</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Von Rueden</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Mayer</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Beckh</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Georgiev</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Giesselbach</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Heese</surname> <given-names>R.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Informed machine learning&#x2013;a taxonomy and survey of integrating prior knowledge into learning systems</article-title>. <source>IEEE Trans. Knowledge Data Eng.</source> <volume>35</volume>, <fpage>614</fpage>&#x2013;<lpage>633</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TKDE.69</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>An interpretable deep learning enso forecasting model</article-title>. <source>Ocean-Land-Atmos. Res.</source> <volume>2</volume>, <fpage>0012</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.34133/olar.0012</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Xiong</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Chinazzi</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Vespignani</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>Y.-A.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Deepgleam: a hybrid mechanistic and deep learning model for covid-19 forecasting</article-title>. <source>arXiv preprint</source>. doi:&#xa0;arXiv:2102.06684
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Bao</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Predicting ocean temperature in high-frequency internal wave area with physics-guided deep learning: A case study from the south China sea</article-title>. <source>J. Mar. Sci. Eng.</source> <volume>11</volume>, <fpage>1728</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/jmse11091728</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiao</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Cai</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>A spatiotemporal deep learning model for sea surface temperature field prediction using time-series satellite data</article-title>. <source>Environ. Model. Softw.</source> <volume>120</volume>, <fpage>104502</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.envsoft.2019.104502</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiao</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Tong</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Xv</surname> <given-names>X.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Prediction of long lead monthly three-dimensional ocean temperature using time series gridded argo data and a deep learning method</article-title>. <source>Int. J. Appl. Earth Observ. Geoinform.</source> <volume>112</volume>, <fpage>102971</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jag.2022.102971</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Spatio-temporal predictions of sst time series in China&#x2019;s offshore waters using a regional convolution long short-term memory (rc-lstm) network</article-title>. <source>Int. J. Remote Sens.</source> <volume>41</volume>, <fpage>3368</fpage>&#x2013;<lpage>3389</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/01431161.2019.1701724</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Yuan</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Neural network driven by space-time partial differential equation for predicting sea surface temperature</article-title>,&#x201d; in <source>2022 IEEE International Conference on Data Mining (ICDM)</source>. <publisher-loc>Orlando, FL, USA</publisher-loc>: <publisher-name>IEEE</publisher-name>, <fpage>656</fpage>&#x2013;<lpage>665</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ICDM54844.2022.00076</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhong</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Prediction of sea surface temperature using long short-term memory</article-title>. <source>IEEE Geosci. Remote Sens. Lett.</source> <volume>14</volume>, <fpage>1745</fpage>&#x2013;<lpage>1749</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/LGRS.2017.2733548</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>R.-H.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A self-attention&#x2013;based neural network for three-dimensional multivariate modeling and its skillful enso predictions</article-title>. <source>Sci. Adv.</source> <volume>9</volume>, <elocation-id>eadf2827</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1126/sciadv.adf2827</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>R.-H.</given-names>
</name>
<name>
<surname>Moum</surname> <given-names>J. N.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Physics-informed deep-learning parameterization of ocean vertical mixing improves climate simulations</article-title>. <source>Natl. Sci. Rev.</source> <volume>9</volume>, <fpage>nwac044</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nsr/nwac044</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>