<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mater.</journal-id>
<journal-title>Frontiers in Materials</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mater.</abbrev-journal-title>
<issn pub-type="epub">2296-8016</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">733813</article-id>
<article-id pub-id-type="doi">10.3389/fmats.2021.733813</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Materials</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Pitting Judgment Model Based on Machine Learning and Feature Optimization Methods</article-title>
<alt-title alt-title-type="left-running-head">Qu et al.</alt-title>
<alt-title alt-title-type="right-running-head">Pitting Judgment Model</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Qu</surname>
<given-names>Zhihao</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1390029/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Tang</surname>
<given-names>Dezhi</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wang</surname>
<given-names>Zhu</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1191078/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Xiaqiao</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chen</surname>
<given-names>Hongjian</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lv</surname>
<given-names>Yao</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Beijing Advanced Innovation Center for Materials Genome Engineering, University of Science and Technology Beijing</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Institute for Advanced Materials and Technology, University of Science and Technology Beijing</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>PetroChina Planning and Engineering Institute</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1138513/overview">Chong Sun</ext-link>, China University of Petroleum (East China), China</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1403450/overview">Liang Dong</ext-link>, Changzhou University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/954968/overview">Da-Hai Xia</ext-link>, Tianjin University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1409614/overview">Qinying Wang</ext-link>, Southwest Petroleum University, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Zhu Wang, <email>wangzhu@ustb.edu.cn</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Environmental Degradation of Materials, a section of the journal Frontiers in Materials</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>26</day>
<month>08</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="collection">
<year>2021</year>
</pub-date>
<volume>8</volume>
<elocation-id>733813</elocation-id>
<history>
<date date-type="received">
<day>30</day>
<month>06</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>06</day>
<month>08</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2021 Qu, Tang, Wang, Li, Chen and Lv.</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Qu, Tang, Wang, Li, Chen and Lv</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Pitting corrosion seriously harms the service life of oil field gathering and transportation pipelines, which is an important subject of corrosion prevention. In this study, we collected the corrosion data of pipeline steel immersion experiment and established a pitting judgment model based on machine learning algorithm. Feature reduction methods, including feature importance calculation and pearson correlation analysis, were first adopted to find the important factors affecting pitting. Then, the best input feature set for pitting judgment was constructed by combining feature combination and feature creation. Through receiver operating characteristic (ROC) curve and area under curve (AUC) calculation, random forest algorithm was selected as the modeling algorithm. As a result, the pitting judgment model based on machine learning and high dimensional feature parameters (i.e., material factors, solution factors, environment factors) showed good prediction accuracy. This study provided an effective means for processing high-dimensional and complex corrosion data, and proved the feasibility of machine learning in solving material corrosion problems.</p>
</abstract>
<kwd-group>
<kwd>machine learning</kwd>
<kwd>feature engineering</kwd>
<kwd>pitting</kwd>
<kwd>random forest</kwd>
<kwd>pipeline steel</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p>Corrosion damage seriously reduces the strength and service life of pipelines in oil and gas fields, which makes the problem of pipeline corrosion increasingly serious (<xref ref-type="bibr" rid="B29">Soares et al., 2009</xref>; <xref ref-type="bibr" rid="B15">Jim&#xe9;nez-Come et al., 2012</xref>). Among all corrosion types, pitting corrosion is one of the most destructive and dangerous corrosion forms (<xref ref-type="bibr" rid="B2">Bhandari et al., 2015</xref>; <xref ref-type="bibr" rid="B17">Kolawole et al., 2016</xref>). After oil and gas pipeline corrosion and perforation, the leaked oil and gas will seriously pollute the environment and have the possibility of explosion, which directly and indirectly leads to serious economic losses and restricts the development of oil and gas industries (<xref ref-type="bibr" rid="B9">Ghidini and Donne, 2009</xref>).</p>
<p>Reliable corrosion warning method and advanced anti-corrosion measures are the key to ensure the safe operation of pipelines and prevent corrosion and leakage accidents. Therefore, it is of great practical significance to better judge the pitting corrosion of pipeline steel for the research and development of anti-corrosion technology and the prediction of structural integrity (<xref ref-type="bibr" rid="B1">Balekelayi and Tesfamariam, 2020</xref>). Pitting, however, is a complex process that includes many complicated phenomena, such as mass transfer, metal dissolution and passivation, etc.), the influencing factors of pitting corrosion are also many, such as metal components, medium temperature, pressure, pH, the type and concentration of ions (<xref ref-type="bibr" rid="B6">Choi et al., 2005</xref>; <xref ref-type="bibr" rid="B18">Li et al., 2012</xref>), which makes the modeling of pitting on more difficult.</p>
<p>The corrosion rate of a specific location sensitively dependent on many local micro materials and environmental conditions. therefore, at the macro level, pitting often occurs in the form of random and probability, which makes the statistical method was used to quantify and simulation of local corrosion, especially the theory of extreme value analysis <xref ref-type="bibr" rid="B30">Vajo et al. (2003)</xref> has been successfully applied to pitting corrosion of steel. <xref ref-type="bibr" rid="B24">Melchers (2008)</xref> showed that the Frechet extreme value distribution was more appropriate than Gumbel to represent the maximum pit depth. <xref ref-type="bibr" rid="B16">Kasai, et al. (2016)</xref> proposed a method combining extreme value analysis with Bayesian inference, which accurately predicted the actual maximum corrosion depth by using the maximum corrosion depth detected.</p>
<p>Due to its advantages in dealing with multi-dimensional, nonlinear and uncertain characteristics, machine learning (ML) methods have been gradually applied in the field of corrosion science in recent years (<xref ref-type="bibr" rid="B13">Hu et al., 2014</xref>; <xref ref-type="bibr" rid="B3">Bi et al., 2015</xref>), and have been successfully applied in some pitting corrosion related simulations. The pitting corrosion prediction model based on ML can not only describe the nonlinear relationship between the influencing factors and the target parameters, so as to realize the accurate prediction of the pitting information, but also can effectively extract the important feature information that reflects the health state of steel in the corrosion data (<xref ref-type="bibr" rid="B8">Diao et al., 2021</xref>). <xref ref-type="bibr" rid="B31">Valor, et al. (2010)</xref> established a stochastic model using Markov chains, which has been successfully applied to reproduce the time evolution of extreme pitting corrosion depths in low-carbon steel. <xref ref-type="bibr" rid="B26">Mohammad, et al. (2013)</xref> proposed a model using artificial neural network (ANN) to predict the characteristics of pitting corrosion, and further pointed out that by increasing the corrosion concentration and prolonging the immersion time, the pitting density and depth could be increased. However, the value of judgement of pitting initiation in pipeline steel anticorrosion work has rarely been reported.</p>
<p>In this study, we collected corrosion data of pipeline steels during immersion experiments, and established a machine learning model to judge the occurrence of pitting corrosion based on steel composition, environmental parameters and solution parameters. The method of processing high-dimensional and complex corrosion data by reduction, combination and creation of features was studied, which improved the generalization ability of the model, and the key corrosion factors for judging the occurrence of pitting corrosion were extracted. The feasibility and advantages of machine learning model in solving the corrosion problem of materials were also discussed.</p>
</sec>
<sec id="s2">
<title>Dataset and Methods</title>
<sec id="s2-1">
<title>Establishing the Dataset</title>
<p>This section describes the details of collecting corrosion dataset that were used to train and test the prediction performance of the machine learning models developed. In the corrosion dataset, a total of 100 valid data were collected. Among them, 40 data are from literature (<xref ref-type="bibr" rid="B34">Yin et al., 2007</xref>; <xref ref-type="bibr" rid="B21">Liu et al., 2014a</xref>; <xref ref-type="bibr" rid="B18">Li et al., 2012</xref>; <xref ref-type="bibr" rid="B23">Liu et al., 2017</xref>; <xref ref-type="bibr" rid="B28">Santos et al., 2021</xref>), and the other 60 data are from corrosion simulation experiments accumulated in our laboratory over the years. As shown in <xref ref-type="table" rid="T1">Table 1</xref>, all the materials in the statistics are pipeline steels with a small amount of alloying elements,and each complete data sample is composed of 13 material features (i.e., C, Si, Mn, P, S, Cr, Ni, Cu, Mo, Ti, Nb, Al, V), eight solution features (i.e., Vs, Sal., Cl<sup>&#x2212;</sup>, Ca<sup>2&#x2b;</sup>, Mg<sup>2&#x2b;</sup>, Na<sup>&#x2b;</sup>, SO<sub>4</sub>
<sup>2-</sup>), four environmental features (i.e., T, H<sub>2</sub>S, CO<sub>2</sub>, CO<sub>2</sub>/H<sub>2</sub>S), immersion time (i.e., t) and pitting information. Detailed data sets are shown in <xref ref-type="sec" rid="s9">Supplementary Table S1</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>List of features used in the machine learning models.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Material Features</th>
<th align="center">Unit</th>
<th align="center">Data range</th>
<th align="center">Solution Features</th>
<th align="center">Unit</th>
<th align="center">Data range</th>
<th align="center">Environment Features</th>
<th align="center">Unit</th>
<th align="center">Data range</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="14" align="left">Wt%</td>
</tr>
<tr>
<td align="left">C</td>
<td align="char" char="ndash">0.07&#x2013;0.26</td>
<td align="left">Fluid velocity (V<sub>s</sub>)</td>
<td rowspan="4" align="left">m/s</td>
<td align="char" char="ndash">0&#x2013;1.5</td>
<td align="left">Temperature (T)</td>
<td align="left">&#xb0;C</td>
<td align="left">60&#x2013;150</td>
</tr>
<tr>
<td align="left">Si</td>
<td align="char" char="ndash">0.22&#x2013;0.41</td>
<td align="left">Salinity (Sal.)</td>
<td align="char" char="ndash">0&#x2013;211510.2</td>
<td align="left">H<sub>2</sub>S</td>
<td align="left">Kpa</td>
<td align="left">0.048&#x2013;12000</td>
</tr>
<tr>
<td align="left">Mn</td>
<td align="char" char="ndash">0.41&#x2013;1.45</td>
<td align="left">Cl<sup>&#x2212;</sup>
</td>
<td align="char" char="ndash">0&#x2013;129880</td>
<td align="left">CO<sub>2</sub>
</td>
<td align="left">Kpa</td>
<td align="left">0&#x2013;10500</td>
</tr>
<tr>
<td align="left">P</td>
<td align="char" char="ndash">0.006&#x2013;0.014</td>
<td align="left">HCO<sub>3</sub>
<sup>&#x2212;</sup>
</td>
<td align="char" char="ndash">0&#x2013;10000</td>
<td rowspan="2" align="left">CO<sub>2</sub>/H<sub>2</sub>S</td>
<td rowspan="2" align="left">&#x2014;</td>
<td rowspan="2" align="left">0&#x2013;125000</td>
</tr>
<tr>
<td align="left">S</td>
<td align="char" char="ndash">0.001&#x2013;0.015</td>
<td align="left">Ca<sup>2&#x2b;</sup>
</td>
<td rowspan="9" align="left">mg/L</td>
<td align="char" char="ndash">0&#x2013;18200</td>
</tr>
<tr>
<td align="left">Cr</td>
<td align="char" char="ndash">0&#x2013;1.27</td>
<td align="left">Mg<sup>2&#x2b;</sup>
</td>
<td align="char" char="ndash">0&#x2013;5000</td>
<td rowspan="2" align="left">Time (t)</td>
<td rowspan="2" align="left">h</td>
<td rowspan="2" align="left">2&#x2013;240</td>
</tr>
<tr>
<td align="left">Ni</td>
<td align="char" char="ndash">0&#x2013;0.27</td>
<td align="left">Na<sup>&#x2b;</sup>
</td>
<td align="char" char="ndash">0&#x2013;69381.6</td>
</tr>
<tr>
<td align="left">Cu</td>
<td align="char" char="ndash">0&#x2013;0.087</td>
<td rowspan="6" align="left">SO<sub>4</sub>
<sup>2-</sup>
</td>
<td rowspan="6" align="char" char="ndash">0&#x2013;192</td>
<td rowspan="6" align="left">Pitting</td>
<td rowspan="6" align="left">&#x2014;</td>
<td rowspan="6" align="left">Yes / No</td>
</tr>
<tr>
<td align="left">Mo</td>
<td align="char" char="ndash">0&#x2013;0.72</td>
</tr>
<tr>
<td align="left">Ti</td>
<td align="char" char="ndash">0&#x2013;0.028</td>
</tr>
<tr>
<td align="left">Nb</td>
<td align="char" char="ndash">0&#x2013;0.026</td>
</tr>
<tr>
<td align="left">Al</td>
<td align="char" char="ndash">0&#x2013;0.091</td>
</tr>
<tr>
<td align="left">V</td>
<td align="char" char="ndash">0&#x2013;0.081</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2-2">
<title>Features Selection</title>
<p>The purpose of feature selection is to simplify the feature set as much as possible and reduce the adverse effects caused by noise and redundant features while maintaining the description ability of feature set. This improves the accuracy, interpretability and operational efficiency of the model (<xref ref-type="bibr" rid="B35">Zhang et al., 2020</xref>).</p>
<p>In this section, feature variables are screened by combining feature importance calculation and Pearson correlation analysis. The former is based on the random forest model (RF model), which is composed of several simple classification and regression tree (CART) models. During the bootstrap sampling process, each CART model produces some data samples that are not selected for training. These data samples termed the out-of-bag (OOB) samples can be used to calculate feature importance (<xref ref-type="bibr" rid="B37">Zhi et al., 2019</xref>). For each CART, a disturbance is added to each input of OOB data and then calculate the variation amplitude of the predicted results. By comparing the amplitude of the variation, the importance of different inputs to the predicted target can be obtained. Finally, RF model obtains the average value of all CARTs&#x2019; results and calculating the importance of each feature is completed. Pearson correlation coefficient is a statistic used to reflect the linear correlation degree of two random feature variables (<xref ref-type="bibr" rid="B32">Waldmann, 2019</xref>). The coefficient obtained by estimating sample covariance and standard deviation ranges from &#x2212;1 to 1. The greater the absolute value is, the stronger the correlation between feature variables is. For some machine learning models, the correlation between different feature variables has an important impact on the prediction results. Based on the above two methods, some redundant information can be removed from the original feature set, so as to achieve the purpose of feature reduction.</p>
<p>Feature combination is also a common method in feature engineering. Using the traditional theoretical calculation formula or model, several original features are combined into a new feature with practical significance. In this study, on the one hand, pitting resistance equivalent numbers (PREN) is calculated based on <xref ref-type="bibr" rid="B4">Chen et al. (2021a)</xref>. PREN is a value calculated on the basis of the mass fraction of certain elements in the metal, and is usually used as a method to compare the pitting corrosion resistance of alloys. A common PREN expression is expressed as following:<disp-formula id="e1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>R</mml:mi>
<mml:mi>E</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>w</mml:mi>
<mml:mi>t</mml:mi>
<mml:mo>%</mml:mo>
<mml:mi>C</mml:mi>
<mml:mi>r</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>3.3</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>w</mml:mi>
<mml:mi>t</mml:mi>
<mml:mo>%</mml:mo>
<mml:mi>M</mml:mi>
<mml:mi>o</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>16</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>w</mml:mi>
<mml:mi>t</mml:mi>
<mml:mo>%</mml:mo>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
<p>On the other hand, the <italic>in-situ</italic> pH (pH<sub>IS</sub>) of the solution is calculated using environmental and solution factors based on the electronic corrosion engineer (ECE) software (<xref ref-type="bibr" rid="B10">Jasim, 2019</xref>). Therefore, two feature parameters, PREN and pH<sub>IS</sub>, are added by the method of the above feature combination.</p>
<p>In the aspect of feature creation, we explore a feature parameter that can contain the information of each element of steel and reflect the uniqueness of different steels. In this study, two different feature creation methods are proposed for each material. The feature creation method &#x2160; is defined by <xref ref-type="disp-formula" rid="e2">Eq. 2</xref>,<disp-formula id="e2">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>a</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>where <italic>Y</italic>
<sub>
<italic>a</italic>
</sub> represents the element mass index of a material; <italic>M</italic>
<sub>
<italic>a1</italic>
</sub>,<italic>M</italic>
<sub>
<italic>a2</italic>
</sub>,<sub>
<italic>&#x3002;&#x3002;&#x3002;</italic>
</sub>
<italic>M</italic>
<sub>
<italic>a</italic>n</sub> are the atomic mass of elements <italic>a</italic>
<sub>
<italic>1</italic>
</sub>
<italic>,a</italic>
<sub>
<italic>2</italic>
</sub>
<italic>,&#x2026;a</italic>
<sub>
<italic>n</italic>
</sub>; <italic>X</italic>
<sub>
<italic>a1</italic>
</sub>,<italic>X</italic>
<sub>
<italic>a2</italic>
</sub>,<sub>
<italic>&#x3002;&#x3002;&#x3002;&#x3002;&#x3002;&#x3002;</italic>
</sub>
<italic>X</italic>
<sub>
<italic>an</italic>
</sub> represent the mass fractions of element <italic>a</italic>
<sub>
<italic>1</italic>
</sub>
<italic>,a</italic>
<sub>
<italic>2</italic>
</sub>
<italic>,&#x2026;&#x2026;a</italic>
<sub>
<italic>n</italic>
</sub>. Method &#x2161; is defined by <xref ref-type="disp-formula" rid="e3">Eq. 3</xref>,<disp-formula id="e3">
<mml:math id="m3">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>b</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>b</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>b</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>where <inline-formula id="inf1">
<mml:math id="m4">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>b</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula> is defined as the mass index ratio of nonmetallic to metallic elements in a material; <italic>b</italic>
<sub>
<italic>1</italic>
</sub>
<italic>,b</italic>
<sub>
<italic>2</italic>
</sub>
<italic>&#x2026;b</italic>
<sub>
<italic>n</italic>
</sub> represent the nonmetallic elements and <italic>c</italic>
<sub>
<italic>1</italic>
</sub>
<italic>,c</italic>
<sub>
<italic>2</italic>
</sub>
<italic>&#x2026;c</italic>
<sub>
<italic>n</italic>
</sub> represent the metal elements. Two new features are generated.</p>
</sec>
<sec id="s2-3">
<title>Experimental Procedure</title>
<p>In this study, we first selected the appropriate dataset division ratio and machine learning classification algorithm through testing. Specifically, data of 40, 50, 60, 70, 80 and 90% were randomly selected from the original corrosion dataset after cleaning as the training set, and the remaining data as the test set. The training set was mainly used to optimize the classification model, and the test set was only used to identify the classification accuracy of the model. We prepared five machine learning classification models to be tested, including random forest classification model (RFC), support vector classification model with radial basis function kernel (SVC), gradient boosting decision tree classification model (GBC), naive bayes classification model (NB), and k-nearest neighbor model (KNN). Datasets of different proportions were input into different classification models for testing. During the training process, we used receiver operating characteristic (ROC) curve and area under curve (AUC) to evaluate the training effect of the model (<xref ref-type="bibr" rid="B19">Li et al., 2015</xref>). Each group of tests was repeated for 100 times, and the best-performing dataset division ratio and classification model were selected according to the average score.</p>
<p>Secondly, in terms of feature reduction, we conducted feature importance calculation and Pearson correlation analysis for all feature parameters (i.e., 13 material features, eight solution features, four environmental features and immersion time). noise and redundancy features were eliminated to form feature combination &#x2160; and based on this feature combination, pitting judgment model &#x2160; was established.</p>
<p>Thirdly, in the aspect of feature combination, two feature parameters (i.e., PREN and pH<sub>IS</sub>) were added by using the traditional theoretical calculation model. For feature creation, we converted the information of each steel element into two feature parameters (i.e., <italic>Y</italic>
<sub>
<italic>a</italic>
</sub> and <inline-formula id="inf2">
<mml:math id="m5">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>b</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula>). The four new feature parameters were combined with feature combination &#x2160;, and then the feature combination &#x2161; was formed after removing the features that contributed less to the target parameter, and the pitting judgment model &#x2161; was established. The performance of the two models was compared, and the improvement of the model&#x2019;s generalization ability was demonstrated.</p>
<p>In the process of feature selection, model optimization and evaluation, F1 score was employed for the evaluation standard. In short, the F1 score is a measure of the classification problem and is a harmonized mean of precision and recall. Its value is approximately close to 1, indicating that the model has better performance (<xref ref-type="bibr" rid="B20">Lim and Chi 2021</xref>). For a binary classfication problem, a 2 &#xd7; 2 confusion matrix is formed based on the forecast labels and actuality labels (as shown in <xref ref-type="table" rid="T2">Table 2</xref>), where the true positive (TP) refers to correct judgment of a positive sample (e.g., a case of pitting is correctly predicted) and a false positive (FP) means failure to judge a positive sample (e.g., a case of pitting is wrongly predicted). Similar definitions can be given to the false negative (FN) and true negative (TN). Further, precision, recall and F1 score can be respectively calculated by the following formulas:<disp-formula id="e4">
<mml:math id="m6">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
<disp-formula id="e5">
<mml:math id="m7">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
<disp-formula id="e6">
<mml:math id="m8">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mfrac>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Confusion matrix for binary classifier.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="left">Actuality</th>
<th colspan="2" align="center">Forecast</th>
</tr>
<tr>
<th align="left">True</th>
<th align="center">False</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">True</td>
<td align="left">True positive (TP)</td>
<td align="left">False negative (FN)</td>
</tr>
<tr>
<td align="left">False</td>
<td align="left">False positive (FP)</td>
<td align="left">True negative (TN)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>This research was based on python programming language, using Spyder3.3.6 software, and all machine learning algorithms involved in the research process were executed by the scikit-learn library. The main algorithm parameters are as follows: max_depth &#x3d; 40 and n_estimators &#x3d; 100 in RFC model; C &#x3d; 170 and gamma &#x3d; 0.5 in SVC model; max_depth &#x3d; 10 and n_estimators &#x3d; 100 in GBC model; k &#x3d; 2 in KNN model, and all other parameters in the model are set to default values.</p>
</sec>
</sec>
<sec sec-type="results|discussion" id="s3">
<title>Results and Discussion</title>
<sec id="s3-1">
<title>Selection of Dataset Division Ratio and Machine Learning Models</title>
<p>Based on the five classification models, the influence of different training set proportion on model performance was explored, and the results were shown in <xref ref-type="fig" rid="F1">Figure 1</xref>. We randomly selected a specified proportion of test sets and repeated the test 100 times to evaluate the prediction performance of the model according to the average score. On the whole, as the proportion of the training set gradually increased, the prediction performance of the model gradually improved. This was because the amount of data in the training set was usually proportional to the effective information contained in it. Therefore, a larger proportion of the training set was highly likely to improve the comprehensive prediction performance of the model. However, when the proportion of training set increased to more than 80%, the F1 score of KNN model decreased significantly, while the F1 score of SVC model and NB model decreased slightly. This may be due to the overfitting of these algorithm, and thus, the generalization ability of the model is significantly reduced (<xref ref-type="bibr" rid="B7">Deng et al., 2015</xref>). Therefore, the division ratio of the training set selected in this study was 80%.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>The pitting prediction performance of five different classification models in different proportions of datasets.</p>
</caption>
<graphic xlink:href="fmats-08-733813-g001.tif"/>
</fig>
<p>In the process of determining the partition ratio of training set, it was found that the RFC model had the best comprehensive performance. In order to further confirm the best model for predicting pitting, the ROC curve and AUC value of the five classification models were respectively drawn and calculated. The ROC curve, which defines false positive rate (FPR) as the <italic>X</italic> axis and true positive rate (TPR) as the <italic>Y</italic> axis, describes the relationship between TP and FP. The closer the ROC curve is to the upper left corner, the better the performance of the model (<xref ref-type="bibr" rid="B12">He et al., 2021</xref>). AUC is the area under the ROC curve and the larger the AUC, the higher the model performance. <xref ref-type="fig" rid="F2">Figure 2</xref> (A-E) were the ROC curves drawn based on the five different models (RFC model, SVC model, GBC model, KNN model and NB model). Among them, the method of five fold cross validation was used in the process and the blue line represented the average ROC curve. By comparison, the curve based on RFC model was closer to the upper left corner, which proved that this model had the best performance. In addition, the average AUC based on the RFC model was 0.84. Meanwhile, other classification models adopted the same method, and the calculated results of average AUC were shown in <xref ref-type="fig" rid="F2">Figure 2F</xref>. The red lines represented the error range for 100 repetitions. As can be seen from the figure, RFC model had the best predictive performance, followed by NB model and GBCmodel, SVC model and KNN model had the lowest average AUC value. Combined with the above results, the RFC model was selected for subsequent studies.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>ROC curve and mean AUC obtained by cross-validation <bold>(A)</bold> When using the RFC model, <bold>(B)</bold> When using the SVC model, <bold>(C)</bold> When using the GBC model, <bold>(D)</bold> When using the KNN model, <bold>(E)</bold> When using the NB model, <bold>(F)</bold> When the five classification models are compared.</p>
</caption>
<graphic xlink:href="fmats-08-733813-g002.tif"/>
</fig>
</sec>
<sec id="s3-2">
<title>Effect of Feature Engineering on Model&#x2019;s Performance</title>
<p>In the first step, the pearson correlation analysis method was used to reduce features. Specifically, input the original 13 material features, eight solution features, four environmental features and immersion time into the RF model, and the calculation results of feature importance were shown in <xref ref-type="fig" rid="F3">Figure 3</xref>. To ensure the generalization ability of the model, we only selected the features with importance values above 0.02. (i.e., CO<sub>2</sub>, T, CO<sub>2</sub>/H<sub>2</sub>S, H<sub>2</sub>S of environmental features; Cl<sup>&#x2212;</sup>, Sal., Na<sup>&#x2b;</sup>, Ca<sup>2&#x2b;</sup>, Mg<sup>2&#x2b;</sup>, HCO<sub>3</sub>
<sup>&#x2212;</sup> of solution features; t). The combined importance of the selected 11 features exceeds 0.85, and they contain most of the information related to pitting.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>The feature importance sequence for 26 features based on RF model.</p>
</caption>
<graphic xlink:href="fmats-08-733813-g003.tif"/>
</fig>
<p>In terms of environmental features, CO<sub>2</sub> is usually present in corrosive solution in the form of a dissolved gas. HCO<sub>3</sub>
<sup>&#x2212;</sup> and H<sub>2</sub>CO<sub>3</sub> is formed when CO<sub>2</sub> reacts with water and H<sup>&#x2b;</sup> produced in the ionization reactions of them can result in local acidification and pitting corrosion (<xref ref-type="bibr" rid="B5">Chen et al., 2021b</xref>). The solubility of H<sub>2</sub>S in water is higher than that of CO<sub>2</sub>. With the increase of the concentration of H<sub>2</sub>S, H<sub>2</sub>S decomposes into more H<sup>&#x2b;</sup> and HS<sup>&#x2212;</sup>, which can change the local acidity of steel surface and promote the anodic dissolution process, thus affecting the pitting susceptibility of steel (<xref ref-type="bibr" rid="B36">Zhao et al., 2020</xref>). In addition, no matter in the corrosion process dominated by CO<sub>2</sub> or H<sub>2</sub>S, the non-dense or non-uniform corrosion products formed on the surface of the steel can accelerate the development of pitting corrosion (<xref ref-type="bibr" rid="B23">Liu et al., 2017</xref>). Temperature is also a key factor affecting pitting, as many materials do not pitting below a certain temperature (critical pitting temperature), which has been demonstrated to exist (<xref ref-type="bibr" rid="B25">Mendibide and Duret-Thual 2018</xref>).</p>
<p>In terms of solution features, it is generally believed that Cl<sup>&#x2212;</sup> has a great influence on the pitting susceptibility of steel. In other words, the higher the content of Cl<sup>&#x2212;</sup>, the looser the corrosion product scale formed on the steel surface and the more serious the cracking is. The Cl<sup>&#x2212;</sup> reaching the steel surface through the corrosion product scale can accelerate the local anode reaction, produce pitting pits and develop rapidly along the longitudinal direction (<xref ref-type="bibr" rid="B22">Liu et al., 2014b</xref>). Ca<sup>2&#x2b;</sup> and Mg<sup>2&#x2b;</sup> also have the ability to influence pitting susceptibility of steel significantly given that the presence of divalent salts can reduce CO<sub>2</sub> solubility (i.e., CaCO<sub>3</sub> in the case of Ca<sup>2&#x2b;</sup> presence and MgCO<sub>3</sub> in the case of Mg<sup>2&#x2b;</sup> presence) (<xref ref-type="bibr" rid="B14">Hua et al., 2018</xref>). Salinity refers to the total ion content in the solution, and the increase of its content can also change the solubility of CO<sub>2</sub> and H<sub>2</sub>S, thus affecting the development of pitting corrosion (<xref ref-type="bibr" rid="B11">Han et al., 2011</xref>).</p>
<p>Then, we calculated the pearson correlation coefficient based on our dataset of solution features and environment features. As shown in <xref ref-type="fig" rid="F4">Figure 4</xref>, the color (blue or red) indicates the direction of the relationship (positive or negative), and the intensity of the color indicates how strong the relationship is (white for completely unrelated and dark blue or red for perfectly correlated). Strong correlations occur between Sal., Na<sup>&#x2b;</sup>, and Cl<sup>&#x2212;</sup>, mainly because Cl<sup>&#x2212;</sup> and Na<sup>&#x2b;</sup> were usually very high in the solution being counted, and the salinity was almost composed of these two ions. Sufficient information could be obtained by selecting only one feature from a combination of features with strong correlation, and the importance of feature was usually proportional to the effective information contained in it (<xref ref-type="bibr" rid="B33">Wang et al., 2020</xref>). Thus, Cl<sup>&#x2212;</sup> was retained and Sal. and Na<sup>&#x2b;</sup> were discarded. Another feature combination with strong correlation was Ca<sup>2&#x2b;</sup> and Mg<sup>2&#x2b;</sup>, which had a similar effect on the pitting susceptibility of steel. Ca<sup>2&#x2b;</sup> was also retained according to the above idea. The feature combination &#x2160; (i.e., CO<sub>2</sub>, T, CO<sub>2</sub>/H<sub>2</sub>S, H<sub>2</sub>S of environmental features; Cl<sup>&#x2212;</sup>, Ca<sup>2&#x2b;</sup>, HCO<sub>3</sub>
<sup>&#x2212;</sup> of solution features; t) was determined.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Pearson correlation matrix for 12 features.</p>
</caption>
<graphic xlink:href="fmats-08-733813-g004.tif"/>
</fig>
<p>Two feature parameters, PREN and pH<sub>IS</sub>, were added by feature combination,and using feature creation method &#x2160; and &#x2161;, two new feature parameters were obtained, namely <italic>Y</italic>
<sub>
<italic>a</italic>
</sub> and <inline-formula id="inf3">
<mml:math id="m9">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>b</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula>. The four newly generated feature parameters (i.e., PREN, pH<sub>IS</sub>, <italic>Y</italic>
<sub>
<italic>a</italic>
</sub>, <inline-formula id="inf4">
<mml:math id="m10">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>b</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula>) were combined with feature combination &#x2160;, and the feature importance was calculated and sorted (<xref ref-type="fig" rid="F5">Figure 5</xref>). pH<sub>IS</sub> and <inline-formula id="inf5">
<mml:math id="m11">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>b</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula> had great influence on the pitting judgment model, especially pH<sub>IS</sub>, while the importance values of PREN and <italic>Y</italic>
<sub>
<italic>a</italic>
</sub> were relatively low. <xref ref-type="bibr" rid="B27">Pourbaix. (2009)</xref> have showen that the pitting potential of carbon steel becomes negative with the decrease of pH, which increases the susceptibility of pitting induction. To sum up, the two feature parameters (i.e., PREN and <italic>Y</italic>
<sub>
<italic>a</italic>
</sub>) were removed, and the feature combination &#x2161; including CO<sub>2</sub>, T, CO<sub>2</sub>/H<sub>2</sub>S, Cl<sup>&#x2212;</sup>, Ca<sup>2&#x2b;</sup>, HCO<sub>3</sub>
<sup>&#x2212;</sup>,t, pH<sub>IS</sub> and <inline-formula id="inf6">
<mml:math id="m12">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>b</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula>was selected as the input features of the pitting judgment model.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>The feature importance sequence for 12 features based on RF model.</p>
</caption>
<graphic xlink:href="fmats-08-733813-g005.tif"/>
</fig>
<p>Based on above two different groups of input features (feature combination &#x2160; and &#x2161;), pitting judgment models &#x2160; and &#x2161; were individually established by RF model. <xref ref-type="table" rid="T3">Table 3</xref> lists the predictive performance of each model. Each prediction process was repeated 100&#xa0;times. By comparison, pitting judgment model &#x2161; with increased pH<sub>IS</sub> and <inline-formula id="inf7">
<mml:math id="m13">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>b</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula> had a stronger performance, and the average F1 score for the training set and test set reached 0.996 and 0.987, respectively. As shown in <xref ref-type="fig" rid="F5">Figure 5</xref>, the performance improvement of Model &#x2161; was mainly due to the two increased features, especially the pH<sub>IS</sub>, which contributed greatly to judging whether pitting occurs. Therefore, we employed this model as the preferred model of pitting judgment.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>The predictive accuracy of the pitting prediction model using the feature combination I and II, respectively.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="left">Methods</th>
<th colspan="3" align="center">Training set - F1 score</th>
<th colspan="3" align="center">Test set- F1 score</th>
</tr>
<tr>
<th align="center">Max</th>
<th align="center">Min</th>
<th align="center">Mean</th>
<th align="center">Max</th>
<th align="center">Min</th>
<th align="center">Mean</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Feature combination &#x2160;</td>
<td align="char" char=".">0.975</td>
<td align="char" char=".">0.949</td>
<td align="char" char=".">0.969</td>
<td align="char" char=".">1</td>
<td align="char" char=".">0.813</td>
<td align="char" char=".">0.938</td>
</tr>
<tr>
<td align="left">Feature combination &#x2161;</td>
<td align="char" char=".">1</td>
<td align="char" char=".">0.974</td>
<td align="char" char=".">
<bold>0.996</bold>
</td>
<td align="char" char=".">1</td>
<td align="char" char=".">0.886</td>
<td align="char" char=".">
<bold>0.987</bold>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As shown in <xref ref-type="fig" rid="F3">Figure 3</xref>, the two most important feature parameters are CO<sub>2</sub> content and T for judging the occurrence of pitting. We tried to explore the law of pitting occurrence only through these two feature parameters. The relationship between T and CO<sub>2</sub> content with the occurrence of pitting is displayed in <xref ref-type="fig" rid="F6">Figure 6</xref>. Surprisingly, both 3D scatter plot and the projection drawing of T and CO<sub>2</sub> content are disable to classify the occurrence of pitting. Pitting and non-pitting overlap each other, suggesting that the parameters of T and CO<sub>2</sub> content are not enough to distinguish the occurrence of pitting. Some other features also contribute to affect the pitting process. As we know, the development of pitting is an extremely complex process, and the influence of many factors must be considered comprehensively, which is exactly the advantage of machine learning model compared with traditional theoretical model.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Influence of temperature and CO<sub>2</sub> content on the prediction of pitting.</p>
</caption>
<graphic xlink:href="fmats-08-733813-g006.tif"/>
</fig>
</sec>
<sec id="s3-3">
<title>Generalization Capabilities of Machine Learning Model</title>
<p>25 new rows of immersion test corrosion data (all parameters within the range) were collected (from our lab) as the validation set to verify the generalization ability of the model. The methods of feature reduction, combination, and creation were used to transform it into a feature set of the same type as feature combination &#x2161;, and then the pitting corrosion of each sample was predicted by the optimized model. As shown in <xref ref-type="table" rid="T4">Table 4</xref>, the pitting judgment model still shows a high prediction accuracy.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>The results of model generalization performance.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">N &#x3d; 25</th>
<th align="center">Predicted pitting</th>
<th align="center">Predicted no-pitting</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Actual pitting (17)</td>
<td align="char" char=".">16</td>
<td align="char" char=".">1</td>
</tr>
<tr>
<td align="left">Actual no-pitting (8)</td>
<td align="char" char=".">0</td>
<td align="char" char=".">8</td>
</tr>
<tr>
<td align="left">F1 score</td>
<td colspan="2" align="center">0.97</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec sec-type="conclusion" id="s4">
<title>Conclusion</title>
<p>In this study, we proposed a machine learning model based on experimental data to judge the occurrence of pitting for pipeline steel. Machine learning algorithm and feature engineering correlation method are used to analyze the relationship between the occurrence of pitting and input features such as material factors, solution factors and environmental factors. For this kind of material, CO<sub>2</sub>, T, CO<sub>2</sub>/H<sub>2</sub>S, Cl<sup>&#x2212;</sup>, Ca<sup>2&#x2b;</sup>, HCO<sub>3</sub>
<sup>&#x2212;</sup>,t, pH<sub>IS</sub> and <inline-formula id="inf8">
<mml:math id="m14">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>b</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula> are considered to be the key factors to judge whether pitting happens or not. The generalization ability of the model is enhanced by replacing alloying element content with specific input parameters. Finally, the F1 scores of the optimized models were all greater than 0.97. Based on these results, machine learning method provides an effective means for processing high-dimensional and complex corrosion data, and can be a useful tool for further exploration of material corrosion problems.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data Availability Statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s9">Supplementary Material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s6">
<title>Author Contributions</title>
<p>ZQ, XL, and YL assembled the corrosion dataset and the feature set used in learning. ZW performed the machine learning. All authors analyzed the results and contributed in writing the manuscring.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>This work was supported by National Key R&#x0026;D Program of China (No. 2020YFB0704501).</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s9" sec-type="disclaimer">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fmats.2021.733813/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fmats.2021.733813/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Table1.XLSX" id="SM1" mimetype="application/XLSX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Balekelayi</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Tesfamariam</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>External Corrosion Pitting Depth Prediction Using Bayesian Spectral Analysis on Bare Oil and Gas Pipelines</article-title>. <source>Int. J. Press. Vessels Piping.</source> <volume>188</volume> (<issue>12</issue>), <fpage>104224</fpage>. <pub-id pub-id-type="doi">10.1016/j.ijpvp.2020.104224</pub-id> </citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bhandari</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Khan</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Abbassi</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Garaniya</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Ojeda</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Modelling of Pitting Corrosion in marine and Offshore Steel Structures - A Technical Review</article-title>. <source>J. Loss Prev. Process Industries.</source> <volume>37</volume>, <fpage>39</fpage>&#x2013;<lpage>62</lpage>. <pub-id pub-id-type="doi">10.1016/j.jlp.2015.06.008</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Toku-Gyamerah</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Cluster Analysis of Acoustic Emission Signals in Pitting Corrosion of Low Carbon Steel</article-title>. <source>Mat.-wiss. U. Werkstofftech.</source> <volume>46</volume> (<issue>7</issue>), <fpage>736</fpage>&#x2013;<lpage>746</lpage>. <pub-id pub-id-type="doi">10.1002/mawe.201500347</pub-id> </citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Dong</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ji</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2021a</year>). <article-title>Revealing the Inner Rules of PREN from Electronic Aspect by First-Principles Calculations</article-title>. <source>Corrosion Sci.</source> <volume>189</volume>, <fpage>109561</fpage>. <pub-id pub-id-type="doi">10.1016/j.corsci.2021.109561</pub-id> </citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>C. Y.</given-names>
</name>
<name>
<surname>Ming</surname>
<given-names>N. M.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2021b</year>). <article-title>Effects of Temperature on the Corrosion Behaviour of X70 Steel in CO<sub>2</sub>-Containing Formation Water</article-title>. <source>J. Nat. Gas Sci. Eng.</source> <volume>88</volume>, <fpage>103815</fpage>. <pub-id pub-id-type="doi">10.1016/j.jngse.2021.103815</pub-id> </citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Choi</surname>
<given-names>Y.-S.</given-names>
</name>
<name>
<surname>Shim</surname>
<given-names>J.-J.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>J.-G.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Effects of Cr, Cu, Ni and Ca on the Corrosion Behavior of Low Carbon Steel in Synthetic Tap Water</article-title>. <source>J. Alloys Comp.</source> <volume>391</volume>, <fpage>162</fpage>&#x2013;<lpage>169</lpage>. <pub-id pub-id-type="doi">10.1016/j.jallcom.2004.07.081</pub-id> </citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Deng</surname>
<given-names>B.-C.</given-names>
</name>
<name>
<surname>Yun</surname>
<given-names>Y.-H.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>Y.-Z.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>D.-S.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Q.-S.</given-names>
</name>
<name>
<surname>Yi</surname>
<given-names>L.-Z.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>A New Strategy to Prevent Over-fitting in Partial Least Squares Models Based on Model Population Analysis</article-title>. <source>Analytica Chim. Acta.</source> <volume>880</volume>, <fpage>32</fpage>&#x2013;<lpage>41</lpage>. <pub-id pub-id-type="doi">10.1016/j.aca.2015.04.045</pub-id> </citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Diao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Improvement of the Machine Learning-Based Corrosion Rate Prediction Model through the Optimization of Input Features</article-title>. <source>Mater. Des.</source> <volume>198</volume>, <fpage>109326</fpage>. <pub-id pub-id-type="doi">10.1016/j.matdes.2020.109326</pub-id> </citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ghidini</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Dalle Donne</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Fatigue Life Predictions Using Fracture Mechanics Methods</article-title>. <source>Eng. Fracture Mech.</source> <volume>76</volume> (<issue>1</issue>), <fpage>134</fpage>&#x2013;<lpage>148</lpage>. <pub-id pub-id-type="doi">10.1016/j.engfracmech.2008.07.008</pub-id> </citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hadi Jasim</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Evaluation the Effect of Velocity and Temperature on the Corrosion Rate of Crude Oil Pipeline in the Presence of CO2/H2S Dissolved Gases</article-title>. <source>Ijcpe</source> <volume>20</volume> (<issue>2</issue>), <fpage>41</fpage>&#x2013;<lpage>50</lpage>. <pub-id pub-id-type="doi">10.31699/IJCPE.2019.2.6</pub-id> </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Han</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Carey</surname>
<given-names>J. W.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Effect of Sodium Chloride on Corrosion of Mild Steel in CO2-saturated Brines</article-title>. <source>J. Appl. Electrochem.</source> <volume>41</volume>, <fpage>741</fpage>&#x2013;<lpage>749</lpage>. <pub-id pub-id-type="doi">10.1007/s10800-011-0290-3</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>He</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wen</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Machine Learning Identified Materials Descriptors for Ferroelectricity</article-title>. <source>Acta Materialia.</source> <volume>209</volume>, <fpage>116815</fpage>. <pub-id pub-id-type="doi">10.1016/j.actamat.2021.116815</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Tian</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Teng</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>The Probabilistic Life Time Prediction Model of Oil Pipeline Due to Local Corrosion Crack</article-title>. <source>Theor. Appl. Fracture Mech.</source> <volume>70</volume>, <fpage>10</fpage>&#x2013;<lpage>18</lpage>. <pub-id pub-id-type="doi">10.1016/j.tafmec.2014.04.002</pub-id> </citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hua</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Shamsa</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Barker</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Neville</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Protectiveness, Morphology and Composition of Corrosion Products Formed on Carbon Steel in the Presence of Cl&#x2212;, Ca2&#x2b; and Mg2&#x2b; in High Pressure CO2 Environments</article-title>. <source>Appl. Surf. Sci.</source> <volume>455</volume>, <fpage>667</fpage>&#x2013;<lpage>682</lpage>. <pub-id pub-id-type="doi">10.1016/j.apsusc.2018.05.140</pub-id> </citation>
</ref>
<ref id="B15">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Jimenez-Come</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Munoz</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Garcia</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Matres</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Martin</surname>
<given-names>M. L.</given-names>
</name>
<name>
<surname>Trujillo</surname>
<given-names>F.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). &#x201c;<article-title>Pitting Corrosion Detection of Austenitic Stainless Steel EN 1.4404 in MgCl2 Solutions Using a Machine Learning Approach</article-title>,&#x201d; in <conf-name>AIP Conference Proceedings</conf-name>. (<publisher-name>American Institute of PhysicsAIP</publisher-name>). <pub-id pub-id-type="doi">10.1063/1.4707652</pub-id> </citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kasai</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Mori</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Tamura</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Sekine</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Tsuchida</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Serizawa</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Predicting Maximum Depth of Corrosion Using Extreme Value Analysis and Bayesian Inference</article-title>. <source>Int. J. Press. Vessels Piping.</source> <volume>146</volume>, <fpage>129</fpage>&#x2013;<lpage>134</lpage>. <pub-id pub-id-type="doi">10.1016/j.ijpvp.2016.08.002</pub-id> </citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kolawole</surname>
<given-names>S. K.</given-names>
</name>
<name>
<surname>Kolawole</surname>
<given-names>F. O.</given-names>
</name>
<name>
<surname>Enegela</surname>
<given-names>O. P.</given-names>
</name>
<name>
<surname>Adewoye</surname>
<given-names>O. O.</given-names>
</name>
<name>
<surname>Soboyejo</surname>
<given-names>A. B. O.</given-names>
</name>
<name>
<surname>Soboyejo</surname>
<given-names>W. O.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Pitting Corrosion of a Low Carbon Steel in Corrosive Environments: Experiments and Models</article-title>. <source>Amr</source> <volume>1132</volume>, <fpage>349</fpage>&#x2013;<lpage>365</lpage>. <pub-id pub-id-type="doi">10.4028/www.scientific.net/AMR.1132.349</pub-id> </citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>W.-f.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>Y.-j.</given-names>
</name>
<name>
<surname>Xue</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Corrosion Behavior of 110S Tube Steel in Environments of High H2S and CO2 Content</article-title>. <source>J. Iron Steel Res. Int.</source> <volume>19</volume>, <fpage>59</fpage>&#x2013;<lpage>65</lpage>. <pub-id pub-id-type="doi">10.1016/S1006-706X(13)60033-3</pub-id> </citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Recognition Algorithm of Acoustic Emission Signals Based on Conditional Random Field Model in Storage Tank Floor Inspection Using Inner Detector</article-title>. <source>Shock. Vibration.</source> <volume>2015</volume>, <fpage>1</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1155/2015/173470</pub-id> </citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lim</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chi</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Damage Prediction on Bridge Decks Considering Environmental Effects with the Application of Deep Neural Networks</article-title>. <source>KSCE J. Civ Eng.</source> <volume>25</volume> (<issue>4</issue>), <fpage>371</fpage>&#x2013;<lpage>385</lpage>. <pub-id pub-id-type="doi">10.1007/s12205-020-5669-4</pub-id> </citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ke</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>E.-H.</given-names>
</name>
</person-group> (<year>2014a</year>). <article-title>Corrosion Behavior of X52 Anti-h2s Pipeline Steel Exposed to High H2S Concentration Solutions at 90 &#xb0;C</article-title>. <source>J. Mater. Sci. Tech.</source> <volume>30</volume> (<issue>05</issue>), <fpage>504</fpage>&#x2013;<lpage>510</lpage>. <pub-id pub-id-type="doi">10.1016/j.jmst.2013.10.018</pub-id> </citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Q. Y.</given-names>
</name>
<name>
<surname>Mao</surname>
<given-names>L. J.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>S. W.</given-names>
</name>
</person-group> (<year>2014b</year>). <article-title>Effects of Chloride Content on CO2 Corrosion of Carbon Steel in Simulated Oil and Gas Well Environments</article-title>. <source>Corrosion Sci.</source> <volume>84</volume>, <fpage>165</fpage>&#x2013;<lpage>171</lpage>. <pub-id pub-id-type="doi">10.1016/j.corsci.2014.03.025</pub-id> </citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Du</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Comparison of Corrosion Behaviour of Low-alloy Pipeline Steel Exposed to H2S/CO2-saturated Brine and Vapour-Saturated H2S/CO2 Environments</article-title>. <source>Electrochimica Acta</source> <volume>232</volume>, <fpage>528</fpage>&#x2013;<lpage>541</lpage>. <pub-id pub-id-type="doi">10.1016/j.electacta.2017.02.114</pub-id> </citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Melchers</surname>
<given-names>R. E.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Extreme Value Statistics and Long-Term marine Pitting Corrosion of Steel</article-title>. <source>Probabilistic Eng. Mech.</source> <volume>23</volume> (<issue>4</issue>), <fpage>482</fpage>&#x2013;<lpage>488</lpage>. <pub-id pub-id-type="doi">10.1016/j.probengmech.2007.09.003</pub-id> </citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mendibide</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Duret-Thual</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Determination of the Critical Pitting Temperature of Corrosion Resistant Alloys in H2S Containing Environments</article-title>. <source>Corrosion Sci.</source> <volume>142</volume>, <fpage>56</fpage>&#x2013;<lpage>65</lpage>. <pub-id pub-id-type="doi">10.1016/j.corsci.2018.07.003</pub-id> </citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>M.Mohammad</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>J. Hammadi</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>M. Lafta</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Prediction of Pitting Corrosion Characteristics Using Artificial Neural Networks</article-title>. <source>Ijca</source> <volume>60</volume> (<issue>4</issue>), <fpage>4</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.5120/9678-4105</pub-id> </citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pourbaix</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>1970</year>). <article-title>Significance of protection Potential in Pitting and Intergranular Corrosion</article-title>. <source>Corrosion</source> <volume>26</volume>, <fpage>431</fpage>&#x2013;<lpage>438</lpage>. <pub-id pub-id-type="doi">10.5006/0010-9312-26.10.431</pub-id> </citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Santos</surname>
<given-names>B. A. F.</given-names>
</name>
<name>
<surname>Serenario</surname>
<given-names>M. E. D.</given-names>
</name>
<name>
<surname>Souza</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>Oliveira</surname>
<given-names>J. R.</given-names>
</name>
<name>
<surname>Vaz</surname>
<given-names>G. L.</given-names>
</name>
<name>
<surname>Gomes</surname>
<given-names>J. A. C. P.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>The Electrolyte Renewal Effect on the Corrosion Mechanisms of API X65 Carbon Steel under Sweet and Sour Environments</article-title>. <source>J. Pet. Sci. Eng.</source> <volume>199</volume>, <fpage>108347</fpage>. <pub-id pub-id-type="doi">10.1016/j.petrol.2021.108347</pub-id> </citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Soares</surname>
<given-names>C. G.</given-names>
</name>
<name>
<surname>Garbatov</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zayed</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Influence of Environmental Factors on Corrosion of Ship Structures in marine Atmosphere</article-title>. <source>Corrosion Sci.</source> <volume>51</volume> (<issue>9</issue>), <fpage>2014</fpage>&#x2013;<lpage>2026</lpage>. <pub-id pub-id-type="doi">10.1016/j.corsci.2009.05.028</pub-id> </citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vajo</surname>
<given-names>J. J.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Phelps</surname>
<given-names>A. C.</given-names>
</name>
<name>
<surname>Reiner</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Herrera</surname>
<given-names>G. A.</given-names>
</name>
<name>
<surname>Cervantes</surname>
<given-names>O.</given-names>
</name>
<etal/>
</person-group> (<year>2003</year>). <article-title>Application of Extreme Value Analysis to Crevice Corrosion</article-title>. <source>Corrosion Sci.</source> <volume>45</volume> (<issue>3</issue>), <fpage>497</fpage>&#x2013;<lpage>509</lpage>. <pub-id pub-id-type="doi">10.1016/S0010-938X(02)00129-4</pub-id> </citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Valor</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Caleyo</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Rivas</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Hallen</surname>
<given-names>J. M.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Stochastic Approach to Pitting-Corrosion-Extreme Modelling in Low-Carbon Steel</article-title>. <source>Corrosion Sci.</source> <volume>52</volume> (<issue>3</issue>), <fpage>910</fpage>&#x2013;<lpage>915</lpage>. <pub-id pub-id-type="doi">10.1016/j.corsci.2009.11.011</pub-id> </citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Waldmann</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>On the Use of the pearson Correlation Coefficient for Model Evaluation in Genome-wide Prediction</article-title>. <source>Front. Genet.</source> <volume>10</volume>. <pub-id pub-id-type="doi">10.3389/fgene.2019.00899</pub-id> </citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Tian</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Kirk</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Laris</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Ross</surname>
<given-names>J. H.</given-names>
</name>
<name>
<surname>Noebe</surname>
<given-names>R. D.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Accelerated Design of Fe-Based Soft Magnetic Materials Using Machine Learning and Stochastic Optimization</article-title>. <source>Acta Materialia.</source> <volume>194</volume>, <fpage>144</fpage>&#x2013;<lpage>155</lpage>. <pub-id pub-id-type="doi">10.1016/j.actamat.2020.05.006</pub-id> </citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yin</surname>
<given-names>Z. F.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>W. Z.</given-names>
</name>
<name>
<surname>Bai</surname>
<given-names>Z. Q.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>Y. R.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>W. J.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Corrosion Behavior of SM 80SS Tube Steel in Stimulant Solution Containing H<sub>2</sub>S and CO<sub>2</sub>
</article-title>. <source>Electrochimica Acta.</source> <volume>12</volume>, <fpage>039</fpage>. <pub-id pub-id-type="doi">10.1016/j.electacta.2007.12.039</pub-id> </citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Tao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ouyang</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Machine Learning Reveals the Importance of the Formation Enthalpy and Atom-Size Difference in Forming Phases of High Entropy Alloys</article-title>. <source>Mater. Des.</source> <volume>193</volume>, <fpage>108835</fpage>. <pub-id pub-id-type="doi">10.1016/j.matdes.2020.108835</pub-id> </citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Effect of CO2/H2S and Applied Stress on Corrosion Behavior of 15Cr Tubing in Oil Field Environment</article-title>. <source>Metals</source> <volume>10</volume>, <fpage>409</fpage>. <pub-id pub-id-type="doi">10.3390/met10030409</pub-id> </citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Prediction and Knowledge Mining of Outdoor Atmospheric Corrosion Rates of Low alloy Steels Based on the Random Forests Approach</article-title>. <source>Metals</source> <volume>9</volume>, <fpage>383</fpage>. <pub-id pub-id-type="doi">10.3390/met9030383</pub-id> </citation>
</ref>
</ref-list>
</back>
</article>