<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="review-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Phys.</journal-id>
<journal-title>Frontiers in Physics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Phys.</abbrev-journal-title>
<issn pub-type="epub">2296-424X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">873848</article-id>
<article-id pub-id-type="doi">10.3389/fphy.2022.873848</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Physics</subject>
<subj-group>
<subject>Review</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Anomaly Detection Based on Convex Analysis: A Survey</article-title>
<alt-title alt-title-type="left-running-head">Wang et al.</alt-title>
<alt-title alt-title-type="right-running-head">Convex Analysis-Based Anomaly Detection</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Tong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Cai</surname>
<given-names>Mengsi</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/935472/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ouyang</surname>
<given-names>Xiao</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Cao</surname>
<given-names>Ziqiang</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Cai</surname>
<given-names>Tie</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Tan</surname>
<given-names>Xu</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Lu</surname>
<given-names>Xin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1014726/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>College of Systems Engineering</institution>, <institution>National University of Defense Technology</institution>, <addr-line>Changsha</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>College of Economy and Management</institution>, <institution>Changsha University</institution>, <addr-line>Changsha</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>College of Liberal Arts and Sciences</institution>, <institution>National University of Defense Technology</institution>, <addr-line>Changsha</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Power China Zhongnan Engineering Corporation Limited</institution>, <addr-line>Changsha</addr-line>, <country>China</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>School of Software Engineering</institution>, <institution>Shenzhen Institute of Information Technology</institution>, <addr-line>Shenzhen</addr-line>, <country>China</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>Department of Global Public Health</institution>, <institution>Karolinska Institutet</institution>, <addr-line>Stockholm</addr-line>, <country>Sweden</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/73426/overview">Jos&#xe9; Tadeu Lunardi</ext-link>, Universidade Estadual de Ponta Grossa, Brazil</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1695755/overview">Peng Li</ext-link>, Institute industrial IT, Germany</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/654727/overview">Zhiwei Ji</ext-link>, Nanjing Agricultural University, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Xu Tan, <email>tanxu_nudt@yahoo.com</email>; Xin Lu, <email>xin_lyu@sina.com</email>
</corresp>
<fn fn-type="equal" id="fn1">
<label>
<sup>&#x2020;</sup>
</label>
<p>These authors have contributed equally to this work</p>
</fn>
<fn fn-type="other">
<p>This article was submitted to Statistical and Computational Physics, a section of the journal Frontiers in Physics</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>27</day>
<month>04</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>10</volume>
<elocation-id>873848</elocation-id>
<history>
<date date-type="received">
<day>11</day>
<month>02</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>25</day>
<month>03</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Wang, Cai, Ouyang, Cao, Cai, Tan and Lu.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Wang, Cai, Ouyang, Cao, Cai, Tan and Lu</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>As a crucial technique for identifying irregular samples or outlier patterns, anomaly detection has broad applications in many fields. Convex analysis (CA) is one of the fundamental methods used in anomaly detection, which contributes to the robust approximation of algebra and geometry, efficient computation to a unique global solution, and mathematical optimization for modeling. Despite the essential role and evergrowing research in CA-based anomaly detection algorithms, little work has realized a comprehensive survey of it. To fill this gap, we summarize the CA techniques used in anomaly detection and classify them into four categories of density estimation methods, matrix factorization methods, machine learning methods, and the others. The theoretical background, sub-categories of methods, typical applications as well as strengths and limitations for each category are introduced. This paper sheds light on a succinct and structured framework and provides researchers with new insights into both anomaly detection and CA. With the remarkable progress made in the techniques of big data and machine learning, CA-based anomaly detection holds great promise for more expeditious, accurate and intelligent detection capacities.</p>
</abstract>
<kwd-group>
<kwd>anomaly detection</kwd>
<kwd>convex analysis</kwd>
<kwd>density estimation</kwd>
<kwd>matrix factorization</kwd>
<kwd>machine learning</kwd>
</kwd-group>
<contract-num rid="cn001">72025405 72088101 91846301 71790615 71774168</contract-num>
<contract-num rid="cn002">2020TP1013 2020JJ4673</contract-num>
<contract-sponsor id="cn001">National Natural Science Foundation of China<named-content content-type="fundref-id">10.13039/501100001809</named-content>
</contract-sponsor>
<contract-sponsor id="cn002">Science and Technology Program of Hunan Province<named-content content-type="fundref-id">10.13039/501100019081</named-content>
</contract-sponsor>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Anomalies are irregular items, events, or observations that differ significantly from the majority of the data and can translate into critical actionable information in various application domains [<xref ref-type="bibr" rid="B1">1</xref>&#x2013;<xref ref-type="bibr" rid="B3">3</xref>]. For example, anomalous readings from the sensor of a large mechanical system could signify a fault in some components of the system.</p>
<p>The problem of anomaly detection was raised as early as the 19th century [<xref ref-type="bibr" rid="B4">4</xref>], and has been extensively studied in various fields, such as network intrusion detection [<xref ref-type="bibr" rid="B5">5</xref>,<xref ref-type="bibr" rid="B6">6</xref>], process fault monitoring [<xref ref-type="bibr" rid="B7">7</xref>,<xref ref-type="bibr" rid="B8">8</xref>], image outlier detection [<xref ref-type="bibr" rid="B9">9</xref>,<xref ref-type="bibr" rid="B10">10</xref>], and other significant fields. Existing basic methods for anomaly detection can be generally classified into two categories [<xref ref-type="bibr" rid="B11">11</xref>], i.e., distance-based anomaly detection, such as K-nearest neighbor (KNN) [<xref ref-type="bibr" rid="B12">12</xref>], K-means [<xref ref-type="bibr" rid="B13">13</xref>] and DBSCAN [<xref ref-type="bibr" rid="B14">14</xref>], and model-based anomaly detection, such as rough set theory [<xref ref-type="bibr" rid="B15">15</xref>], Bayesian networks [<xref ref-type="bibr" rid="B16">16</xref>], Markov models [<xref ref-type="bibr" rid="B17">17</xref>], neural networks [<xref ref-type="bibr" rid="B18">18</xref>] and generative adversarial network [<xref ref-type="bibr" rid="B19">19</xref>]. To facilitate the settlement of the challenging problem that anomalies are low frequency, convex analysis (abbr.: CA in this paper), a branch of mathematics that studies convex sets and convex functions [<xref ref-type="bibr" rid="B20">20</xref>], has been widely applied to anomaly detection approaches, including linear-based, probabilistic-based, proximity-based, ensemble-based, and learning-based models [<xref ref-type="bibr" rid="B21">21</xref>,<xref ref-type="bibr" rid="B22">22</xref>].</p>
<p>With a wealth of practical techniques, CA is known as one of the fundamental techniques used to support solution and optimization in anomaly detection models. The superiority of the CA-based strategy can be summarized from the theoretical and practical perspectives. On the theoretical side, CA blends the advantages of providing efficient solutions with less complicated models. As to the applications, CA-based strategy has produced proverbially extensive applications in aviation, advertisement, finance and other fields. Specifically, compared with other kinds of strategies, CA plays a crucial role in anomaly detection for its robust approximation in algebra and geometry, efficient computation to a unique global solution, as well as mathematical optimization for modeling [<xref ref-type="bibr" rid="B23">23</xref>,<xref ref-type="bibr" rid="B24">24</xref>]. In addition, regarding the complex non-convex shape of the collected data in the real world, local convexity (a branch of CA) also shows outstanding performance in anomaly detection [<xref ref-type="bibr" rid="B25">25</xref>], and this paper can be equally practical as guidance for local convexity.</p>
<p>CA-based anomaly detection has been first proposed for studying the convex geometric approximation of subsurfaces and anomalies (i.e., seismic records) in 1966 [<xref ref-type="bibr" rid="B26">26</xref>], after which great efforts have been made to improve its accuracy and effectiveness. To date, convex analysis plays an essential role in anomaly detection, based on which a large number of anomaly detection algorithms have been developed. For example, density estimation is an indispensable method used for outlier detection, and matrix factorization is used to detect anomaly for the matrix data. Although CA plays an essential role in anomaly detection and evergrowing research has been conducted on CA-based anomaly detection algorithms (as described in <xref ref-type="sec" rid="s2-2">Section 2.2</xref>), to the best of our knowledge, there is no survey paper which has addressed the anomaly detection methods based on CA, and little work has realized a comprehensive classification of it. In addition, the essential relationship between anomaly detection and CA has been rarely investigated [<xref ref-type="bibr" rid="B27">27</xref>,<xref ref-type="bibr" rid="B28">28</xref>].</p>
<p>Therefore, in this paper, we aim to conduct an in-depth survey on the framework, principle, characteristics and applications of the CA-based anomaly detection methods, and to point out possible future research directions. Based on the function of CA in anomaly detection, we classify the CA-based anomaly detection methods into four categories: 1) <italic>Density estimation</italic>, a classic anomaly detection technique including direct density estimation and indirect density estimation, with CA optimizing or substituting the density estimation of samples; 2) <italic>Matrix factorization</italic>, a crucial branch of anomaly detection method by using CA to factorize the matrix data, which has received frequent usage in machine fault diagnosis and image outlier detection [<xref ref-type="bibr" rid="B10">10</xref>]; 3) <italic>Machine learning</italic>, a widely used technique for anomaly detection based on the functions of CA, including support vector domain method utilizing the solution and geometric approximation of CA, convex hull method utilizing the geometric approximation of CA, online convex programming method utilizing the quick optimization of CA, and neural network method utilizing the steepest descent of CA; and 4) <italic>Other CA-based anomaly detection methods</italic>. For each of the first three categories, the core CA-based anomaly detection techniques and their variants are both introduced. It should be emphasized that the function and contribution of CA in each algorithm are described, which demonstrates the multidisciplinary property of CA-based anomaly detection and provides new insights for understanding the association between anomaly detection and CA.</p>
<p>The rest of this paper is organized as follows: <xref ref-type="sec" rid="s2">Section 2</xref> introduces the fundamentals of CA-based anomaly detection; <xref ref-type="sec" rid="s3">Section 3</xref> reports the direct and indirect density estimation methods and presents the latest development trends; <xref ref-type="sec" rid="s4">Section 4</xref> reviews the techniques of matrix factorization used in anomaly detection and their applications; The machine learning-based anomaly detection algorithm in CA can be found in <xref ref-type="sec" rid="s5">Section 5</xref>, composed of four sub-categories; <xref ref-type="sec" rid="s6">Section 6</xref> presents other CA-based anomaly detection methods not involved in the three mainstream categories; <xref ref-type="sec" rid="s7">Section 7</xref> summarizes this work and discusses the open challenges and future technological trends of anomaly detection based on CA.</p>
</sec>
<sec id="s2">
<title>2 Research Methodology and Statistics</title>
<sec id="s2-1">
<title>2.1 Research Methodology</title>
<p>To collect theory and applications of anomaly detection algorithms based on CA, existing literatures are collected from eight authoritative library databases including Google Scholar, Web of Science, Elsevier, Springer, IEEE Xplore, Wiley, Annual Reviews and ProQuest Dissertations &#x26; Theses (PQDT). In order to guarantee the accuracy of the retrieval, search terms are divided into two parts: technique terms and application terms. Technique terms concern CA-based anomaly detection methods, in which &#x201c;convex analysis&#x201d; AND &#x201c;anomaly detection&#x201d; OR &#x201c;outlier detection&#x201d; is our primary candidate. Then the application terms are joint, e.g., &#x201c;convex hull&#x201d; AND &#x201c;visual surveillance,&#x201d; to construct a more comprehensive search string for their specific applications. Full text search is adopted and no restriction on publication type is set. Besides, considering some cornerstone and classic methods delivered earlier, there is also no limitation in publication time. However, we spotlight the latest research progress of CA-based anomaly detection methods since 2000 [<xref ref-type="bibr" rid="B29">29</xref>].</p>
<p>After executing each search operation, the filtering process of papers is implemented by reviewing each paper manually in our group. During the review, relevant cross-references are also searched by Google Scholar. If one paper satisfies CA-based anomaly detection algorithms, it is selected in this review for further introduction. Based on this kind of search strategy and criteria, appropriate publications are recorded and reviewed.</p>
</sec>
<sec id="s2-2">
<title>2.2 Statistical Analysis</title>
<p>According to our searching results, the number of published papers and applications of CA-based anomaly detection algorithms are statistically analyzed. As shown in <xref ref-type="fig" rid="F1">Figure 1</xref>, the development of the four CA-based anomaly detection categories presents a rapid growth trend in the past 2&#xa0;decades. As a general and classic technique, density estimation has been employed with a steady upward trend, except a sharp rise in 2007 when the indirect density estimation method was produced. The curve of matrix factorization methods is flat until the emergence of its first model&#x2014;robust principal component analysis (RPCA)&#x2014;in 2011. After that, the growth of matrix factorization is steep initially and then slowed down, since this method is only appropriate for matrix data. In addition to the emergence of new sub-categories in 2003 and 2004, there was another rapid increase in the publication number of machine learning methods in 2012, probably because that ImageNet&#x2019;s victory [<xref ref-type="bibr" rid="B30">30</xref>] has triggered the excitement of experts and scholars in deep learning and machine learning in this year. In recent decades, machine learning methods have been of essential importance in anomaly detection as a modern and advanced technique for managing big data generated from sophisticated realities. Besides the above three types of methods, there are many other CA-based anomaly detection methods, such as the convex combination of anomaly detectors [<xref ref-type="bibr" rid="B31">31</xref>], CM<sub>
<italic>T</italic>
</sub>MSOM algorithm [<xref ref-type="bibr" rid="B32">32</xref>], and archetypal analysis [<xref ref-type="bibr" rid="B33">33</xref>], and the number of corresponding researches is increasing every year.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Number of published papers about CA-based anomaly detection methods from 2000 to 2020.</p>
</caption>
<graphic xlink:href="fphy-10-873848-g001.tif"/>
</fig>
<p>Refer to [<xref ref-type="bibr" rid="B34">34</xref>], we report the various real-world applications for CA-based anomaly detection methods in different fields, as shown in <xref ref-type="fig" rid="F2">Figure 2</xref>. Among them, the arrow illustrates that the type of CA-based anomaly detection methods can resolve the corresponding problem of that application, and the line thickness is derived from the number of studies found in the literature search. We can see that the most proverbially extensive applications of CA-based anomaly detection methods are network intrusion detection, extreme events detection, and process fault mornitoring. The goal of the network intrusion detection is to identify unauthorized use, misuse, and abuse of computer systems by both system insiders and external penetrators [<xref ref-type="bibr" rid="B35">35</xref>,<xref ref-type="bibr" rid="B36">36</xref>]. The objectives of extreme events detection include neuclear explosion, extreme climate and epidemic [<xref ref-type="bibr" rid="B37">37</xref>]. And by early warning, manufacturing process-oriented process fault mornitoring is conductive to the prevention and control of dangerous malfunction and to reduce productivity loss [<xref ref-type="bibr" rid="B38">38</xref>].</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Various applications of CA-based anomaly detection methods in different fields.</p>
</caption>
<graphic xlink:href="fphy-10-873848-g002.tif"/>
</fig>
</sec>
</sec>
<sec id="s3">
<title>3 Fundamentals of Convex Analysis</title>
<sec id="s3-1">
<title>3.1 Theoretical Framework of Convex Analysis</title>
<p>Convex analysis (CA) is a branch of mathematics that studies the properties of convex sets and convex functions, often with applications in convex minimization, a subdomain of optimization theory [<xref ref-type="bibr" rid="B39">39</xref>]. We proceed to give a few vital and succinct foundations of CA that we used extensively in this review. In addition, we discuss the advantages of CA compared with other mathematical methods, which is the key to the algorithmic success.</p>
<sec id="s3-1-1">
<title>3.1.1 Convex Sets</title>
<p>A set <italic>C</italic> is convex if the line segment between any two points in <italic>C</italic> lies in <italic>C</italic>, i.e., if <italic>&#x2200;x</italic>
<sub>1</sub>, <italic>x</italic>
<sub>2</sub> &#x2208; <italic>C</italic> and <italic>&#x2200;&#x3b8;</italic> &#x2208; [0, 1], we have<disp-formula id="e1">
<mml:math id="m1">
<mml:mi>&#x3b8;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>C</mml:mi>
<mml:mo>.</mml:mo>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
</sec>
<sec id="s3-1-2">
<title>3.1.2 Convex Functions</title>
<p>A function <inline-formula id="inf1">
<mml:math id="m2">
<mml:mi>f</mml:mi>
<mml:mo>:</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2192;</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:math>
</inline-formula> is convex if the domain of function <italic>f</italic> (<bold>dom</bold> <italic>f</italic>) is a convex set, and if for all <italic>x</italic>, <italic>y</italic> &#x2208; <bold>dom</bold> <italic>f</italic>, and <italic>&#x2200;&#x3b8;</italic> &#x2208; [0, 1], we have<disp-formula id="e2">
<mml:math id="m3">
<mml:mi>f</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>x</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>f</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mi>f</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<p>As a valuable property of convex functions, strong convexity can significantly speed-up the convergence of first order methods. We say that <inline-formula id="inf2">
<mml:math id="m4">
<mml:mi>f</mml:mi>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="script">X</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:math>
</inline-formula> is <italic>&#x3b1;</italic>-strongly convex if it satisfies the improved subgradient inequality <xref ref-type="disp-formula" rid="e3">Eq. 3</xref>:<disp-formula id="e3">
<mml:math id="m5">
<mml:mi>f</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>f</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2264;</mml:mo>
<mml:mo>&#x25bd;</mml:mo>
<mml:mi>f</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2225;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>y</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>.</mml:mo>
</mml:math>
<label>(3)</label>
</disp-formula>
</p>
<p>A large value of <italic>&#x3b1;</italic> would lead to a faster convergence rate, since a point far from the optimum will have a large gradient, and thus gradient descent will produce large steps in this case.</p>
</sec>
<sec id="s3-1-3">
<title>3.1.3 Convex Optimization</title>
<p>As a significant subfield of CA, convex optimization studies the problem of minimizing convex functions over convex sets for mathematical optimization. A convex optimization problem in standard form is written as [<xref ref-type="bibr" rid="B40">40</xref>]:<disp-formula id="e4">
<mml:math id="m6">
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mi mathvariant="normal">n</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mi mathvariant="normal">z</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mo>.</mml:mo>
<mml:mfenced open="{" close="">
<mml:mrow>
<mml:mtable class="aligned">
<mml:mtr>
<mml:mtd columnalign="right"/>
<mml:mtd columnalign="left">
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2264;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right"/>
<mml:mtd columnalign="left">
<mml:msub>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>p</mml:mi>
</mml:mtd>
</mml:mtr>
</mml:mtable>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(4)</label>
</disp-formula>where the optimization variable is <inline-formula id="inf3">
<mml:math id="m7">
<mml:mi>x</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula>, the objective function <inline-formula id="inf4">
<mml:math id="m8">
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2192;</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:math>
</inline-formula> is convex, inequality constraint functions <inline-formula id="inf5">
<mml:math id="m9">
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2192;</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> are convex, and equality constraint functions <inline-formula id="inf6">
<mml:math id="m10">
<mml:msub>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2192;</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> are affine [<xref ref-type="bibr" rid="B41">41</xref>].</p>
<p>Convex optimization problem shows many beneficial properties. For example, every local minima is a global minima, and if the objective function is strictly convex, then the problem has at most one optimal point. Therefore, if a task can be formulated as a convex optimization problem, then it can be solved efficiently and reliably with effective and rapid optimization and solution, using interior-point methods or other special methods for convex optimization. General convex optimization focuses on problem formulation and modeling, more specifically, it is applied to find bounds on the optimal value, as well as approximate solutions. These solution methods are dependable enough to be embedded in computer-aided design or analysis tools, or even real-time automatic or reactive control systems.</p>
</sec>
<sec id="s3-1-4">
<title>3.1.4 Duality</title>
<p>The core design of the Lagrangian duality (or just duality) is to consider the constraints in the convex optimization problem <xref ref-type="disp-formula" rid="e4">Eq. 4</xref> by constructing an objective function with a weighted sum of the constraint functions [<xref ref-type="bibr" rid="B42">42</xref>]. Then the Lagrangian <inline-formula id="inf7">
<mml:math id="m11">
<mml:mi>L</mml:mi>
<mml:mo>:</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#xd7;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#xd7;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2192;</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:math>
</inline-formula> for the problem <xref ref-type="disp-formula" rid="e4">Eq. 4</xref> is<disp-formula id="e5">
<mml:math id="m12">
<mml:mi>L</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:math>
<label>(5)</label>
</disp-formula>with <bold>dom</bold> <inline-formula id="inf8">
<mml:math id="m13">
<mml:mi>L</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="script">D</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#xd7;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula>, where <italic>&#x3bb;</italic>
<sub>
<italic>i</italic>
</sub> is the Lagrange multiplier associated with the <italic>i</italic>th inequality constraint <italic>f</italic>
<sub>
<italic>i</italic>
</sub>(<italic>x</italic>) &#x2264; 0, and <italic>v</italic>
<sub>
<italic>i</italic>
</sub> is the Lagrange multiplier associated with the <italic>i</italic>th equality constraint <italic>h</italic>
<sub>
<italic>i</italic>
</sub>(<italic>x</italic>) &#x3d; 0. In addition, the vectors <italic>&#x3bb;</italic> and <italic>v</italic> are referred to the dual variables or Lagrange multiplier vectors of the problem <xref ref-type="disp-formula" rid="e4">Eq. 4</xref> [<xref ref-type="bibr" rid="B39">39</xref>]. Therefore, the Lagrange dual function (or just dual function) <inline-formula id="inf9">
<mml:math id="m14">
<mml:mi>g</mml:mi>
<mml:mo>:</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#xd7;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2192;</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:math>
</inline-formula> is defined as the minimum value of the Lagrangian over <italic>x</italic>: for <inline-formula id="inf10">
<mml:math id="m15">
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula>, <inline-formula id="inf11">
<mml:math id="m16">
<mml:mi>v</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula>,<disp-formula id="e6">
<mml:math id="m17">
<mml:mi>g</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mi>inf</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mi>L</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mi>inf</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(6)</label>
</disp-formula>
</p>
<p>The associated dual problem of convex optimization problems could often produce an interesting interpretation regarding the original problem and lead to an efficient or distributed method for solving it. Therefore, it also reflects theoretical or conceptual advantages of convex optimization and CA [<xref ref-type="bibr" rid="B43">43</xref>].</p>
</sec>
</sec>
<sec id="s3-2">
<title>3.2 Association Between Anomaly Detection and Convex Analysis</title>
<p>CA has substantially geometrical and computational advantages. Common techniques, such as the Karush-Kuhn-Tucker (KKT) optimality conditions [<xref ref-type="bibr" rid="B44">44</xref>], gradient descent method [<xref ref-type="bibr" rid="B45">45</xref>] and Jensen&#x2019;s inequality [<xref ref-type="bibr" rid="B46">46</xref>], and common applications, such as norm approximation [<xref ref-type="bibr" rid="B40">40</xref>], geometric projection and maximum likelihood estimation in CA, are all devoted to typical anomaly detection algorithms. Such anomaly detection algorithms could benefit from CA in robust approximation in algebra and geometry, efficient computation to global unique solutions, and mathematical optimization. Therefore, CA is a valuable and intrinsic part and motivation for anomaly detection.</p>
<p>We review several architectures and methods of existing anomaly detection techniques based on CA and group them into four categories according to the underlying approach adopted by each technique. These include 1) <italic>density estimation methods</italic> based on the way how density directly or indirectly estimated, 2) <italic>matrix factorization methods</italic>, 3) <italic>machine learning methods</italic> based on the support vector domain algorithm, convex hull algorithm, online convex programming algorithm and neural network algorithm, and 4) <italic>other methods</italic>. The anatomy of CA-based anomaly detection methods and their cornerstone algorithms are illustrated in <xref ref-type="fig" rid="F3">Figure 3</xref>.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Anatomy of CA-based anomaly detection methods and their cornerstone algorithms.</p>
</caption>
<graphic xlink:href="fphy-10-873848-g003.tif"/>
</fig>
</sec>
</sec>
<sec id="s4">
<title>4 Density Estimation</title>
<p>Density estimation is an indispensable method used for outlier detection, one of the most elementary issues of anomaly detection. There are two typical algorithms based on CA for which the density estimation is directly or indirectly used. In these methods, &#x201c;density&#x201d; describes the probability that the value of a random variable is generated by a certain distribution. Thresholds are set up for density estimation methods, and samples with a density below the threshold are outliers.</p>
<sec id="s4-1">
<title>4.1 Direct Density Estimation</title>
<sec id="s4-1-1">
<title>4.1.1 Model Description</title>
<p>In direct density estimation, abnormal data are defined as samples with a density less than the preset threshold. A probability density function for a continuous random variable is a non-negative Lebesgue-integrable function [<xref ref-type="bibr" rid="B47">47</xref>], and satisfies<disp-formula id="e7">
<mml:math id="m18">
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mo>&#x222b;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x221e;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mi>d</mml:mi>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
</mml:math>
<label>(7)</label>
</disp-formula>where <italic>F</italic>
<sub>
<italic>X</italic>
</sub>(<italic>x</italic>) is the cumulative distribution function of <italic>X</italic>.</p>
<p>Since the multivariate Gaussian distribution model (see <xref ref-type="fig" rid="F4">Figure 4A</xref>) [<xref ref-type="bibr" rid="B48">48</xref>] is not capable of describing the situation where the data in the same set conform to multiple different distributions, the mixture of Gaussian (MoG) (see <xref ref-type="fig" rid="F4">Figure 4B</xref> for instance, which is a linear combination of Gaussian distributions) was used to model the general data distribution [<xref ref-type="bibr" rid="B49">49</xref>]. Each Gaussian distribution in the MoG is defined as a component, and then the probability density of the target variable <bold>x</bold>, <italic>p</italic>(<bold>x</bold>), is defined in the MoG as:<disp-formula id="e8">
<mml:math id="m19">
<mml:mi>p</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:mi>det</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
<mml:mi>exp</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3bc;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msubsup>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3bc;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:math>
<label>(8)</label>
</disp-formula>where <italic>d</italic> represents the sample dimension, <italic>&#x3b1;</italic>
<sub>
<italic>k</italic>
</sub> is a mixed coefficient, <bold>
<italic>&#x3bc;</italic>
</bold>
<sub>
<italic>k</italic>
</sub> and <italic>&#x2211;</italic>
<sub>
<italic>k</italic>
</sub> are the mean and covariance matrix of the <italic>k</italic>th component, and <italic>&#x3b3;</italic> is the number of mixed components. The Expectation-Maximization (EM) algorithm [<xref ref-type="bibr" rid="B50">50</xref>], a typical algorithm using CA, is adopted to optimize the parameter <italic>&#x3b1;</italic>
<sub>
<italic>k</italic>
</sub>, <bold>
<italic>&#x3bc;</italic>
</bold>
<sub>
<italic>k</italic>
</sub> and <italic>&#x2211;</italic>
<sub>
<italic>k</italic>
</sub>. The EM algorithm searches for the maximum likelihood estimation of parameters in a probability model that depends on unobservable hidden variables. For samples <italic>x</italic>
<sub>1</sub>, <italic>x</italic>
<sub>2</sub>, ..., <italic>x</italic>
<sub>
<italic>n</italic>
</sub>, the hidden variables of each sample are assumed to be <italic>z</italic>
<sup>(<italic>j</italic>)</sup>, <italic>j</italic> &#x2208; [1, <italic>m</italic>]. Then, the algorithm finds the lower bound of the likelihood function through Jensen&#x2019;s inequality [<xref ref-type="bibr" rid="B46">46</xref>].<disp-formula id="e9">
<mml:math id="m20">
<mml:mi>ln</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>L</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:mi>ln</mml:mi>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>Q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
<mml:mfrac>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:mo>;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>Q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2265;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>Q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
<mml:mi>ln</mml:mi>
<mml:mfrac>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:mo>;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>Q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(9)</label>
</disp-formula>
</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Gaussian model and the mixture of Gaussian model: <bold>(A)</bold> The Gaussian model imposes the ellipsoidal density model on the two-dimensional data; <bold>(B)</bold> the MoG model is a linear combination of Gaussian distributions. Apertures of diverse sizes are their transformations with different outlier detection errors.</p>
</caption>
<graphic xlink:href="fphy-10-873848-g004.tif"/>
</fig>
<p>In <xref ref-type="disp-formula" rid="e9">Eq. 9</xref>, <italic>L</italic>(<italic>&#x3b8;</italic>) is the likelihood function, and <italic>Q</italic>
<sub>
<italic>i</italic>
</sub>(<italic>z</italic>
<sup>(<italic>j</italic>)</sup>) denotes the probability of <italic>x</italic>
<sub>
<italic>i</italic>
</sub> belonging to class <italic>z</italic>
<sup>(<italic>j</italic>)</sup>. After parameter optimization through CA, samples with a density from the MoG model less than the preset threshold are outliers.</p>
</sec>
<sec id="s4-1-2">
<title>4.1.2 Systems and Applications</title>
<p>Inspired by the MoG model with the EM algorithm, Woodward and Sain [<xref ref-type="bibr" rid="B51">51</xref>] used the EM algorithm to identify outliers from a mixture of normal distributions when there is missing data. They confirmed through simulations and examples that using the EM algorithm on the entire dataset resulted in higher detection probabilities than using only the complete data vectors, which is the subset of the entire dataset that includes only data vectors for which all of the variables were observed. The MoG model with the EM algorithm, can detect nuclear explosions from a large number of background signals (such as earthquakes and mining explosions) using seismic signals (or any other discriminant) [<xref ref-type="bibr" rid="B52">52</xref>]. Carrying out outlier detection to recognize heart disease, biological virus invasion, and electrical power grid faults has also been explored [<xref ref-type="bibr" rid="B53">53</xref>&#x2013;<xref ref-type="bibr" rid="B55">55</xref>].</p>
</sec>
<sec id="s4-1-3">
<title>4.1.3 Strengths and Limitations</title>
<p>By the Jensen&#x2019;s inequality of the <italic>logarithmic function</italic> in the <italic>expectation</italic> format, the lower bound of the likelihood function <italic>L</italic>(<italic>&#x3b8;</italic>) of parameters in direct density estimation was discovered rapidly, precisely, and effectively. However, the principal drawback of this method is that the number of mixed components is data-dependent due to the requirement such as weights <italic>&#x2211;</italic>
<sup>
<italic>&#x3b3;</italic>
</sup>
<italic>&#x3b1;</italic>
<sub>
<italic>k</italic>
</sub> &#x3d; 1, so it is a tough choice, and the mixture of multiple Gaussian models requires more samples to overcome the curse of dimensionality [<xref ref-type="bibr" rid="B56">56</xref>].</p>
</sec>
</sec>
<sec id="s4-2">
<title>4.2 Indirect Density Estimation</title>
<sec id="s4-2-1">
<title>4.2.1 Model Description</title>
<p>Although direct density estimation method is adaptable to multiple different distributions estimation and efficient parametric optimization, it can not correctly reflect the pattern&#x2019;s characteristics for most high-dimensional conditions, as multivariate functions are intrinsically difficult to estimate [<xref ref-type="bibr" rid="B57">57</xref>]. To solve this problem, indirect density estimation methods have been developed. The main reason for the name &#x201c;indirect density estimation&#x201d; is that it does not require density estimation. The goal of this method is to estimate the density ratio <italic>w</italic>(<italic>x</italic>), called <italic>importance</italic>, of the independent and identically distributed (i.i.d.) training samples <inline-formula id="inf12">
<mml:math id="m21">
<mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> and i.i.d. test samples <inline-formula id="inf13">
<mml:math id="m22">
<mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>:<disp-formula id="e10">
<mml:math id="m23">
<mml:mi>w</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>/</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:math>
<label>(10)</label>
</disp-formula>where <italic>p</italic>
<sub>
<italic>te</italic>
</sub>(<italic>x</italic>) and <italic>p</italic>
<sub>
<italic>tr</italic>
</sub>(<italic>x</italic>) are the probability density function [<xref ref-type="bibr" rid="B58">58</xref>] for the training data and test data, respectively. <italic>w</italic>(<italic>x</italic>) is non-negative because <italic>p</italic>
<sub>
<italic>te</italic>
</sub>(<italic>x</italic>) &#x2265; 0 and <italic>p</italic>
<sub>
<italic>tr</italic>
</sub>(<italic>x</italic>) &#x3e; 0 for all <italic>x</italic> belonging to the data domain <inline-formula id="inf14">
<mml:math id="m24">
<mml:mi mathvariant="script">D</mml:mi>
<mml:mo>&#x2282;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. <italic>w</italic>(<italic>x</italic>) for regular samples is close to one, while those for outliers tend to deviate substantially from one (i.e., close to 0) because the training data only contains regular samples, and <italic>p</italic>
<sub>
<italic>te</italic>
</sub>(<italic>x</italic>) would be close to 0 where outliers exist.</p>
<p>With the key constraint of avoiding estimating densities <italic>p</italic>
<sub>
<italic>te</italic>
</sub>(<italic>x</italic>) and <italic>p</italic>
<sub>
<italic>tr</italic>
</sub>(<italic>x</italic>), adhoc studies have estimated the <italic>w</italic>(<italic>x</italic>) to detect the outlier by convex techniques, in which kernel mean matching (KMM) [<xref ref-type="bibr" rid="B59">59</xref>], logistic regression (LogReg) [<xref ref-type="bibr" rid="B60">60</xref>], the Kullback-Leibler importance estimation procedure (KLIEP) [<xref ref-type="bibr" rid="B61">61</xref>,<xref ref-type="bibr" rid="B62">62</xref>], least squares importance fitting (LSIF) [<xref ref-type="bibr" rid="B63">63</xref>], and unconstrained least squares importance fitting (uLSIF) [<xref ref-type="bibr" rid="B64">64</xref>] are popular. For the above-listed methods, the convex expressions for the estimation and their appraisal are summarized in <xref ref-type="table" rid="T1">Table 1</xref>; we describe the latest two methods in detail.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Indirect density estimation methods.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Method</th>
<th align="center">Covex expression</th>
<th align="center">Strengths and limitations</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">KMM</td>
<td align="left">Convex quadratic programming</td>
<td align="left">Dependent and hard parameter tuning, demanding computation</td>
</tr>
<tr>
<td align="left">LogReg</td>
<td align="left">Convex nonlinear</td>
<td align="left">Easy model selection, rather expensive computation</td>
</tr>
<tr>
<td align="left">KLIEP</td>
<td align="left">Convex nonlinear</td>
<td align="left">Easy model selection, rather expensive computation</td>
</tr>
<tr>
<td align="left">LSIF</td>
<td align="left">Convex quadratic programming</td>
<td align="left">More efficient computation, numerically unreliable regularization path tracking</td>
</tr>
<tr>
<td align="left">uLSIF</td>
<td align="left">Unconstrained convex quadratic programming</td>
<td align="left">Efficient and numerically stable computation, easy model selection</td>
</tr>
</tbody>
</table>
</table-wrap>
<sec id="s4-2-1-1">
<title>4.2.1.1 LSIF model</title>
<p>Through convex quadratic programming, Kanamori et al. estimated the <italic>w</italic>(<italic>x</italic>) of the sample, which did not involve density estimation by LSIF, and applied it to outlier detection in a toy dataset by considering <italic>w</italic>(<italic>x</italic>) as the index of abnormal degree [<xref ref-type="bibr" rid="B63">63</xref>]. The LSIF model hypothesizes that <italic>w</italic>(<italic>x</italic>) can be estimated by a linear model <inline-formula id="inf15">
<mml:math id="m25">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mi mathvariant="bold-italic">&#x3c6;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, where <bold>
<italic>&#x3b1;</italic>
</bold> &#x3d; (<italic>&#x3b1;</italic>
<sub>1</sub>, <italic>&#x3b1;</italic>
<sub>2</sub>, ..., <italic>&#x3b1;</italic>
<sub>
<italic>b</italic>
</sub>) is the coefficient vector, <italic>b</italic> is the number of parameters, and <inline-formula id="inf16">
<mml:math id="m26">
<mml:mi mathvariant="bold-italic">&#x3c6;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3c6;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3e;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mn mathvariant="bold">0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2200;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">D</mml:mi>
</mml:math>
</inline-formula> represents the basis functions. The least squares method [<xref ref-type="bibr" rid="B65">65</xref>] was employed to minimize the squared error between the estimation <inline-formula id="inf17">
<mml:math id="m27">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> and the actual value <italic>w</italic>(<italic>x</italic>) on the training dataset. With the help of empirical estimation, the density estimation problem of interest can be transformed into explicit convex quadratic programming in <xref ref-type="disp-formula" rid="e11">Eq. 11</xref> and then the global optimal solution can be obtained:<disp-formula id="e11">
<mml:math id="m28">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mi mathvariant="normal">n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3b1;</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">H</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3b1;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">h</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mi mathvariant="bold-italic">&#x3b1;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:msubsup>
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mi mathvariant="bold-italic">&#x3b1;</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mspace width="0.3333em"/>
<mml:mspace width="0.3333em"/>
<mml:mspace width="0.3333em"/>
<mml:mi>s</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>.</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:mi mathvariant="bold-italic">&#x3b1;</mml:mi>
<mml:mo>&#x2265;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mn mathvariant="bold">0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>.</mml:mo>
</mml:math>
<label>(11)</label>
</disp-formula>
</p>
<p>In <xref ref-type="disp-formula" rid="e11">Eq. 11</xref>, <inline-formula id="inf18">
<mml:math id="m29">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">H</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:munderover>
<mml:mi mathvariant="bold-italic">&#x3c6;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c6;</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula>, <inline-formula id="inf19">
<mml:math id="m30">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">h</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:munderover>
<mml:mi mathvariant="bold-italic">&#x3c6;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf20">
<mml:math id="m31">
<mml:mi>&#x3bb;</mml:mi>
<mml:msubsup>
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mi mathvariant="bold-italic">&#x3b1;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is the regularization term to prevent overfitting.</p>
</sec>
<sec id="s4-2-1-2">
<title>4.2.1.2 uLSIF Model</title>
<p>With convex quadratic programming, a unique global solution can be obtained using the LSIF method. However, it tends to suffer from a numerical problem, since the numerical errors tend to accumulate when tracking the regularization path; consequently, it is not practically reliable. Therefore, uLSIF, a practical alternative to LSIF, was developed to provide an approximate solution to LSIF in a computationally efficient and reliable way [<xref ref-type="bibr" rid="B64">64</xref>]. By ignoring the non-negativity constraint in the optimization problem in <xref ref-type="disp-formula" rid="e11">Eq. 11</xref>, Kanamori et al. derived the following unconstrained optimization problem:<disp-formula id="e12">
<mml:math id="m32">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mi mathvariant="normal">n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3b2;</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">H</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3b2;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">h</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mi mathvariant="bold-italic">&#x3b2;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mi mathvariant="bold-italic">&#x3b2;</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(12)</label>
</disp-formula>
</p>
<p>In <xref ref-type="disp-formula" rid="e12">Eq. 12</xref>, a quadratic regularization term <inline-formula id="inf21">
<mml:math id="m33">
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mi mathvariant="bold-italic">&#x3b2;</mml:mi>
</mml:math>
</inline-formula> is added, instead of the linear one <inline-formula id="inf22">
<mml:math id="m34">
<mml:mi>&#x3bb;</mml:mi>
<mml:msubsup>
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mi mathvariant="bold-italic">&#x3b2;</mml:mi>
</mml:math>
</inline-formula>, since the linear penalty term can not work as a regularizer without the non-negativity constraint. <xref ref-type="disp-formula" rid="e12">Equation 12</xref> is an unconstrained convex quadratic programming, so the solution can be analytically computed as<disp-formula id="e13">
<mml:math id="m35">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3b2;</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">H</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">h</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:math>
<label>(13)</label>
</disp-formula>in which <bold>I</bold>
<sub>
<italic>b</italic>
</sub> is the b-dimensional identity matrix. Due to the discarding of the non-negativity constraint, some of the learned parameters could be negative. To compensate for this approximation error, the solution was modified by <xref ref-type="disp-formula" rid="e14">Eq. 14</xref> in an element-wise manner:<disp-formula id="e14">
<mml:math id="m36">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3b2;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="normal">x</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mn mathvariant="bold">0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3b2;</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(14)</label>
</disp-formula>
</p>
<p>One advantage of the above-unconstrained formulation is that the closed-form solution can be computed simply by solving a system of linear equations. Consequently, its calculation can be stable when <italic>&#x3bb;</italic> is not too small.</p>
<p>Afterward, several variants of the basic technique uLSIF model were developed, such as KuLSIF as a kernelized variant [<xref ref-type="bibr" rid="B66">66</xref>], and RuLSIF as a <italic>&#x3b1;</italic>-relative variant [<xref ref-type="bibr" rid="B67">67</xref>]. In addition, machine learning models like convolutional neural networks (CNN) [<xref ref-type="bibr" rid="B68">68</xref>], gradient boosting over decision trees (GBDT), and a one-layer neural network [<xref ref-type="bibr" rid="B69">69</xref>], can be trained with the uLSIF loss function to detect anomalies.</p>
</sec>
</sec>
<sec id="s4-2-2">
<title>4.2.2 Systems and Applications</title>
<p>Since proposed, the uLSIF method has received widespread usage in outlier detection. For instance, based on the experimental results of 12 datasets available from R&#xe4;tsch&#x2019;s benchmark repository [<xref ref-type="bibr" rid="B70">70</xref>], the SMART disk-failure dataset, and the in-house financial dataset, Hido et al. concluded that the uLSIF-based method is a reliable and computationally efficient alternative to existing outlier detection methods [<xref ref-type="bibr" rid="B71">71</xref>]. Umer et al. also illustrated its superiority in the detection of malicious poisoning attacks in 2019 [<xref ref-type="bibr" rid="B6">6</xref>]. In addition, change-point detection in time series data such as smart home time series data [<xref ref-type="bibr" rid="B72">72</xref>,<xref ref-type="bibr" rid="B73">73</xref>], outlying image detection in hand-written digit image and face image data [<xref ref-type="bibr" rid="B68">68</xref>], outlier detection in both synthetic and benchmark datasets [<xref ref-type="bibr" rid="B74">74</xref>], and computer game cheats detection in game-traffic data [<xref ref-type="bibr" rid="B75">75</xref>], all proved its excellence.</p>
</sec>
<sec id="s4-2-3">
<title>4.2.3 Strengths and Limitations</title>
<p>Since estimating density is complex (especially in high-dimensional space), a convex heuristic enables indirect density estimation methods against the curse of dimensionality without going through density estimation. The outliers tend to have smaller importance values (close to zero) and then they emerge by a suitable threshold. Optimization methods, such as Newton&#x2019;s method, the conjugate gradient method, the Broyden&#x2013;Fletcher&#x2013;Goldfarb&#x2013;Shanno algorithm for a convex nonlinear problem, the gradient descent method, KKT conditions method [<xref ref-type="bibr" rid="B44">44</xref>], and the inexact augmented lagrange multiplier (IALM) method for unconstrained and constrained quadratic programs, are not only efficient, but could find the global minima. The uLSIF-based method is highly scalable to large datasets, which is of critical importance in practical applications.</p>
<p>The indirect density estimation method, however, is of well-documented vulnerability to a poisoning attack, even with a modest number of attack points inserted into the training data [<xref ref-type="bibr" rid="B6">6</xref>]. When an intelligent adversary (the one with full or partial access to the training data) injects well-crafted malicious samples into the training data, an incorrect estimation of the <italic>w</italic>(<italic>x</italic>) can occur.</p>
</sec>
</sec>
</sec>
<sec id="s5">
<title>5 Matrix Factorization</title>
<p>Matrix factorization is a series of methods used for anomaly detection when the data can be represented as a matrix, i.e., a significant representation of data in which columns generally represent linearly independent features and rows represent samples. The dominant mechanism of these methods is that convex programming is employed to factorize the matrix data. Among matrix factorization methods, robust principal component analysis (RPCA) methods consisting of RPCA and its relative extension and improvement, are mainstream and emerging. RPCA has the advantage of tolerance to high-amplitude sharp noise instead of the Gaussian distributed noise of its baseline PCA (or Singular Value Decomposition, SVD) [<xref ref-type="bibr" rid="B76">76</xref>]. In this method, a background dictionary is used to represent each pixel linearly, and the residual is taken as the pixel&#x2019;s abnormal level.</p>
<p>A notable feature of the RPCA series is that there are different definitions and detection methods for anomalies in different applications, but all are based on matrix factorization. Therefore, the matrix factorization models, together with their systems and applications, strengths, and drawbacks, are provided in this section.</p>
<sec id="s5-1">
<title>5.1 Model Description</title>
<p>The RPCA model and its relative extension and improvement have been widely applied in anomaly detection [<xref ref-type="bibr" rid="B77">77</xref>&#x2013;<xref ref-type="bibr" rid="B79">79</xref>] after Cand&#xe9;s et al. recovered a low-rank component and a sparse component from the original data matrix by a convenient convex programming, which achieved RPCA via principal component pursuit [<xref ref-type="bibr" rid="B80">80</xref>].</p>
<sec id="s5-1-1">
<title>5.1.1 RPCA method</title>
<p>A data matrix <inline-formula id="inf23">
<mml:math id="m37">
<mml:mi mathvariant="bold">S</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> with <italic>n</italic> samples and <italic>m</italic> variables can be factorized by RPCA as:<disp-formula id="e15">
<mml:math id="m38">
<mml:mi mathvariant="bold">S</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold">L</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold">E</mml:mi>
<mml:mo>,</mml:mo>
</mml:math>
<label>(15)</label>
</disp-formula>where <inline-formula id="inf24">
<mml:math id="m39">
<mml:mi mathvariant="bold">L</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is a low-rank component, <inline-formula id="inf25">
<mml:math id="m40">
<mml:mi mathvariant="bold">E</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is a sparse matrix containing outliers and process faults, and <inline-formula id="inf26">
<mml:math id="m41">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="double-struck">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:math>
</inline-formula> denotes that the <italic>j</italic>th variable in the <italic>i</italic>th sample is noise-free. The essence of the RPCA algorithm is to address the convex optimization programming demonstrated in <xref ref-type="disp-formula" rid="e16">Eq. 16</xref>:<disp-formula id="e16">
<mml:math id="m42">
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mi mathvariant="normal">n</mml:mi>
<mml:mo>&#x2225;</mml:mo>
<mml:mi mathvariant="bold">L</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2217;</mml:mo>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>&#x2225;</mml:mo>
<mml:mi mathvariant="bold">E</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mi>s</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi mathvariant="bold">S</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold">L</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold">E</mml:mi>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
</mml:math>
<label>(16)</label>
</disp-formula>In <xref ref-type="disp-formula" rid="e16">Eq. 16</xref>, &#x2225;<bold>L</bold> &#x2225;<sub>&#x2a;</sub> is the nuclear norm of the matrix <bold>L</bold>, obtained by the sum of the singular value of <bold>L</bold>. &#x2225;<bold>E</bold> &#x2225;<sub>1</sub> is the norm of matrix <bold>E</bold>, i.e., the sum of absolute values of all elements in <bold>E</bold>. Also, the parameter <italic>&#x3bb;</italic> provides the trade-off between the norm factor &#x2225;<bold>L</bold> &#x2225;<sub>&#x2a;</sub> and &#x2225;<bold>E</bold> &#x2225;<sub>1</sub>, which can be calculated according to the standard <xref ref-type="disp-formula" rid="e17">Eq. 17</xref>
<disp-formula id="e17">
<mml:math id="m43">
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="normal">x</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:math>
<label>(17)</label>
</disp-formula>and then adjusted slightly according to prior knowledge of the solution.</p>
<p>The optimization problem in <xref ref-type="disp-formula" rid="e16">Eq. 16</xref> is convex and linearly constrained, and several efficient algorithms are available, including the alternating direction method of multipliers (ADMM) [<xref ref-type="bibr" rid="B81">81</xref>], IALM [<xref ref-type="bibr" rid="B82">82</xref>], and singular value thresholding (SVT) [<xref ref-type="bibr" rid="B83">83</xref>]. Key steps of the RPCA problem solved by IALM are demonstrated in <xref ref-type="other" rid="alg1">Algorithm 1</xref>.</p>
<p>
<statement content-type="algorithm" id="alg1">
<label>Algorithm 1</label>
<p>RPCA problem using IALM</p>
<p>
<inline-graphic xlink:href="fphy-10-873848-fx1.tif"/>
</p>
</statement>
</p>
</sec>
<sec id="s5-1-2">
<title>5.1.2 Stable principal component pursuit method.</title>
<p>After Isom and LaBarre [<xref ref-type="bibr" rid="B84">84</xref>] first applied RPCA in the monitoring of fuel cell power plants&#x2019; process fault detection, Zhang et al. [<xref ref-type="bibr" rid="B85">85</xref>] proposed an LRaSMD-based Mahalanobis distance (LSMAD) method for hyperspectral outlier detection. This algorithm dates to the SPCP model proposed by Zhou et al. [<xref ref-type="bibr" rid="B86">86</xref>], in which a noise item <bold>N</bold> (i.e., i.i.d. noise on each entry of the matrix) programming was<disp-formula id="e18">
<mml:math id="m44">
<mml:munder>
<mml:mrow>
<mml:mi>min</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold">L</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">E</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mo>&#x2225;</mml:mo>
<mml:mi mathvariant="bold">L</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2217;</mml:mo>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>&#x2225;</mml:mo>
<mml:mi mathvariant="bold">E</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mi>s</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>.</mml:mo>
<mml:mo>&#x2225;</mml:mo>
<mml:mi mathvariant="bold">X</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold">L</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold">E</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
<mml:mo>,</mml:mo>
</mml:math>
<label>(18)</label>
</disp-formula>where &#x2225;&#x22c5;&#x2225;<sub>
<italic>F</italic>
</sub> denotes its <italic>Frobenius</italic> norm and &#x2225;<bold>N</bold> &#x2225;<sub>
<italic>F</italic>
</sub> &#x2264; <italic>&#x3b4;</italic> for some <italic>&#x3b4;</italic> &#x3e; 0, thus <bold>L</bold>&#x2a; and <bold>E</bold>&#x2a; can be estimated more stably.</p>
</sec>
<sec id="s5-1-3">
<title>5.1.3 Low-Rank representation method.</title>
<p>Xu et al. [<xref ref-type="bibr" rid="B87">87</xref>] suggested leveraging LRR [<xref ref-type="bibr" rid="B88">88</xref>] for anomaly detection in hyperspectral images (HSIs). The LRR model introduced a dictionary matrix <bold>D</bold> in the linear decomposition of the background matrix, and the convex optimization problem in <xref ref-type="disp-formula" rid="e19">Eq. 19</xref> is solved for matrix factorization:<disp-formula id="e19">
<mml:math id="m45">
<mml:munder>
<mml:mrow>
<mml:mi>min</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold">Z</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">E</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mo>&#x2225;</mml:mo>
<mml:mi mathvariant="bold">Z</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2217;</mml:mo>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>&#x2225;</mml:mo>
<mml:mi mathvariant="bold">E</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2,1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mi>s</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi mathvariant="bold">X</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold">D</mml:mi>
<mml:mi mathvariant="bold">Z</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold">E</mml:mi>
<mml:mo>.</mml:mo>
</mml:math>
<label>(19)</label>
</disp-formula>
</p>
<p>In <xref ref-type="disp-formula" rid="e19">Eq. 19</xref>, &#x2225;&#x22c5;&#x2225;<sub>2,1</sub> is defined to be the sum of <italic>&#x2113;</italic>
<sub>2</sub> norms of column vectors of a matrix, and &#x2225;<bold>E</bold> &#x2225;<sub>2,1</sub> represents the <italic>&#x2113;</italic>
<sub>2,1</sub>-norm to characterize the error term <bold>E</bold>. LRR could handle the data collected from multiple subspaces well.</p>
<p>In 2020, Su et al. [<xref ref-type="bibr" rid="B10">10</xref>] proposed an LRCRD method, and this model is primarily suitable for hyperspace. They employed another <italic>&#x2113;</italic>
<sub>2</sub> norm to collaborate the global background and anomaly feature as local representation process attribute on the foundation of LRR; thus, a functional outlier detection model with strong representation ability was built:<disp-formula id="e20">
<mml:math id="m46">
<mml:mi>min</mml:mi>
<mml:mo>&#x2225;</mml:mo>
<mml:mi mathvariant="bold">Z</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2217;</mml:mo>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:mo>&#x2225;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>&#x2225;</mml:mo>
<mml:mi mathvariant="bold">E</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2,1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mi>s</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi mathvariant="bold">X</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold">D</mml:mi>
<mml:mi mathvariant="bold">Z</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold">E</mml:mi>
<mml:mo>,</mml:mo>
</mml:math>
<label>(20)</label>
</disp-formula>where &#x2225;<bold>Z</bold> &#x2225;<sub>&#x2a;</sub> is still the nuclear norm of <bold>Z</bold>, convexly approximating the rank of <bold>Z</bold>, <italic>N</italic> is the number of pixels, <italic>&#x3b2;</italic> &#x3e; 0 and <italic>&#x3bb;</italic> &#x3e; 0 are both regularization coefficients.</p>
</sec>
<sec id="s5-1-4">
<title>5.1.4 RPCA-OP method.</title>
<p>When it comes to strongly corrupted data, such that the columns of all entries are corrupted, the RPCA <italic>via</italic> outlier pursuit (RPCA-OP) method, an efficient convex optimization-based algorithm, should be employed for outlier detection [<xref ref-type="bibr" rid="B89">89</xref>]. Experiments have confirmed that the RPCA-OP method can even endure column-sparse or row-sparse errors. It recovers the correct column space of the uncorrupted matrix rather than the exact matrix itself like RPCA. Its convex optimization program is shown in <xref ref-type="disp-formula" rid="e21">Eq. 21</xref>:<disp-formula id="e21">
<mml:math id="m47">
<mml:mi>min</mml:mi>
<mml:mo>&#x2225;</mml:mo>
<mml:mi mathvariant="bold">R</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2217;</mml:mo>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>&#x2225;</mml:mo>
<mml:mi mathvariant="bold">C</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2,1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mi>s</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi mathvariant="bold">S</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold">R</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold">C</mml:mi>
<mml:mo>,</mml:mo>
</mml:math>
<label>(21)</label>
</disp-formula>where <bold>C</bold> is still a sparse matrix with some columns&#x2019; elements all be zero, &#x2225;<bold>C</bold> &#x2225;<sub>2,1</sub> promotes column-wise sparsity. To ensure success, we could tune parameter <italic>&#x3bb;</italic> to <inline-formula id="inf27">
<mml:math id="m48">
<mml:mfrac>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>7</mml:mn>
<mml:msqrt>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
</mml:math>
</inline-formula> with <italic>&#x3bb;</italic> being the fraction of corrupted points. Outliers exist in the set of nonzero columns of <inline-formula id="inf28">
<mml:math id="m49">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">C</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> (i.e., <inline-formula id="inf29">
<mml:math id="m50">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">I</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>:</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2260;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mi mathvariant="normal">o</mml:mi>
<mml:mi mathvariant="normal">r</mml:mi>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mi mathvariant="normal">o</mml:mi>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>).</p>
</sec>
</sec>
<sec id="s5-2">
<title>5.2 Systems and Applications</title>
<p>Anomaly detection models with matrix facts are primarily applied in image outlier detection and process fault monitoring. Nevertheless, there are different definitions and detection methods for anomalies in these two applicable scenarios.</p>
<p>In image detection, the sparse matrix, one component, indicates outliers. Outlier detection can be simply done by finding the nonzero columns of <bold>E</bold>
<sup>&#x2217;</sup>, when all or a fraction of the data samples are clean. For the cases where <bold>E</bold>
<sup>&#x2217;</sup> only approximately has sparse column supports, we can use threshold strategy (threshold <italic>&#x3c4;</italic> &#x3e; 0), that is, the <italic>i</italic>th data vector of <bold>X</bold> is discriminated to be an outlier if and only if<disp-formula id="e22">
<mml:math id="m51">
<mml:mo>&#x2225;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2217;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3e;</mml:mo>
<mml:mi>&#x3c4;</mml:mi>
<mml:mo>.</mml:mo>
</mml:math>
<label>(22)</label>
</disp-formula>
</p>
<p>In process fault monitoring, the data may contain persistent process noise which weakly affect production. The noise may be caused by the sensor errors, the subjective control by operators with different experience, or the instability of the data transmission network. However, the faults, such as sudden changes in system behavior, should be paid more attention to and identified as anomalies.</p>
<p>In 2011, Isom and Labarre used the RPCA method for process monitoring for the first time by straightforward observation of the sparse matrix obtained [<xref ref-type="bibr" rid="B84">84</xref>]. Afterwards, powerful multivariate statistics were built for fault detection based on either component matrix. For example, the statistics <bold>L</bold>
<sup>2</sup> &#x3d; <italic>x</italic>
<sup>
<italic>T</italic>
</sup>
<bold>Z</bold> (<italic>x</italic> is an online testing sample) [<xref ref-type="bibr" rid="B83">83</xref>] and <italic>Hotelling</italic>&#x2032;<italic>s T</italic>
<sup>2</sup> [<xref ref-type="bibr" rid="B90">90</xref>] were built. If their value is greater than the threshold under a certain normal condition, a fault occurs.</p>
<p>Matrix factorization-based methods are extensively used in many applications of interest, including image outlier detection, especially in hyperspectral scenarios, video surveillance, and mechanical fault detection. <xref ref-type="table" rid="T2">Table 2</xref> lists the applicable scenarios, models, and the improvement and application of the four sets of methods mentioned above, in which <bold>L</bold> denotes the low-rank component, <bold>E</bold> denotes the sparse component, <bold>N</bold> is the additional small dense noise, and <bold>Z</bold> is the (low-rank) coefficient matrix.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Matrix factorization method.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Methods</th>
<th align="center">Applicable scenarios</th>
<th align="center">Model</th>
<th align="center">Improvement and application</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">RPCA [<xref ref-type="bibr" rid="B80">80</xref>]</td>
<td align="left">Basic scenarios</td>
<td align="left">
<bold>S</bold> &#x3d; <bold>L</bold> &#x2b; <bold>E</bold>
</td>
<td align="left">For fuel cell power plants process fault detection [<xref ref-type="bibr" rid="B84">84</xref>], FRPCALG model with RPCA and Laplacian manifold graph combined [<xref ref-type="bibr" rid="B91">91</xref>], fault detection in a blast furnace process [<xref ref-type="bibr" rid="B90">90</xref>], RVAE model for unsupervised cell outlier detection [<xref ref-type="bibr" rid="B8">8</xref>]</td>
</tr>
<tr>
<td align="left">SPCP [<xref ref-type="bibr" rid="B86">86</xref>]</td>
<td align="left">Data with small entry-wise perturbations</td>
<td align="left">
<bold>X</bold> &#x3d; <bold>L</bold> &#x2b; <bold>E</bold> &#x2b; <bold>N</bold>
</td>
<td align="left">LRaSMD model [<xref ref-type="bibr" rid="B92">92</xref>] and LSMAD model for HSI anomaly detection [<xref ref-type="bibr" rid="B85">85</xref>], a joint low-rank sparse modeling algorithm for CFRP composites defects detection [<xref ref-type="bibr" rid="B7">7</xref>]</td>
</tr>
<tr>
<td align="left">LRR [<xref ref-type="bibr" rid="B88">88</xref>]</td>
<td align="left">Data from multiple subspaces</td>
<td align="left">
<bold>X</bold> &#x3d; <bold>DZ</bold> &#x2b; <bold>E</bold>
</td>
<td align="left">LRASR model [<xref ref-type="bibr" rid="B87">87</xref>], abundance- and dictionary-based low-rank decomposition (ADLR) model [<xref ref-type="bibr" rid="B93">93</xref>], and LRCRD model [<xref ref-type="bibr" rid="B10">10</xref>] for HSI anomaly detection</td>
</tr>
<tr>
<td align="left">RPCA-OP [<xref ref-type="bibr" rid="B89">89</xref>]</td>
<td align="left">Strongly corrupted data</td>
<td align="left">
<bold>S</bold> &#x3d; <bold>R</bold> &#x2b; <bold>C</bold>
</td>
<td align="left">Robust Deep Autoencoder (RDA) model [<xref ref-type="bibr" rid="B94">94</xref>], OC-NN [<xref ref-type="bibr" rid="B95">95</xref>], a new factorization -based RPCA model [<xref ref-type="bibr" rid="B9">9</xref>] for e.g., image anomaly detection and video surveillance</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s5-3">
<title>5.3 Strengths and Limitations</title>
<p>In the matrix factorization-based anomaly detection method, CA is of great significance in the fundamental linear factorization of the matrix. CA generally illuminates this method by norm approximation; nuclear norm minimization, as a convex surrogate, replaces the rank function, solves the original NP-hard problem, and makes it successful and efficiently computable. However, this method is only applicable when the sample can be represented as a matrix.</p>
</sec>
</sec>
<sec id="s6">
<title>6 Machine Learning</title>
<p>CA has been adopted in many machine learning technologies, including logistic regression, support vector machines, and artificial neural networks. Therefore, these machine learning methods have inevitably and selectively been applied to anomaly detection [<xref ref-type="bibr" rid="B96">96</xref>]. In this review, we classified them into four sub-categories, i.e., support vector domain method, convex hull method, online convex optimization method, and neural network method, in conformity with the role of CA in anomaly detection in the machine learning field.</p>
<sec id="s6-1">
<title>6.1 Support Vector Domain Method</title>
<sec id="s6-1-1">
<title>6.1.1 Model Description</title>
<p>This method aims to discover a data description with a presupposed shape from the training dataset. A good description covers all target data but includes no superfluous space. Points outside the description in the test set will be detected as outliers.</p>
<p>Among the support vector domain methods, the support vector machine (SVM) [<xref ref-type="bibr" rid="B97">97</xref>] is a mainstream two-class classification method for fields such as text detection, human body recognition, and freight transportation [<xref ref-type="bibr" rid="B98">98</xref>]. The SVM separates two types of samples with a maximal margin by a hyperplane. For outlier detection, since there is often only the target sample in the training set due to the lack of negative examples, the original SVM is no longer applicable, and support vector data description (SVDD) was developed by Tax and Duin [<xref ref-type="bibr" rid="B43">43</xref>] for one-class classification. It looks for a spherical description as implicit mapping, as shown in <xref ref-type="fig" rid="F5">Figure 5A</xref>. This description encloses most training samples <bold>x</bold>
<sub>
<italic>i</italic>
</sub> and minimizes the volume (i.e., minimizes <italic>R</italic>) of the hypersphere (<italic>R</italic>, <bold>a</bold>), where <italic>R</italic> is the radius and <bold>a</bold> is its center.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Support vector spherical description based on the training set: <bold>(A)</bold> SVDD and <bold>(B)</bold> NSVDD. The dashed circles represent samples in the description and the solid circles on and outside the description. The circles on the description are SVs (i.e., a solid circle crossed by a curve) containing both target samples and outliers in <bold>(B)</bold> NSVDD. The support vector domain method penalizes observations not in the correct position, i.e., red circles in <bold>(A)</bold> SVDD and <bold>(B)</bold> NSVDD because normal samples should be inside the description, and blue dashed circles in <bold>(B)</bold> NSVDD because outliers should fall outside the description.</p>
</caption>
<graphic xlink:href="fphy-10-873848-g005.tif"/>
</fig>
<p>SVDD adopts the soft-margin criterion [<xref ref-type="bibr" rid="B99">99</xref>], and a slack variable <italic>&#x3be;</italic> is introduced to penalize training samples outside the sphere (i.e., the red point in <xref ref-type="fig" rid="F5">Figure 5A</xref>, with square distance to the center of the sphere is greater than <italic>R</italic>
<sup>2</sup>). It operates as Step 1 in the following SVDD algorithm to find the hypersphere with the penalty of <italic>&#x3be;</italic>
<sub>
<italic>i</italic>
</sub>, where <italic>C</italic> is the regularization factor (i.e., the trade-off between the volume and the errors) for tighter description and higher accuracy. The detailed algorithm flow (SVDD by the Lagrange multiplier method) is presented as follows:<list list-type="simple">
<list-item>
<p>Step 1) min <italic>R</italic>
<sup>2</sup> &#x2b; <italic>C&#x2211;</italic>
<sub>
<italic>i</italic>
</sub>
<italic>&#x3be;</italic>
<sub>
<italic>i</italic>
</sub> <italic>s</italic>.<italic>t</italic>. &#x2225;<bold>x</bold>
<sub>
<italic>i</italic>
</sub> &#x2212; <bold>a</bold> &#x2225;<sup>2</sup> &#x2264; <italic>R</italic>
<sup>2</sup> &#x2b; <italic>&#x3be;</italic>
<sub>
<italic>i</italic>
</sub>, <italic>&#x3be;</italic>
<sub>
<italic>i</italic>
</sub> &#x2265; 0, <italic>&#x2200;i</italic>;</p>
</list-item>
<list-item>
<p>Step 2) <italic>L</italic>(<italic>R</italic>, <bold>a</bold>, <italic>&#x3b1;</italic>
<sub>
<italic>i</italic>
</sub>, <italic>&#x3b3;</italic>
<sub>
<italic>i</italic>
</sub>, <italic>&#x3be;</italic>
<sub>
<italic>i</italic>
</sub>) &#x3d; <italic>R</italic>
<sup>2</sup> &#x2b; <italic>C&#x2211;</italic>
<sub>
<italic>i</italic>
</sub>
<italic>&#x3be;</italic>
<sub>
<italic>i</italic>
</sub> &#x2212; <italic>&#x2211;</italic>
<sub>
<italic>i</italic>
</sub>
<italic>&#x3b1;</italic>
<sub>
<italic>i</italic>
</sub>{<italic>R</italic>
<sup>2</sup> &#x2b; <italic>&#x3be;</italic>
<sub>
<italic>i</italic>
</sub> &#x2212; (&#x2225;<bold>x</bold>
<sub>
<italic>i</italic>
</sub> &#x2225;<sup>2</sup> &#x2212; 2<bold>a</bold>&#x22c5;<bold>x</bold>
<sub>
<italic>i</italic>
</sub> &#x2b; &#x2225;<bold>a</bold> &#x2225;<sup>2</sup>)} &#x2212; <italic>&#x2211;</italic>
<sub>
<italic>i</italic>
</sub>
<italic>&#x3b3;</italic>
<sub>
<italic>i</italic>
</sub>
<italic>&#x3be;</italic>
<sub>
<italic>i</italic>
</sub> is the loss function with the Lagrange multipliers <italic>&#x3b1;</italic>
<sub>
<italic>i</italic>
</sub> &#x2265; 0 and <italic>&#x3b3;</italic>
<sub>
<italic>i</italic>
</sub> &#x2265; 0;</p>
</list-item>
<list-item>
<p>Step 3) Setting partial derivatives to zero provides the following constraints: <inline-formula id="inf30">
<mml:math id="m52">
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x2202;</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x2202;</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>:</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x2202;</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x2202;</mml:mi>
<mml:mi mathvariant="bold">a</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:math>
</inline-formula>: <inline-formula id="inf31">
<mml:math id="m53">
<mml:mi mathvariant="bold">a</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x2202;</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x2202;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3be;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>:</mml:mo>
<mml:mi>C</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:math>
</inline-formula>;</p>
</list-item>
<list-item>
<p>Step 4) Extrapolate 0 &#x2264; <italic>&#x3b1;</italic>
<sub>
<italic>i</italic>
</sub> &#x2264; <italic>C</italic> according to the last equation in Step 3) <italic>&#x3b1;</italic>
<sub>
<italic>i</italic>
</sub> &#x3d; <italic>C</italic> &#x2212; <italic>&#x3b3;</italic>
<sub>
<italic>i</italic>
</sub> and <italic>&#x3b1;</italic>
<sub>
<italic>i</italic>
</sub> &#x2265; 0, <italic>&#x3b3;</italic>
<sub>
<italic>i</italic>
</sub> &#x2265; 0;</p>
</list-item>
<list-item>
<p>Step 5) Resubstitute Step 3) into Step 2): <italic>L</italic> &#x3d; <italic>&#x2211;</italic>
<sub>
<italic>i</italic>
</sub>
<italic>&#x3b1;</italic>
<sub>
<italic>i</italic>
</sub>(<bold>x</bold>
<sub>
<italic>i</italic>
</sub> &#x22c5;<bold>x</bold>
<sub>
<italic>i</italic>
</sub>) &#x2212; <italic>&#x2211;</italic>
<sub>
<italic>i</italic>,<italic>j</italic>
</sub>
<italic>&#x3b1;</italic>
<sub>
<italic>i</italic>
</sub>
<italic>&#x3b1;</italic>
<sub>
<italic>j</italic>
</sub>(<bold>x</bold>
<sub>
<italic>i</italic>
</sub> &#x22c5;<bold>x</bold>
<sub>
<italic>j</italic>
</sub>) <italic>s</italic>.<italic>t</italic>. 0 &#x2264; <italic>&#x3b1;</italic>
<sub>
<italic>i</italic>
</sub> &#x2264; <italic>C</italic>;</p>
</list-item>
<list-item>
<p>Step 6) <italic>R</italic>
<sup>2</sup> &#x3d; (<bold>x</bold>
<sub>
<italic>k</italic>
</sub> &#x22c5;<bold>x</bold>
<sub>
<italic>k</italic>
</sub>) &#x2212; 2<italic>&#x2211;</italic>
<sub>
<italic>i</italic>
</sub>
<italic>&#x3b1;</italic>
<sub>
<italic>i</italic>
</sub>(<bold>x</bold>
<sub>
<italic>i</italic>
</sub> &#x22c5;<bold>x</bold>
<sub>
<italic>k</italic>
</sub>) &#x2b; <italic>&#x2211;</italic>
<sub>
<italic>i</italic>,<italic>j</italic>
</sub>
<italic>&#x3b1;</italic>
<sub>
<italic>i</italic>
</sub>
<italic>&#x3b1;</italic>
<sub>
<italic>j</italic>
</sub>(<bold>x</bold>
<sub>
<italic>i</italic>
</sub> &#x22c5;<bold>x</bold>
<sub>
<italic>j</italic>
</sub>), <bold>x</bold>
<sub>
<italic>k</italic>
</sub> is the set of support vectors with 0 &#x3c; <italic>&#x3b1;</italic>
<sub>
<italic>i</italic>
</sub> &#x3c; <italic>C</italic>;</p>
</list-item>
<list-item>
<p>Step 7) Test a new object <bold>z</bold> by the distance to the center of the sphere &#x2225;<bold>z</bold> &#x2212; <bold>a</bold> &#x2225;<sup>2</sup> &#x3d; (<bold>z</bold> &#x22c5;<bold>z</bold>) &#x2212; 2<italic>&#x2211;</italic>
<sub>
<italic>i</italic>
</sub>
<italic>&#x3b1;</italic>
<sub>
<italic>i</italic>
</sub>(<bold>z</bold> &#x22c5;<bold>x</bold>
<sub>
<italic>i</italic>
</sub>) &#x2b; <italic>&#x2211;</italic>
<sub>
<italic>i</italic>,<italic>j</italic>
</sub>
<italic>&#x3b1;</italic>
<sub>
<italic>i</italic>
</sub>
<italic>&#x3b1;</italic>
<sub>
<italic>j</italic>
</sub>(<bold>x</bold>
<sub>
<italic>i</italic>
</sub> &#x22c5;<bold>x</bold>
<sub>
<italic>j</italic>
</sub>), if this distance is larger than <italic>R</italic>
<sup>2</sup>, then the object <bold>z</bold> is flagged as an anomalous object.</p>
</list-item>
</list>
</p>
<p>Applying the Lagrange multiplier method [<xref ref-type="bibr" rid="B44">44</xref>], the dual problem can be obtained by the KKT conditions, and the problem that both minimum volume and maximum samples are expected to be fulfilled can be transformed into the above convex quadratic programming problem in Step 1 of the SVDD algorithm. Besides, the duality <bold>a</bold> &#x3d; <italic>&#x2211;</italic>
<sup>
<italic>n</italic>
</sup>
<italic>&#x3b1;</italic>
<sub>
<italic>i</italic>
</sub>
<bold>x</bold>
<sub>
<italic>i</italic>
</sub> could generate the sparse center of the sphere, which improves its test performance.</p>
<p>
<xref ref-type="fig" rid="F5">Figure 5A</xref> is a visual representation of SVDD, and the points on the surface with 0 &#x3c; <italic>&#x3b1;</italic>
<sub>
<italic>i</italic>
</sub> &#x3c; <italic>C</italic> are support vectors (SVs). The red circles are like the black ones (i.e., normal samples in the training set). However, the red circles are outside the hypersphere, so they are penalized.</p>
<p>To further enhance the flexibility of SVDD when negative examples are available, the following SVDD with negative samples (NSVDD) was also proposed by Tax and Duin [<xref ref-type="bibr" rid="B43">43</xref>]. NSVDD assumes that the target samples are in the hypersphere as much as possible (i.e., the black circle in <xref ref-type="fig" rid="F5">Figure 5B</xref>), but the outliers are outside (i.e., the green circle in <xref ref-type="fig" rid="F5">Figure 5B</xref>). Then, the normal points (i.e., the red circle) and the outliers (i.e., the blue dashed circle) should be penalized because they are not in the correct position. <xref ref-type="disp-formula" rid="e23">Eq. 23</xref> describes how NSVDD works:<disp-formula id="e23">
<mml:math id="m54">
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mi mathvariant="normal">n</mml:mi>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:msup>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>C</mml:mi>
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:munder>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3be;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mi>s</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>.</mml:mo>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x2225;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold">a</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3be;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3be;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2200;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
</mml:math>
<label>(23)</label>
</disp-formula>in which <italic>y</italic>
<sub>
<italic>i</italic>
</sub> &#x2208; { &#x2212; 1, 1} is the label of the training sample with &#x201c;-1&#x201d; denoting an outlier. NSVDD is identical to the normal SVDD when new variables <inline-formula id="inf32">
<mml:math id="m55">
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula> are defined, and both are convex representations.</p>
<p>By employing two slack variables, NSVDD has shown higher classification accuracy with a varying radius of the hypersphere [<xref ref-type="bibr" rid="B100">100</xref>]. However, the outlier placed on the boundary of the description (i.e., the blue solid circle crossed by a curve in <xref ref-type="fig" rid="F5">Figure 5B</xref> can not be distinguished from the SVs in the target class (i.e., black solid circle crossed by a curve) based on Step 7 in the SVDD algorithm. By applying kernel techniques, both SVDD and NSVDD can obtain a rigid hypersphere for nonlinear problems with greater flexibility and malleability.</p>
<p>SVDD is an unsupervised machine learning method for anomaly detection, while NSVDD is supervised. The related semi-supervised method [<xref ref-type="bibr" rid="B101">101</xref>] was developed in 2020 for rolling element bearings default detection by combining SVDD and cyclic spectral coherence (CSCoh) as domain indicators [<xref ref-type="bibr" rid="B102">102</xref>].</p>
</sec>
<sec id="s6-1-2">
<title>6.1.2 Systems and Applications</title>
<p>Although affected by noise and limited to hypersphere data, standard SVDD can be rated as a cornerstone in the field of anomaly detection. With its improvement, it has been explored for anomaly detection with high-dimensional and large-scale data [<xref ref-type="bibr" rid="B103">103</xref>], adversarial examples [<xref ref-type="bibr" rid="B104">104</xref>], contaminated data [<xref ref-type="bibr" rid="B105">105</xref>], and other anomaly detection situations. Furthermore, in 2020, Yuan et al. [<xref ref-type="bibr" rid="B106">106</xref>] demonstrated that this method can undertake robust process monitoring in over 20 real-life datasets, including vehicle evaluation, breast cancer, and process engineering.</p>
</sec>
<sec id="s6-1-3">
<title>6.1.3 Strengths and Limitations</title>
<p>By transforming the mini-volume and most-points problem into convex quadratic programming, convexity makes KKT conditions necessary and sufficient. The optimality of the convex program is adequate for solving the data description of the support vector domain method, which results in accuracy and efficiency for global outlier detection. This method ensures the accuracy of normal samples by minimizing the volume of the description and the error of outlier detection. Compared with other outlier detection methods, this method shows comparable or improved performance for sparse and complex datasets. However, for minuscule target error rates, the SVDD could break down, and this method is not preferred for high-dimensional samples.</p>
</sec>
</sec>
<sec id="s6-2">
<title>6.2 Convex Hull Method</title>
<sec id="s6-2-1">
<title>6.2.1 Model Description</title>
<p>The support vector domain method is a fundamental and special case of the convex hull method, in which the hypersphere is a convex hull (CH), and solutions are most provided by convex programming. The CH for a set of points <inline-formula id="inf33">
<mml:math id="m56">
<mml:mi>S</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> in a real vector space <italic>V</italic> is the minimal convex set containing <italic>S</italic> [<xref ref-type="bibr" rid="B107">107</xref>]. The CH classifier, belonging to the one-class classifier, builds the CH border according to the training set (comprising the points of the normal class), and samples outside the border in the test set are outliers. An example is illustrated in <xref ref-type="fig" rid="F6">Figure 6</xref>.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>An example of the convex hull for less overfitting.</p>
</caption>
<graphic xlink:href="fphy-10-873848-g006.tif"/>
</fig>
<p>The convex hull <italic>CH</italic>(<italic>S</italic>) can be calculated according to <xref ref-type="disp-formula" rid="e24">Eq. 24</xref>:<disp-formula id="e24">
<mml:math id="m57">
<mml:mi>C</mml:mi>
<mml:mi>H</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mo>&#x2200;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>:</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2229;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(24)</label>
</disp-formula>
</p>
<p>Since the existence of outliers in the training set may lead to an overfitting decision model, the CH can be corrected by a parameter <italic>&#x3bb;</italic> &#x2208; [0, &#x2b; <italic>&#x221e;</italic>), according to <xref ref-type="disp-formula" rid="e25">Eq. 25</xref> [<xref ref-type="bibr" rid="B108">108</xref>]:<disp-formula id="e25">
<mml:math id="m58">
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
<mml:mi>v</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mi>c</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>v</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>C</mml:mi>
<mml:mi>H</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(25)</label>
</disp-formula>
</p>
<p>In <xref ref-type="disp-formula" rid="e25">Eq. 25</xref>, <italic>v</italic> incorporates the vertices of the original convex hull in <xref ref-type="disp-formula" rid="e24">Eq. 24</xref> regarding their center <inline-formula id="inf34">
<mml:math id="m59">
<mml:mi>c</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:msub>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2200;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>,</mml:mo>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>D</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:math>
</inline-formula>; thus, <italic>v</italic>
<sub>
<italic>&#x3bb;</italic>
</sub> contains the modified vertices of <italic>CH</italic>(<italic>S</italic>). From this equation, it can be concluded that the CH would be expanded or contracted when <italic>&#x3bb;</italic> is greater than 1 or lower than 1, respectively.</p>
<p>However, this approach shows two major drawbacks. First, the computation cost is high. And second, the training data&#x2019;s boundary may not be well-modeled by a convex polytope. Calculating the CH of a high-dimension dataset requires a tremendous computational cost. If a dataset comprises <italic>N</italic> samples in <inline-formula id="inf35">
<mml:math id="m60">
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula>, the cost of computing the CH is estimated as <italic>O</italic>(<italic>N</italic>
<sup>(<italic>n</italic>/2)&#x2b;1</sup>) [<xref ref-type="bibr" rid="B109">109</xref>]. This problem can be solved with the approximate polytope ensemble (APE) technique [<xref ref-type="bibr" rid="B110">110</xref>], which first constructs <italic>p</italic> random 2D projections of the original dataset and then model the CH for each 2D projection. Then, outliers are identified by those points which are outside of at least one of these projections. The main idea of this approach is demonstrated in <xref ref-type="fig" rid="F7">Figure 7</xref>, where a dataset in <inline-formula id="inf36">
<mml:math id="m61">
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is projected in two 2D planes, and the red dot out of the CH of projection &#x23;2 represents an outlier. Despite the good performance of the APE approach, an inaccurate classification would happen in non-convex sets. Hence, non-convex APE (NAPE), an extension of managing non-convex boundaries, is proposed. The underlying idea of this extension is to divide the non-convex boundary into a set of convex problems. Then, each convex problem can be solved using the APE algorithm.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>APE technique using the approximate 2D convex hull.</p>
</caption>
<graphic xlink:href="fphy-10-873848-g007.tif"/>
</fig>
</sec>
<sec id="s6-2-2">
<title>6.2.2 Systems and Applications</title>
<p>Outlier detection performance was investigated using this method on over 200 datasets [<xref ref-type="bibr" rid="B111">111</xref>&#x2013;<xref ref-type="bibr" rid="B113">113</xref>], even in multi-modal distributions of automated visual surveillance detection [<xref ref-type="bibr" rid="B114">114</xref>]. All exhibited a trade-off between the detection rate (true positive rate) and false alarm rate (false positive rate), and AUC greater than 0.9. In practice, CHs are usually adopted in industrial intelligent fault diagnosis, multiaxial high-cycle fatigue recognition, and other anomaly detection applications, some of which are described in He et al. [<xref ref-type="bibr" rid="B115">115</xref>]&#x2019;s and Scalet [<xref ref-type="bibr" rid="B116">116</xref>]&#x2019;s studies.</p>
</sec>
<sec id="s6-2-3">
<title>6.2.3 Strengths and Limitations</title>
<p>As a flexible geometric model, a CH is typically a substantial approximation of the target region. It can approximate a polytope without overfitting, even in a high-dimension situation. The low computational and memory storage requirements allow the APE method to be used under limited resources. By the vertex of the CH of the training set, outliers can be relatively easily recognized. However, the boundaries of the training data may not be well modeled by APE in more general non-convex scenarios. Furthermore, due to its ability to manage strong non-convex distributions, NAPE, a more general extension than the APE algorithm, outperforms the rest of the outlier detection methods including APE in many cases [<xref ref-type="bibr" rid="B109">109</xref>,<xref ref-type="bibr" rid="B110">110</xref>]. Nevertheless, further efforts are needed to reduce the computational costs of building NAPE.</p>
</sec>
</sec>
<sec id="s6-3">
<title>6.3 Online Convex Programming Method</title>
<sec id="s6-3-1">
<title>6.3.1 Model Description</title>
<p>Unlike the CH method, which is largely an offline algorithm, the online convex programming (OCP) method can be explored in online anomaly detection methods for the data stream. OCP, such as the online gradient descent (OGD) algorithm [<xref ref-type="bibr" rid="B117">117</xref>], as defined by Zinkevich [<xref ref-type="bibr" rid="B118">118</xref>], features a sequence of convex programmings with feasible sets that are identical, but the cost functions are diverse. According to what has been learned, the algorithm should always choose a point for the lowest cumulative cost before observing the cost function. Whenever the anomaly score (i.e., probability, density, or other custom metrics) efficiently and simply calculated by the OCP for the current state falls below the dynamic threshold, we declare an anomaly.</p>
<p>OCP can be broadly viewed as a game between two opponents: the <italic>Forecaster</italic> and the <italic>Environment</italic> [<xref ref-type="bibr" rid="B74">74</xref>,<xref ref-type="bibr" rid="B119">119</xref>]. The <italic>Forecaster</italic> constantly predicts changes in a dynamic <italic>Environment</italic>, where the influence of the <italic>Environment</italic> is depicted by a sequence of convex cost functions with arbitrary variations over a given feasible set, and the <italic>Forecaster</italic> attempts to pick the next feasible point in such a way to reduce the cumulative cost as much as possible.</p>
<p>An OCP problem with horizon T can be outlined by a convex feasible set <inline-formula id="inf37">
<mml:math id="m62">
<mml:mi>U</mml:mi>
<mml:mo>&#x2286;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> and a family of convex functions <inline-formula id="inf38">
<mml:math id="m63">
<mml:mi mathvariant="script">F</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:mtext mathvariant="italic">f</mml:mtext>
<mml:mo>:</mml:mo>
<mml:mi>U</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula>. The algorithm of OCP is described in the following section.</p>
<p>
<statement content-type="algorithm" id="alg2">
<label>Algorithm 2</label>
<p>Online convex programming</p>
<p>
<inline-graphic xlink:href="fphy-10-873848-fx2.tif"/>
</p>
<p>The Forecaster will minimize the difference between the actual cost incurred after T rounds of the game and the smallest cumulative cost that could be achieved in hindsight using a single feasible point. Given a strategy <italic>&#x3bc;</italic>
<sup>
<italic>T</italic>
</sup> and a cost function tuple f<sup>
<italic>T</italic>
</sup>, the <italic>regret</italic> w.r.t. <italic>u</italic>
<sup>
<italic>T</italic>
</sup> is defined as <xref ref-type="disp-formula" rid="e26">Eq. 26</xref>
<disp-formula id="e26">
<mml:math id="m64">
<mml:msub>
<mml:mrow>
<mml:mtext mathvariant="italic">R</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mtext mathvariant="italic">f</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mtext mathvariant="italic">u</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x225c;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:math>
<label>(26)</label>
</disp-formula>where <italic>u</italic>
<sup>
<italic>T</italic>
</sup> &#x3d; (<italic>u</italic>
<sub>1</sub>, ..., <italic>u</italic>
<sub>
<italic>T</italic>
</sub>) &#x2208; U<sup>
<italic>T</italic>
</sup>, a time-varying tuple, is a comparison strategy distinguishing from the <italic>Forecaster</italic>&#x2019;s observation-driven strategy <italic>&#x3bc;</italic>
<sup>
<italic>T</italic>
</sup> and it does not depend on the previous points or cost functions but only on the time index <italic>t</italic>.</p>
<p>Then the goal would be to select a suitably restricted subset <inline-formula id="inf39">
<mml:math id="m65">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2282;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>U</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> and employ the <italic>Forecaster</italic>&#x2019;s tactic <italic>&#x3bc;</italic>
<sup>
<italic>T</italic>
</sup> to ensure that the worst-case regret<disp-formula id="e27">
<mml:math id="m66">
<mml:munder>
<mml:mrow>
<mml:mi>sup</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="script">F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:munder>
<mml:munder>
<mml:mrow>
<mml:mi>sup</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:munder>
<mml:msub>
<mml:mrow>
<mml:mtext mathvariant="italic">R</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mtext mathvariant="italic">f</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mtext mathvariant="italic">u</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2261;</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mi>sup</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="script">F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:munder>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mi>inf</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:munder>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(27)</label>
</disp-formula>is sublinear in <italic>T</italic>. Whenever the anomaly score (i.e., probability, density, or other custom metrics) efficiently and simply calculated by the OCP for the current state falls below the dynamic threshold, we declare an anomaly.</p>
</statement>
</p>
</sec>
<sec id="s6-3-2">
<title>6.3.2 Systems and Applications</title>
<p>Online anomaly detection based on OCP fulfills the needs of some fields, such as industrial production and network routing, where decisions should be made before the comprehension of true costs.</p>
<p>Inspired by recent developments in OCP, Raginsky et al. [<xref ref-type="bibr" rid="B120">120</xref>] designed and analyzed a so-called FHTAGN method consisting of assigning a belief (probability) and flagging potential anomalies according to the belief, exploring online anomaly detection methods with dynamic thresholding built on limited feedback. Nevertheless, classic statistical change point detection studies, such as this work [<xref ref-type="bibr" rid="B120">120</xref>], surveyed the transient outlier instead of the persistent change. Therefore, persistent change was considered for anomaly detection based on OCP. Further improvements have been made to achieve lower computational complexity [<xref ref-type="bibr" rid="B121">121</xref>] or higher anomaly detection accuracy [<xref ref-type="bibr" rid="B122">122</xref>].</p>
</sec>
<sec id="s6-3-3">
<title>6.3.3 Strengths and Limitations</title>
<p>Convex optimization provides a more versatile approach to tackling complex situations, especially sequential change point detection. Its efficiency and simplicity make it possible to perform computations in real-time. By the convex cost function of the <italic>Environment</italic>, schemes such as mirror descent for the OCP method are possible. It allows us to remarkably predict the extrinsic anomalous behavior for the next observation concerning the best model based on what we have seen in the past. However, this work has not been extended to any arbitrary anomaly detection method.</p>
</sec>
</sec>
<sec id="s6-4">
<title>6.4 Neural Network Method</title>
<sec id="s6-4-1">
<title>6.4.1 Model Description</title>
<p>In machine learning, especially deep learning, a neural network (NN) is also an essential algorithm that CA contributes to anomaly detection, with its core gradient descent method being the most significant technique in CA [<xref ref-type="bibr" rid="B123">123</xref>]. For anomaly detection, a NN extracts the characteristics of abnormal behavior by adaptive learning and learns the normal behavior pattern from the training set. Then, samples with anomalously-related labels in the test set will be anomalies [<xref ref-type="bibr" rid="B124">124</xref>].</p>
<p>The loss function to be minimized in a NN is:<disp-formula id="e28">
<mml:math id="m67">
<mml:mi>L</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">&#x2223;</mml:mo>
<mml:mi>X</mml:mi>
<mml:mo stretchy="false">&#x2223;</mml:mo>
</mml:mrow>
</mml:mfrac>
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mi>l</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:math>
<label>(28)</label>
</disp-formula>where <italic>w</italic> is the weight of the network, X is the training set with labels and <italic>l</italic>(x, <italic>w</italic>) denotes the loss calculated by the sample <italic>x</italic> &#x2208; X and its label.</p>
<p>The gradient descent method is a first-order optimization algorithm usually applied to find the minima of a function. An iterative search is performed to the point with the specified step size from the current point along the opposite direction of the gradient (or approximate gradient), which is the direction of steepest descent. As the most common gradient descent method in NN, minibatch stochastic gradient descent [<xref ref-type="bibr" rid="B45">45</xref>] is usually called simply stochastic gradient descent (SGD) in recent literature even though it operates on mini-batches. It performs the following parameter update:<disp-formula id="e29">
<mml:math id="m68">
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b7;</mml:mi>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">B</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mo>&#x25bd;</mml:mo>
<mml:mi>l</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:math>
<label>(29)</label>
</disp-formula>where <inline-formula id="inf40">
<mml:math id="m69">
<mml:mi mathvariant="script">B</mml:mi>
</mml:math>
</inline-formula> is the <italic>minibatch</italic> sampled from <italic>X</italic> and <inline-formula id="inf41">
<mml:math id="m70">
<mml:mi>N</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi mathvariant="script">B</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:math>
</inline-formula> is the minibatch size, <italic>&#x3b7;</italic> denotes the learning rate, t represents the iteration index, and &#x25bd;<italic>l</italic>(<italic>x</italic>, <italic>w</italic>
<sub>
<italic>t</italic>
</sub>) represents the gradient of loss <italic>l</italic>(<italic>x</italic>, <italic>w</italic>). Therefore, the parameter update is a back-propagation process along the gradient, as demonstrated in <xref ref-type="disp-formula" rid="e30">Eq. 30</xref>:<disp-formula id="e30">
<mml:math id="m71">
<mml:mo>&#x25bd;</mml:mo>
<mml:mi>l</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x2202;</mml:mi>
<mml:mi>l</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x2202;</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x2202;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x2202;</mml:mi>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>.</mml:mo>
</mml:math>
<label>(30)</label>
</disp-formula>
</p>
</sec>
<sec id="s6-4-2">
<title>6.4.2 Systems and Applications</title>
<p>Many neural networks have been applied to specific fields of anomaly detection and have been investigated with appealing results. For example, Zenati et al. [<xref ref-type="bibr" rid="B125">125</xref>] leveraged bidirectional generative adversarial networks (BiGAN) for image and network intrusion detection, Gao et al. [<xref ref-type="bibr" rid="B126">126</xref>] applied CNN for time series anomaly detection in 367 public benchmark datasets from Yahoo, and Xu et al. [<xref ref-type="bibr" rid="B127">127</xref>] proposed a cluster-based deep adaptation network (CDAN) model that is adaptable for the spinning power consumption anomaly detection problem in the real-environment yarn spinning workshop. These studies have achieved a desirable performance and high speed.</p>
</sec>
<sec id="s6-4-3">
<title>6.4.3 Strengths and Limitations</title>
<p>As an unconstrained optimization in convex optimization theory, the gradient descent method achieves a rapid decline in the loss function by the convex path, contributing considerably to the behavior learning of normal samples and anomalies. NN is a non-parametric method that typically employs gradient descent. With the best architecture and an efficient training procedure, anomaly detection by a NN exhibits higher AUC and F1 scores than other state-of-the-art methods, such as LRR [<xref ref-type="bibr" rid="B88">88</xref>] and isolation forests (IF) [<xref ref-type="bibr" rid="B128">128</xref>]. Nevertheless, a NN generally desires adequate training data for convergence. Another critical drawback of this method may be that it can not provide the analyst with clear interpretability of why the system believes an entity is potentially anomalous.</p>
</sec>
</sec>
</sec>
<sec id="s7">
<title>7 Other Convex Analysis-Based Anomaly Detection Methods</title>
<p>In addition to the density estimation method, matrix factorization method, and machine learning method, there are also a number of other CA-based anomaly detection methods which still benefit from the geometrical and computational advantages of CA. Robust approximation, efficient computation, and mathematical optimization of CA make these techniques effective and reliable, which is a critical feature for deployment in practice.</p>
<p>In [<xref ref-type="bibr" rid="B31">31</xref>], a novel technique of finding a convex combination of outputs from anomaly detectors to maximize the number of true alarms in <italic>&#x3c4;</italic>-fraction of most anomalies was proposed for security domain. In the experimental evaluation attack detections of NetFlow and HTTP network, this technique outperforms prior work, and it is also more robust to noise in labels of training data.</p>
<p>In [<xref ref-type="bibr" rid="B129">129</xref>], anomaly detector for control systems based on CUSUM was improved by breaking down the original nonlinearity into several convex optimization problems. In a simple example, it is shown that this anomaly detector could better diminish the attack impact and detect attacks.</p>
<p>In [<xref ref-type="bibr" rid="B32">32</xref>], CM<sub>
<italic>T</italic>
</sub>MSOM was proposed with the contributions of the powerful convex and continuous optimization techniques to diagnose Parkinson&#x2019;s disease. Results on Parkinson telemonitoring dataset indicate that this method performs better than current parametric models.</p>
<p>In [<xref ref-type="bibr" rid="B130">130</xref>], CRO-FADALARA was proposed with a cleaning procedure and RO-FADALARA (Robust Outlier FADA for LARge Applications) to detect functional anomalies. This approach can not only return archetypoids but also output a set of outliers together with the importance that each variable had in the outlier detection. In [<xref ref-type="bibr" rid="B131">131</xref>], anomalous events during gameplay were detected through archetypal analysis (AA) with the reconstruction error distribution. In addition, archetypal analysis was explored to detect hyperspectral anomalies [<xref ref-type="bibr" rid="B132">132</xref>], anomalous flows in urban water networks [<xref ref-type="bibr" rid="B133">133</xref>], and so on.</p>
</sec>
<sec id="s8">
<title>8 Benchmark and Comparison</title>
<p>Based on the experiments introduced by several representative CA-based anomaly detection literatures, we summarize the performance of CA-based methods with other baseline methods in some golden-standard datasets. A comprehensive comparison is demonstrated in <xref ref-type="table" rid="T3">Table 3</xref>, in which the model with the best performance in the respective dataset is presented, and the CA-based methods are shown in bold. We introduce the support vector domain method and the online convex programming method in detail.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Anomaly detection performance of CA-based methods with other baseline.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Categories</th>
<th align="center">Dataset</th>
<th align="center">Metrics</th>
<th colspan="2" align="center">Methods and their performance</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Indirect density estimation [<xref ref-type="bibr" rid="B71">71</xref>]</td>
<td align="left">SMART dataset [<xref ref-type="bibr" rid="B70">70</xref>]</td>
<td align="left">AUC</td>
<td align="left">
<bold>uLSIF (0.881) KLIEP (0.836) LogReg (0.856) KMM (0.861)</bold>
</td>
<td align="left">OSVM (0.843) LOF (0.847) KDE (0.736)</td>
</tr>
<tr>
<td align="left">Matrix factorization [<xref ref-type="bibr" rid="B88">88</xref>]</td>
<td align="left">Yale-Caltech [<xref ref-type="bibr" rid="B138">138</xref>]</td>
<td align="left">AUC</td>
<td align="left">
<bold>LRR (0.9927) RPCA (0.9863)</bold>
</td>
<td align="left">SR (0.9239) PCA (0.9653)</td>
</tr>
<tr>
<td align="left">Support vector domain [<xref ref-type="bibr" rid="B43">43</xref>]</td>
<td align="left">Water pump dataset [<xref ref-type="bibr" rid="B134">134</xref>]</td>
<td align="left">
<italic>&#x3f5;</italic>
<sub>
<italic>M</italic>
</sub> (%)</td>
<td align="left">Normal density (16.6) Parzen density (42.0)</td>
<td align="left">MoG (14.4) KNN (22.5) <bold>SVDD (9.9)</bold>
</td>
</tr>
<tr>
<td align="left">Convex hull [<xref ref-type="bibr" rid="B110">110</xref>]</td>
<td align="left">User verification dataset [<xref ref-type="bibr" rid="B139">139</xref>]</td>
<td align="left">AUC</td>
<td align="left">Normal density (0.87) Minimum Spanning Trees(0.92) K-means (0.93)</td>
<td align="left">MoG (0.92) <bold>APE (0.93) NAPE (0.98)</bold>
</td>
</tr>
<tr>
<td align="left">Online convex programming [<xref ref-type="bibr" rid="B122">122</xref>]</td>
<td align="left">Occupancy dataset [<xref ref-type="bibr" rid="B136">136</xref>]</td>
<td align="left">AUC</td>
<td align="left">
<bold>AD-HKDE (0.9907)</bold> K-D Tree (0.9854) FOGD (0.9490)</td>
<td align="left">KNN (0.9854) KDE (0.9368)</td>
</tr>
<tr>
<td align="left">Neural network [<xref ref-type="bibr" rid="B126">126</xref>]</td>
<td align="left">Yahoo benchmark datasets [<xref ref-type="bibr" rid="B140">140</xref>]</td>
<td align="left">F1 Score</td>
<td align="left">
<bold>RobustTAD (0.693)</bold> ARIMA (0.225)</td>
<td align="left">SHESD (0.494) Donut (0.029)</td>
</tr>
<tr>
<td align="left">Other methods [<xref ref-type="bibr" rid="B33">33</xref>]</td>
<td align="left">Breast Cancer Wisconsin [<xref ref-type="bibr" rid="B141">141</xref>]</td>
<td align="left">AUC</td>
<td align="left">
<bold>AA &#x2b; k-NN (0.9851)</bold> LOF (0.9816)</td>
<td align="left">RPCA (0.9664) HBOS (0.9827) KNN (0.9791)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The CA-based methods are shown in bold.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>To investigate how the SVDD works in a real outlier detection problem, Tax and Duin focused on a machine diagnostics problem: the distinguishment of the pump with faulty operation conditions [<xref ref-type="bibr" rid="B43">43</xref>]. In the dataset of the submersible water pump [<xref ref-type="bibr" rid="B134">134</xref>], the outlier data contains pumping situations with loose foundation, imbalance and failure in loads and speeds of the pump. To see how well the SVDD performs, they compared it with a number of other methods, including the normal density, the MoG (optimized using EM), the Parzen density, and KNN. To make a more quantitative comparison, an error measure (<italic>&#x3f5;</italic>
<sub>
<italic>M</italic>
</sub>) is derived from the ROC curves, as demonstrated in <xref ref-type="disp-formula" rid="e31">Eq. 31</xref>,<disp-formula id="e31">
<mml:math id="m72">
<mml:msub>
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mo>&#x222b;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a0;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn mathvariant="normal">1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mi>d</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn mathvariant="normal">1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:math>
<label>(31)</label>
</disp-formula>where <italic>&#x3f5;</italic>
<sub>1</sub> is the error of the first kind and <italic>&#x3f5;</italic>
<sub>&#x3a0;</sub> is the error of the second kind of the investigated interval (A, B) [<xref ref-type="bibr" rid="B135">135</xref>]. The methods were applied to number of features ranging from 3 up to 64, and <xref ref-type="table" rid="T3">Table 3</xref> shows their overall best performances with 30 features. Results indicate that in almost all cases, the SVDD, which focuses on modeling the boundary, obtains a better performance than other methods, especially for higher dimensionalities.</p>
<p>In addition, with the latest progress of the online convex programming method, the anomaly detector with hierarchical kernel density estimators (AD-HKDE) method was applied to the Occupancy dataset [<xref ref-type="bibr" rid="B136">136</xref>], which consists of 10,808 data points whose labels correspond to occupied (normal) and unoccupied (anomalous) room states [<xref ref-type="bibr" rid="B122">122</xref>], and other seven real-world datasets. Using ROC and AUC, the performance of AD-HKDE was compared with that of KNN, K-D tree nearest neighbor search (K-D Tree), Fourier online gradient descent (FOGD) [<xref ref-type="bibr" rid="B137">137</xref>], and Kernel density estimation (KDE). As seen in <xref ref-type="table" rid="T3">Table 3</xref>, the AD-HKDE method achieves the highest AUC score, indicating that it has a stronger guarantee in relatively smaller false alarm regions (except a few cases). However, when the data size is small, AD-HKDE can not perfectly learn the bandwidths in all regions across time, thus yield relatively unsatisfactory anomaly detection results.</p>
</sec>
<sec id="s9">
<title>9 Conclusion and Discussion</title>
<p>Anomaly detection is a crucial technique used to identify abnormal samples with behavior or patterns conveying critical (usually harmful or even fatal) information. CA has been widely used in anomaly detection because of its ability to robustly approximate in algebra and geometry, efficiently compute to global unique solutions, and mathematically optimize. However, little work has realized a comprehensive classification of the CA-based anomaly detection. In this paper, we classify the existing CA-based anomaly detection techniques into four categories: density estimation, matrix factorization, machine learning, and other methods, according to the underlying principle of CA in anomaly detection. Models of wide application domains and data types from the general to the particular such as matrices and time series have been intensively investigated. The main methods discussed in this review are summarized in <xref ref-type="table" rid="T4">Table 4</xref>.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Summary of convex theory and its application in this paper.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th colspan="2" align="left">Category</th>
<th align="center">Theoretical basis of convex analysis</th>
<th align="center">Strengths and limitations</th>
<th align="center">Typical applications</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="2" align="left">Density estimation</td>
<td align="left">direct</td>
<td align="left">Jensen&#x2019;s inequality</td>
<td align="left">Wide application, quick computation; the difficulty of choosing the number of mixed components, sensitivity to the curse of dimensionality</td>
<td align="left">Nuclear explosion detection, biological virus invasion recognition</td>
</tr>
<tr>
<td align="left">indirect</td>
<td align="left">Least squares approximation</td>
<td align="left">High scalability to large data sets; vulnerability to a poisoning attack</td>
<td align="left">Network intrusion detection, computer game cheats detection</td>
</tr>
<tr>
<td align="left">Matrix factorization</td>
<td align="left">\</td>
<td align="left">Norm approximation</td>
<td align="left">Efficient computation; limitation to matrix data</td>
<td align="left">Image outlier detection, process fault monitoring</td>
</tr>
<tr>
<td rowspan="4" align="left">Machine learning</td>
<td align="left">support vector domain</td>
<td align="left">Convex quadratic programming and convex polytope</td>
<td align="left">Appealing performance on sparse and complex data sets; little suitableness to data with small target error rates and high dimensionality</td>
<td align="left">Machine diagnostics, disease detection</td>
</tr>
<tr>
<td align="left">convex hull</td>
<td align="left">Convex polytope</td>
<td align="left">No overfitting even in the high-dimension situation; incomplete advantages of every model</td>
<td align="left">Industrial fault intelligent diagnosis, multiaxial high-cycle fatigue recognition</td>
</tr>
<tr>
<td align="left">online convex programming</td>
<td align="left">Online convex programming</td>
<td align="left">Real-time computation in online anomaly detection; no extension to arbitrary application</td>
<td align="left">Stream data detection in industry production, network routing, and other fields</td>
</tr>
<tr>
<td align="left">neural network</td>
<td align="left">Steepest descent</td>
<td align="left">Enjoyable performance; prerequisite to adequate data, no clear explanations about the mechanism of the anomaly detection</td>
<td align="left">Image outlier detection, network intrusion detection</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In summary, this paper presents an in-depth literature review of the CA-based anomaly detection techniques, including their latest progress, systems and applications, as well as strengths and limitations. Functions and contributions of CA in anomaly detection are underlined, demonstrating the multidisciplinary property of CA-based anomaly detection and providing new and succinct understanding of the association between anomaly detection and CA.</p>
<p>With the remarkable progress made in the techniques of big data and machine learning, CA-based anomaly detection shows great promise for more expeditious, accurate and intelligent detection capacities. In this field, further research should be conducted on the following open challenges to explore this promising domain:<list list-type="simple">
<list-item>
<p>1) Like the density estimation and matrix factorization techniques mentioned in this paper, they are popular and effective strategies for anomaly detection based on CA that declaring observations anomalous if their values deviate below or over some threshold. However, how to set this threshold with high efficiency remains in doubt, and this notoriously difficult problem should be resolved.</p>
</list-item>
<list-item>
<p>2) At present, the data streams generated in many industrial scenarios put forward higher requirements for anomaly detection algorithms, and real-time results should be generated without waiting for all inputs. Consequently, taking the support vector domain method as an example, future studies should explore how to utilize an online process to learn the hypersphere boundary of SVDD in streaming environments.</p>
</list-item>
<list-item>
<p>3) Incorporating prior rules for convex theory-based anomaly detection models, especially machine learning methods, could be investigated intensively to enhance their performance. For instance, mine the structural information of the data itself by norms, such as <italic>&#x2113;</italic>
<sub>2,1</sub> norm and <italic>&#x2113;</italic>
<sub>2,0</sub> norm.</p>
</list-item>
<list-item>
<p>4) Considering the data characteristics of the anomaly detection domain, where anomalies are few and two classes are extremely unbalanced, the generalization ability of machine learning methods, especially the gradient descent-based model, should be strengthened to be more suitable and applicable.</p>
</list-item>
</list>
</p>
</sec>
</body>
<back>
<sec id="s10">
<title>Author Contributions</title>
<p>Conceptualization: TW and XL; Methodology: TW and XO; Validation: MC and ZC; Formal analysis: MC and ZC; Investigation: MC and TC; Resources: XO and ZC; Writing&#x2014;original draft: TW and XO; Writing&#x2014;review and editing: TW and XL; Visualization: ZC; Supervision: XT; Project administration: XL; Funding acquisition: TC and XT. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec id="s11">
<title>Funding</title>
<p>This research was funded by the National Nature Science Foundation of China (72025405, 72088101, 91846301, 71790615, and 71774168), the Shenzhen Basic Research Project for Development of Science and Technology (JCYJ20200109141218676 and 202008291726500001) and the Hunan Science and Technology Plan Project (2020TP1013 and 2020JJ4673).</p>
</sec>
<sec sec-type="COI-statement" id="s12">
<title>Conflict of Interest</title>
<p>ZC was employed by the Power China Zhongnan Engineering Corporation Limited.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s13">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chandola</surname>
<given-names>V</given-names>
</name>
<name>
<surname>Banerjee</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Kumar</surname>
<given-names>V</given-names>
</name>
</person-group>. <article-title>Anomaly Detection</article-title>. <source>ACM Comput Surv</source> (<year>2009</year>) <volume>41</volume>:<fpage>1</fpage>&#x2013;<lpage>58</lpage>. <pub-id pub-id-type="doi">10.1145/1541880.1541882</pub-id> </citation>
</ref>
<ref id="B2">
<label>2.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Harrou</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Kadri</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Chaabane</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Tahon</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y</given-names>
</name>
</person-group>. <article-title>Improved Principal Component Analysis for Anomaly Detection: Application to an Emergency Department</article-title>. <source>Comput Ind Eng</source> (<year>2015</year>) <volume>88</volume>:<fpage>63</fpage>&#x2013;<lpage>77</lpage>. <pub-id pub-id-type="doi">10.1016/j.cie.2015.06.020</pub-id> </citation>
</ref>
<ref id="B3">
<label>3.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aryal</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Santosh</surname>
<given-names>KC</given-names>
</name>
<name>
<surname>Dazeley</surname>
<given-names>R</given-names>
</name>
</person-group>. <article-title>Usfad: a Robust Anomaly Detector Based on Unsupervised Stochastic forest</article-title>. <source>Int J Mach Learn Cyber</source> (<year>2021</year>) <volume>12</volume>:<fpage>1137</fpage>&#x2013;<lpage>50</lpage>. <pub-id pub-id-type="doi">10.1007/s13042-020-01225-0</pub-id> </citation>
</ref>
<ref id="B4">
<label>4.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Edgeworth</surname>
<given-names>FY</given-names>
</name>
</person-group>. <article-title>Xli. On Discordant Observations</article-title>. <source>The Lond Edinb Dublin Philosophical Mag J Sci</source> (<year>1887</year>) <volume>23</volume>:<fpage>364</fpage>&#x2013;<lpage>75</lpage>. <pub-id pub-id-type="doi">10.1080/14786448708628471</pub-id> </citation>
</ref>
<ref id="B5">
<label>5.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Almiani</surname>
<given-names>M</given-names>
</name>
<name>
<surname>AbuGhazleh</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Jararweh</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Razaque</surname>
<given-names>A</given-names>
</name>
</person-group>. <article-title>Ddos Detection in 5g-Enabled Iot Networks Using Deep Kalman Backpropagation Neural Network</article-title>. <source>Int J Mach Learn Cyber</source> (<year>2021</year>) <volume>12</volume>:<fpage>3337</fpage>&#x2013;<lpage>49</lpage>. <pub-id pub-id-type="doi">10.1007/s13042-021-01323-7</pub-id> </citation>
</ref>
<ref id="B6">
<label>6.</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Umer</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Frederickson</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Polikar</surname>
<given-names>R</given-names>
</name>
</person-group>. <article-title>Vulnerability of Covariate Shift Adaptation against Malicious Poisoning Attacks</article-title>. In: <conf-name>2019 International Joint Conference on Neural Networks (IJCNN)</conf-name> (<year>2019</year>). p. <fpage>1</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1109/IJCNN.2019.8851748</pub-id> </citation>
</ref>
<ref id="B7">
<label>7.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ahmed</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Woo</surname>
<given-names>WL</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>Y</given-names>
</name>
</person-group>. <article-title>Ensemble Joint Sparse Low-Rank Matrix Decomposition for Thermography Diagnosis System</article-title>. <source>IEEE Trans Ind Electron</source> (<year>2021</year>) <volume>68</volume>:<fpage>2648</fpage>&#x2013;<lpage>58</lpage>. <pub-id pub-id-type="doi">10.1109/TIE.2020.2975484</pub-id> </citation>
</ref>
<ref id="B8">
<label>8.</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Eduardo</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Nazabal</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Williams</surname>
<given-names>CKI</given-names>
</name>
<name>
<surname>Sutton</surname>
<given-names>C</given-names>
</name>
</person-group>. <article-title>Robust Variational Autoencoders for Outlier Detection and Repair of Mixed-type Data</article-title>. In: <person-group person-group-type="editor">
<name>
<surname>Chiappa</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Calandra</surname>
<given-names>R</given-names>
</name>
</person-group>, editors. <source>Proceedings of the Twenty Third International Conference on Artificial Intelligence and Statistics</source>. <source>Vol. 108 of Proceedings of Machine Learning Research</source>. <publisher-loc>New York City, NY, USA</publisher-loc>: <publisher-name>PMLR</publisher-name> (<year>2020</year>). p. <fpage>4056</fpage>&#x2013;<lpage>66</lpage>. </citation>
</ref>
<ref id="B9">
<label>9.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peng</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Kang</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>Q</given-names>
</name>
</person-group>. <article-title>Robust Principal Component Analysis: A Factorization-Based Approach with Linear Complexity</article-title>. <source>Inf Sci</source> (<year>2020</year>) <volume>513</volume>:<fpage>581</fpage>&#x2013;<lpage>99</lpage>. <pub-id pub-id-type="doi">10.1016/j.ins.2019.09.074</pub-id> </citation>
</ref>
<ref id="B10">
<label>10.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Su</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>A-X</given-names>
</name>
<name>
<surname>Du</surname>
<given-names>Q</given-names>
</name>
</person-group>. <article-title>Low Rank and Collaborative Representation for Hyperspectral Anomaly Detection via Robust Dictionary Construction</article-title>. <source>ISPRS J Photogrammetry Remote Sensing</source> (<year>2020</year>) <volume>169</volume>:<fpage>195</fpage>&#x2013;<lpage>211</lpage>. <pub-id pub-id-type="doi">10.1016/j.isprsjprs.2020.09.008</pub-id> </citation>
</ref>
<ref id="B11">
<label>11.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Ji</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>A Novel Computational Approach for Discord Search with Local Recurrence Rates in Multivariate Time Series</article-title>. <source>Inf Sci</source> (<year>2019</year>) <volume>477</volume>:<fpage>220</fpage>&#x2013;<lpage>33</lpage>. <pub-id pub-id-type="doi">10.1016/j.ins.2018.10.047</pub-id> </citation>
</ref>
<ref id="B12">
<label>12.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Su</surname>
<given-names>M-Y</given-names>
</name>
</person-group>. <article-title>Using Clustering to Improve the Knn-Based Classifiers for Online Anomaly Network Traffic Identification</article-title>. <source>J Netw Computer Appl</source> (<year>2011</year>) <volume>34</volume>:<fpage>722</fpage>&#x2013;<lpage>30</lpage>. <pub-id pub-id-type="doi">10.1016/j.jnca.2010.10.009</pub-id> </citation>
</ref>
<ref id="B13">
<label>13.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Muniyandi</surname>
<given-names>AP</given-names>
</name>
<name>
<surname>Rajeswari</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Rajaram</surname>
<given-names>R</given-names>
</name>
</person-group>. <article-title>Network Anomaly Detection by Cascading K-Means Clustering and C4.5 Decision Tree Algorithm</article-title>. <source>Proced Eng</source> (<year>2012</year>) <volume>30</volume>:<fpage>174</fpage>&#x2013;<lpage>82</lpage>. <pub-id pub-id-type="doi">10.1016/j.proeng.2012.01.849</pub-id> </citation>
</ref>
<ref id="B14">
<label>14.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>YF</given-names>
</name>
</person-group>. <article-title>Anomaly Detection Based on Enhanced Dbscan Algorithm</article-title>. <source>Proced Eng</source> (<year>2011</year>) <volume>15</volume>:<fpage>178</fpage>&#x2013;<lpage>82</lpage>. <pub-id pub-id-type="doi">10.1016/j.proeng.2011.08.036</pub-id> </citation>
</ref>
<ref id="B15">
<label>15.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yao</surname>
<given-names>X-H</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>J-Z</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z-C</given-names>
</name>
</person-group>. <article-title>Intelligent Fault Diagnosis Using Rough Set Method and Evidence Theory for Nc Machine Tools</article-title>. <source>Int J Computer Integrated Manufacturing</source> (<year>2009</year>) <volume>22</volume>:<fpage>472</fpage>&#x2013;<lpage>82</lpage>. <pub-id pub-id-type="doi">10.1080/09511920802537995</pub-id> </citation>
</ref>
<ref id="B16">
<label>16.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mascaro</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Nicholso</surname>
<given-names>AE</given-names>
</name>
<name>
<surname>Korb</surname>
<given-names>KB</given-names>
</name>
</person-group>. <article-title>Anomaly Detection in Vessel Tracks Using Bayesian Networks</article-title>. <source>Int J Approximate Reasoning</source> (<year>2014</year>) <volume>55</volume>:<fpage>84</fpage>&#x2013;<lpage>98</lpage>. <pub-id pub-id-type="doi">10.1016/j.ijar.2013.03.012</pub-id> </citation>
</ref>
<ref id="B17">
<label>17.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ren</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Ye</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z</given-names>
</name>
</person-group>. <article-title>Anomaly Detection Based on a Dynamic Markov Model</article-title>. <source>Inf Sci</source> (<year>2017</year>) <volume>411</volume>:<fpage>52</fpage>&#x2013;<lpage>65</lpage>. <pub-id pub-id-type="doi">10.1016/j.ins.2017.05.021</pub-id> </citation>
</ref>
<ref id="B18">
<label>18.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nagpal</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Brar</surname>
<given-names>YS</given-names>
</name>
</person-group>. <article-title>Artificial Neural Network Approaches for Fault Classification: Comparison and Performance</article-title>. <source>Neural Comput Applic</source> (<year>2014</year>) <volume>25</volume>:<fpage>1863</fpage>&#x2013;<lpage>70</lpage>. <pub-id pub-id-type="doi">10.1007/s00521-014-1677-y</pub-id> </citation>
</ref>
<ref id="B19">
<label>19.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yan</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>W</given-names>
</name>
<name>
<surname>Ji</surname>
<given-names>Z</given-names>
</name>
</person-group>. <article-title>Unsupervised Learning for Fault Detection and Diagnosis of Air Handling Units</article-title>. <source>Energy and Buildings</source> (<year>2020</year>) <volume>210</volume>:<fpage>109689</fpage>. <pub-id pub-id-type="doi">10.1016/j.enbuild.2019.109689</pub-id> </citation>
</ref>
<ref id="B20">
<label>20.</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Rockafellar</surname>
<given-names>RT</given-names>
</name>
</person-group>. <source>Convex Analysis</source>. <publisher-loc>Princeton, NJ, USA</publisher-loc>: <publisher-name>Princeton University Press</publisher-name> (<year>1970</year>). <pub-id pub-id-type="doi">10.1515/9781400873173</pub-id> </citation>
</ref>
<ref id="B21">
<label>21.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Bah</surname>
<given-names>MJ</given-names>
</name>
<name>
<surname>Hammad</surname>
<given-names>M</given-names>
</name>
</person-group>. <article-title>Progress in Outlier Detection Techniques: A Survey</article-title>. <source>IEEE Access</source> (<year>2019</year>) <volume>7</volume>:<fpage>107964</fpage>&#x2013;<lpage>8000</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2019.2932769</pub-id> </citation>
</ref>
<ref id="B22">
<label>22.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nachman</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Shih</surname>
<given-names>D</given-names>
</name>
</person-group>. <article-title>Anomaly Detection with Density Estimation</article-title>. <source>Phys Rev D</source> (<year>2020</year>) <volume>101</volume>:<fpage>075042</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevD.101.075042</pub-id> </citation>
</ref>
<ref id="B23">
<label>23.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mordukhovich</surname>
<given-names>BS</given-names>
</name>
<name>
<surname>Nam</surname>
<given-names>NM</given-names>
</name>
</person-group>. <article-title>An Easy Path to Convex Analysis and Applications</article-title>. <source>Synth Lectures Mathematics Stat</source> (<year>2013</year>) <volume>6</volume>:<fpage>1</fpage>&#x2013;<lpage>218</lpage>. <pub-id pub-id-type="doi">10.2200/S00554ED1V01Y201312MAS014</pub-id> </citation>
</ref>
<ref id="B24">
<label>24.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>W</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X</given-names>
</name>
</person-group>. <article-title>Similarity Constrained Convex Nonnegative Matrix Factorization for Hyperspectral Anomaly Detection</article-title>. <source>IEEE Trans Geosci Remote Sensing</source> (<year>2019</year>) <volume>57</volume>:<fpage>4810</fpage>&#x2013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1109/TGRS.2019.2893116</pub-id> </citation>
</ref>
<ref id="B25">
<label>25.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Niggemann</surname>
<given-names>O</given-names>
</name>
</person-group>. <article-title>Non-convex hull Based Anomaly Detection in Cpps</article-title>. <source>Eng Appl Artif Intelligence</source> (<year>2020</year>) <volume>87</volume>:<fpage>103301</fpage>. <pub-id pub-id-type="doi">10.1016/j.engappai.2019.103301</pub-id> </citation>
</ref>
<ref id="B26">
<label>26.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pachman</surname>
<given-names>JM</given-names>
</name>
</person-group>. <article-title>Optimization of Seismic Reconnaissance Surveys in Petroleum Exploration</article-title>. <source>Management Sci</source> (<year>1966</year>) <volume>12</volume>:<fpage>B</fpage>&#x2013;<lpage>312</lpage>. <pub-id pub-id-type="doi">10.1287/mnsc.12.8.b312</pub-id> </citation>
</ref>
<ref id="B27">
<label>27.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Goernitz</surname>
<given-names>N</given-names>
</name>
<name>
<surname>Kloft</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Rieck</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Brefeld</surname>
<given-names>U</given-names>
</name>
</person-group>. <article-title>Toward Supervised Anomaly Detection</article-title>. <source>jair</source> (<year>2013</year>) <volume>46</volume>:<fpage>235</fpage>&#x2013;<lpage>62</lpage>. <pub-id pub-id-type="doi">10.1613/jair.3623</pub-id> </citation>
</ref>
<ref id="B28">
<label>28.</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Turchini</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Seidenari</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Del Bimbo</surname>
<given-names>A</given-names>
</name>
</person-group>. <article-title>Convex Polytope Ensembles for Spatio-Temporal Anomaly Detection</article-title>. In: <person-group person-group-type="editor">
<name>
<surname>Battiato</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Gallo</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Schettini</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Stanco</surname>
<given-names>F</given-names>
</name>
</person-group>, editors. <source>Image Analysis and Processing - ICIAP 2017</source>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer International Publishing</publisher-name> (<year>2017</year>). p. <fpage>174</fpage>&#x2013;<lpage>84</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-68560-1_16</pub-id> </citation>
</ref>
<ref id="B29">
<label>29.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>A Review of Applications of Artificial Intelligent Algorithms in Wind Farms</article-title>. <source>Artif Intell Rev</source> (<year>2020</year>) <volume>53</volume>:<fpage>3447</fpage>&#x2013;<lpage>500</lpage>. <pub-id pub-id-type="doi">10.1007/s10462-019-09768-7</pub-id> </citation>
</ref>
<ref id="B30">
<label>30.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Krizhevsky</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Sutskever</surname>
<given-names>I</given-names>
</name>
<name>
<surname>Hinton</surname>
<given-names>GE</given-names>
</name>
</person-group>. <article-title>Imagenet Classification with Deep Convolutional Neural Networks</article-title>. <source>Adv Neural Inf Process Syst</source> (<year>2012</year>) <volume>25</volume>:<fpage>1097</fpage>&#x2013;<lpage>105</lpage>. </citation>
</ref>
<ref id="B31">
<label>31.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Grill</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Pevn&#xfd;</surname>
<given-names>T</given-names>
</name>
</person-group>. <article-title>Learning Combination of Anomaly Detectors for Security Domain</article-title>. <source>Computer Networks</source> (<year>2016</year>) <volume>107</volume>:<fpage>55</fpage>&#x2013;<lpage>63</lpage>. <pub-id pub-id-type="doi">10.1016/j.comnet.2016.05.021</pub-id> </citation>
</ref>
<ref id="B32">
<label>32.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Taylan</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Yerlikaya-&#xd6;zkurt</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Bilgi&#xe7; U&#xe7;ak</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Weber</surname>
<given-names>G-W</given-names>
</name>
</person-group>. <article-title>A New Outlier Detection Method Based on Convex Optimization: Application to Diagnosis of Parkinson&#x27;s Disease</article-title>. <source>J Appl Stat</source> (<year>2021</year>) <volume>48</volume>:<fpage>2421</fpage>&#x2013;<lpage>40</lpage>. <pub-id pub-id-type="doi">10.1080/02664763.2020.1864815</pub-id> </citation>
</ref>
<ref id="B33">
<label>33.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cabero</surname>
<given-names>I</given-names>
</name>
<name>
<surname>Epifanio</surname>
<given-names>I</given-names>
</name>
<name>
<surname>Pi&#xe9;rola</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Ballester</surname>
<given-names>A</given-names>
</name>
</person-group>. <article-title>Archetype Analysis: A New Subspace Outlier Detection Approach</article-title>. <source>Knowledge-Based Syst</source> (<year>2021</year>) <volume>217</volume>:<fpage>106830</fpage>. <pub-id pub-id-type="doi">10.1016/j.knosys.2021.106830</pub-id> </citation>
</ref>
<ref id="B34">
<label>34.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tang</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>Q</given-names>
</name>
</person-group>. <article-title>A Review on Representative Swarm Intelligence Algorithms for Solving Optimization Problems: Applications and Trends</article-title>. <source>Ieee/caa J Autom Sinica</source> (<year>2021</year>) <volume>8</volume>:<fpage>1627</fpage>&#x2013;<lpage>43</lpage>. <pub-id pub-id-type="doi">10.1109/JAS.2021.1004129</pub-id> </citation>
</ref>
<ref id="B35">
<label>35.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mukherjee</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Heberlein</surname>
<given-names>LT</given-names>
</name>
<name>
<surname>Levitt</surname>
<given-names>KN</given-names>
</name>
</person-group>. <article-title>Network Intrusion Detection</article-title>. <source>IEEE Netw</source> (<year>1994</year>) <volume>8</volume>:<fpage>26</fpage>&#x2013;<lpage>41</lpage>. <pub-id pub-id-type="doi">10.1109/65.283931</pub-id> </citation>
</ref>
<ref id="B36">
<label>36.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname>
<given-names>N</given-names>
</name>
<name>
<surname>Tian</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Du</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Guizani</surname>
<given-names>M</given-names>
</name>
</person-group>. <article-title>A Multiple-Kernel Clustering Based Intrusion Detection Scheme for 5g and Iot Networks</article-title>. <source>Int J Mach Learn Cyber</source> (<year>2021</year>) <volume>12</volume>:<fpage>3129</fpage>&#x2013;<lpage>44</lpage>. <pub-id pub-id-type="doi">10.1007/s13042-020-01253-w</pub-id> </citation>
</ref>
<ref id="B37">
<label>37.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peterson</surname>
<given-names>TC</given-names>
</name>
<name>
<surname>Stott</surname>
<given-names>PA</given-names>
</name>
<name>
<surname>Herring</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>Explaining Extreme Events of 2011 from a Climate Perspective</article-title>. <source>Bull Amer Meteorol Soc.</source> (<year>2012</year>) <volume>93</volume>:<fpage>1041</fpage>&#x2013;<lpage>67</lpage>. <pub-id pub-id-type="doi">10.1175/BAMS-D-12-00021.1</pub-id> </citation>
</ref>
<ref id="B38">
<label>38.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Saraeian</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Shirazi</surname>
<given-names>B</given-names>
</name>
</person-group>. <article-title>Process Mining-Based Anomaly Detection of Additive Manufacturing Process Activities Using a Game Theory Modeling Approach</article-title>. <source>Comput Ind Eng</source> (<year>2020</year>) <volume>146</volume>:<fpage>106584</fpage>. <pub-id pub-id-type="doi">10.1016/j.cie.2020.106584</pub-id> </citation>
</ref>
<ref id="B39">
<label>39.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bubeck</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>Convex Optimization: Algorithms and Complexity</article-title>. <source>FNT Machine Learn</source> (<year>2015</year>) <volume>8</volume>:<fpage>231</fpage>&#x2013;<lpage>357</lpage>. <pub-id pub-id-type="doi">10.1561/2200000050</pub-id> </citation>
</ref>
<ref id="B40">
<label>40.</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Boyd</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Vandenberghe</surname>
<given-names>L</given-names>
</name>
</person-group>. <source>Convex Optimization</source>. <publisher-loc>Cambridge</publisher-loc>: <publisher-name>Cambridge University Press</publisher-name> (<year>2004</year>). <pub-id pub-id-type="doi">10.1017/CBO9780511804441</pub-id> </citation>
</ref>
<ref id="B41">
<label>41.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>N</given-names>
</name>
<name>
<surname>Qin</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>A Neurodynamic Approach to Nonlinear Optimization Problems with Affine equality and Convex Inequality Constraints</article-title>. <source>Neural Networks</source> (<year>2019</year>) <volume>109</volume>:<fpage>147</fpage>&#x2013;<lpage>58</lpage>. <pub-id pub-id-type="doi">10.1016/j.neunet.2018.10.010</pub-id> </citation>
</ref>
<ref id="B42">
<label>42.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bo&#x163;</surname>
<given-names>RI</given-names>
</name>
<name>
<surname>Grad</surname>
<given-names>S-M</given-names>
</name>
<name>
<surname>Wanka</surname>
<given-names>G</given-names>
</name>
</person-group>. <article-title>On strong and Total lagrange Duality for Convex Optimization Problems</article-title>. <source>J Math Anal Appl</source> (<year>2008</year>) <volume>337</volume>:<fpage>1315</fpage>&#x2013;<lpage>25</lpage>. <pub-id pub-id-type="doi">10.1016/j.jmaa.2007.04.071</pub-id> </citation>
</ref>
<ref id="B43">
<label>43.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tax</surname>
<given-names>DMJ</given-names>
</name>
<name>
<surname>Duin</surname>
<given-names>RPW</given-names>
</name>
</person-group>. <article-title>Support Vector Data Description</article-title>. <source>Machine Learn</source> (<year>2004</year>) <volume>54</volume>:<fpage>45</fpage>&#x2013;<lpage>66</lpage>. <pub-id pub-id-type="doi">10.1023/B:MACH.0000008084.60811.49</pub-id> </citation>
</ref>
<ref id="B44">
<label>44.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>M</given-names>
</name>
</person-group>. <article-title>Generalized lagrange Multiplier Method and Kkt Conditions with an Application to Distributed Optimization</article-title>. <source>IEEE Trans Circuits Syst</source> (<year>2019</year>) <volume>66</volume>:<fpage>252</fpage>&#x2013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1109/TCSII.2018.2842085</pub-id> </citation>
</ref>
<ref id="B45">
<label>45.</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Vaswani</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Mishkin</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Laradji</surname>
<given-names>I</given-names>
</name>
<name>
<surname>Schmidt</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Gidel</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Lacoste-Julien</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>Painless Stochastic Gradient: Interpolation, Line-Search, and Convergence Rates</article-title>. In: <person-group person-group-type="editor">
<name>
<surname>Wallach</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Larochelle</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Beygelzimer</surname>
<given-names>A</given-names>
</name>
<name>
<surname>d&#x2019; Alch&#xe9;-Buc</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Fox</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Garnett</surname>
<given-names>R</given-names>
</name>
</person-group>, editors. <source>Advances in Neural Information Processing Systems</source>, <volume>Vol. 32</volume>. <publisher-loc>Vancouver, Canada</publisher-loc>: <publisher-name>Curran Associates, Inc.</publisher-name> (<year>2019</year>). </citation>
</ref>
<ref id="B46">
<label>46.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abramovich</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Jameson</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Sinnamon</surname>
<given-names>G</given-names>
</name>
</person-group>. <article-title>Refining Jensen&#x2019;s Inequality</article-title>. <source>Bull Math Soc Sci Math Phys R&#xe9;pub Pop Roum</source> (<year>2004</year>) <volume>47</volume>(<issue>95</issue>):<fpage>3</fpage>&#x2013;<lpage>14</lpage>. </citation>
</ref>
<ref id="B47">
<label>47.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sayed</surname>
<given-names>WA</given-names>
</name>
<name>
<surname>Darwish</surname>
<given-names>MA</given-names>
</name>
</person-group>. <article-title>On the Existence of Solutions of a Perturbed Functional Integral Equation in the Space of Lebesgue Integrable Functions on &#x211d;&#x2b;</article-title>. <source>ZN PRz Mechanika</source> (<year>2018</year>) <volume>41</volume>:<fpage>19</fpage>&#x2013;<lpage>27</lpage>. <pub-id pub-id-type="doi">10.7862/rf.2018.2</pub-id> </citation>
</ref>
<ref id="B48">
<label>48.</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Ahrendt</surname>
<given-names>P</given-names>
</name>
</person-group>. <source>The Multivariate Gaussian Probability Distribution</source>. <publisher-loc>Kongens Lyngby, Denmark</publisher-loc>: <publisher-name>Tech. rep., Technical University of Denmark</publisher-name> (<year>2005</year>). </citation>
</ref>
<ref id="B49">
<label>49.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sain</surname>
<given-names>SR</given-names>
</name>
<name>
<surname>Gray</surname>
<given-names>HL</given-names>
</name>
<name>
<surname>Woodward</surname>
<given-names>WA</given-names>
</name>
<name>
<surname>Fisk</surname>
<given-names>MD</given-names>
</name>
</person-group>. <article-title>Outlier Detection from a Mixture Distribution when Training Data Are Unlabeled</article-title>. <source>Bull Seismol Soc Am</source> (<year>1999</year>) <volume>89</volume>:<fpage>294</fpage>&#x2013;<lpage>304</lpage>. <pub-id pub-id-type="doi">10.1785/BSSA0890010294</pub-id> </citation>
</ref>
<ref id="B50">
<label>50.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sammaknejad</surname>
<given-names>N</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>B</given-names>
</name>
</person-group>. <article-title>A Review of the Expectation Maximization Algorithm in Data-Driven Process Identification</article-title>. <source>J Process Control</source> (<year>2019</year>) <volume>73</volume>:<fpage>123</fpage>&#x2013;<lpage>36</lpage>. <pub-id pub-id-type="doi">10.1016/j.jprocont.2018.12.010</pub-id> </citation>
</ref>
<ref id="B51">
<label>51.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Woodward</surname>
<given-names>WA</given-names>
</name>
<name>
<surname>Sain</surname>
<given-names>SR</given-names>
</name>
</person-group>. <article-title>Testing for Outliers from a Mixture Distribution when Some Data Are Missing</article-title>. <source>Comput Stat Data Anal</source> (<year>2003</year>) <volume>44</volume>:<fpage>193</fpage>&#x2013;<lpage>210</lpage>. <pub-id pub-id-type="doi">10.1016/S0167-9473(03)00008-2</pub-id> </citation>
</ref>
<ref id="B52">
<label>52.</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Scott</surname>
<given-names>DW</given-names>
</name>
<name>
<surname>Sain</surname>
<given-names>SR</given-names>
</name>
</person-group>. <article-title>Multidimensional Density Estimation</article-title>. In: <person-group person-group-type="editor">
<name>
<surname>Rao</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Wegman</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Solka</surname>
<given-names>J</given-names>
</name>
</person-group>, editors. <source>Data Mining and Data Visualization</source>. <source>Vol. 24 of Handbook of Statistics</source>. <publisher-loc>Amsterdam, Netherlands</publisher-loc>: <publisher-name>Elsevier</publisher-name> (<year>2005</year>). p. <fpage>229</fpage>&#x2013;<lpage>61</lpage>. <pub-id pub-id-type="doi">10.1016/S0169-7161(04)24009-3</pub-id> </citation>
</ref>
<ref id="B53">
<label>53.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Miao</surname>
<given-names>Q</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Quan</surname>
<given-names>Y</given-names>
</name>
</person-group>. <article-title>Fast Structural Ensemble for One-Class Classification</article-title>. <source>Pattern Recognition Lett</source> (<year>2016</year>) <volume>80</volume>:<fpage>179</fpage>&#x2013;<lpage>87</lpage>. <pub-id pub-id-type="doi">10.1016/j.patrec.2016.06.028</pub-id> </citation>
</ref>
<ref id="B54">
<label>54.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Ji</surname>
<given-names>G</given-names>
</name>
</person-group>. <article-title>An Innovative One-Class Least Squares Support Vector Machine Model Based on Continuous Cognition</article-title>. <source>Knowledge-Based Syst</source> (<year>2017</year>) <volume>123</volume>:<fpage>217</fpage>&#x2013;<lpage>28</lpage>. <pub-id pub-id-type="doi">10.1016/j.knosys.2017.02.024</pub-id> </citation>
</ref>
<ref id="B55">
<label>55.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>De Santis</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Livi</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Sadeghian</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Rizzi</surname>
<given-names>A</given-names>
</name>
</person-group>. <article-title>Modeling and Recognition of Smart Grid Faults by a Combined Approach of Dissimilarity Learning and One-Class Classification</article-title>. <source>Neurocomputing</source> (<year>2015</year>) <volume>170</volume>:<fpage>368</fpage>&#x2013;<lpage>83</lpage>. <pub-id pub-id-type="doi">10.1016/j.neucom.2015.05.112</pub-id> </citation>
</ref>
<ref id="B56">
<label>56.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bach</surname>
<given-names>F</given-names>
</name>
</person-group>. <article-title>Breaking the Curse of Dimensionality with Convex Neural Networks</article-title>. <source>J Mach Learn Res</source> (<year>2017</year>) <volume>18</volume>:<fpage>629</fpage>&#x2013;<lpage>81</lpage>. </citation>
</ref>
<ref id="B57">
<label>57.</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>van der Walt</surname>
<given-names>CM</given-names>
</name>
<name>
<surname>Barnard</surname>
<given-names>E</given-names>
</name>
</person-group>. <article-title>Variable Kernel Density Estimation in High-Dimensional Feature Spaces</article-title>. In: <conf-name>Thirty-first AAAI conference on artificial intelligence</conf-name> (<year>2017</year>). p. <fpage>2674</fpage>&#x2013;<lpage>80</lpage>. </citation>
</ref>
<ref id="B58">
<label>58.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dobronets</surname>
<given-names>BS</given-names>
</name>
<name>
<surname>A. Popova</surname>
<given-names>O</given-names>
</name>
<name>
<surname>Popova</surname>
<given-names>OA</given-names>
</name>
</person-group>. <article-title>Improving the Accuracy of the Probability Density Function Estimation</article-title>. <source>J Sib Fed Univ Math Phys</source> (<year>2017</year>) <volume>10</volume>:<fpage>16</fpage>&#x2013;<lpage>21</lpage>. <pub-id pub-id-type="doi">10.17516/1997-1397-2017-10-1-16-21</pub-id> </citation>
</ref>
<ref id="B59">
<label>59.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Gretton</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Borgwardt</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Sch&#xf6;lkopf</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Smola</surname>
<given-names>A</given-names>
</name>
</person-group>. <article-title>Correcting Sample Selection Bias by Unlabeled Data</article-title>. <source>Adv Neural Inf Process Syst</source> (<year>2006</year>) <volume>19</volume>:<fpage>601</fpage>&#x2013;<lpage>8</lpage>. </citation>
</ref>
<ref id="B60">
<label>60.</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Bickel</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Br&#xfc;ckner</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Scheffer</surname>
<given-names>T</given-names>
</name>
</person-group>. <article-title>Discriminative Learning for Differing Training and Test Distributions</article-title>. In: <conf-name>ICML &#x2019;07: Proceedings of the 24th International Conference on Machine Learning</conf-name>. <publisher-loc>New York, NY, USA</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name> (<year>2007</year>). p. <fpage>81</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1145/1273496.1273507</pub-id> </citation>
</ref>
<ref id="B61">
<label>61.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sugiyama</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Suzuki</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Nakajima</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Kashima</surname>
<given-names>H</given-names>
</name>
<name>
<surname>von B&#xfc;nau</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Kawanabe</surname>
<given-names>M</given-names>
</name>
</person-group>. <article-title>Direct Importance Estimation for Covariate Shift Adaptation</article-title>. <source>Ann Inst Stat Math</source> (<year>2008</year>) <volume>60</volume>:<fpage>699</fpage>&#x2013;<lpage>746</lpage>. <pub-id pub-id-type="doi">10.1007/s10463-008-0197-x</pub-id> </citation>
</ref>
<ref id="B62">
<label>62.</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Sugiyama</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Nakajima</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Kashima</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Von Buenau</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Kawanabe</surname>
<given-names>M</given-names>
</name>
</person-group>. <article-title>Direct Importance Estimation with Model Selection and its Application to Covariate Shift Adaptation</article-title>. In: <source>NIPS</source>, <volume>Vol. 7</volume>. <publisher-loc>Princeton, NJ, USA</publisher-loc>: <publisher-name>Citeseer</publisher-name> (<year>2007</year>). p. <fpage>1433</fpage>&#x2013;<lpage>40</lpage>. </citation>
</ref>
<ref id="B63">
<label>63.</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Kanamori</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Hido</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Sugiyama</surname>
<given-names>M</given-names>
</name>
</person-group>. <article-title>Efficient Direct Density Ratio Estimation for Non-stationarity Adaptation and Outlier Detection</article-title>. In: <source>Advances in Neural Information Processing Systems</source>. <publisher-loc>Princeton, NJ, USA</publisher-loc>: <publisher-name>Citeseer</publisher-name> (<year>2008</year>). p. <fpage>809</fpage>&#x2013;<lpage>16</lpage>. </citation>
</ref>
<ref id="B64">
<label>64.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kanamori</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Hido</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Sugiyama</surname>
<given-names>M</given-names>
</name>
</person-group>. <article-title>A Least-Squares Approach to Direct Importance Estimation</article-title>. <source>J Mach Learn Res</source> (<year>2009</year>) <volume>10</volume>:<fpage>1391</fpage>&#x2013;<lpage>445</lpage>. </citation>
</ref>
<ref id="B65">
<label>65.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>de Souza</surname>
<given-names>SVC</given-names>
</name>
<name>
<surname>Junqueira</surname>
<given-names>RG</given-names>
</name>
</person-group>. <article-title>A Procedure to Assess Linearity by Ordinary Least Squares Method</article-title>. <source>Analytica Chim Acta</source> (<year>2005</year>) <volume>552</volume>:<fpage>25</fpage>&#x2013;<lpage>35</lpage>. <pub-id pub-id-type="doi">10.1016/j.aca.2005.07.043</pub-id> </citation>
</ref>
<ref id="B66">
<label>66.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kanamori</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Suzuki</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Sugiyama</surname>
<given-names>M</given-names>
</name>
</person-group>. <article-title>Statistical Analysis of Kernel-Based Least-Squares Density-Ratio Estimation</article-title>. <source>Mach Learn</source> (<year>2012</year>) <volume>86</volume>:<fpage>335</fpage>&#x2013;<lpage>67</lpage>. <pub-id pub-id-type="doi">10.1007/s10994-011-5266-3</pub-id> </citation>
</ref>
<ref id="B67">
<label>67.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yamada</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Suzuki</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Kanamori</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Hachiya</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Sugiyama</surname>
<given-names>M</given-names>
</name>
</person-group>. <article-title>Relative Density-Ratio Estimation for Robust Distribution Comparison</article-title>. <source>Neural Comput</source> (<year>2013</year>) <volume>25</volume>:<fpage>1324</fpage>&#x2013;<lpage>70</lpage>. <pub-id pub-id-type="doi">10.1162/NECO_a_00442</pub-id> </citation>
</ref>
<ref id="B68">
<label>68.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nam</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Sugiyama</surname>
<given-names>M</given-names>
</name>
</person-group>. <article-title>Direct Density Ratio Estimation with Convolutional Neural Networks with Application in Outlier Detection</article-title>. <source>IEICE Trans Inf Syst</source> (<year>2015</year>) <volume>E98.D</volume>:<fpage>1073</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1587/transinf.2014EDP7335</pub-id> </citation>
</ref>
<ref id="B69">
<label>69.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hushchyn</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Ustyuzhanin</surname>
<given-names>A</given-names>
</name>
</person-group>. <article-title>Generalization of Change-point Detection in Time Series Data Based on Direct Density Ratio Estimation</article-title>. <source>J Comput Sci</source> (<year>2021</year>) <volume>53</volume>:<fpage>101385</fpage>. <pub-id pub-id-type="doi">10.1016/j.jocs.2021.101385</pub-id> </citation>
</ref>
<ref id="B70">
<label>70.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>R&#xe4;tsch</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Onoda</surname>
<given-names>T</given-names>
</name>
<name>
<surname>M&#xfc;ller</surname>
<given-names>K-R</given-names>
</name>
</person-group>. <article-title>Soft Margins for Adaboost</article-title>. <source>Machine Learn</source> (<year>2001</year>) <volume>42</volume>:<fpage>287</fpage>&#x2013;<lpage>320</lpage>. <pub-id pub-id-type="doi">10.1023/A:1007618119488</pub-id> </citation>
</ref>
<ref id="B71">
<label>71.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hido</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Tsuboi</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Kashima</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Sugiyama</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Kanamori</surname>
<given-names>T</given-names>
</name>
</person-group>. <article-title>Statistical Outlier Detection Using Direct Density Ratio Estimation</article-title>. <source>Knowl Inf Syst</source> (<year>2011</year>) <volume>26</volume>:<fpage>309</fpage>&#x2013;<lpage>36</lpage>. <pub-id pub-id-type="doi">10.1007/s10115-010-0283-2</pub-id> </citation>
</ref>
<ref id="B72">
<label>72.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Yamada</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Collier</surname>
<given-names>N</given-names>
</name>
<name>
<surname>Sugiyama</surname>
<given-names>M</given-names>
</name>
</person-group>. <article-title>Change-point Detection in Time-Series Data by Relative Density-Ratio Estimation</article-title>. <source>Neural Networks</source> (<year>2013</year>) <volume>43</volume>:<fpage>72</fpage>&#x2013;<lpage>83</lpage>. <pub-id pub-id-type="doi">10.1016/j.neunet.2013.01.012</pub-id> </citation>
</ref>
<ref id="B73">
<label>73.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aminikhanghahi</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Cook</surname>
<given-names>DJ</given-names>
</name>
</person-group>. <article-title>Real-time Change point Detection with Application to Smart home Time Series Data</article-title>. <source>IEEE Trans Knowl Data Eng</source> (<year>2019</year>) <volume>31</volume>:<fpage>1010</fpage>&#x2013;<lpage>23</lpage>. <pub-id pub-id-type="doi">10.1109/TKDE.2018.2850347</pub-id> </citation>
</ref>
<ref id="B74">
<label>74.</label>
<citation citation-type="journal">
<comment>[Dataset]</comment> <person-group person-group-type="author">
<name>
<surname>Yamada</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Kaski</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>Interpreting Outliers: Localized Logistic Regression for Density Ratio Estimation</article-title>. <source>Arxiv preprint</source> (<year>2017</year>). <comment>Available from: <ext-link ext-link-type="uri" xlink:href="http://arxiv.org/abs/1702.06354">http://arxiv.org/abs/1702.06354</ext-link> (Accessed December 5, 2021)</comment>. </citation>
</ref>
<ref id="B75">
<label>75.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Islam</surname>
<given-names>MS</given-names>
</name>
<name>
<surname>Dong</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Chandra</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Khan</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Thuraisingham</surname>
<given-names>BM</given-names>
</name>
</person-group>. <article-title>Gci: A Gpu Based Transfer Learning Approach for Detecting Cheats of Computer Game</article-title>. <source>IEEE Trans Dependable Secure Comput</source> (<year>2020</year>) <volume>2020</volume>:<fpage>1</fpage>. <pub-id pub-id-type="doi">10.1109/TDSC.2020.3013817</pub-id> </citation>
</ref>
<ref id="B76">
<label>76.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>R</given-names>
</name>
</person-group>. <article-title>Optimal Sparse Singular Value Decomposition for High-Dimensional High-Order Data</article-title>. <source>J Am Stat Assoc</source> (<year>2019</year>) <volume>114</volume>:<fpage>1708</fpage>&#x2013;<lpage>25</lpage>. <pub-id pub-id-type="doi">10.1080/01621459.2018.1527227</pub-id> </citation>
</ref>
<ref id="B77">
<label>77.</label>
<citation citation-type="journal">
<comment>[Dataset]</comment> <person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>D</given-names>
</name>
</person-group>. <article-title>Targeted Backdoor Attacks on Deep Learning Systems Using Data Poisoning</article-title>. <source>Arxiv preprint</source> (<year>2017</year>). <comment>Available from: <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/1712.05526">https://arxiv.org/abs/1712.05526</ext-link> (Accessed December 5, 2021)</comment>. </citation>
</ref>
<ref id="B78">
<label>78.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bouwmans</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Javed</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Otazo</surname>
<given-names>R</given-names>
</name>
</person-group>. <article-title>On the Applications of Robust Pca in Image and Video Processing</article-title>. <source>Proc IEEE</source> (<year>2018</year>) <volume>106</volume>:<fpage>1427</fpage>&#x2013;<lpage>57</lpage>. <pub-id pub-id-type="doi">10.1109/JPROC.2018.2853589</pub-id> </citation>
</ref>
<ref id="B79">
<label>79.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ruff</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Kauffmann</surname>
<given-names>JR</given-names>
</name>
<name>
<surname>Vandermeulen</surname>
<given-names>RA</given-names>
</name>
<name>
<surname>Montavon</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Samek</surname>
<given-names>W</given-names>
</name>
<name>
<surname>Kloft</surname>
<given-names>M</given-names>
</name>
<etal/>
</person-group> <article-title>A Unifying Review of Deep and Shallow Anomaly Detection</article-title>. <source>Proc IEEE</source> (<year>2021</year>) <volume>109</volume>:<fpage>756</fpage>&#x2013;<lpage>95</lpage>. <pub-id pub-id-type="doi">10.1109/JPROC.2021.3052449</pub-id> </citation>
</ref>
<ref id="B80">
<label>80.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cand&#xe8;s</surname>
<given-names>EJ</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Wright</surname>
<given-names>J</given-names>
</name>
</person-group>. <article-title>Robust Principal Component Analysis?</article-title> <source>J ACM</source> (<year>2011</year>) <volume>58</volume>:<fpage>1</fpage>&#x2013;<lpage>37</lpage>. <pub-id pub-id-type="doi">10.1145/1970392.1970395</pub-id> </citation>
</ref>
<ref id="B81">
<label>81.</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Cand&#xe9;s</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Wright</surname>
<given-names>J</given-names>
</name>
</person-group>. <article-title>Robust Principal Component Analysis?: Recovering Low-Rank Matrices from Sparse Errors</article-title>. In: <conf-name>2010 IEEE Sensor Array and Multichannel Signal Processing Workshop</conf-name> (<year>2010</year>). p. <fpage>201</fpage>&#x2013;<lpage>4</lpage>. <pub-id pub-id-type="doi">10.1109/SAM.2010.5606734</pub-id> </citation>
</ref>
<ref id="B82">
<label>82.</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>Z</given-names>
</name>
</person-group>. <article-title>Linearized Alternating Direction Method with Adaptive Penalty for Low-Rank Representation</article-title>. In: <conf-name>NIPS&#x2019;11: Proceedings of the 24th International Conference on Neural Information Processing Systems</conf-name>. <publisher-loc>Red Hook, NY, USA</publisher-loc>: <publisher-name>Curran Associates Inc.</publisher-name> (<year>2011</year>). p. <fpage>612</fpage>&#x2013;<lpage>20</lpage>. </citation>
</ref>
<ref id="B83">
<label>83.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pan</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>An</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L</given-names>
</name>
</person-group>. <article-title>Fault Detection with Principal Component Pursuit Method</article-title>. <source>J Phys Conf Ser</source> (<year>2015</year>) <volume>659</volume>:<fpage>012035</fpage>. <pub-id pub-id-type="doi">10.1088/1742-6596/659/1/012035</pub-id> </citation>
</ref>
<ref id="B84">
<label>84.</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Isom</surname>
<given-names>JD</given-names>
</name>
<name>
<surname>LaBarre</surname>
<given-names>RE</given-names>
</name>
</person-group>. <article-title>Process Fault Detection, Isolation, and Reconstruction by Principal Component Pursuit</article-title>. In: <conf-name>Proceedings of the 2011 American Control Conference</conf-name> (<year>2011</year>). p. <fpage>238</fpage>&#x2013;<lpage>43</lpage>. <pub-id pub-id-type="doi">10.1109/ACC.2011.5990849</pub-id> </citation>
</ref>
<ref id="B85">
<label>85.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Du</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>A Low-Rank and Sparse Matrix Decomposition-Based Mahalanobis Distance Method for Hyperspectral Anomaly Detection</article-title>. <source>IEEE Trans Geosci Remote Sensing</source> (<year>2016</year>) <volume>54</volume>:<fpage>1376</fpage>&#x2013;<lpage>89</lpage>. <pub-id pub-id-type="doi">10.1109/TGRS.2015.2479299</pub-id> </citation>
</ref>
<ref id="B86">
<label>86.</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Wright</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Cand&#xe8;s</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Y</given-names>
</name>
</person-group>. <article-title>Stable Principal Component Pursuit</article-title>. In: <conf-name>2010 IEEE International Symposium on Information Theory</conf-name> (<year>2010</year>). p. <fpage>1518</fpage>&#x2013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1109/ISIT.2010.5513535</pub-id> </citation>
</ref>
<ref id="B87">
<label>87.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Plaza</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>Z</given-names>
</name>
</person-group>. <article-title>Anomaly Detection in Hyperspectral Images Based on Low-Rank and Sparse Representation</article-title>. <source>IEEE Trans Geosci Remote Sensing</source> (<year>2016</year>) <volume>54</volume>:<fpage>1990</fpage>&#x2013;<lpage>2000</lpage>. <pub-id pub-id-type="doi">10.1109/TGRS.2015.2493201</pub-id> </citation>
</ref>
<ref id="B88">
<label>88.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Y</given-names>
</name>
</person-group>. <article-title>Robust Recovery of Subspace Structures by Low-Rank Representation</article-title>. <source>IEEE Trans Pattern Anal Mach Intell</source> (<year>2013</year>) <volume>35</volume>:<fpage>171</fpage>&#x2013;<lpage>84</lpage>. <pub-id pub-id-type="doi">10.1109/TPAMI.2012.88</pub-id> </citation>
</ref>
<ref id="B89">
<label>89.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Caramanis</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Sanghavi</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>Robust Pca via Outlier Pursuit</article-title>. <source>IEEE Trans Inform Theor</source> (<year>2012</year>) <volume>58</volume>:<fpage>3047</fpage>&#x2013;<lpage>64</lpage>. <pub-id pub-id-type="doi">10.1109/TIT.2011.2173156</pub-id> </citation>
</ref>
<ref id="B90">
<label>90.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pan</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>C</given-names>
</name>
<name>
<surname>An</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y</given-names>
</name>
</person-group>. <article-title>Robust Principal Component Pursuit for Fault Detection in a Blast Furnace Process</article-title>. <source>Ind Eng Chem Res</source> (<year>2018</year>) <volume>57</volume>:<fpage>283</fpage>&#x2013;<lpage>91</lpage>. <pub-id pub-id-type="doi">10.1021/acs.iecr.7b03338</pub-id> </citation>
</ref>
<ref id="B91">
<label>91.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>W</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>D</given-names>
</name>
</person-group>. <article-title>Randomized Subspace-Based Robust Principal Component Analysis for Hyperspectral Anomaly Detection</article-title>. <source>J Appl Rem Sens</source> (<year>2018</year>) <volume>12</volume>:<fpage>1</fpage>&#x2013;<lpage>19</lpage>. <pub-id pub-id-type="doi">10.1117/1.JRS.12.015015</pub-id> </citation>
</ref>
<ref id="B92">
<label>92.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>W</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Lai</surname>
<given-names>YM</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>W</given-names>
</name>
</person-group>. <article-title>Low-rank and Sparse Matrix Decomposition-Based Anomaly Detection for Hyperspectral Imagery</article-title>. <source>J Appl Remote Sens</source> (<year>2014</year>) <volume>8</volume>(<issue>1</issue>):<fpage>083641</fpage>. <pub-id pub-id-type="doi">10.1117/1.JRS.8.083641</pub-id> </citation>
</ref>
<ref id="B93">
<label>93.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qu</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Ayhan</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Kwan</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Vance</surname>
<given-names>S</given-names>
</name>
<etal/>
</person-group> <article-title>Hyperspectral Anomaly Detection through Spectral Unmixing and Dictionary-Based Low-Rank Decomposition</article-title>. <source>IEEE Trans Geosci Remote Sensing</source> (<year>2018</year>) <volume>56</volume>:<fpage>4391</fpage>&#x2013;<lpage>405</lpage>. <pub-id pub-id-type="doi">10.1109/TGRS.2018.2818159</pub-id> </citation>
</ref>
<ref id="B94">
<label>94.</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Paffenroth</surname>
<given-names>RC</given-names>
</name>
</person-group>. <article-title>Anomaly Detection with Robust Deep Autoencoders</article-title>. In: <conf-name>KDD &#x2019;17: Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>. <publisher-loc>New York, NY, USA</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name> (<year>2017</year>). p. <fpage>665</fpage>&#x2013;<lpage>74</lpage>. <pub-id pub-id-type="doi">10.1145/3097983.3098052</pub-id> </citation>
</ref>
<ref id="B95">
<label>95.</label>
<citation citation-type="journal">
<comment>[Dataset]</comment> <person-group person-group-type="author">
<name>
<surname>Chalapathy</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Menon</surname>
<given-names>AK</given-names>
</name>
<name>
<surname>Chawla</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>Anomaly Detection Using One-Class Neural Networks</article-title>. <source>ArXiv preprint</source> (<year>2018</year>). <comment>Available from: <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/1802.06360">https://arxiv.org/abs/1802.06360</ext-link> (Accessed December 5, 2021)</comment>. </citation>
</ref>
<ref id="B96">
<label>96.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cviti&#x107;</surname>
<given-names>I</given-names>
</name>
<name>
<surname>Perakovi&#x107;</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Peri&#x161;a</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Gupta</surname>
<given-names>B</given-names>
</name>
</person-group>. <article-title>Ensemble Machine Learning Approach for Classification of Iot Devices in Smart home</article-title>. <source>Int J Mach Learn Cyber</source> (<year>2021</year>) <volume>12</volume>:<fpage>3179</fpage>&#x2013;<lpage>202</lpage>. <pub-id pub-id-type="doi">10.1007/s13042-020-01241-0</pub-id> </citation>
</ref>
<ref id="B97">
<label>97.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jan</surname>
<given-names>SU</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>Y-D</given-names>
</name>
<name>
<surname>Shin</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Koo</surname>
<given-names>I</given-names>
</name>
</person-group>. <article-title>Sensor Fault Classification Based on Support Vector Machine and Statistical Time-Domain Features</article-title>. <source>IEEE Access</source> (<year>2017</year>) <volume>5</volume>:<fpage>8682</fpage>&#x2013;<lpage>90</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2017.2705644</pub-id> </citation>
</ref>
<ref id="B98">
<label>98.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Di Ciccio</surname>
<given-names>C</given-names>
</name>
<name>
<surname>van der Aa</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Cabanillas</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Mendling</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Prescher</surname>
<given-names>J</given-names>
</name>
</person-group>. <article-title>Detecting Flight Trajectory Anomalies and Predicting Diversions in Freight Transportation</article-title>. <source>Decis Support Syst</source> (<year>2016</year>) <volume>88</volume>:<fpage>1</fpage>&#x2013;<lpage>17</lpage>. <pub-id pub-id-type="doi">10.1016/j.dss.2016.05.004</pub-id> </citation>
</ref>
<ref id="B99">
<label>99.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alam</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Sonbhadra</surname>
<given-names>SK</given-names>
</name>
<name>
<surname>Agarwal</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Nagabhushan</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Tanveer</surname>
<given-names>M</given-names>
</name>
</person-group>. <article-title>Sample Reduction Using Farthest Boundary point Estimation (Fbpe) for Support Vector Data Description (Svdd)</article-title>. <source>Pattern Recognition Lett</source> (<year>2020</year>) <volume>131</volume>:<fpage>268</fpage>&#x2013;<lpage>76</lpage>. <pub-id pub-id-type="doi">10.1016/j.patrec.2020.01.004</pub-id> </citation>
</ref>
<ref id="B100">
<label>100.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mu</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Nandi</surname>
<given-names>AK</given-names>
</name>
</person-group>. <article-title>Multiclass Classification Based on Extended Support Vector Data Description</article-title>. <source>IEEE Trans Syst Man Cybern B</source> (<year>2009</year>) <volume>39</volume>:<fpage>1206</fpage>&#x2013;<lpage>16</lpage>. <pub-id pub-id-type="doi">10.1109/TSMCB.2009.2013962</pub-id> </citation>
</ref>
<ref id="B101">
<label>101.</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Akcay</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Atapour-Abarghouei</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Breckon</surname>
<given-names>TP</given-names>
</name>
</person-group>. <article-title>Ganomaly: Semi-supervised Anomaly Detection via Adversarial Training</article-title>. In: <source>Asian Conference on Computer Vision</source>. <publisher-loc>Berlin, Germany</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2019</year>). p. <fpage>622</fpage>&#x2013;<lpage>37</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-20893-6_39</pub-id> </citation>
</ref>
<ref id="B102">
<label>102.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Gryllias</surname>
<given-names>K</given-names>
</name>
</person-group>. <article-title>A Semi-supervised Support Vector Data Description-Based Fault Detection Method for Rolling Element Bearings Based on Cyclic Spectral Analysis</article-title>. <source>Mech Syst Signal Process</source> (<year>2020</year>) <volume>140</volume>:<fpage>106682</fpage>. <pub-id pub-id-type="doi">10.1016/j.ymssp.2020.106682</pub-id> </citation>
</ref>
<ref id="B103">
<label>103.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Erfani</surname>
<given-names>SM</given-names>
</name>
<name>
<surname>Rajasegarar</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Karunasekera</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Leckie</surname>
<given-names>C</given-names>
</name>
</person-group>. <article-title>High-dimensional and Large-Scale Anomaly Detection Using a Linear One-Class Svm with Deep Learning</article-title>. <source>Pattern Recognition</source> (<year>2016</year>) <volume>58</volume>:<fpage>121</fpage>&#x2013;<lpage>34</lpage>. <pub-id pub-id-type="doi">10.1016/j.patcog.2016.03.028</pub-id> </citation>
</ref>
<ref id="B104">
<label>104.</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Ruff</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Vandermeulen</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Goernitz</surname>
<given-names>N</given-names>
</name>
<name>
<surname>Deecke</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Siddiqui</surname>
<given-names>SA</given-names>
</name>
<name>
<surname>Binder</surname>
<given-names>A</given-names>
</name>
<etal/>
</person-group> <article-title>Deep One-Class Classification</article-title>. In: <person-group person-group-type="editor">
<name>
<surname>Dy</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Krause</surname>
<given-names>A</given-names>
</name>
</person-group>, editors. <source>Proceedings of the 35th International Conference on Machine Learning</source>. <source>Vol. 80 of Proceedings of Machine Learning Research</source>. <publisher-loc>New York City, NY, USA</publisher-loc>: <publisher-name>PMLR</publisher-name> (<year>2018</year>). p. <fpage>4393</fpage>&#x2013;<lpage>402</lpage>. </citation>
</ref>
<ref id="B105">
<label>105.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Lan</surname>
<given-names>H</given-names>
</name>
</person-group>. <article-title>Robust Support Vector Data Description for novelty Detection with Contaminated Data</article-title>. <source>Eng Appl Artif Intelligence</source> (<year>2020</year>) <volume>91</volume>:<fpage>103554</fpage>. <pub-id pub-id-type="doi">10.1016/j.engappai.2020.103554</pub-id> </citation>
</ref>
<ref id="B106">
<label>106.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yuan</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Mao</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>B</given-names>
</name>
</person-group>. <article-title>A Pruned Support Vector Data Description-Based Outlier Detection Method: Applied to Robust Process Monitoring</article-title>. <source>Trans Inst Meas Control</source> (<year>2020</year>) <volume>42</volume>:<fpage>2113</fpage>&#x2013;<lpage>26</lpage>. <pub-id pub-id-type="doi">10.1177/0142331220905951</pub-id> </citation>
</ref>
<ref id="B107">
<label>107.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barber</surname>
<given-names>CB</given-names>
</name>
<name>
<surname>Dobkin</surname>
<given-names>DP</given-names>
</name>
<name>
<surname>Huhdanpaa</surname>
<given-names>H</given-names>
</name>
</person-group>. <article-title>The Quickhull Algorithm for Convex Hulls</article-title>. <source>ACM Trans Math Softw</source> (<year>1996</year>) <volume>22</volume>:<fpage>469</fpage>&#x2013;<lpage>83</lpage>. <pub-id pub-id-type="doi">10.1145/235815.235821</pub-id> </citation>
</ref>
<ref id="B108">
<label>108.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhenbing Liu</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>JG</given-names>
</name>
<name>
<surname>Chao Pan</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Guoyou Wang</surname>
<given-names>G</given-names>
</name>
</person-group>. <article-title>A Novel Geometric Approach to Binary Classification Based on Scaled Convex Hulls</article-title>. <source>IEEE Trans Neural Netw</source> (<year>2009</year>) <volume>20</volume>:<fpage>1215</fpage>&#x2013;<lpage>20</lpage>. <pub-id pub-id-type="doi">10.1109/TNN.2009.2022399</pub-id> </citation>
</ref>
<ref id="B109">
<label>109.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jove</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Casteleiro-Roca</surname>
<given-names>J-L</given-names>
</name>
<name>
<surname>Quinti&#xe1;n</surname>
<given-names>H</given-names>
</name>
<name>
<surname>M&#xe9;ndez-P&#xe9;rez</surname>
<given-names>J-A</given-names>
</name>
<name>
<surname>Calvo-Rolle</surname>
<given-names>JL</given-names>
</name>
</person-group>. <article-title>A New Method for Anomaly Detection Based on Non-convex Boundaries with Random Two-Dimensional Projections</article-title>. <source>Inf Fusion</source> (<year>2021</year>) <volume>65</volume>:<fpage>50</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1016/j.inffus.2020.08.011</pub-id> </citation>
</ref>
<ref id="B110">
<label>110.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Casale</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Pujol</surname>
<given-names>O</given-names>
</name>
<name>
<surname>Radeva</surname>
<given-names>P</given-names>
</name>
</person-group>. <article-title>Approximate Polytope Ensemble for One-Class Classification</article-title>. <source>Pattern Recognition</source> (<year>2014</year>) <volume>47</volume>:<fpage>854</fpage>&#x2013;<lpage>64</lpage>. <pub-id pub-id-type="doi">10.1016/j.patcog.2013.08.007</pub-id> </citation>
</ref>
<ref id="B111">
<label>111.</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Casale</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Pujol</surname>
<given-names>O</given-names>
</name>
<name>
<surname>Radeva</surname>
<given-names>P</given-names>
</name>
</person-group>. <article-title>Approximate Convex Hulls Family for One-Class Classification</article-title>. In: <source>International Workshop on Multiple Classifier Systems</source>. <publisher-loc>Berlin, Germany</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2011</year>). p. <fpage>106</fpage>&#x2013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-642-21557-5_13</pub-id> </citation>
</ref>
<ref id="B112">
<label>112.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fern&#xe1;ndez-Francos</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Fontenla-Romero</surname>
<given-names>O</given-names>
</name>
<name>
<surname>Alonso-Betanzos</surname>
<given-names>A</given-names>
</name>
</person-group>. <article-title>One-class Convex hull-based Algorithm for Classification in Distributed Environments</article-title>. <source>IEEE Trans Syst Man Cybern, Syst</source> (<year>2020</year>) <volume>50</volume>:<fpage>386</fpage>&#x2013;<lpage>96</lpage>. <pub-id pub-id-type="doi">10.1109/TSMC.2017.2771341</pub-id> </citation>
</ref>
<ref id="B113">
<label>113.</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Jove</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Gonzalez-Cava</surname>
<given-names>JM</given-names>
</name>
<name>
<surname>Casteleiro-Roca</surname>
<given-names>J-L</given-names>
</name>
<name>
<surname>Quinti&#xe1;n</surname>
<given-names>H</given-names>
</name>
<name>
<surname>M&#xe9;ndez-P&#xe9;rez</surname>
<given-names>JA</given-names>
</name>
<name>
<surname>Calvo-Rolle</surname>
<given-names>JL</given-names>
</name>
</person-group>. <article-title>Anomaly Detection on Patients Undergoing General Anesthesia</article-title>. In: <conf-name>International Joint Conference: 12th International Conference on Computational Intelligence in Security for Information Systems (CISIS 2019) and 10th International Conference on EUropean Transnational Education (ICEUTE 2019)</conf-name>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2019</year>). p. <fpage>141</fpage>&#x2013;<lpage>52</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-20005-3_15</pub-id> </citation>
</ref>
<ref id="B114">
<label>114.</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Turchini</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Seidenari</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Del Bimbo</surname>
<given-names>A</given-names>
</name>
</person-group>. <article-title>Convex Polytope Ensembles for Spatio-Temporal Anomaly Detection</article-title>. In: <conf-name>International Conference on Image Analysis and Processing</conf-name>. <publisher-loc>Berlin, Germany</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2017</year>). p. <fpage>174</fpage>&#x2013;<lpage>84</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-68560-1_16</pub-id> </citation>
</ref>
<ref id="B115">
<label>115.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>He</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Shao</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Xiang</surname>
<given-names>J</given-names>
</name>
</person-group>. <article-title>Kernel Flexible and Displaceable Convex hull Based Tensor Machine for Gearbox Fault Intelligent Diagnosis with Multi-Source Signals</article-title>. <source>Measurement</source> (<year>2020</year>) <volume>163</volume>:<fpage>107965</fpage>. <pub-id pub-id-type="doi">10.1016/j.measurement.2020.107965</pub-id> </citation>
</ref>
<ref id="B116">
<label>116.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Scalet</surname>
<given-names>G</given-names>
</name>
</person-group>. <article-title>A Convex hull&#x2010;based Approach for Multiaxial High&#x2010;cycle Fatigue Criteria</article-title>. <source>Fatigue Fract Eng Mater Struct</source> (<year>2021</year>) <volume>44</volume>:<fpage>14</fpage>&#x2013;<lpage>27</lpage>. <pub-id pub-id-type="doi">10.1111/ffe.13318</pub-id> </citation>
</ref>
<ref id="B117">
<label>117.</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Bartlett</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Hazan</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Rakhlin</surname>
<given-names>A</given-names>
</name>
</person-group>. <source>Adaptive Online Gradient Descent</source>. <publisher-loc>Berkeley, California</publisher-loc>: <publisher-name>Tech. rep., EECS Department, University of California</publisher-name> (<year>2007</year>). </citation>
</ref>
<ref id="B118">
<label>118.</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zinkevich</surname>
<given-names>M</given-names>
</name>
</person-group>. <article-title>Online Convex Programming and Generalized Infinitesimal Gradient Ascent</article-title>. In: <conf-name>Proceedings of the 20th international conference on machine learning (ICML-03)</conf-name>. <publisher-loc>Washington, DC</publisher-loc>: <publisher-name>ICML</publisher-name> (<year>2003</year>). p. <fpage>928</fpage>&#x2013;<lpage>36</lpage>. </citation>
</ref>
<ref id="B119">
<label>119.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shalev-Shwartz</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>Online Learning and Online Convex Optimization</article-title>. <source>FNT Machine Learn</source> (<year>2011</year>) <volume>4</volume>:<fpage>107</fpage>&#x2013;<lpage>94</lpage>. <pub-id pub-id-type="doi">10.1561/2200000018</pub-id> </citation>
</ref>
<ref id="B120">
<label>120.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Raginsky</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Willett</surname>
<given-names>RM</given-names>
</name>
<name>
<surname>Horn</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Silva</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Marcia</surname>
<given-names>RF</given-names>
</name>
</person-group>. <article-title>Sequential Anomaly Detection in the Presence of Noise and Limited Feedback</article-title>. <source>IEEE Trans Inform Theor</source> (<year>2012</year>) <volume>58</volume>:<fpage>5544</fpage>&#x2013;<lpage>62</lpage>. <pub-id pub-id-type="doi">10.1109/TIT.2012.2201375</pub-id> </citation>
</ref>
<ref id="B121">
<label>121.</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Siddiqui</surname>
<given-names>MA</given-names>
</name>
<name>
<surname>Fern</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Dietterich</surname>
<given-names>TG</given-names>
</name>
<name>
<surname>Wright</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Theriault</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Archer</surname>
<given-names>DW</given-names>
</name>
</person-group>. <article-title>Feedback-guided Anomaly Discovery via Online Optimization</article-title>. In: <conf-name>KDD &#x2019;18: Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery &#x26; Data Mining</conf-name>. <publisher-loc>New York, NY, USA</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name> (<year>2018</year>). p. <fpage>2200</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1145/3219819.3220083</pub-id> </citation>
</ref>
<ref id="B122">
<label>122.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kerpicci</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Ozkan</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Kozat</surname>
<given-names>SS</given-names>
</name>
</person-group>. <article-title>Online Anomaly Detection with Bandwidth Optimized Hierarchical Kernel Density Estimators</article-title>. <source>IEEE Trans Neural Netw Learn Syst.</source> (<year>2021</year>) <volume>32</volume>:<fpage>4253</fpage>&#x2013;<lpage>66</lpage>. <pub-id pub-id-type="doi">10.1109/TNNLS.2020.3017675</pub-id> </citation>
</ref>
<ref id="B123">
<label>123.</label>
<citation citation-type="journal">
<comment>[Dataset]</comment> <person-group person-group-type="author">
<name>
<surname>Ruder</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>An Overview of Gradient Descent Optimization Algorithms</article-title>. <source>ArXiv preprint</source> (<year>2016</year>). <comment>Available from: <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/1609.04747">https://arxiv.org/abs/1609.04747</ext-link> (Accessed December 5, 2021)</comment>. </citation>
</ref>
<ref id="B124">
<label>124.</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Davenport</surname>
<given-names>M</given-names>
</name>
</person-group>. <article-title>Dynamic Matrix Recovery from Incomplete Observations under an Exact Low-Rank Constraint</article-title>. In: <person-group person-group-type="editor">
<name>
<surname>Lee</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Sugiyama</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Luxburg</surname>
<given-names>U</given-names>
</name>
<name>
<surname>Guyon</surname>
<given-names>I</given-names>
</name>
<name>
<surname>Garnett</surname>
<given-names>R</given-names>
</name>
</person-group>, editors. <source>Advances in Neural Information Processing Systems</source>, <volume>Vol. 29</volume>. <publisher-loc>Red Hook, NY, USA</publisher-loc>: <publisher-name>Curran Associates, Inc.</publisher-name> (<year>2016</year>). </citation>
</ref>
<ref id="B125">
<label>125.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zenati</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Foo</surname>
<given-names>CS</given-names>
</name>
<name>
<surname>Lecouat</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Manek</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Chandrasekhar</surname>
<given-names>VR</given-names>
</name>
</person-group>. <article-title>Efficient gan-based Anomaly Detection</article-title>. <source>arXiv preprint arXiv:1802.06222</source> (<year>2018</year>). </citation>
</ref>
<ref id="B126">
<label>126.</label>
<citation citation-type="journal">
<comment>[Dataset]</comment> <person-group person-group-type="author">
<name>
<surname>Gao</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Wen</surname>
<given-names>Q</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>H</given-names>
</name>
</person-group>. <article-title>Robusttad: Robust Time Series Anomaly Detection via Decomposition and Convolutional Neural Networks</article-title>. <source>Arxiv preprint</source> (<year>2020</year>). <comment>Available from: <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2002.09545">https://arxiv.org/abs/2002.09545</ext-link> (Accessed December 5, 2021)</comment>. </citation>
</ref>
<ref id="B127">
<label>127.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X</given-names>
</name>
</person-group>. <article-title>Anomaly Detection of Power Consumption in Yarn Spinning Using Transfer Learning</article-title>. <source>Comput Ind Eng</source> (<year>2021</year>) <volume>152</volume>:<fpage>107015</fpage>. <pub-id pub-id-type="doi">10.1016/j.cie.2020.107015</pub-id> </citation>
</ref>
<ref id="B128">
<label>128.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hariri</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Kind</surname>
<given-names>MC</given-names>
</name>
<name>
<surname>Brunner</surname>
<given-names>RJ</given-names>
</name>
</person-group>. <article-title>Extended Isolation forest</article-title>. <source>IEEE Trans Knowl Data Eng</source> (<year>2021</year>) <volume>33</volume>:<fpage>1479</fpage>&#x2013;<lpage>89</lpage>. <pub-id pub-id-type="doi">10.1109/TKDE.2019.2947676</pub-id> </citation>
</ref>
<ref id="B129">
<label>129.</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Umsonst</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Sandberg</surname>
<given-names>H</given-names>
</name>
<name>
<surname>C&#xe1;rdenas</surname>
<given-names>AA</given-names>
</name>
</person-group>. <article-title>Security Analysis of Control System Anomaly Detectors</article-title>. In: <conf-name>2017 American Control Conference (ACC)</conf-name> (<year>2017</year>). p. <fpage>5500</fpage>&#x2013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.23919/ACC.2017.7963810</pub-id> </citation>
</ref>
<ref id="B130">
<label>130.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vinu&#xe9;</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Epifanio</surname>
<given-names>I</given-names>
</name>
</person-group>. <article-title>Robust Archetypoids for Anomaly Detection in Big Functional Data</article-title>. <source>Adv Data Anal Classif</source> (<year>2021</year>) <volume>15</volume>:<fpage>437</fpage>&#x2013;<lpage>62</lpage>. <pub-id pub-id-type="doi">10.1007/s11634-020-00412-9</pub-id> </citation>
</ref>
<ref id="B131">
<label>131.</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Sifa</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Drachen</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Block</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Moon</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Dubhashi</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>H</given-names>
</name>
<etal/>
</person-group> <article-title>Archetypal Analysis Based Anomaly Detection for Improved Storytelling in Multiplayer Online Battle arena Games</article-title>. In: <conf-name>2021 Australasian Computer Science Week Multiconference</conf-name> (<year>2021</year>). p. <fpage>1</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1145/3437378.3442690</pub-id> </citation>
</ref>
<ref id="B132">
<label>132.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Laakso</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Chan</surname>
<given-names>JC-W</given-names>
</name>
</person-group>. <article-title>Archetypal Analysis and Structured Sparse Representation for Hyperspectral Anomaly Detection</article-title>. <source>Remote Sensing</source> (<year>2021</year>) <volume>13</volume>:<fpage>4102</fpage>. <pub-id pub-id-type="doi">10.3390/rs13204102</pub-id> </citation>
</ref>
<ref id="B133">
<label>133.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mill&#xe1;n-Roures</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Epifanio</surname>
<given-names>I</given-names>
</name>
<name>
<surname>Mart&#xed;nez</surname>
<given-names>V</given-names>
</name>
</person-group>. <article-title>Detection of Anomalies in Water Networks by Functional Data Analysis</article-title>. <source>Math Probl Eng</source> (<year>2018</year>) <volume>2018</volume>:<fpage>1</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1155/2018/5129735</pub-id> </citation>
</ref>
<ref id="B134">
<label>134.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tax</surname>
<given-names>DMJ</given-names>
</name>
<name>
<surname>Duin</surname>
<given-names>RPW</given-names>
</name>
</person-group>. <article-title>Support Vector Domain Description</article-title>. <source>Pattern recognition Lett</source> (<year>1999</year>) <volume>20</volume>:<fpage>1191</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1016/S0167-8655(99)00087-2</pub-id> </citation>
</ref>
<ref id="B135">
<label>135.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bradley</surname>
<given-names>AP</given-names>
</name>
</person-group>. <article-title>The Use of the Area under the Roc Curve in the Evaluation of Machine Learning Algorithms</article-title>. <source>Pattern recognition</source> (<year>1997</year>) <volume>30</volume>:<fpage>1145</fpage>&#x2013;<lpage>59</lpage>. <pub-id pub-id-type="doi">10.1016/S0031-3203(96)00142-2</pub-id> </citation>
</ref>
<ref id="B136">
<label>136.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Candanedo</surname>
<given-names>LM</given-names>
</name>
<name>
<surname>Feldheim</surname>
<given-names>V</given-names>
</name>
</person-group>. <article-title>Accurate Occupancy Detection of an Office Room from Light, Temperature, Humidity and CO 2 Measurements Using Statistical Learning Models</article-title>. <source>Energy and Buildings</source> (<year>2016</year>) <volume>112</volume>:<fpage>28</fpage>&#x2013;<lpage>39</lpage>. <pub-id pub-id-type="doi">10.1016/j.enbuild.2015.11.071</pub-id> </citation>
</ref>
<ref id="B137">
<label>137.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lu</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Hoi</surname>
<given-names>SC</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z-Y</given-names>
</name>
</person-group>. <article-title>Large Scale Online Kernel Learning</article-title>. <source>J Machine Learn Res</source> (<year>2016</year>) <volume>17</volume>:<fpage>1</fpage>. </citation>
</ref>
<ref id="B138">
<label>138.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fei-Fei</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Fergus</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Perona</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Zekrifa</surname>
<given-names>D</given-names>
</name>
</person-group>. <article-title>Learning Generative Visual Models from Few Training Examples: An Incremental Bayesian Approach Tested on 101 Object Categories</article-title>. <source>Computer Vis Image Understanding</source> (<year>2007</year>) <volume>106</volume>:<fpage>59</fpage>&#x2013;<lpage>70</lpage>. <pub-id pub-id-type="doi">10.1016/j.cviu.2005.09.012</pub-id> </citation>
</ref>
<ref id="B139">
<label>139.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Casale</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Pujol</surname>
<given-names>O</given-names>
</name>
<name>
<surname>Radeva</surname>
<given-names>P</given-names>
</name>
</person-group>. <article-title>Personalization and User Verification in Wearable Systems Using Biometric Walking Patterns</article-title>. <source>Pers Ubiquit Comput</source> (<year>2012</year>) <volume>16</volume>:<fpage>563</fpage>&#x2013;<lpage>80</lpage>. <pub-id pub-id-type="doi">10.1007/s00779-011-0415-z</pub-id> </citation>
</ref>
<ref id="B140">
<label>140.</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Laptev</surname>
<given-names>N</given-names>
</name>
<name>
<surname>Amizadeh</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Flint</surname>
<given-names>I</given-names>
</name>
</person-group>. <article-title>Generic and Scalable Framework for Automated Time-Series Anomaly Detection</article-title>. In: <conf-name>Proceedings of the 21th ACM SIGKDD international conference on knowledge discovery and data mining</conf-name> (<year>2015</year>). p. <fpage>1939</fpage>&#x2013;<lpage>47</lpage>. <pub-id pub-id-type="doi">10.1145/2783258.2788611</pub-id> </citation>
</ref>
<ref id="B141">
<label>141.</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lavanya</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Rani</surname>
<given-names>DKU</given-names>
</name>
</person-group>. <article-title>Analysis of Feature Selection with Classification: Breast Cancer Datasets</article-title>. <source>Indian J Computer Sci Eng (Ijcse)</source> (<year>2011</year>) <volume>2</volume>:<fpage>756</fpage>&#x2013;<lpage>63</lpage>. </citation>
</ref>
</ref-list>
</back>
</article>