<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2025.1506074</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Artificial Intelligence</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Dual feature-based and example-based explanation methods</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Konstantinov</surname> <given-names>Andrei</given-names></name>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Kozlov</surname> <given-names>Boris</given-names></name>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Kirpichenko</surname> <given-names>Stanislav</given-names></name>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Utkin</surname> <given-names>Lev</given-names></name>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Muliukha</surname> <given-names>Vladimir</given-names></name>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2859618/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
</contrib>
</contrib-group>
<aff><institution>Department of Artificial Intelligence Technologies, Peter the Great St. Petersburg Polytechnic University</institution>, <addr-line>St. Petersburg</addr-line>, <country>Russia</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Jose Santamaria Lopez, University of Ja&#x000E9;n, Spain</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Sumeet Sehra, Conestoga College, Canada</p>
<p>Archana Talhar Belge, Thakur College of Engineering and Technology, India</p>
<p>Pinaki Mitra, Indian Institute of Technology Guwahati, India</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Vladimir Muliukha <email>vladimir.muliukha&#x00040;spbstu.ru</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>10</day>
<month>02</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>8</volume>
<elocation-id>1506074</elocation-id>
<history>
<date date-type="received">
<day>04</day>
<month>10</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>23</day>
<month>01</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2025 Konstantinov, Kozlov, Kirpichenko, Utkin and Muliukha.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Konstantinov, Kozlov, Kirpichenko, Utkin and Muliukha</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>A new approach to the local and global explanation based on selecting a convex hull constructed for the finite number of points around an explained instance is proposed. The convex hull allows us to consider a dual representation of instances in the form of convex combinations of extreme points of a produced polytope. Instead of perturbing new instances in the Euclidean feature space, vectors of convex combination coefficients are uniformly generated from the unit simplex, and they form a new dual dataset. A dual linear surrogate model is trained on the dual dataset. The explanation feature importance values are computed by means of simple matrix calculations. The approach can be regarded as a modification of the well-known model LIME. The dual representation inherently allows us to get the example-based explanation. The neural additive model is also considered as a tool for implementing the example-based explanation approach. Many numerical experiments with real datasets are performed for studying the approach. A code of proposed algorithms is available. The proposed results are fundamental and can be used in various application areas. They do not involve specific human subjects and human data.</p></abstract>
<kwd-group>
<kwd>machine learning</kwd>
<kwd>explainable AI</kwd>
<kwd>neural additive network</kwd>
<kwd>dual representation</kwd>
<kwd>convex hull</kwd>
<kwd>example-based explanation</kwd>
<kwd>feature-based explanation</kwd>
</kwd-group>
<counts>
<fig-count count="10"/>
<table-count count="5"/>
<equation-count count="31"/>
<ref-count count="88"/>
<page-count count="17"/>
<word-count count="12083"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Machine Learning and Artificial Intelligence</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>Many machine learning models, including neural networks, have the black-box nature due to their complexity and the obscurity of their internal workings. Therefore, to explain how predictions are obtained for their corresponding inputs, specific explanation methods are required. This requirement affects many applications, especially those in medicine, finance, and safety maintenance. As a result, many successful methods and algorithms have been developed to satisfy this requirement (Arya et al., <xref ref-type="bibr" rid="B6">2019</xref>; Belle and Papantonis, <xref ref-type="bibr" rid="B9">2021</xref>; Guidotti et al., <xref ref-type="bibr" rid="B37">2019</xref>; Liang et al., <xref ref-type="bibr" rid="B47">2021</xref>; Molnar, <xref ref-type="bibr" rid="B49">2019</xref>; Murdoch et al., <xref ref-type="bibr" rid="B51">2019</xref>; Ras et al., <xref ref-type="bibr" rid="B58">2022</xref>; Zablocki et al., <xref ref-type="bibr" rid="B84">2021</xref>; Zhang Y. et al., <xref ref-type="bibr" rid="B88">2021</xref>).</p>
<p>There are many definitions and interpretations of the explanation. We understand explanation as an answer to the question which features of an instance or a set of instances significantly impact the black-box model prediction or which features are most relevant to the prediction. Methods answering this question can be referred to as <italic>feature importance</italic> methods or the <italic>feature-based explanation</italic>. Another group of explanation methods is called the <italic>example-based</italic> explanation methods (Molnar, <xref ref-type="bibr" rid="B49">2019</xref>). The corresponding methods are based on selecting influential instances from a training set having the largest impact on predictions to compare the training instance with the explainable one.</p>
<p>Feature importance explanation methods, in turn, can be divided into two groups: local and global. Methods from the first group explain the black-box model predictions locally around a test instance. Global methods explain a set of instances or the entire dataset. The well-known local explanation method is the Local Interpretable Model-Agnostic Explanation (LIME) (Ribeiro et al., <xref ref-type="bibr" rid="B60">2016</xref>). In accordance with this method, a surrogate model is constructed and trained, which approximates the black-box model at a point. The surrogate model in LIME is the linear regression whose coefficients can be interpreted as the feature importance measures. In fact, LIME can be regarded as a method of the linear approximation of a complex non-linear function implemented by the black-box model at a point. LIME is based on using a simple regression model. Agarwal et al. (<xref ref-type="bibr" rid="B3">2021</xref>) proposed to generalize LIME using the generalized additive model (GAM) (Hastie and Tibshirani, <xref ref-type="bibr" rid="B38">1990</xref>) instead of the simple linear regression and its implementation by means of neural networks of a special form. The GAM is a more general and flexible model in comparison with the original linear model. The corresponding surrogate model using the GAM is called the neural additive model (NAM).</p>
<p>Another important method, which is used for the local as well as global explanations, is SHapley Additive exPlanations (SHAP) (Lundberg and Lee, <xref ref-type="bibr" rid="B48">2017</xref>; Strumbelj and Kononenko, <xref ref-type="bibr" rid="B71">2010</xref>). The method is based on applying game-theoretic Shapley values (Shapley, <xref ref-type="bibr" rid="B68">1953</xref>) which can be interpreted as average marginal contributions of features to the black-box model prediction. SHAP can be also viewed as a method of the linear approximation of the black-box model predictions.</p>
<p>One of the important shortcomings of LIME is that it uses the perturbation technique which may be difficult to implement or may be even incorrect for some datasets, for example, for images. Moreover, it may provide incorrect results for high-dimensional data of a complex structure. The perturbation technique may generate a disturbed dataset especially when dealing with image data. A slight change in the data can lead to significant changes in images, often losing their meaning. Examples and an analysis of this pitfall as well as other pitfalls of LIME are considered in Molnar et al. (<xref ref-type="bibr" rid="B50">2020</xref>). The dual representation proposed in the study does not deal with images and allows us to overcome this difficulty. Another problem is that points generated in accordance with the perturbation technique may be located out of the training point domain, i.e., these points can be viewed as out-of-domain (OOD) data. This case is shown in <xref ref-type="fig" rid="F1">Figure 1</xref> where training points and generated points are depicted by small circles and by diamonds, respectively. The explained point is depicted by the triangle. A machine learning black-box model learned on points from the training domain may provide quite incorrect predictions for generated points which are outside of the domain. As a result, the approximating linear function constructed by using the generated points may be also incorrect.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>Illustration of a case of out-of-domain data when generated points may be out of the training point domain.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-08-1506074-g0001.tif"/>
</fig>
<p>One of the shortcomings of SHAP is that it is also computationally expensive when there is a large number of features due to considering all possible coalitions whose number is 2<sup><italic>m</italic></sup>, where <italic>m</italic> is the number of features. Therefore, the computational time grows exponentially. Several simplifications and approximations have been proposed to overcome this difficulty (Strumbelj and Kononenko, <xref ref-type="bibr" rid="B71">2010</xref>, <xref ref-type="bibr" rid="B72">2011</xref>, <xref ref-type="bibr" rid="B73">2014</xref>; Utkin and Konstantinov, <xref ref-type="bibr" rid="B76">2022</xref>). However, they do not cardinally solve the problem of high-dimensional data. Moreover, there is another difficulty of SHAP, which is rarely mentioned. According to SHAP, the black-box model prediction is computed for instances composed from subsets of features and some values of removing features introduced by using some rules. If to use the example depicted at <xref ref-type="fig" rid="F1">Figure 1</xref>, then new instances in SHAP may be located inside or outside the ring bounding the training data domain where the black-box model provides incorrect predictions.</p>
<p>To partially solve the above problems, we propose a new explanation method which is based on applying two approaches: the <italic>convex hull</italic> of training data and the <italic>duality</italic> concept. The convex hull machine learning methods (Yousefzadeh, <xref ref-type="bibr" rid="B83">2020</xref>) analyze relationship between a convex hull of a training set and the decision boundaries for test instances. The duality is a fundamental concept in various field. We use the dual representation of data assuming the linear space in the local area around the explainable instance.</p>
<p>The idea behind the proposed method is very simple. We propose to find the convex hull of a subset of training data consisting on <italic>K</italic> instances which are close to the explainable instance. By using extreme points of the corresponding convex polytope, each point inside the convex hull can be expressed through the linear combination of the extreme points. Coefficients &#x003BB; of the linear combination are proposed to be regarded as a new feature vector which determines the corresponding point. They can be viewed as probabilities defined in the unit simplex of probabilities. Since the coefficients belong to the unit simplex, then they can be uniformly generated from the simplex such that each dual feature vector &#x003BB; corresponds to the feature vector in the Euclidean space (the feature space of training data). A generated feature vector in the Euclidean space is computed through extreme points of the convex hull. As a result, we get a new dual dataset which generates instances in a local area around the explainable instance. The surrogate linear model is constructed by using this new dual dataset whose elements may have a smaller dimension defined by <italic>K</italic> or by the number of extreme points of the convex hull. Hence, we get important elements of the generated vectors of coefficients. Due to the linear representation of the surrogate (explanation) model, the important features in the Euclidean space can be simply computed from the important dual coefficients of the linear combinations by means of solving a simple optimization problem.</p>
<p>Another important idea behind the proposed dual representation is to consider the example-based explanation. It turns out that the dual explanation inherently leads to the example-based explanation when we study how each dual feature &#x003BB;<sub><italic>i</italic></sub> contributes into predictions. The contribution can be determined by applying well-known surrogate methods, for example, LIME or the neural additive model (NAM) (Agarwal et al., <xref ref-type="bibr" rid="B3">2021</xref>), but the corresponding surrogate models are constructed for features &#x003BB; but not for initial features.</p>
<p>For the local explanation, we construct the convex hull by using only a part of training data. Though the same algorithm can be successfully applied to the global explanation. In this case, the convex hull covers the entire dataset.</p>
<p>Our contributions can be summarized as follows:</p>
<list list-type="order">
<list-item><p>A new feature-based explanation method is proposed. It is based on the dual representation of datasets such that generation of new instances in carried out by means of generating points from the uniform distribution in the unit simplex. In other words, the method replaces the perturbation process of feature vectors in the Euclidean space by the uniform generation of points in the unit simplex, which is simpler and is carried out by many well-known algorithms (Rubinstein and Kroese, <xref ref-type="bibr" rid="B64">2008</xref>; Smith and Tromble, <xref ref-type="bibr" rid="B70">2004</xref>). The generation resolves the problem of out-of-domain data and reduces the number of hyperparameters which have to be tuned for perturbing new instances.</p></list-item>
<list-item><p>A new example-based explanation method is proposed. It is again based on the dual representation of datasets and uses well-known explanation models NAM, accumulated local effect (Apley and Zhu, <xref ref-type="bibr" rid="B4">2020</xref>), the linear regression model. The explanation method provides shape function which describe contributions of the dual features into the predictions. In sum, the model chooses the most influential instances among a certain number of nearest neighbors for the explained instance.</p></list-item>
<list-item><p>The proposed methods are illustrated by means of numerical experiments with synthetic and real data. The code of the proposed algorithm can be found in <ext-link ext-link-type="uri" xlink:href="https://github.com/Kozlov992/Dual-Explanation">https://github.com/Kozlov992/Dual-Explanation</ext-link>.</p></list-item>
</list>
<p>The study is organized as follows. Related work can be found in Section 2. A brief introduction to the convex hull, the explanation methods LIME, SHAP, NAM, and example-based methods is given in Section 3. A detailed description of the proposed approach applied to the feature-based explanation and the example-based explanation is available in Section 4. Numerical experiments with synthetic data and real data studying the feature-based explanation are given in Section 5. Section 6 provides numerical examples illustrating example-based explanation. Advantages and limitations of the proposed methods are discussed in Section 7. Concluding remarks can be found in Section 8.</p></sec>
<sec id="s2">
<title>2 Related work</title>
<sec>
<title>2.1 Local and global explanation methods</title>
<p>The requirement of the black-box model explanation led to development of many explanation methods. A large part of methods follows from the original LIME method (Ribeiro et al., <xref ref-type="bibr" rid="B60">2016</xref>). These methods include ALIME (Shankaranarayana and Runje, <xref ref-type="bibr" rid="B67">2019</xref>), Anchor LIME (Ribeiro et al., <xref ref-type="bibr" rid="B61">2018</xref>), LIME-Aleph (Rabold et al., <xref ref-type="bibr" rid="B57">2020</xref>), SurvLIME (Kovalev et al., <xref ref-type="bibr" rid="B45">2020</xref>), LIME for tabular data (Garreau and von Luxburg, <xref ref-type="bibr" rid="B32">2020a</xref>,<xref ref-type="bibr" rid="B33">b</xref>), GraphLIME (Huang et al., <xref ref-type="bibr" rid="B39">2022</xref>), etc.</p>
<p>To generalize the simple linear explanation surrogate model, several neural network models, including NAM (Agarwal et al., <xref ref-type="bibr" rid="B3">2021</xref>), GAMI-Net (Yang et al., <xref ref-type="bibr" rid="B81">2021</xref>), and AxNNs (Chen et al., <xref ref-type="bibr" rid="B20">2020</xref>), were proposed. These models are based on applying the GAM (Hastie and Tibshirani, <xref ref-type="bibr" rid="B38">1990</xref>). Similar explanation models, including Explainable Boosting Machine (Nori et al., <xref ref-type="bibr" rid="B54">2019</xref>) and EGBM (Konstantinov and Utkin, <xref ref-type="bibr" rid="B44">2021</xref>), were developed using the gradient boosting machine.</p>
<p>Another large part of explanation methods is based on the original SHAP method (Strumbelj and Kononenko, <xref ref-type="bibr" rid="B71">2010</xref>) which uses Shapley values (Lundberg and Lee, <xref ref-type="bibr" rid="B48">2017</xref>) as measures of the feature contribution into the black-box model prediction. This part includes FastSHAP (Jethani et al., <xref ref-type="bibr" rid="B42">2022</xref>), Kernel SHAP (Lundberg and Lee, <xref ref-type="bibr" rid="B48">2017</xref>), Neighborhood SHAP (Ghalebikesabi et al., <xref ref-type="bibr" rid="B34">2021</xref>), SHAFF (Benard et al., <xref ref-type="bibr" rid="B10">2022</xref>), TimeSHAP (Bento et al., <xref ref-type="bibr" rid="B12">2021</xref>), X-SHAP (Bouneder et al., <xref ref-type="bibr" rid="B14">2020</xref>), ShapNets (Wang et al., <xref ref-type="bibr" rid="B80">2021</xref>), etc.</p>
<p>Many explanation methods, including LIME and its modifications, are based on perturbation techniques (Fong and Vedaldi, <xref ref-type="bibr" rid="B30">2019</xref>, <xref ref-type="bibr" rid="B29">2017</xref>; Petsiuk et al., <xref ref-type="bibr" rid="B56">2018</xref>; Vu et al., <xref ref-type="bibr" rid="B78">2019</xref>), which stem from the well-known property that contribution of a feature can be determined by measuring how a prediction changes when the feature is altered (Du et al., <xref ref-type="bibr" rid="B24">2019</xref>). The main difficulty of using the perturbation technique is its computational complexity when samples are of the high dimensionality.</p>
<p>Another interesting group of explanation methods, called the example-based explanation methods (Molnar, <xref ref-type="bibr" rid="B49">2019</xref>), is based on selecting influential instances from a training set having the largest impact on the predictions and its comparison with the explainable instance. Several approaches to the example-based method implementation were considered in Adhikari et al. (<xref ref-type="bibr" rid="B2">2019</xref>), Cai et al. (<xref ref-type="bibr" rid="B16">2019</xref>), Chong et al. (<xref ref-type="bibr" rid="B21">2022</xref>), Crabbe et al. (<xref ref-type="bibr" rid="B41">2021</xref>), and Teso et al. (<xref ref-type="bibr" rid="B75">2021</xref>).</p>
<p>In addition to the aforementioned methods, there are a huge number of other approaches to solving the explanation problem, for example, Integrated Gradients (Sundararajan et al., <xref ref-type="bibr" rid="B74">2017</xref>), and Contrastive Examples (Dhurandhar et al., <xref ref-type="bibr" rid="B22">2018</xref>). Detailed surveys of many methods can be found in Adadi and Berrada (<xref ref-type="bibr" rid="B1">2018</xref>), Arrieta et al. (<xref ref-type="bibr" rid="B5">2020</xref>), Bodria et al. (<xref ref-type="bibr" rid="B13">2023</xref>), Burkart and Huber (<xref ref-type="bibr" rid="B15">2021</xref>), Carvalho et al. (<xref ref-type="bibr" rid="B17">2019</xref>), Islam et al. (<xref ref-type="bibr" rid="B40">2022</xref>), Guidotti et al. (<xref ref-type="bibr" rid="B37">2019</xref>), Li et al. (<xref ref-type="bibr" rid="B46">2022</xref>), Rudin (<xref ref-type="bibr" rid="B65">2019</xref>), and Rudin et al. (<xref ref-type="bibr" rid="B66">2021</xref>).</p></sec>
<sec>
<title>2.2 Convex hull methods and the convex duality concept</title>
<p>Most papers considering the convex hull methods study the relationship between location of decision boundaries and convex hulls of a training set. The corresponding methods are presented in Chau et al. (<xref ref-type="bibr" rid="B19">2013</xref>), El Mrabti et al. (<xref ref-type="bibr" rid="B25">2024</xref>), Gu et al. (<xref ref-type="bibr" rid="B36">2020</xref>), Nemirko and Dula (<xref ref-type="bibr" rid="B52">2021a</xref>), Nemirko and Dula (<xref ref-type="bibr" rid="B53">2021b</xref>), Renwang et al. (<xref ref-type="bibr" rid="B59">2022</xref>), Rossignol et al. (<xref ref-type="bibr" rid="B63">2024</xref>), Singh and Kumar (<xref ref-type="bibr" rid="B69">2021</xref>), Wang et al. (<xref ref-type="bibr" rid="B79">2013</xref>), Yousefzadeh (<xref ref-type="bibr" rid="B83">2020</xref>), and Zhang X. et al. (<xref ref-type="bibr" rid="B87">2021</xref>). Boundary of the dataset&#x00027;s convex hull is studied in Balestriero et al. (<xref ref-type="bibr" rid="B8">2021</xref>) to discriminate interpolation and extrapolation occurring for a sample. Efficient algorithms for efficient computation of the convex hull for training data are presented in Khosravani et al. (<xref ref-type="bibr" rid="B43">2016</xref>).</p>
<p>The concept of duality was also widely used in machine learning models starting from duality in the support vector machine and its various modifications (Bennett and Bredensteiner, <xref ref-type="bibr" rid="B11">2000</xref>; Zhang, <xref ref-type="bibr" rid="B86">2002</xref>). This concept was successfully applied to some types of neural networks (Ergen and Pilanci, <xref ref-type="bibr" rid="B26">2020</xref>, <xref ref-type="bibr" rid="B27">2021</xref>), including GANs (Farnia and Tse, <xref ref-type="bibr" rid="B28">2018</xref>), to models dealing with the high-dimensional data (Yao et al., <xref ref-type="bibr" rid="B82">2018</xref>).</p>
<p>At the same time, the aforementioned approaches did not apply to explanation models. Concepts of the convex hull and the convex duality may be a way to simplify and to improve the explanation models.</p></sec></sec>
<sec id="s3">
<title>3 Preliminaries</title>
<sec>
<title>3.1 Convex hull</title>
<p>According to Rockafellar (<xref ref-type="bibr" rid="B62">1970</xref>), a domain produced by a set of instances as vectors in Euclidean space is convex if a straight line segment that joins every pair of instances belonging to the set contains a vector belonging to the domain. A set <inline-formula><mml:math id="M1"><mml:mrow><mml:mi mathvariant="script">S</mml:mi></mml:mrow></mml:math></inline-formula> is convex if, for every pair, <inline-formula><mml:math id="M2"><mml:mstyle class="text"><mml:mtext mathvariant="bold">u</mml:mtext></mml:mstyle><mml:mo>,</mml:mo><mml:mstyle class="text"><mml:mtext mathvariant="bold">v</mml:mtext></mml:mstyle><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mi mathvariant="script">S</mml:mi></mml:mrow></mml:math></inline-formula>, and all &#x003BB; &#x02208; [0, 1], the vector (1 &#x02212; &#x003BB;)<bold>u</bold> &#x0002B; &#x003BB;<bold>v</bold> belongs to <inline-formula><mml:math id="M3"><mml:mrow><mml:mi mathvariant="script">S</mml:mi></mml:mrow></mml:math></inline-formula>.</p>
<p>Moreover, if <inline-formula><mml:math id="M4"><mml:mrow><mml:mi mathvariant="script">S</mml:mi></mml:mrow></mml:math></inline-formula> is a convex set, then for any <bold>x</bold><sub>1</sub>, <bold>x</bold><sub>2</sub>, ..., <bold>x</bold><sub><italic>t</italic></sub> belonging to <inline-formula><mml:math id="M5"><mml:mrow><mml:mi mathvariant="script">S</mml:mi></mml:mrow></mml:math></inline-formula> and for any non-negative numbers &#x003BB;<sub>1</sub>, ..., &#x003BB;<sub><italic>t</italic></sub> such that &#x003BB;<sub>1</sub> &#x0002B;... &#x0002B; &#x003BB;<sub><italic>t</italic></sub> &#x0003D; 1, the sum &#x003BB;<sub>1</sub><bold>x</bold><sub>1</sub> &#x0002B; ... &#x0002B; &#x003BB;<sub><italic>t</italic></sub><bold>x</bold><sub><italic>t</italic></sub> is called a convex combination of <bold>x</bold><sub>1</sub>, ..., <bold>x</bold><sub><italic>t</italic></sub>. The <italic>convex hull</italic> or <italic>convex envelope</italic> of set <inline-formula><mml:math id="M6"><mml:mrow><mml:mi mathvariant="script">X</mml:mi></mml:mrow></mml:math></inline-formula> of instances in the Euclidean space can be defined in terms of convex sets or convex combinations as the minimal convex set containing <inline-formula><mml:math id="M7"><mml:mrow><mml:mi mathvariant="script">X</mml:mi></mml:mrow></mml:math></inline-formula>, or the intersection of all convex sets containing <inline-formula><mml:math id="M8"><mml:mrow><mml:mi mathvariant="script">X</mml:mi></mml:mrow></mml:math></inline-formula>, or the set of all convex combinations of instances in <inline-formula><mml:math id="M9"><mml:mrow><mml:mi mathvariant="script">X</mml:mi></mml:mrow></mml:math></inline-formula>.</p></sec>
<sec>
<title>3.2 LIME, SHAP, NAM, and example-based methods</title>
<p>Let us briefly introduce the most popular explanation methods.</p>
<p><italic>LIME</italic> (Ribeiro et al., <xref ref-type="bibr" rid="B60">2016</xref>) proposes to approximate a black-box explainable model, denoted as <italic>f</italic>, with a simple function <italic>g</italic> in the vicinity of the point of interest <bold>x</bold>, whose prediction by means of <italic>f</italic> has to be explained, under condition that the approximation function <italic>g</italic> belongs to a set of explanation models <italic>G</italic>, for example, linear models. To construct the function <italic>g</italic>, a new dataset consisting of generated points around <bold>x</bold> is constructed with predictions computed be means of the black-box model. Weights <italic>w</italic><sub><bold>x</bold></sub> are assigned to new instances in accordance with their proximity to point <bold>x</bold> by using a distance metric, for example, the Euclidean distance. The explanation function <italic>g</italic> is obtained by solving the following optimization problem:</p>
<disp-formula id="E1"><label>(1)</label><mml:math id="M10"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mo class="qopname">arg</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo class="qopname">min</mml:mo></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mo>&#x02208;</mml:mo><mml:mi>G</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:mi>L</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>f</mml:mi><mml:mo>,</mml:mo><mml:mi>g</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mtext>&#x003A6;</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>g</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Here, <italic>L</italic> is a loss function, for example, mean squared error, which measures how the function <italic>g</italic> is close to function <italic>f</italic> at point <bold>x</bold>; &#x003A6;(<italic>g</italic>) is the model complexity. A local linear model is the result of the original LIME such that its coefficients explain the prediction.</p>
<p>Another approach to explaining the black-box model predictions is <italic>SHAP</italic> (Lundberg and Lee, <xref ref-type="bibr" rid="B48">2017</xref>; Strumbelj and Kononenko, <xref ref-type="bibr" rid="B71">2010</xref>), which is based on a concept of the Shapley values (Shapley, <xref ref-type="bibr" rid="B68">1953</xref>) estimating contributions of features to the prediction. If we explain prediction <italic>f</italic>(<bold>x</bold><sub>0</sub>) from the model at a local point <bold>x</bold><sub>0</sub>, then the <italic>i</italic>-th feature contribution is defined by the Shapley value as</p>
<disp-formula id="E2"><label>(2)</label><mml:math id="M11"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003D5;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>S</mml:mi><mml:mo>&#x02286;</mml:mo><mml:mi>N</mml:mi><mml:mo>\</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:munder></mml:mstyle><mml:mfrac><mml:mrow><mml:mo>|</mml:mo><mml:mi>S</mml:mi><mml:mo>|</mml:mo><mml:mo>!</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:mi>N</mml:mi><mml:mo>|</mml:mo><mml:mo>-</mml:mo><mml:mo>|</mml:mo><mml:mi>S</mml:mi><mml:mo>|</mml:mo><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>!</mml:mo></mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mi>N</mml:mi><mml:mo>|</mml:mo><mml:mo>!</mml:mo></mml:mrow></mml:mfrac><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>S</mml:mi><mml:mo>&#x0222A;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>f</italic>(<italic>S</italic>) is the black-box model prediction under condition that a subset <italic>S</italic> of the instance <bold>x</bold><sub>0</sub> features is used as the corresponding input; <italic>N</italic> is the set of all features.</p>
<p>It can be seen from <xref ref-type="disp-formula" rid="E2">Equation 2</xref> that the Shapley value &#x003D5;<sub><italic>i</italic></sub> can be regarded as the average contribution of the <italic>i</italic>-th feature across all possible permutations of the feature set. The prediction <italic>f</italic>(<bold>x</bold><sub>0</sub>) can be represented by using Shapley values as follows (Lundberg and Lee, <xref ref-type="bibr" rid="B48">2017</xref>; Strumbelj and Kononenko, <xref ref-type="bibr" rid="B71">2010</xref>):</p>
<disp-formula id="E3"><label>(3)</label><mml:math id="M12"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mtext>&#x01D53C;</mml:mtext><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mi>&#x003D5;</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>To generalize LIME, <italic>NAM</italic> was proposed in Agarwal et al. (<xref ref-type="bibr" rid="B3">2021</xref>). It is based on the generalized additive model of the form <italic>y</italic>(<bold>x</bold>) &#x0003D; <italic>g</italic><sub>1</sub>(<italic>x</italic><sub>1</sub>) &#x0002B; ... &#x0002B; <italic>g</italic><sub><italic>m</italic></sub>(<italic>x</italic><sub><italic>m</italic></sub>) (Hastie and Tibshirani, <xref ref-type="bibr" rid="B38">1990</xref>) and consists of <italic>m</italic> neural networks such that a single feature is fed to each subnetwork and each network implements function <italic>g</italic><sub><italic>i</italic></sub>(<italic>x</italic><sub><italic>i</italic></sub>), where <italic>g</italic><sub><italic>i</italic></sub> is a univariate shape function with <italic>E</italic>(<italic>g</italic><sub><italic>i</italic></sub>) &#x0003D; 0. All networks are trained jointly using backpropagation and can learn arbitrarily complex shape functions (Agarwal et al., <xref ref-type="bibr" rid="B3">2021</xref>). The loss function for training the whole neural network is of the form:</p>
<disp-formula id="E4"><label>(4)</label><mml:math id="M13"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mi>g</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <inline-formula><mml:math id="M14"><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> is the <italic>k</italic>-th feature of the <italic>i</italic>-th instance; <italic>n</italic> is the number of training instances.</p>
<p>The representation of results in NAM in the form of shape functions can be considered in two ways. On the one hand, the functions are more informative, and they show how features contribute into a prediction. On the other hand, we often need to have a single value of the feature contribution which can be obtained by computing an importance measure from the obtained shape function.</p>
<p>NAM significantly extends the flexibility of explanation models due to possibility to implement arbitrary functions of features by means of neural networks.</p>
<p>According to Molnar (<xref ref-type="bibr" rid="B49">2019</xref>), an instance or a set of instances are selected in <italic>example-based explanation methods</italic> to explain the model prediction. In contrast to the feature importance explanation (LIME, SHAP), the example-based methods explain a model by selecting instances from the dataset and do not consider features or their importance for explaining. In the context of obtained results, the example-based methods are represented by influential instances (points from the training set that have the largest impact on the predictions) and by prototypes (representative instances from the training data). It should be noted that instances used for explanation may not belong to a dataset and are combinations of instances from the dataset or some points in the dataset domain. The well-known method of <italic>K</italic> nearest neighbors can be regarded as an example-based explanation method.</p></sec></sec>
<sec sec-type="materials and methods" id="s4">
<title>4 Materials and methods</title>
<sec>
<title>4.1 Dual explanation</title>
<p>Let us consider the method for dual explanation. Suppose that there is a dataset <inline-formula><mml:math id="M15"><mml:mrow><mml:mi mathvariant="script">T</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula> of <italic>t</italic> points (<bold>x</bold><sub><italic>i</italic></sub>, <italic>y</italic><sub><italic>i</italic></sub>), where <inline-formula><mml:math id="M16"><mml:msub><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mi mathvariant="script">X</mml:mi></mml:mrow><mml:mo>&#x02282;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> is a feature vector consisting of <italic>m</italic> features, <italic>y</italic><sub><italic>i</italic></sub> is the observed output for the feature vector <bold>x</bold><sub><italic>i</italic></sub> such that <italic>y</italic><sub><italic>i</italic></sub> &#x02208; &#x0211D; in the regression problem and <italic>y</italic><sub><italic>i</italic></sub> &#x02208; {1, 2, ..., <italic>C</italic>} in the classification problem with <italic>C</italic> classes. It is assumed that output <italic>y</italic> of an explained black-box model is a function <italic>f</italic>(<bold>x</bold>) of an associated input vector <bold>x</bold> from <inline-formula><mml:math id="M17"><mml:mrow><mml:mi mathvariant="script">X</mml:mi></mml:mrow></mml:math></inline-formula>.</p>
<p>To explain an instance <inline-formula><mml:math id="M18"><mml:msub><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mi mathvariant="script">X</mml:mi></mml:mrow></mml:math></inline-formula>, an interpretable surrogate model <italic>g</italic> for the black-box model <italic>f</italic> is trained in a local region around <bold>x</bold><sub>0</sub>. It is carried out by generating a new dataset <inline-formula><mml:math id="M19"><mml:mrow><mml:mi mathvariant="script">S</mml:mi></mml:mrow></mml:math></inline-formula> of <italic>n</italic> perturbed samples in the vicinity of the point of interest <bold>x</bold><sub>0</sub> similarly to LIME. Samples are assigned by weights <italic>w</italic><sub><bold>x</bold></sub> in accordance with their proximity to the point <bold>x</bold>. By using the black-box model, output values <italic>y</italic> are obtained as function <italic>f</italic> of generated instances. As a result, dataset <inline-formula><mml:math id="M20"><mml:mrow><mml:mi mathvariant="script">S</mml:mi></mml:mrow></mml:math></inline-formula> consists of <italic>n</italic> pairs (<bold>x</bold><sub><italic>i</italic></sub>, <italic>f</italic>(<bold>x</bold><sub><italic>i</italic></sub>)), <italic>i</italic> &#x0003D; 1, ..., <italic>n</italic>. Interpretable surrogate model <italic>g</italic> is now trained on <inline-formula><mml:math id="M21"><mml:mrow><mml:mi mathvariant="script">S</mml:mi></mml:mrow></mml:math></inline-formula>. Many explanation methods such as LIME and SHAP are based on applying the linear regression function</p>
<disp-formula id="E5"><label>(5)</label><mml:math id="M22"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>g</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>ax</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">T</mml:mtext></mml:mrow></mml:msup><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>as an interpretable model because each coefficient <italic>a</italic><sub><italic>i</italic></sub> in <italic>g</italic> quantifies how the <italic>i</italic>-th feature impacts on the prediction. Here <bold>a</bold> &#x0003D; (<italic>a</italic><sub>1</sub>, ..., <italic>a</italic><sub><italic>m</italic></sub>). It should be noted that the domain of set <inline-formula><mml:math id="M23"><mml:mrow><mml:mi mathvariant="script">S</mml:mi></mml:mrow></mml:math></inline-formula> coincides with the domain of set <inline-formula><mml:math id="M24"><mml:mrow><mml:mi mathvariant="script">T</mml:mi></mml:mrow></mml:math></inline-formula> in the case of the global explanation.</p>
<p>Let us consider the convex hull <inline-formula><mml:math id="M25"><mml:mrow><mml:mi mathvariant="script">P</mml:mi></mml:mrow></mml:math></inline-formula> of a set of <italic>K</italic> nearest neighbors of instance <bold>x</bold><sub>0</sub> in the Euclidean space. The convex hull <inline-formula><mml:math id="M26"><mml:mrow><mml:mi mathvariant="script">P</mml:mi></mml:mrow></mml:math></inline-formula> forms a convex polytope with <italic>d</italic> vertices or extreme points <inline-formula><mml:math id="M27"><mml:msubsup><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula>, <italic>i</italic> &#x0003D; 1, ..., <italic>d</italic>. Then, each point <inline-formula><mml:math id="M28"><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mi mathvariant="script">P</mml:mi></mml:mrow></mml:math></inline-formula> is a convex combination of <italic>d</italic> extreme points:</p>
<disp-formula id="E6"><label>(6)</label><mml:math id="M29"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:msubsup><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mtext class="textrm" mathvariant="normal">where&#x000A0;</mml:mtext><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02265;</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>This implies that we can uniformly generate a vector in the unit simplex of possible vectors &#x003BB; consisting of <italic>d</italic> coefficients &#x003BB;<sub>1</sub>, ..., &#x003BB;<sub><italic>d</italic></sub>, denoted &#x00394;<sup><italic>d</italic>&#x02212;1</sup>. In other words, we can consider points in the unit simplex &#x00394;<sup><italic>d</italic>&#x02212;1</sup> and construct a new dual dataset <inline-formula><mml:math id="M30"><mml:mrow><mml:mi mathvariant="script">D</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula>, which consists of vectors <inline-formula><mml:math id="M31"><mml:msup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>, and the corresponding values <italic>z</italic><sub><italic>j</italic></sub>, <italic>j</italic> &#x0003D; 1, ..., <italic>n</italic>, computed by using the black-box model <italic>f</italic> as follows:</p>
<disp-formula id="E7"><label>(7)</label><mml:math id="M32"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msubsup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:msubsup><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>i.e., <italic>z</italic><sub><italic>j</italic></sub> is a prediction of the black-box model when its input is vector <inline-formula><mml:math id="M33"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:munderover><mml:msubsup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:msubsup><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup><mml:mo>.</mml:mo></mml:math></inline-formula></p>
<p>In sum, we can train the &#x0201C;dual&#x0201D; linear regression model (the surrogate model) for explanation on dataset <inline-formula><mml:math id="M34"><mml:mrow><mml:mi mathvariant="script">D</mml:mi></mml:mrow></mml:math></inline-formula>, which is of the form:</p>
<disp-formula id="E8"><label>(8)</label><mml:math id="M35"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>h</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mstyle mathvariant='bold'><mml:mtext>b</mml:mtext></mml:mstyle><mml:msup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">T</mml:mtext></mml:mrow></mml:msup><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <bold>b</bold> &#x0003D; (<italic>b</italic><sub>1</sub>,..., <italic>b</italic><sub><italic>d</italic></sub>) is the vector of coefficients of the &#x0201C;dual&#x0201D; linear regression model.</p>
<p>The surrogate model can be trained by means of LIME or SHAP with the dual dataset <inline-formula><mml:math id="M36"><mml:mrow><mml:mi mathvariant="script">D</mml:mi></mml:mrow></mml:math></inline-formula>.</p>
<p>Suppose that we have trained the function <italic>h</italic>(&#x003BB;) and computed coefficients <italic>b</italic><sub>1</sub>, ..., <italic>b</italic><sub><italic>d</italic></sub>. The next question is how to transform these coefficients to coefficients <italic>a</italic><sub>1</sub>, ..., <italic>a</italic><sub><italic>m</italic></sub> which characterize the feature contribution into the prediction. In the case of the linear regression, coefficients of function <italic>g</italic>(<bold>x</bold>) &#x0003D; <italic>a</italic><sub>1</sub><italic>x</italic><sub>1</sub> &#x0002B; ... &#x0002B; <italic>a</italic><sub><italic>m</italic></sub><italic>x</italic><sub><italic>m</italic></sub> can be found from the condition:</p>
<disp-formula id="E9"><label>(9)</label><mml:math id="M37"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>g</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msubsup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:msubsup><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>h</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>which has to be satisfied for all generated &#x003BB;<sub><italic>j</italic></sub>. This obvious condition means that predictions of the &#x0201C;primal&#x0201D; surrogate model with coefficients <italic>a</italic><sub>1</sub>, ..., <italic>a</italic><sub><italic>m</italic></sub> has to coincide with predictions of the &#x0201C;dual&#x0201D; model.</p>
<p>Introduce a matrix consisting of extreme points</p>
<disp-formula id="E10"><label>(10)</label><mml:math id="M38"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo>*</mml:mo><mml:mtext class="textrm" mathvariant="normal">T</mml:mtext></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msubsup><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Note that, &#x003BB;<sub><italic>i</italic></sub> &#x0003D; 1 and &#x003BB;<sub><italic>j</italic></sub> &#x0003D; 0, <italic>j</italic> &#x02260; <italic>i</italic>, for the <italic>i</italic>-th extreme point. This implies that the condition (<xref ref-type="disp-formula" rid="E9">Equation 9</xref>) can be rewritten as</p>
<disp-formula id="E11"><label>(11)</label><mml:math id="M39"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>g</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>h</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>By using <xref ref-type="disp-formula" rid="E5">Equation 5</xref>, we get</p>
<disp-formula id="E12"><label>(12)</label><mml:math id="M40"><mml:mrow><mml:mi>g</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msubsup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>x</mml:mi></mml:mstyle><mml:mi>i</mml:mi><mml:mo>&#x02217;</mml:mo></mml:msubsup></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>a</mml:mi></mml:mstyle><mml:msubsup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>x</mml:mi></mml:mstyle><mml:mi>i</mml:mi><mml:mrow><mml:mo>*</mml:mo><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>T</mml:mi></mml:mstyle></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>.</mml:mo></mml:mrow></mml:math></disp-formula>
<p>Hence, there holds</p>
<disp-formula id="E13"><label>(13)</label><mml:math id="M41"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mstyle mathvariant='bold'><mml:mtext>aX</mml:mtext></mml:mstyle><mml:mo>=</mml:mo><mml:mstyle mathvariant='bold'><mml:mtext>b</mml:mtext></mml:mstyle><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>It follows from the above that</p>
<disp-formula id="E14"><label>(14)</label><mml:math id="M42"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>a</mml:mtext></mml:mstyle><mml:mo>=</mml:mo><mml:mstyle mathvariant='bold'><mml:mtext>X</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mstyle mathvariant='bold'><mml:mtext>b</mml:mtext></mml:mstyle><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <bold>X</bold><sup>&#x02212;1</sup> is the pseudoinverse matrix.</p>
<p>Generally, the vector <bold>a</bold> can be computed by solving the following unconstrained optimization problem:</p>
<disp-formula id="E15"><label>(15)</label><mml:math id="M43"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>a</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>o</mml:mi><mml:mi>p</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo class="qopname">arg</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mtext>min</mml:mtext></mml:mrow><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>a</mml:mtext></mml:mstyle><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:munder></mml:mstyle><mml:mo>&#x02225;</mml:mo><mml:mstyle mathvariant='bold'><mml:mtext>aX</mml:mtext></mml:mstyle><mml:mo>-</mml:mo><mml:mstyle mathvariant='bold'><mml:mtext>b</mml:mtext></mml:mstyle><mml:msup><mml:mrow><mml:mo>&#x02225;</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
 <p>In the original LIME, perturbed instances are generated around <bold>x</bold><sub>0</sub>. One of the important advantages of the proposed dual approach is the opportunity to avoid generating instances in accordance with a probability distribution with parameters and to generate only uniformly distributed points &#x003BB;<sup>(<italic>j</italic>)</sup> in the unit simplex &#x00394;<sup><italic>d</italic>&#x02212;1</sup>. Indeed, if we have image data, then it is difficult to perturb pixels or superpixels of images. Moreover, it is difficult to determine parameters of the generation to cover instances from different classes. According to the dual representation, after generating vectors &#x003BB;<sup>(<italic>j</italic>)</sup>, new vectors <bold>x</bold><sub><italic>j</italic></sub> are computed by using <xref ref-type="disp-formula" rid="E6">Equation 6</xref>. This is similar to the mixup method (Zhang et al., <xref ref-type="bibr" rid="B85">2018</xref>) to some extent that generates new samples by linear interpolation of multiple samples and their labels. However, in contrast to the mixup method, the prediction is obtained as the output of the black-box model (see <xref ref-type="disp-formula" rid="E7">Equation 7</xref>), but not as the convex combination of one-hot label encodings. Another important advantage is that instances corresponding to the generated set <inline-formula><mml:math id="M54"><mml:mrow><mml:mi mathvariant="script">D</mml:mi></mml:mrow></mml:math></inline-formula> are totally included in the domain of the dataset <inline-formula><mml:math id="M55"><mml:mrow><mml:mi mathvariant="script">T</mml:mi></mml:mrow></mml:math></inline-formula>. This implies that we do not get anomalous predictions <italic>f</italic>(<bold>x</bold><sub><italic>i</italic></sub>) when generated <bold>x</bold><sub><italic>i</italic></sub> is far from the domain of the dataset <inline-formula><mml:math id="M56"><mml:mrow><mml:mi mathvariant="script">T</mml:mi></mml:mrow></mml:math></inline-formula>.</p>
<p>Another question is how to choose the convex hull of the predefined size and, hence, how to determine extreme points <inline-formula><mml:math id="M57"><mml:msubsup><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula> of the corresponding convex polytope. The problem is that the convex hull has to include some number of points from dataset <inline-formula><mml:math id="M58"><mml:mrow><mml:mi mathvariant="script">T</mml:mi></mml:mrow></mml:math></inline-formula> and the explained point <bold>x</bold><sub>0</sub>. Let us consider <italic>K</italic> nearest neighbors around <bold>x</bold><sub>0</sub> from <inline-formula><mml:math id="M59"><mml:mrow><mml:mi mathvariant="script">T</mml:mi></mml:mrow></mml:math></inline-formula>, where <italic>K</italic> is a tuning parameter satisfying condition <italic>K</italic>&#x02265;<italic>d</italic>. The convex hull is constructed on these <italic>K</italic> &#x0002B; 1 points (<italic>K</italic> points from <inline-formula><mml:math id="M60"><mml:mrow><mml:mi mathvariant="script">T</mml:mi></mml:mrow></mml:math></inline-formula> and one point <bold>x</bold><sub>0</sub>). Then, there are <italic>d</italic> points among <italic>K</italic> nearest neighbors which define a convex polytope and can be regarded as its extreme points. It should be noted that <italic>d</italic> depends on the dataset analyzed. <xref ref-type="fig" rid="F2">Figure 2</xref> illustrates two cases of the explained point location and the convex polytopes constructed from <italic>K</italic> &#x0003D; 7 nearest neighbors. The dataset consists of 10 points depicted by circles. A new explained point <bold>x</bold><sub>0</sub> is depicted by the red triangle. In Case 1, point <bold>x</bold><sub>0</sub> lies in the largest convex polytope with <italic>d</italic> &#x0003D; 5 extreme points <inline-formula><mml:math id="M61"><mml:msubsup><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>5</mml:mn></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula> constructed from seven nearest neighbors. The largest polytope is taken in order to envelop as large as possible points from the dataset. In Case 2, point <bold>x</bold><sub>0</sub> lies outside the convex polytope constructed from nearest neighbors. Therefore, this point is included into the set of extreme points and <italic>d</italic> &#x02264; <italic>K</italic>&#x0002B;1. As a result, we have <italic>d</italic> &#x0003D; 6 extreme points <inline-formula><mml:math id="M62"><mml:msubsup><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>5</mml:mn></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>6</mml:mn></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula>.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>Two cases of the explained point location and the convex polytops constructed from <italic>K</italic> nearest neighbors.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-08-1506074-g0002.tif"/>
</fig>
<p>To identify whether the newly added point can be expressed as convex combination of the existing vectors, the Farka&#x00027;s lemma (Dinh and Jeyakumar, <xref ref-type="bibr" rid="B23">2014</xref>) can be applied.</p>
<p>Points &#x003BB;<sup>(<italic>j</italic>)</sup> from the unit simplex &#x00394;<sup><italic>d</italic>&#x02212;1</sup> are randomly selected in accordance with the uniform distribution over the simplex. This procedure can be carried out by means of generating random numbers in accordance with the Dirichlet distribution (Rubinstein and Kroese, <xref ref-type="bibr" rid="B64">2008</xref>). There are also different approaches to generate points from the unit simplex (Smith and Tromble, <xref ref-type="bibr" rid="B70">2004</xref>).</p>
<p>Finally, we write <xref ref-type="table" rid="T5">Algorithm 1</xref> implementing the proposed method.</p>
<table-wrap position="float" id="T5"> 
<label>Algorithm 1</label>
<caption><p>The dual explanation algorithm.</p></caption>
<table frame="hsides" rules="groups">
<tbody>
<tr><td align="left" valign="top"><monospace><bold>Require:</bold> Training set <inline-formula><mml:math id="M44"><mml:mrow><mml:mi mathvariant="script">T</mml:mi></mml:mrow></mml:math></inline-formula>; the black-box model <italic>f</italic>; explainable point <bold>x</bold><sub>0</sub>; the number of nearest neighbors <italic>K</italic></monospace></td></tr>
<tr><td align="left" valign="top"><monospace><bold>Ensure:</bold> Important features of <bold>x</bold><sub>0</sub> (vector <inline-formula><mml:math id="M45"><mml:mstyle class="text"><mml:mtext mathvariant="bold">a</mml:mtext></mml:mstyle><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">T</mml:mtext></mml:mstyle></mml:mrow></mml:msup></mml:math></inline-formula> of the linear surrogate model coefficients)</monospace></td></tr>
<tr><td align="left" valign="top"><monospace>1: Determine a set <inline-formula><mml:math id="M46"><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">T</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> of <italic>K</italic> nearest neighbors for <bold>x</bold><sub>0</sub> adding <bold>x</bold><sub>0</sub> itself</monospace></td></tr>
<tr><td align="left" valign="top"><monospace>2: Construct the largest convex hull <inline-formula><mml:math id="M47"><mml:mrow><mml:mi mathvariant="script">P</mml:mi></mml:mrow></mml:math></inline-formula> of <inline-formula><mml:math id="M48"><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">T</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula></monospace></td></tr>
<tr><td align="left" valign="top"><monospace>3: Find extreme points of <inline-formula><mml:math id="M49"><mml:mrow><mml:mi mathvariant="script">P</mml:mi></mml:mrow></mml:math></inline-formula> and their number <italic>d</italic> &#x02264; <italic>K</italic>&#x0002B;1</monospace></td></tr>
<tr><td align="left" valign="top"><monospace>4: Generate uniformly <italic>n</italic> points &#x003BB;<sup>(<italic>j</italic>)</sup>, <italic>j</italic> &#x0003D; 1, ..., <italic>n</italic>, from the unit simplex &#x00394;<sup><italic>d</italic>&#x02212;1</sup></monospace></td></tr>
<tr><td align="left" valign="top"><monospace>5: Find predictions <italic>z</italic><sub><italic>i</italic></sub> of the black-box model in accordance with associated input <inline-formula><mml:math id="M50"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:munderover><mml:msubsup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:msubsup><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula> for all <italic>i</italic> &#x0003D; 1, ..., <italic>n</italic></monospace></td></tr>
<tr><td align="left" valign="top"><monospace>6: Construct a new dual dataset <inline-formula><mml:math id="M51"><mml:mrow><mml:mi mathvariant="script">D</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula></monospace></td></tr>
<tr><td align="left" valign="top"><monospace>7: Train the linear regression (<xref ref-type="disp-formula" rid="E8">Equation 8</xref>) on dataset <inline-formula><mml:math id="M52"><mml:mrow><mml:mi mathvariant="script">D</mml:mi></mml:mrow></mml:math></inline-formula> and find the vector of coefficients <inline-formula><mml:math id="M53"><mml:mstyle class="text"><mml:mtext mathvariant="bold">b</mml:mtext></mml:mstyle><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">T</mml:mtext></mml:mstyle></mml:mrow></mml:msup></mml:math></inline-formula></monospace></td></tr>
<tr><td align="left" valign="top"><monospace>8: Find vector <bold>a</bold> by solving optimization problem (<xref ref-type="disp-formula" rid="E15">Equation 15</xref>)</monospace></td></tr>
</tbody>
</table>
</table-wrap>
<p><xref ref-type="fig" rid="F3">Figure 3</xref> illustrates steps of the algorithm for explanation of a prediction provided by a black-box model at the point depicted by the small triangle. Points of the dataset are depicted by small circles. The training dataset <inline-formula><mml:math id="M63"><mml:mrow><mml:mi mathvariant="script">T</mml:mi></mml:mrow></mml:math></inline-formula> and the explained point are shown in <xref ref-type="fig" rid="F3">Figure 3A</xref>. <xref ref-type="fig" rid="F3">Figure 3B</xref> shows set <inline-formula><mml:math id="M64"><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">T</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> of <italic>K</italic> &#x0003D; 13 nearest points such that only two points (0.05, 0.5) and (1.0, 0.1) from training set <inline-formula><mml:math id="M65"><mml:mrow><mml:mi mathvariant="script">T</mml:mi></mml:mrow></mml:math></inline-formula> do not belong to set <inline-formula><mml:math id="M66"><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">T</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>. The convex hull and the corresponding extreme points are shown in <xref ref-type="fig" rid="F3">Figure 3C</xref>. Points uniformly generated in the unit simplex are depicted by means of small crosses in <xref ref-type="fig" rid="F3">Figure 3D</xref>. It is interesting to point out that the generated points are uniformly distributed in the unit simplex, but not in the convex polytope as it is follows from <xref ref-type="fig" rid="F3">Figure 3D</xref>. We uniformly generate vectors &#x003BB;, but the corresponding vectors <bold>x</bold> are not uniformly distributed in the polytope. One can see from <xref ref-type="fig" rid="F3">Figure 3D</xref> that generated points in the initial (primal) feature space tend to be located in the area where the density of extreme points is largest. This is a very interesting property of the dual representation. It means that the method takes into account the concentration of training points and the probability distribution of the instances in the dataset.</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Steps of the algorithm for explanation of a prediction provided by a black-box model at the point depicted by the small triangle. <bold>(A)</bold> Training dataset (circles) and the explainable example (triangle). <bold>(B)</bold> 13 nearest neighbors. <bold>(C)</bold> The convex hull around the explainable example. <bold>(D)</bold> Generated examples (crosses) in the convex hull.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-08-1506074-g0003.tif"/>
</fig>
<p>The difference between points generated by means of the original LIME and the proposed method is illustrated in <xref ref-type="fig" rid="F4">Figure 4</xref> where the left picture (<xref ref-type="fig" rid="F4">Figure 4A</xref>) shows a fragment of <xref ref-type="fig" rid="F1">Figure 1</xref> and the right picture (<xref ref-type="fig" rid="F4">Figure 4B</xref>) illustrates how the proposed method generates instances.</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>Generated points in the original LIME <bold>(A)</bold> and in the proposed dual method <bold>(B)</bold>.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-08-1506074-g0004.tif"/>
</fig>
<p>The proposed method requires finding all the extreme points (vertices) of the convex hull of a given point <inline-formula><mml:math id="M67"><mml:msub><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> and its nearest neighbors <bold>x</bold><sub>1</sub>, <bold>x</bold><sub>2</sub>, &#x02026;, <bold>x</bold><sub><italic>n</italic>&#x02212;1</sub>. When the dimension <italic>d</italic> is small, these extreme points can be computed in time <italic>O</italic>(2<sup><italic>O</italic>(<italic>d</italic>log<italic>d</italic>)</sup><italic>n</italic><sup>2</sup>) &#x0003D; <italic>O</italic>(<italic>n</italic><sup>2</sup>) (Ottmann et al., <xref ref-type="bibr" rid="B55">2001</xref>). In general, determining whether <bold>x</bold><sub><italic>i</italic></sub> is an extreme point can be done by checking the condition</p>
<disp-formula id="E16"><label>(16)</label><mml:math id="M68"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>v</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mrow><mml:mo stretchy="false">{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">}</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02260;</mml:mo><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>v</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mrow><mml:mo stretchy="false">{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">}</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo>\</mml:mo><mml:mrow><mml:mo stretchy="false">{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">}</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>conv</italic>(<italic>P</italic>) denotes the convex hull of the set <italic>P</italic>.</p>
<p>The above condition is equivalent to solving a feasibility problem that can be formulated as a linear program. This linear program involves <italic>n</italic> variables and <italic>n</italic>&#x0002B;<italic>d</italic> constraints and can be solved using the interior-point method described in Vaidya (<xref ref-type="bibr" rid="B77">1989</xref>). For each point, the time complexity of this procedure is <italic>O</italic>((<italic>n</italic>&#x0002B;<italic>d</italic>)<sup>3/2</sup><italic>n</italic>log(<italic>n</italic>)), resulting in an overall complexity of</p>
<disp-formula id="E17"><label>(17)</label><mml:math id="M69"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>O</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>n</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>d</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>3</mml:mn><mml:mo>/</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:msup><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo class="qopname">log</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>In the extreme case, when <italic>d</italic>&#x0226B;1, we can use the AVTA algorithm <inline-formula><mml:math id="M112"><mml:mrow><mml:msubsup><mml:mrow><mml:mo>&#x0007B;</mml:mo><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>x</mml:mi></mml:mstyle><mml:mi>j</mml:mi></mml:msub><mml:mo>&#x0007D;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo>.</mml:mo></mml:mrow></mml:math></inline-formula>. <italic>This algorithm has the time complexity</italic>O(n<sup>2</sup>(t<sup>&#x02212;</sup>)), <italic>where</italic>t &#x02208; (0, 1). The approximation becomes more precise as <italic>t</italic> &#x02192; 0.</p>
<p>The dual approach can work best when applied to analysis of potential outliers. In that regard, the generation procedure proposed in the study is more robust than the one used in LIME. By choosing the generation region as the convex hull of the explained point nearest neighbors, we reduce the likelihood of creating additional samples that fail to align with the original data distribution. As for hyperparameters, the number of nearest neighbors used to construct the convex hull for the explained point largely depends on the user&#x00027;s preferences and the nature of analyzed data. We can stop incorporating additional neighbors when a certain threshold is reached, such as when the next nearest neighbor is considerably more distant compared to the previous ones. Furthermore, we can choose to exclude a new neighbor if its data features clearly indicate that it would not contribute much to the analysis of the explained point. The number of points to generate can be taken as <italic>k</italic>&#x000B7;<italic>n</italic>, where <italic>k</italic> is a real number and <italic>n</italic> is the number of selected neighbors. By default, <italic>k</italic> &#x0003D; 3. This implies that we can increment the number of generated points until we observe the convergence of dual coefficients. We can also modify the distribution type employed for creating the dual dataset. For instance, if we take new points to be generated mostly in close proximity to the explained point <bold>x</bold> &#x0003D; (<italic>x</italic><sub>1</sub>, ..., <italic>x</italic><sub><italic>d</italic></sub>), we can sample the points from the Dirichlet distribution with concentration parameters &#x003B1;<sub><italic>i</italic></sub> &#x0003D; 1 &#x0002B; <italic>t</italic> &#x000B7; <italic>x</italic><sub><italic>i</italic></sub>, where <italic>t</italic> &#x0003E; 0.</p></sec>
<sec>
<title>4.2 Example-based explanation and NAM</title>
<p>It turns out that the proposed method for the dual explanation inherently leads to the example-based explanation. An example-based explainer justifies the prediction on the explainable instance by returning instances related to it. Let us consider the dual representation (<xref ref-type="disp-formula" rid="E8">Equation 8</xref>). If we normalize coefficients <bold>b</bold> &#x0003D; (<italic>b</italic><sub>1</sub>, ..., <italic>b</italic><sub><italic>d</italic></sub>) as</p>
<disp-formula id="E18"><label>(18)</label><mml:math id="M70"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mstyle displaystyle='true'><mml:msubsup><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>d</mml:mi></mml:msubsup><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:mstyle></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>
<p>then new coefficients (<italic>v</italic><sub>1</sub>, ..., <italic>v</italic><sub><italic>d</italic></sub>) quantify how extreme points <inline-formula><mml:math id="M71"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> associated with (&#x003BB;<sub>1</sub>, ..., &#x003BB;<sub><italic>d</italic></sub>) impact on the prediction. The greater the value of <italic>v</italic><sub><italic>i</italic></sub>, the greater contribution of <inline-formula><mml:math id="M72"><mml:msubsup><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula> into a prediction. Hence, the linear combination of extreme points</p>
<disp-formula id="E19"><label>(19)</label><mml:math id="M73"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:msubsup><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>allows us to get an instance <bold>x</bold> explaining <bold>x</bold><sub>0</sub>.</p>
<p>An outstanding approach considering convex combinations of instances from a dataset as the example-based explanation was proposed in Crabbe et al. (<xref ref-type="bibr" rid="B41">2021</xref>). In fact, we came to the similar example-based explanation by using the dual representation and constructing linear regression surrogate model for new variables (&#x003BB;<sub>1</sub>, ..., &#x003BB;<sub><italic>d</italic></sub>).</p>
<p>The example-based explanation may be very useful when we apply NAM (Agarwal et al., <xref ref-type="bibr" rid="B3">2021</xref>) for explaining the black-box prediction. By using dual dataset <inline-formula><mml:math id="M74"><mml:mrow><mml:mi mathvariant="script">D</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula>, we train NAM consisting of <italic>d</italic> subnetworks such that each subnetwork implements the shape function <italic>h</italic><sub><italic>i</italic></sub>(&#x003BB;<sub><italic>i</italic></sub>). <xref ref-type="fig" rid="F5">Figure 5</xref> illustrates a scheme of training NAM. Each generated vector &#x003BB; is fed to NAM such that each its variable &#x003BB;<sub><italic>i</italic></sub> is fed to a separate neural subnetwork. For the same vector &#x003BB;, the corresponding instance <bold>x</bold> is computed by using <xref ref-type="disp-formula" rid="E6">Equation 6</xref>, and it is fed to the black-box model. The loss function for training the whole neural network is defined as the difference between the output <italic>z</italic> of the black-box model and the sum of shape functions <italic>h</italic><sub>1</sub>, ..., <italic>h</italic><sub><italic>d</italic></sub> implemented by neural subnetworks for the corresponding vector &#x003BB;, i.e., the loss function <italic>L</italic> is of the form:</p>
<disp-formula id="E20"><label>(20)</label><mml:math id="M75"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>&#x0002B;</mml:mo><mml:mi>&#x003B1;</mml:mi><mml:mi>R</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>w</mml:mtext></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <inline-formula><mml:math id="M76"><mml:msubsup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> is the <italic>k</italic>-th element of vector <bold>&#x003BB;</bold><sup>(<italic>i</italic>)</sup>; <italic>R</italic> is a regularization term with the hyperparameter &#x003B1; which controls the strength of the regularization; <bold>w</bold> is the vector of the neural network training parameters.</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p>Scheme of training NAM on the generated set of random vectors &#x003BB;.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-08-1506074-g0005.tif"/>
</fig>
<p>The main difficulty of using the NAM results, i.e., shape functions <italic>h</italic><sub><italic>k</italic></sub>(&#x003BB;<sub><italic>k</italic></sub>), is how to interpret the shape functions for explanation. However, in the context of the example-based explanation, this difficulty can be simply resolved. First, we study how a shape function can be represented by a single value characterizing the importance of each variable &#x003BB;<sub><italic>k</italic></sub>, <italic>k</italic> &#x0003D; 1, ..., <italic>d</italic>. The shape function is similar to the partial dependence plot (Friedman, <xref ref-type="bibr" rid="B31">2001</xref>; Molnar, <xref ref-type="bibr" rid="B49">2019</xref>) to some extent. The importance of a variable (&#x003BB;<sub><italic>k</italic></sub>) can be evaluated by studying how rapidly the shape function, corresponding to the variable, is changed. The rapid change of the shape function says that small changes of the variable significantly change the target values (<italic>z</italic>). The above implies that we can use the importance measure proposed in Greenwell et al. (<xref ref-type="bibr" rid="B35">2018</xref>), which is defined as the deviation of each unique variable value from the average curve. In terms of the dual variables, it can be written as:</p>
<disp-formula id="E21"><label>(21)</label><mml:math id="M77"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>I</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msqrt><mml:mrow><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>r</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:msqrt><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>r</italic> is a number of values of each variable &#x003BB;<sub><italic>k</italic></sub>, which are analyzed to study the corresponding shape function.</p>
<p>Normalized values of the importance measures can be regarded as coefficients <italic>v</italic><sub><italic>i</italic></sub>, <italic>i</italic> &#x0003D; 1, ..., <italic>d</italic>, in <xref ref-type="disp-formula" rid="E19">Equation 19</xref>, i.e., they show how important each extreme point or how each extreme point can be regarded as an instance which explains instance <bold>x</bold><sub>0</sub>.</p>
<p>An additional important advantage of the dual representation is that shape functions for all variables &#x003BB;<sub><italic>k</italic></sub>, <italic>k</italic> &#x0003D; 1, ..., <italic>d</italic>, have the same scale because all variables are in the interval from 0 to 1. This allows us to compare the importance measures <italic>I</italic>(&#x003BB;<sub><italic>k</italic></sub>) without the preliminary scaling which can make results incorrect.</p></sec></sec>
<sec id="s5">
<title>5 Numerical experiments with the feature-based explanation</title>
<sec>
<title>5.1 Example 1</title>
<p>First, we consider the following simplest example when the black-box model is of the form:</p>
<disp-formula id="E22"><mml:math id="M78"><mml:mtable columnalign="left"><mml:mtr><mml:mtd><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mn>10</mml:mn><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:mn>20</mml:mn><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:mn>2</mml:mn><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mn>3</mml:mn><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>4</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mn>0</mml:mn><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>5</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mn>0</mml:mn><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>6</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mn>0</mml:mn><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>7</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mi>&#x003BE;</mml:mi></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;</mml:mtext><mml:mo>=</mml:mo><mml:mstyle mathvariant='bold'><mml:mtext>ax</mml:mtext></mml:mstyle><mml:mo>&#x0002B;</mml:mo><mml:mi>&#x003BE;</mml:mi><mml:mo>,</mml:mo><mml:mtext>&#x000A0;&#x000A0;</mml:mtext><mml:mi>&#x003BE;</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:mrow><mml:mi mathvariant="script">N</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Let us estimate the feature importance by using the proposed dual model. We generate <italic>n</italic> &#x0003D; 1000 points <bold>x</bold><sub><italic>i</italic></sub>, <italic>i</italic> &#x0003D; 1, ..., <italic>N</italic>, with components uniformly distributed in interval [0, 1], which are explained. For every point <bold>x</bold><sub><italic>i</italic></sub>, the dual model with <italic>K</italic> &#x0003D; 10 nearest neighbors is constructed by generating 30 vectors &#x003BB;<sup>(<italic>i</italic>)</sup> &#x02208; &#x0211D;<sup>7</sup> in the unit simplex. By applying <xref ref-type="table" rid="T5">Algorithm 1</xref>, we compute optimal vector <inline-formula><mml:math id="M79"><mml:msup><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">a</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mn>7</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">T</mml:mtext></mml:mstyle></mml:mrow></mml:msup></mml:math></inline-formula> for every point <bold>x</bold><sub><italic>i</italic></sub>. We expect that the mean value <inline-formula><mml:math id="M80"><mml:mover accent="false" class="mml-overline"><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">a</mml:mtext></mml:mstyle></mml:mrow><mml:mo accent="true">&#x000AF;</mml:mo></mml:mover></mml:math></inline-formula> of <bold>a</bold><sup>(<italic>i</italic>)</sup> over all <italic>i</italic> &#x0003D; 1, ..., <italic>N</italic> should be as close as possible to the true vector of coefficients <bold>a</bold> forming function <italic>f</italic>(<bold>x</bold>). The corresponding results are shown in <xref ref-type="table" rid="T1">Table 1</xref>. It can be seen from <xref ref-type="table" rid="T1">Table 1</xref> that the obtained vector <inline-formula><mml:math id="M81"><mml:mover accent="false" class="mml-overline"><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">a</mml:mtext></mml:mstyle></mml:mrow><mml:mo accent="true">&#x000AF;</mml:mo></mml:mover></mml:math></inline-formula> is actually close to vector <bold>a</bold>.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Values of the importance measures in Example 1 in accordance with the explanation approach LR.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th/>
<th valign="top" align="center"><bold><italic>x</italic><sub>1</sub></bold></th>
<th valign="top" align="center"><bold><italic>x</italic><sub>2</sub></bold></th>
<th valign="top" align="center"><bold><italic>x</italic><sub>3</sub></bold></th>
<th valign="top" align="center"><bold><italic>x</italic><sub>4</sub></bold></th>
<th valign="top" align="center"><bold><italic>x</italic><sub>5</sub></bold></th>
<th valign="top" align="center"><bold><italic>x</italic><sub>6</sub></bold></th>
<th valign="top" align="center"><bold><italic>x</italic><sub>7</sub></bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><bold>a</bold></td>
<td valign="top" align="center">10</td>
<td valign="top" align="center">&#x02212;20</td>
<td valign="top" align="center">&#x02212;2</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">0</td>
</tr> <tr>
<td valign="top" align="left"><inline-formula><mml:math id="M82"><mml:mover accent="false" class="mml-overline"><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">a</mml:mtext></mml:mstyle></mml:mrow><mml:mo accent="true">&#x000AF;</mml:mo></mml:mover></mml:math></inline-formula></td>
<td valign="top" align="center">9.98</td>
<td valign="top" align="center">&#x02212;20.01</td>
<td valign="top" align="center">&#x02212;2.02</td>
<td valign="top" align="center">2.97</td>
<td valign="top" align="center">0.11</td>
<td valign="top" align="center">&#x02212;0.02</td>
<td valign="top" align="center">0.03</td>
</tr></tbody>
</table>
</table-wrap></sec>
<sec>
<title>5.2 Example 2</title>
<p>Let us consider another numerical example where the non-linear black-box model is investigated. It is of the form:</p>
<disp-formula id="E23"><mml:math id="M83"><mml:mrow><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mo>&#x0002B;</mml:mo><mml:mn>2</mml:mn><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mi>&#x003BE;</mml:mi><mml:mo>,</mml:mo><mml:mtext>&#x000A0;&#x000A0;</mml:mtext><mml:mi>&#x003BE;</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:mrow><mml:mi mathvariant="script">N</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>05</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mrow></mml:math></disp-formula>
<p>We take <italic>N</italic> &#x0003D; 400 and generate two sets of points <bold>x</bold>. The first set contains <bold>x</bold> whose features are uniformly generated in the interval [0, 1]. The second set consists of <bold>x</bold> whose features are uniformly generated in the interval [15, 16]. It is interesting to note that the feature <italic>x</italic><sub>1</sub> is more important for the case of the second set because <inline-formula><mml:math id="M84"><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:math></inline-formula> rapidly increases whereas <inline-formula><mml:math id="M85"><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:math></inline-formula> decreases when we consider the first set and <italic>x</italic><sub>2</sub> is more important in this case.</p>
<p>We take <italic>K</italic> &#x0003D; 6 and generate 30 vectors &#x003BB;<sup>(<italic>i</italic>)</sup> uniformly distributed in the unit simplex for every <bold>x</bold> to construct the linear model <italic>h</italic>(&#x003BB;<sup>(<italic>i</italic>)</sup>). Mean values of the normalized importance of features <italic>x</italic><sub>1</sub> and <italic>x</italic><sub>2</sub> obtained for the first set are &#x02212;0.3 and 0.86 and for the second set are &#x02212;0.95 and 0.37. These results completely coincide with the importance of features considered above for two subsets.</p></sec>
<sec>
<title>5.3 Example 3</title>
<p>A goal of the following numerical example is to consider a case when we try to get predictions for points lying outside bounds of data on which the black-box model was trained as it is depicted in <xref ref-type="fig" rid="F1">Figure 1</xref>. In this case, the predictions of generated instances may be inaccurate and can seriously affect quality of many explanation models, for example, LIME, which uses the perturbation technique.</p>
<p>The initial dataset consists of <italic>n</italic> &#x0003D; 400 feature vectors <bold>x</bold><sub>1</sub>, ..., <bold>x</bold><sub><italic>n</italic></sub> such that there holds</p>
<disp-formula id="E24"><label>(22)</label><mml:math id="M86"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="false" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>&#x003C1;</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="false" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mo class="qopname">cos</mml:mo><mml:mi>&#x003C6;</mml:mi></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mo class="qopname">sin</mml:mo><mml:mi>&#x003C6;</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where parameter &#x003C1;<sup>2</sup> is uniformly distributed in interval [0, 2<sup>2</sup>]; parameter &#x003C6; is uniformly distributed in interval [0, 2&#x003C0;].</p>
<p>The observed outputs <italic>y</italic><sub><italic>i</italic></sub> &#x0003D; <italic>f</italic>(<bold>x</bold><sub><italic>i</italic></sub>) are defined as</p>
<disp-formula id="E25"><label>(23)</label><mml:math id="M87"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>&#x0002B;</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>&#x0002B;</mml:mo><mml:mi>&#x003BE;</mml:mi><mml:mo>,</mml:mo><mml:mtext>&#x000A0;&#x000A0;</mml:mtext><mml:mi>&#x003BE;</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:mrow><mml:mi mathvariant="script">N</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>05</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>We use two black-box models: the KNN regressor with <italic>k</italic> &#x0003D; 6 and the random forest consisting of 100 decision trees, implemented by means of the Python Sckit-learn. The above black-box models have default parameters taken from Sckit-learn.</p>
<p>We construct the explanation models at <italic>l</italic> &#x0003D; 100 testing points <bold>x</bold><sub>1, <italic>test</italic></sub>, ..., <bold>x</bold><sub><italic>l, test</italic></sub> of the form <xref ref-type="disp-formula" rid="E22">Equation 22</xref>, but with parameters &#x003C1;<sup>2</sup> uniformly distributed in [1.9<sup>2</sup>, 2<sup>2</sup>] and &#x003C6; uniformly distributed in [0, 2&#x003C0;]. It can be seen from the interval of parameter &#x003C1; that a part of generated points can be outside bounds of training data <bold>x</bold><sub>1</sub>, ..., <bold>x</bold><sub><italic>n</italic></sub>. <xref ref-type="fig" rid="F6">Figure 6</xref> shows the set of instances for training the black-box model and the set of testing instances for evaluation of the explanation models.</p>
<fig id="F6" position="float">
<label>Figure 6</label>
<caption><p>Instances for training the black-box models <bold>(the left picture)</bold> and testing points for evaluation of the explanation models <bold>(the right picture)</bold>.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-08-1506074-g0006.tif"/>
</fig>
<p>The dual model is constructed in accordance with <xref ref-type="table" rid="T5">Algorithm 1</xref> using <italic>K</italic> &#x0003D; 6 nearest neighbors. We generate 30 dual vectors &#x003BB;<sup>(<italic>j</italic>)</sup> to train the dual model. We also use LIME and generate 30 points having normal distribution <inline-formula><mml:math id="M88"><mml:mrow><mml:mi mathvariant="script">N</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mi>&#x003A3;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>, where &#x003A3; &#x0003D; diag(0.05, 0.05). Every point has a weight generated from the normal distribution with parameter <italic>v</italic> &#x0003D; 0.01.</p>
<p>To compare the dual model and LIME, we use the mean squared error (MSE) which measures how predictions of the explanation model <italic>g</italic>(<bold>x</bold>) are close to predictions of the black-box model <italic>f</italic>(<bold>x</bold>) (KNN or the random forest). It is defined as</p>
<disp-formula id="E26"><mml:math id="M89"><mml:mrow><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:mi>g</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>.</mml:mo></mml:mrow></mml:math></disp-formula>
<p>Values of the MSE measures for the dual explanation model and for the original LIME, when KNN is used as a black-box model, are 0.01 and 0.02, respectively. It can be seen from the results that the dual model provides better results in comparison with LIME because some generated points in LIME are located outside the training domain. Values of the MSE measures for the dual explanation model and for the original LIME, when the random forest is used as a black-box model, are 0.005 and 0.014, respectively.</p></sec>
<sec>
<title>5.4 Example 4</title>
<p>Let us perform a similar experiment with real data by taking the dataset &#x0201C;Combined Cycle Power Plant Data Set&#x0201D; (<ext-link ext-link-type="uri" xlink:href="https://archive.ics.uci.edu/ml/datasets/combined&#x0002B;cycle&#x0002B;power&#x0002B;plant">https://archive.ics.uci.edu/ml/datasets/combined&#x0002B;cycle&#x0002B;power&#x0002B;plant</ext-link>) consisting of 9568 instances having 4 features. We use Z-score normalization (the mean is 0 and the standard deviation is 1) for feature vectors from the dataset. Two black-box models implemented by using the KNN regressor with <italic>K</italic> &#x0003D; 10 and the random forest regressor consisting of 100 decision trees. The testing set consisting of <italic>l</italic> &#x0003D; 200 new instances is produced as follows. The convex hull of the training set in the 4-dimensional feature space is determined. Then, vertices of the obtained polytope are computed. Two adjacent vertices <bold>x</bold><sub><italic>j</italic><sub>1</sub></sub> and <bold>x</bold><sub><italic>j</italic><sub>2</sub></sub> are randomly selected. Value &#x003BB; is generated from the uniform distribution on the unit interval. A new testing instance <bold>x</bold><sub><italic>j, test</italic></sub> is obtained as <bold>x</bold><sub><italic>j, test</italic></sub> &#x0003D; &#x003BB;<bold>x</bold><sub><italic>j</italic><sub>1</sub></sub>&#x0002B;(1&#x02212;&#x003BB;)<bold>x</bold><sub><italic>j</italic><sub>2</sub></sub>. Then, we again select adjacent vertices and repeat the procedure for computing testing instances <italic>l</italic> times. As a result, we get the testing set <bold>x</bold><sub><italic>j, test</italic></sub>, <italic>j</italic> &#x0003D; 1, ..., <italic>l</italic>.</p>
<p>The dual model is constructed in accordance with <xref ref-type="table" rid="T5">Algorithm 1</xref> using <italic>K</italic> &#x0003D; 10 nearest neighbors. We again generate 30 dual vectors &#x003BB;<sup>(<italic>j</italic>)</sup> to train the dual model. We also use LIME and generate 30 points having normal distribution <inline-formula><mml:math id="M90"><mml:mrow><mml:mi mathvariant="script">N</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mi>&#x003A3;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>, where &#x003A3; &#x0003D; diag(0.05, 0.05, 0.05, 0.05). Every point has a weight generated from the normal distribution with parameter <italic>v</italic> &#x0003D; 0.5.</p>
<p>Values of the MSE measures for the dual explanation model and for the original LIME, when KNN is used as a black-box model trained on dataset &#x0201C;Combined Cycle Power Plant Data Set&#x0201D;, are 84 and 0173, respectively. It can be seen from the results that the dual model provides better results in comparison with LIME because some generated points in LIME are located outside the training domain. Values of the MSE measures for the dual explanation model and for the original LIME, when the random forest is used as a black-box model, are 110 and 282, respectively. One can again see from the above results that the dual models outperform LIME.</p></sec></sec>
<sec id="s6">
<title>6 Numerical experiments with the example-based explanation</title>
<sec>
<title>6.1 Example 1</title>
<p>We start from the synthetic instances illustrating the dual example-based explanation when NAM is used. Suppose that the explained instance <bold>x</bold><sub>0</sub> belongs to a polytope with six vertices <bold>x</bold><sub>1</sub>, ..., <bold>x</bold><sub>6</sub> (<italic>d</italic> &#x0003D; 6). The black-box model is a function <italic>f</italic>(<bold>x</bold>) such that</p>
<disp-formula id="E27"><label>(24)</label><mml:math id="M91"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>6</mml:mn></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>h</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>h</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>6</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;</mml:mtext><mml:mo>=</mml:mo><mml:mn>15</mml:mn><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mn>22</mml:mn><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mn>0</mml:mn><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mn>40</mml:mn><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>4</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo class="qopname">sin</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>3</mml:mn><mml:mo>.</mml:mo><mml:mn>14</mml:mn><mml:mo>&#x000B7;</mml:mo><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>4</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mn>0</mml:mn><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>5</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mn>0</mml:mn><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>6</mml:mn></mml:mrow></mml:msub><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p><italic>n</italic> &#x0003D; 2, 000 vectors &#x003BB;<sup>(<italic>i</italic>)</sup> &#x02208; &#x0211D;<sup>6</sup>, <italic>i</italic> &#x0003D; 1, ..., <italic>n</italic>, are uniformly generated in the unit simplex &#x00394;<sup>6 &#x02212; 1</sup>. For each point &#x003BB;<sup>(<italic>i</italic>)</sup>, the corresponding prediction <italic>z</italic><sub><italic>i</italic></sub> is computed by using the black-box function <italic>h</italic>(&#x003BB;). NAM is trained with the learning rate 0.0005, with hyperparameter &#x003B1; &#x0003D; 10<sup>&#x02212;4</sup>, the number of epochs is 300, and the batch size is 128.</p>
<p>To determine the normalized values of the importance measures <italic>I</italic>(&#x003BB;<sub><italic>i</italic></sub>), <italic>i</italic> &#x0003D; 1, ..., 6, we use three approaches. The first one is to apply the method called accumulated local effect (ALE) (Apley and Zhu, <xref ref-type="bibr" rid="B4">2020</xref>), which describes how features influence the prediction of the black-box model on average. The second approach is to construct the linear regression model (LR) by using the generated points and their predictions obtained by means of the black-box model. The third approach is to use NAM.</p>
<p>The corresponding normalized values of the importance measures for &#x003BB;<sub>1</sub>, ..., &#x003BB;<sub>6</sub> obtained by means of ALE, LR, and NAM are shown in <xref ref-type="table" rid="T2">Table 2</xref>. It should be noted that the importance measure <italic>I</italic>(&#x003BB;<sub><italic>i</italic></sub>) can be obtained only for NAM and ALE. However, normalized coefficients of LR can be interpreted in the same way. Therefore, we consider results of these models jointly in all tables. One can see from <xref ref-type="table" rid="T2">Table 2</xref> that all methods provide similar relationships between the importance measures <italic>I</italic>(&#x003BB;<sub>1</sub>), <italic>i</italic> &#x0003D; 1, ..., 6. However, LR provides rather large values of <italic>I</italic>(&#x003BB;<sub>3</sub>), <italic>I</italic>(&#x003BB;<sub>5</sub>), <italic>I</italic>(&#x003BB;<sub>6</sub>), which do not correspond to the zero-valued coefficients in <xref ref-type="disp-formula" rid="E24">Equation 24</xref>.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Values of the importance measures in Example 1 in accordance with explanation approaches: ALE, LR, and NAM.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th/>
<th valign="top" align="center" colspan="6"><bold>Importance measures</bold></th>
</tr>
</thead>
<tbody>
<tr style="background-color:#919498;color:#ffffff">
<td/>
<td valign="top" align="center"><italic>I</italic>(&#x003BB;<sub>1</sub>)</td>
<td valign="top" align="center"><italic>I</italic>(&#x003BB;<sub>2</sub>)</td>
<td valign="top" align="center"><italic>I</italic>(&#x003BB;<sub>3</sub>)</td>
<td valign="top" align="center"><italic>I</italic>(&#x003BB;<sub>4</sub>)</td>
<td valign="top" align="center"><italic>I</italic>(&#x003BB;<sub>5</sub>)</td>
<td valign="top" align="center"><italic>I</italic>(&#x003BB;<sub>6</sub>)</td>
</tr> <tr>
<td valign="top" align="left">ALE</td>
<td valign="top" align="center">0.172</td>
<td valign="top" align="center">0.259</td>
<td valign="top" align="center">0.000</td>
<td valign="top" align="center">0.569</td>
<td valign="top" align="center">0.000</td>
<td valign="top" align="center">0.000</td>
</tr> <tr>
<td valign="top" align="left">LR</td>
<td valign="top" align="center">0.182</td>
<td valign="top" align="center">0.245</td>
<td valign="top" align="center">0.054</td>
<td valign="top" align="center">0.405</td>
<td valign="top" align="center">0.062</td>
<td valign="top" align="center">0.052</td>
</tr> <tr>
<td valign="top" align="left">NAM</td>
<td valign="top" align="center">0.157</td>
<td valign="top" align="center">0.238</td>
<td valign="top" align="center">0.012</td>
<td valign="top" align="center">0.569</td>
<td valign="top" align="center">0.012</td>
<td valign="top" align="center">0.012</td>
</tr></tbody>
</table>
</table-wrap>
<p>Shape functions illustrating how functions of the generalized additive model depend on &#x003BB;<sub><italic>i</italic></sub> are shown in <xref ref-type="fig" rid="F7">Figure 7</xref>. It can be clearly seen from <xref ref-type="fig" rid="F7">Figure 7</xref> that the largest importance &#x003BB;<sub>2</sub> and &#x003BB;<sub>4</sub> have the highest importance. This implies that the explained instance is interpreted by the fourth and the second nearest instances.</p>
<fig id="F7" position="float">
<label>Figure 7</label>
<caption><p>Six shape functions obtained in Example 1 for the example-based explanation.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-08-1506074-g0007.tif"/>
</fig></sec>
<sec>
<title>6.2 Example 2</title>
<p>Suppose that the explainable instance <bold>x</bold><sub>0</sub> belongs to a polytope with four vertices <inline-formula><mml:math id="M93"><mml:msubsup><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>4</mml:mn></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula> (<italic>d</italic> &#x0003D; 4). The black-box model is a function <italic>f</italic>(<bold>x</bold>) such that</p>
<disp-formula id="E28"><mml:math id="M94"><mml:mrow><mml:mi>h</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>4</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>4</mml:mn></mml:mrow></mml:msub><mml:mo>.</mml:mo></mml:mrow></mml:math></disp-formula>
<p><italic>n</italic> &#x0003D; 1000 points &#x003BB;<sup>(<italic>i</italic>)</sup> &#x02208; &#x0211D;<sup>4</sup>, <italic>i</italic> &#x0003D; 1, ..., <italic>n</italic>, are uniformly generated in the unit simplex &#x00394;<sup>4 &#x02212; 1</sup>. For each point &#x003BB;<sup>(<italic>i</italic>)</sup>, the corresponding prediction <italic>z</italic><sub><italic>i</italic></sub> is computed by using the black-box function <italic>h</italic>(&#x003BB;). NAM is trained with the learning rate 0.0005, with hyperparameter &#x003B1; &#x0003D; 10<sup>&#x02212;6</sup>, the number of epochs is 300, and the batch size is 128.</p>
<p>Normalized values of <italic>I</italic>(&#x003BB;<sub><italic>i</italic></sub>) obtained by means of ALE, LR, and NAM are shown in <xref ref-type="table" rid="T3">Table 3</xref>. It can be seen from <xref ref-type="table" rid="T3">Table 3</xref> that the obtained importance measures correspond to the intuitive consideration of the expression for <italic>h</italic>(&#x003BB;). The corresponding shape functions for all features are shown in <xref ref-type="fig" rid="F8">Figure 8</xref>.</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Values of the importance measures in Example 2 in accordance with three explanation approaches: ALE, LR, and NAM.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th/>
<th valign="top" align="center" colspan="4"><bold>Importance measures</bold></th>
</tr>
</thead>
<tbody>
<tr style="background-color:#919498;color:#ffffff">
<td/>
<td valign="top" align="center"><italic>I</italic>(&#x003BB;<sub>1</sub>)</td>
<td valign="top" align="center"><italic>I</italic>(&#x003BB;<sub>2</sub>)</td>
<td valign="top" align="center"><italic>I</italic>(&#x003BB;<sub>3</sub>)</td>
<td valign="top" align="center"><italic>I</italic>(&#x003BB;<sub>4</sub>)</td>
</tr> <tr>
<td valign="top" align="left">ALE</td>
<td valign="top" align="center">0.392</td>
<td valign="top" align="center">0.087</td>
<td valign="top" align="center">0.089</td>
<td valign="top" align="center">0.432</td>
</tr> <tr>
<td valign="top" align="left">LR</td>
<td valign="top" align="center">0.357</td>
<td valign="top" align="center">0.081</td>
<td valign="top" align="center">0.112</td>
<td valign="top" align="center">0.450</td>
</tr> <tr>
<td valign="top" align="left">NAM</td>
<td valign="top" align="center">0.306</td>
<td valign="top" align="center">0.134</td>
<td valign="top" align="center">0.202</td>
<td valign="top" align="center">0.358</td>
</tr></tbody>
</table>
</table-wrap>
<fig id="F8" position="float">
<label>Figure 8</label>
<caption><p>Four shape functions obtained in Example 2 for the example-based explanation.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-08-1506074-g0008.tif"/>
</fig></sec>
<sec>
<title>6.3 Example 3</title>
<p>Suppose that the explained instance <bold>x</bold><sub>0</sub> belongs to a polytope with three vertices <inline-formula><mml:math id="M95"><mml:msubsup><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula> (<italic>d</italic> &#x0003D; 3):</p>
<disp-formula id="E29"><mml:math id="M96"><mml:mrow><mml:msubsup><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mo>-</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">T</mml:mtext></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:msubsup><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">T</mml:mtext></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:msubsup><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">T</mml:mtext></mml:mrow></mml:msup><mml:mo>.</mml:mo><mml:mtext>&#x000A0;</mml:mtext></mml:mrow></mml:math></disp-formula>
<p>The black-box model has the following function of two features <italic>x</italic><sup>(1)</sup> and <italic>x</italic><sup>(2)</sup>:</p>
<disp-formula id="E30"><mml:math id="M97"><mml:mrow><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>7</mml:mn><mml:mo>&#x000B7;</mml:mo><mml:mtext class="textrm" mathvariant="normal">sign</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mtext class="textrm" mathvariant="normal">sign</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></disp-formula>
<p>We generate <italic>n</italic> &#x0003D; 1, 000 points &#x003BB;<sup>(<italic>i</italic>)</sup> &#x02208; &#x0211D;<sup>3</sup>, <italic>i</italic> &#x0003D; 1, ..., <italic>n</italic>, which are uniformly generated in the unit simplex &#x00394;<sup>3 &#x02212; 1</sup>. These points correspond to <italic>n</italic> vectors <inline-formula><mml:math id="M98"><mml:msub><mml:mrow><mml:mstyle class="text"><mml:mtext mathvariant="bold">x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> defined as</p>
<disp-formula id="E31"><mml:math id="M99"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x000B7;</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mo>-</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">T</mml:mtext></mml:mrow></mml:msup><mml:mo>&#x0002B;</mml:mo><mml:msubsup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x000B7;</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">T</mml:mtext></mml:mrow></mml:msup><mml:mo>&#x0002B;</mml:mo><mml:msubsup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x000B7;</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">T</mml:mtext></mml:mrow></mml:msup></mml:mrow></mml:math></disp-formula>
<p>with the corresponding values of <italic>f</italic>(<bold>x</bold><sub><italic>i</italic></sub>) and shown in <xref ref-type="fig" rid="F9">Figure 9</xref>. It can be seen from <xref ref-type="fig" rid="F9">Figure 9</xref> that this example can be regarded as a classification task with four classes. Parameters of experiments are the same as in the previous examples, but &#x003B1; &#x0003D; 0.</p>
<fig id="F9" position="float">
<label>Figure 9</label>
<caption><p>Dataset of vectors <bold>x</bold> and the corresponding values of <italic>f</italic>(<bold>x</bold>) for Example 3.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-08-1506074-g0009.tif"/>
</fig>
<p>Normalized values of <italic>I</italic>(&#x003BB;<sub><italic>i</italic></sub>) obtained by means of ALE, LR, and NAM are shown in <xref ref-type="table" rid="T4">Table 4</xref>. It can be seen from <xref ref-type="table" rid="T4">Table 4</xref> that the obtained importance measures correspond to the intuitive consideration of the expression for <italic>h</italic>(&#x003BB;). The corresponding shape functions for all features are shown in <xref ref-type="fig" rid="F10">Figure 10</xref>.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>Values of the importance measures in Example 3 in accordance with three explanation approaches: ALE, LR, and NAM.</p></caption>
<table frame="box" rules="all">
<tbody>
<tr>
<td/>
<td valign="top" align="center" colspan="3"><bold>Importance measures</bold></td>
</tr>
 <tr style="background-color:#919498;color:#ffffff">
<td/>
<td valign="top" align="center"><italic>I</italic>(&#x003BB;<sub>1</sub>)</td>
<td valign="top" align="center"><italic>I</italic>(&#x003BB;<sub>2</sub>)</td>
<td valign="top" align="center"><italic>I</italic>(&#x003BB;<sub>3</sub>)</td>
</tr> <tr>
<td valign="top" align="left">ALE</td>
<td valign="top" align="center">0.411</td>
<td valign="top" align="center">0.395</td>
<td valign="top" align="center">0.194</td>
</tr> <tr>
<td valign="top" align="left">LR</td>
<td valign="top" align="center">0.430</td>
<td valign="top" align="center">0.310</td>
<td valign="top" align="center">0.260</td>
</tr> <tr>
<td valign="top" align="left">NAM</td>
<td valign="top" align="center">0.499</td>
<td valign="top" align="center">0.338</td>
<td valign="top" align="center">0.163</td>
</tr></tbody>
</table>
</table-wrap>
<fig id="F10" position="float">
<label>Figure 10</label>
<caption><p>Three shape functions obtained in Example 3 for the example-based explanation.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-08-1506074-g0010.tif"/>
</fig>
</sec></sec>
<sec sec-type="discussion" id="s7">
<title>7 Discussion</title>
<p>Let us analyze advantages and limitations of the proposed methods. First, we consider advantages.</p>
<list list-type="order">
<list-item><p>One of the important advantages is that the proposed methods allow us to replace the perturbation process of feature vectors in the Euclidean space by the uniform generation of points in the unit simplex. Indeed, the perturbation of feature vectors requires to define several parameters, including probability distributions of generation for every feature, and parameters of the distributions. The cases depicted in <xref ref-type="fig" rid="F1">Figure 1</xref> may lead to incorrect predictions and to an incorrect surrogate model. Moreover, if instances are images, then it is difficult to correctly perturb them. Due to the proposed method, the perturbation of feature vectors is avoided, and it is replaced with uniform generation in the unit simplex, which is simple. The dual approach can be applied to the feature-based explanation as well as to the example-based explanation.</p></list-item>
<list-item><p>The dual representation of data can have a smaller dimension than the initial instances. It depends on <italic>K</italic> nearest neighbors around the explained instance. As a result, the constructed surrogate dual model can be simpler than the model trained on the initial training set.</p></list-item>
<list-item><p>The dual approach can be also adapted to SHAP to generate the removed features in a specific way.</p></list-item>
<list-item><p>The proposed methods are flexible. We can change the size of the convex hull by changing the number <italic>K</italic>. It can be applied to different explanation models, for example, to LIME, SHAP, and NAM. The main idea of the adaptation is to use the well-known explanation methods. In particular, LIME can be incorporated into the proposed method by constructing the linear regression for the dual dataset. We can incorporate SHAP for computing the feature contributions of the dual instances <inline-formula><mml:math id="M100"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>. NAM is incorporated to compute the shape functions of features <inline-formula><mml:math id="M101"><mml:msubsup><mml:mrow><mml:mtext>&#x003BB;</mml:mtext></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula>, <italic>k</italic> &#x0003D; 1, ..., <italic>d</italic>. The method can be applied to the local and global explanations. There are different definitions of the global explanation. One of the approaches to define the global explanation, proposed in Ribeiro et al. (<xref ref-type="bibr" rid="B60">2016</xref>), is to compute the average feature importance over the feature importances obtained by means of the local explanation for all instances of the dataset. This is a computationally difficult problem due to two main factors: (1) constructing a convex hull on the dataset; (2) solving the local explanation problems for all instances in the training set. The first problem can be solved by dividing the whole dataset into subsets with feature vectors that are close in distance construct a convex hull for each subset and solve the &#x0201C;local&#x0201D; problem of global explanation. This can be done, for example, using a decision tree so that leaves of the tree contain close instances. Another way is clustering, for which the assumption is fulfilled that each cluster also contains close instances. The second problem is computationally intensive. Its efficient solution is one of the important directions for further research.</p></list-item>
</list>
<p>In spite of many advantages of the dual approach, we have to note also its limitations:</p>
<list list-type="order">
<list-item><p>The advantage of the smaller dimensionality in the dual representation is questionable for the feature-based explanation. If we take a number of extreme points smaller than the data dimensionality, then we restrict the set of generated primal points by some subspace of the initial feature space. This can be a reason of incorrect results. Ways to overcome this difficulty are an interesting direction for further research. However, this limitation does not impact on the example-based explanation because we actually extend the mixup method and try to find influential instances among nearest neighbors.</p></list-item>
<list-item><p>Another problem is that calculation of vertices of the largest convex hull is a computationally hard problem. This problem does not take place for the example-based explanation when the number of nearest neighbors is smaller than the initial data dimensionality.</p></list-item>
</list>
<p>In spite of the above limitations, the proposed approach has many interesting properties and can be regarded as the first step for developing various algorithms using dual representation. It can have the biggest impact in medicine, where, on the one hand, high-dimensional data take place, and, on the other hand, predictions (diagnoses) need to be explained to believe in them and choose a desirable treatment.</p>
<p>It has been shown in numerical examples with synthetic data that the proposed method outperforms the separate LIME method in terms of accuracy (see, for example, Sections 5.3, 5.4). One of the reasons is that some generated points in LIME may be located outside the training domain. However, LIME can be regarded as a part of the proposed method when it is used for computing coefficients <italic>b</italic> &#x0003D; (<italic>b</italic><sub>1</sub>, ..., <italic>b</italic><sub><italic>d</italic></sub>) in the dual representation. This implies that the computation time for explanation using the proposed method may exceed the LIME time. At the same time, instances in the obtained dual dataset may have the smaller dimensionality in comparison with the initial data. In this case, the computation time of the proposed method can be comparable with the LIME time.</p></sec>
<sec sec-type="conclusions" id="s8">
<title>8 Conclusion</title>
<p>Feature-based and example-based explanation methods in the framework of the dual feature representation have been presented in the study. The methods directly follow from the dual representation. They can be viewed as a basis for their improvement and the development of other methods within the dual representation.</p>
<p>In the example-based explanation, we used NAM as a neural network tool for explaining predictions under condition of considering the dual dataset with new variables (&#x003BB;<sub>1</sub>, ..., &#x003BB;<sub><italic>d</italic></sub>). However, there are effective explanation methods different from NAM, which are based on the gradient boosting machine (Nori et al., <xref ref-type="bibr" rid="B54">2019</xref>; Konstantinov and Utkin, <xref ref-type="bibr" rid="B44">2021</xref>). The combination of the proposed approach with these methods is an interesting direction for further research.</p>
<p>Another interesting direction for further research is to study how the proposed approach adapts to the example-based image explanation when <italic>K</italic> nearest neighbors are not determined by the proximity of original images. The search for efficient adaptation algorithms seems to be a relevant and interesting task.</p>
<p>There are interesting results in the linear programming when the significance of dual variables is related to perturbations of coefficients of the primal constraints (Castillo et al., <xref ref-type="bibr" rid="B18">2006</xref>). This peculiarity can be applied to develop new explanation methods.</p>
<p>It should be noted that many applications have features that are not taken into account in the proposed approach, for example, the presence of multimodal data having different dimensions. Adaptation of the approach and the extensions oriented to specific applications are also important issues for further research. An idea behind the problem solution is to reduce different dimensions to one in the dual data representation.</p>
<p>Adversarial settings can produce a complex cluster structure within the feature space. A significant challenge in such scenarios is addressing out-of-distribution points. The proposed method can handle this problem unlike the LIME. To enhance the robustness, we propose two hyperparameters: the configuration of the Dirichlet distribution and the number of the neighbors to construct the convex hull. Proper adjustment of these hyperparameters has the potential to enhance the method&#x00027;s robustness.</p>
<p>The proposed results are fundamental. They are illustrated only with synthetic data or well-known real datasets. Therefore, we do not use personal data which require to implement robust security measures to safeguard sensitive information and prevent unauthorized access. It should be noted that one of the important goals of the proposed results is to provide explanations for the machine learning model decisions and actions making the models transparent. As a result, users have a clear understanding of how the black-box model operates and the factors influencing its outputs. The proposed method belongs to the field of explainable artificial intelligence; thus, we have contributed to the development of transparent and reliable AI systems. Methods of the prediction explanation can improve collaboration between AI developers and domain experts as they can be used to facilitate the feedback exchange between the AI engineer and the expert. Our method can be more useful in domains where the example-based explanations are in demand. The potential risks and biases associated with the proposed method are comparable to those of the LIME method and depend on the data scientist&#x00027;s handling of the data.</p></sec>
</body>
<back>
<sec sec-type="data-availability" id="s9">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: <ext-link ext-link-type="uri" xlink:href="https://archive.ics.uci.edu/dataset/294/combined&#x0002B;cycle&#x0002B;power&#x0002B;plant">https://archive.ics.uci.edu/dataset/294/combined&#x0002B;cycle&#x0002B;power&#x0002B;plant</ext-link>.</p>
</sec>
<sec sec-type="author-contributions" id="s10">
<title>Author contributions</title>
<p>AK: Conceptualization, Formal analysis, Resources, Methodology, Writing &#x02013; original draft. BK: Formal analysis, Data curation, Software, Validation, Visualization, Writing &#x02013; review &#x00026; editing. SK: Data curation, Software, Validation, Visualization, Writing &#x02013; review &#x00026; editing. LU: Writing &#x02013; review &#x00026; editing, Conceptualization, Formal analysis, Methodology, Writing &#x02013; original draft. VM: Conceptualization, Formal analysis, Writing &#x02013; review &#x00026; editing, Investigation, Project administration, Resources.</p>
</sec>
<sec sec-type="funding-information" id="s11">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. The research is partially funded by the Ministry of Science and Higher Education of the Russian Federation as part of World-class Research Center Program: Advanced Digital Technologies (Contract No. 075-15-2022-311 dated April, 20 2022).</p>
</sec>
<ack><p>The authors would like to express their appreciation to the referees whose very valuable comments have improved the study.</p>
</ack>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s12">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p></sec>
<sec sec-type="disclaimer" id="s13">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Adadi</surname> <given-names>A.</given-names></name> <name><surname>Berrada</surname> <given-names>M.</given-names></name></person-group> (<year>2018</year>). <article-title>Peeking inside the black-box: a survey on explainable artificial intelligence (XAI)</article-title>. <source>IEEE Access</source> <volume>6</volume>, <fpage>52138</fpage>&#x02013;<lpage>52160</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2018.2870052</pub-id><pub-id pub-id-type="pmid">39897059</pub-id></citation></ref>
<ref id="B2">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Adhikari</surname> <given-names>A.</given-names></name> <name><surname>Tax</surname> <given-names>D.</given-names></name> <name><surname>Satta</surname> <given-names>R.</given-names></name> <name><surname>Faeth</surname> <given-names>M.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;LEAFAGE: example-based and feature importance-based explanations for black-box ML models,&#x0201D;</article-title> in <source>2019 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE)</source> (<publisher-loc>IEEE</publisher-loc>), <fpage>1</fpage>&#x02013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1109/FUZZ-IEEE.2019.8858846</pub-id></citation>
</ref>
<ref id="B3">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Agarwal</surname> <given-names>R.</given-names></name> <name><surname>Melnick</surname> <given-names>L.</given-names></name> <name><surname>Frosst</surname> <given-names>N.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Lengerich</surname> <given-names>B.</given-names></name> <name><surname>Caruana</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>&#x0201C;Neural additive models: interpretable machine learning with neural nets,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems</source>, <fpage>4699</fpage>&#x02013;<lpage>4711</lpage>.</citation>
</ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Apley</surname> <given-names>D.</given-names></name> <name><surname>Zhu</surname> <given-names>J.</given-names></name></person-group> (<year>2020</year>). <article-title>Visualizing the effects of predictor variables in black box supervised learning models</article-title>. <source>J. R. Stat. Soc</source>. <volume>82</volume>, <fpage>1059</fpage>&#x02013;<lpage>1086</lpage>. <pub-id pub-id-type="doi">10.1111/rssb.12377</pub-id><pub-id pub-id-type="pmid">39217110</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Arrieta</surname> <given-names>A.</given-names></name> <name><surname>Diaz-Rodriguez</surname> <given-names>N.</given-names></name> <name><surname>Ser</surname> <given-names>J. D.</given-names></name> <name><surname>Bennetot</surname> <given-names>A.</given-names></name> <name><surname>Tabik</surname> <given-names>S.</given-names></name> <name><surname>Barbado</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Explainable artificial intelligence (XAI): concepts, taxonomies, opportunities and challenges toward responsible AI</article-title>. <source>Inf. Fusion</source> <volume>58</volume>, <fpage>82</fpage>&#x02013;<lpage>115</lpage>. <pub-id pub-id-type="doi">10.1016/j.inffus.2019.12.012</pub-id></citation>
</ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Arya</surname> <given-names>V.</given-names></name> <name><surname>Bellamy</surname> <given-names>R.</given-names></name> <name><surname>Chen</surname> <given-names>P.-Y.</given-names></name> <name><surname>Dhurandhar</surname> <given-names>A.</given-names></name> <name><surname>Hind</surname> <given-names>M.</given-names></name> <name><surname>Hoffman</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>One explanation does not fit all: a toolkit and taxonomy of AI explainability techniques</article-title>. <source>ArXiv:1909.03012</source>.</citation>
</ref>
<ref id="B7">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Awasthi</surname> <given-names>P.</given-names></name> <name><surname>Kalantari</surname> <given-names>B.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name></person-group> (<year>2018</year>). <article-title>&#x0201C;Robust vertex enumeration for convex hulls in high dimensions,&#x0201D;</article-title> in <source>International Conference on Artificial Intelligence and Statistics (PMLR)</source>, <fpage>1387</fpage>&#x02013;<lpage>1396</lpage>.</citation>
</ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Balestriero</surname> <given-names>R.</given-names></name> <name><surname>Pesenti</surname> <given-names>J.</given-names></name> <name><surname>LeCun</surname> <given-names>Y.</given-names></name></person-group> (<year>2021</year>). <article-title>Learning in high dimension always amounts to extrapolation</article-title>. <source>ArXiv:2110.09485v09482</source>.</citation>
</ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Belle</surname> <given-names>V.</given-names></name> <name><surname>Papantonis</surname> <given-names>I.</given-names></name></person-group> (<year>2021</year>). <article-title>Principles and practice of explainable machine learning</article-title>. <source>Front. Big Data</source> <volume>39</volume>:<fpage>688969</fpage>. <pub-id pub-id-type="doi">10.3389/fdata.2021.688969</pub-id><pub-id pub-id-type="pmid">34278297</pub-id></citation></ref>
<ref id="B10">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Benard</surname> <given-names>C.</given-names></name> <name><surname>Biau</surname> <given-names>G.</given-names></name> <name><surname>Veiga</surname> <given-names>S. D.</given-names></name> <name><surname>Scornet</surname> <given-names>E.</given-names></name></person-group> (<year>2022</year>). &#x0201C;SHAFF: fast and consistent SHApley eFfect estimates via random forests,&#x0201D; <italic>in International Conference on Artificial Intelligence and Statistics (PMLR)</italic>, <fpage>5563</fpage>&#x02013;<lpage>5582</lpage>.</citation>
</ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bennett</surname> <given-names>K.</given-names></name> <name><surname>Bredensteiner</surname> <given-names>E.</given-names></name></person-group> (<year>2000</year>). <article-title>&#x0201C;Duality and geometry in SVM classifiers,&#x0201D;</article-title> in <source>ICML&#x00027;00: Proceedings of the Seventeenth International Conference on Machine Learning</source>, <fpage>57</fpage>&#x02013;<lpage>64</lpage>.</citation>
</ref>
<ref id="B12">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Bento</surname> <given-names>J.</given-names></name> <name><surname>Saleiro</surname> <given-names>P.</given-names></name> <name><surname>Cruz</surname> <given-names>A.</given-names></name> <name><surname>Figueiredo</surname> <given-names>M.</given-names></name> <name><surname>Bizarro</surname> <given-names>P.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;TimeSHAP: explaining recurrent models through sequence perturbations,&#x0201D;</article-title> in <source>Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery &#x00026;Data Mining</source>, 2565&#x02013;2573. <pub-id pub-id-type="doi">10.1145/3447548.3467166</pub-id><pub-id pub-id-type="pmid">27534393</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bodria</surname> <given-names>F.</given-names></name> <name><surname>Giannotti</surname> <given-names>F.</given-names></name> <name><surname>Guidotti</surname> <given-names>R.</given-names></name> <name><surname>Naretto</surname> <given-names>F.</given-names></name> <name><surname>Pedreschi</surname> <given-names>D.</given-names></name> <name><surname>Rinzivillo</surname> <given-names>S.</given-names></name></person-group> (<year>2023</year>). <article-title>Benchmarking and survey of explanation methods for black box models</article-title>. <source>Data Min. Knowl. Disc</source>. <volume>37</volume>, <fpage>1719</fpage>&#x02013;<lpage>1778</lpage>. <pub-id pub-id-type="doi">10.1007/s10618-023-00933-9</pub-id></citation>
</ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bouneder</surname> <given-names>L.</given-names></name> <name><surname>Leo</surname> <given-names>Y.</given-names></name> <name><surname>Lachapelle</surname> <given-names>A.</given-names></name></person-group> (<year>2020</year>). <article-title>X-SHAP: towards multiplicative explainability of machine learning</article-title>. <source>ArXiv:2006.04574</source>.</citation>
</ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Burkart</surname> <given-names>N.</given-names></name> <name><surname>Huber</surname> <given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>A survey on the explainability of supervised machine learning</article-title>. <source>J. Artif. Intell. Res</source>. <volume>70</volume>, <fpage>245</fpage>&#x02013;<lpage>317</lpage>. <pub-id pub-id-type="doi">10.1613/jair.1.12228</pub-id></citation>
</ref>
<ref id="B16">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Cai</surname> <given-names>C.</given-names></name> <name><surname>Jongejan</surname> <given-names>J.</given-names></name> <name><surname>Holbrook</surname> <given-names>J.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;The effects of example-based explanations in a machine learning interface,&#x0201D;</article-title> in <source>Proceedings of the 24th International Conference on Intelligent User Interfaces</source>, 258&#x02013;262. <pub-id pub-id-type="doi">10.1145/3301275.3302289</pub-id></citation>
</ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Carvalho</surname> <given-names>D.</given-names></name> <name><surname>Pereira</surname> <given-names>E.</given-names></name> <name><surname>Cardoso</surname> <given-names>J.</given-names></name></person-group> (<year>2019</year>). <article-title>Machine learning interpretability: a survey on methods and metrics</article-title>. <source>Electronics</source> <volume>8</volume>, <fpage>1</fpage>&#x02013;<lpage>34</lpage>. <pub-id pub-id-type="doi">10.3390/electronics8080832</pub-id></citation>
</ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Castillo</surname> <given-names>E.</given-names></name> <name><surname>Conejo</surname> <given-names>A.</given-names></name> <name><surname>Castillo</surname> <given-names>C.</given-names></name> <name><surname>M&#x000ED;nguez</surname> <given-names>R.</given-names></name> <name><surname>Ortigosa</surname> <given-names>D.</given-names></name></person-group> (<year>2006</year>). <article-title>Perturbation approach to sensitivity analysis in mathematical programming</article-title>. <source>J. Optim. Theory Appl</source>. <volume>128</volume>, <fpage>49</fpage>&#x02013;<lpage>74</lpage>. <pub-id pub-id-type="doi">10.1007/s10957-005-7557-y</pub-id></citation>
</ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chau</surname> <given-names>A.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Yu</surname> <given-names>W.</given-names></name></person-group> (<year>2013</year>). <article-title>Large data sets classification using convex-concave hull and support vector machine</article-title>. <source>Soft Comput</source>. <volume>17</volume>, <fpage>793</fpage>&#x02013;<lpage>804</lpage>. <pub-id pub-id-type="doi">10.1007/s00500-012-0954-x</pub-id></citation>
</ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>J.</given-names></name> <name><surname>Vaughan</surname> <given-names>J.</given-names></name> <name><surname>Nair</surname> <given-names>V.</given-names></name> <name><surname>Sudjianto</surname> <given-names>A.</given-names></name></person-group> (<year>2020</year>). <article-title>Adaptive explainable neural networks (AxNNs)</article-title>. <source>ArXiv:2004.02353v02352</source>.</citation>
</ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chong</surname> <given-names>P.</given-names></name> <name><surname>Cheung</surname> <given-names>N.</given-names></name> <name><surname>Elovici</surname> <given-names>Y.</given-names></name> <name><surname>Binder</surname> <given-names>A.</given-names></name></person-group> (<year>2022</year>). <article-title>Toward scalable and unified example-based explanation and outlier detection</article-title>. <source>IEEE Trans. Image Process</source>. <volume>31</volume>, <fpage>525</fpage>&#x02013;<lpage>540</lpage>. <pub-id pub-id-type="doi">10.1109/TIP.2021.3127847</pub-id><pub-id pub-id-type="pmid">34793299</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dhurandhar</surname> <given-names>A.</given-names></name> <name><surname>Chen</surname> <given-names>P.-Y.</given-names></name> <name><surname>Luss</surname> <given-names>R.</given-names></name> <name><surname>Tu</surname> <given-names>C.-C.</given-names></name> <name><surname>Ting</surname> <given-names>P.</given-names></name> <name><surname>Shanmugam</surname> <given-names>K.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Explanations based on the missing: towards contrastive explanations with pertinent negatives</article-title>. <source>ArXiv:1802.07623v07622</source>.</citation>
</ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dinh</surname> <given-names>N.</given-names></name> <name><surname>Jeyakumar</surname> <given-names>V.</given-names></name></person-group> (<year>2014</year>). <article-title>Farkas&#x00027; lemma: three decades of generalizations for mathematical optimization</article-title>. <source>Top</source> <volume>22</volume>, <fpage>1</fpage>&#x02013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1007/s11750-014-0319-y</pub-id></citation>
</ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Du</surname> <given-names>M.</given-names></name> <name><surname>Liu</surname> <given-names>N.</given-names></name> <name><surname>Hu</surname> <given-names>X.</given-names></name></person-group> (<year>2019</year>). <article-title>Techniques for interpretable machine learning</article-title>. <source>Commun. ACM</source> <volume>63</volume>, <fpage>68</fpage>&#x02013;<lpage>77</lpage>. <pub-id pub-id-type="doi">10.1145/3359786</pub-id></citation>
</ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>El Mrabti</surname> <given-names>S.</given-names></name> <name><surname>E. L.-,Mekkaoui</surname> <given-names>J.</given-names></name> <name><surname>Hachmoud</surname> <given-names>A.</given-names></name> <name><surname>Lazaar</surname> <given-names>M.</given-names></name></person-group> (<year>2024</year>). <article-title>An explainable machine learning model for sentiment analysis of online reviews</article-title>. <source>Knowl. Based Syst</source>. <volume>302</volume>:<fpage>112348</fpage>. <pub-id pub-id-type="doi">10.1016/j.knosys.2024.112348</pub-id><pub-id pub-id-type="pmid">35627070</pub-id></citation></ref>
<ref id="B26">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Ergen</surname> <given-names>T.</given-names></name> <name><surname>Pilanci</surname> <given-names>M.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;Convex duality of deep neural networks,&#x0201D;</article-title> in <source>Proceedings of the 37 th International Conference on Machine Learning</source> (<publisher-loc>PMLR</publisher-loc>), <fpage>108</fpage>.<pub-id pub-id-type="pmid">38294919</pub-id></citation></ref>
<ref id="B27">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Ergen</surname> <given-names>T.</given-names></name> <name><surname>Pilanci</surname> <given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>Convex geometry and duality of over-parameterized neural networks</article-title>. <source>J. Mach. Learn. Res</source>. <volume>22</volume>, <fpage>1</fpage>&#x02013;<lpage>63</lpage>. Available at: <ext-link ext-link-type="uri" xlink:href="https://typeset.io/pdf/convex-geometryand-duality-of-over-parameterizedneural-3w1w4fo6ik.pdf">https://typeset.io/pdf/convex-geometryand-duality-of-over-parameterizedneural-3w1w4fo6ik.pdf</ext-link> (accessed January 29, 2025).</citation>
</ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Farnia</surname> <given-names>F.</given-names></name> <name><surname>Tse</surname> <given-names>D.</given-names></name></person-group> (<year>2018</year>). <article-title>&#x0201C;A convex duality framework for gans,&#x0201D;</article-title> in <source>32nd Conference on Neural Information Processing Systems (NeurIPS 2018)</source>, <fpage>1</fpage>&#x02013;<lpage>11</lpage>.</citation>
</ref>
<ref id="B29">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Fong</surname> <given-names>R.</given-names></name> <name><surname>Vedaldi</surname> <given-names>A.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;Interpretable explanations of black boxes by meaningful perturbation,&#x0201D;</article-title> in <source>Proceedings of the IEEE International Conference on Computer Vision</source> (<publisher-loc>IEEE</publisher-loc>), <fpage>3429</fpage>&#x02013;<lpage>3437</lpage>. <pub-id pub-id-type="doi">10.1109/ICCV.2017.371</pub-id><pub-id pub-id-type="pmid">38257608</pub-id></citation></ref>
<ref id="B30">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Fong</surname> <given-names>R.</given-names></name> <name><surname>Vedaldi</surname> <given-names>A.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Explanations for attributing deep neural network predictions,&#x0201D;</article-title> in <source>Explainable AI</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>149</fpage>&#x02013;<lpage>167</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-28954-6_8</pub-id></citation>
</ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Friedman</surname> <given-names>J.</given-names></name></person-group> (<year>2001</year>). <article-title>Greedy function approximation: a gradient boosting machine</article-title>. <source>Ann. Stat</source>. <volume>29</volume>, <fpage>1189</fpage>&#x02013;<lpage>1232</lpage>. <pub-id pub-id-type="doi">10.1214/aos/1013203451</pub-id><pub-id pub-id-type="pmid">38281721</pub-id></citation></ref>
<ref id="B32">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Garreau</surname> <given-names>D.</given-names></name> <name><surname>von Luxburg</surname> <given-names>U.</given-names></name></person-group> (<year>2020a</year>). <article-title>&#x0201C;Explaining the explainer: a first theoretical analysis of LIME,&#x0201D;</article-title> in <source>International Conference on Artificial Intelligence and Statistics</source> (<publisher-loc>PMLR</publisher-loc>), <fpage>1287</fpage>&#x02013;<lpage>1296</lpage>.</citation>
</ref>
<ref id="B33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Garreau</surname> <given-names>D.</given-names></name> <name><surname>von Luxburg</surname> <given-names>U.</given-names></name></person-group> (<year>2020b</year>). <article-title>Looking deeper into tabular LIME</article-title>. <source>ArXiv:2008.11092</source>.</citation>
</ref>
<ref id="B34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ghalebikesabi</surname> <given-names>S.</given-names></name> <name><surname>Ter-Minassian</surname> <given-names>L.</given-names></name> <name><surname>Diaz-Ordaz</surname> <given-names>K.</given-names></name> <name><surname>Holmes</surname> <given-names>C.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;On locality of local explanation models,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems</source>, <fpage>18395</fpage>&#x02013;<lpage>18407</lpage>.</citation>
</ref>
<ref id="B35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Greenwell</surname> <given-names>B.</given-names></name> <name><surname>Boehmke</surname> <given-names>B.</given-names></name> <name><surname>McCarthy</surname> <given-names>A.</given-names></name></person-group> (<year>2018</year>). <article-title>A simple and effective model-based variable importance measure</article-title>. <source>ArXiv:1805.04755</source>.</citation>
</ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gu</surname> <given-names>X.</given-names></name> <name><surname>lai Chung</surname> <given-names>F.</given-names></name> <name><surname>Wang</surname> <given-names>S.</given-names></name></person-group> (<year>2020</year>). <article-title>Extreme vector machine for training on large data</article-title>. <source>Int. J. Mach. Learn. Cyber</source>. <volume>11</volume>, <fpage>33</fpage>&#x02013;<lpage>53</lpage>. <pub-id pub-id-type="doi">10.1007/s13042-019-00936-3</pub-id></citation>
</ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Guidotti</surname> <given-names>R.</given-names></name> <name><surname>Monreale</surname> <given-names>A.</given-names></name> <name><surname>Ruggieri</surname> <given-names>S.</given-names></name> <name><surname>Turini</surname> <given-names>F.</given-names></name> <name><surname>Giannotti</surname> <given-names>F.</given-names></name> <name><surname>Pedreschi</surname> <given-names>D.</given-names></name></person-group> (<year>2019</year>). <article-title>A survey of methods for explaining black box models</article-title>. <source>ACM Comput. Surv</source>. <volume>51</volume>:<fpage>93</fpage>. <pub-id pub-id-type="doi">10.1145/3236009</pub-id></citation>
</ref>
<ref id="B38">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Hastie</surname> <given-names>T.</given-names></name> <name><surname>Tibshirani</surname> <given-names>R.</given-names></name></person-group> (<year>1990</year>). <source>Generalized Additive Models</source>. <publisher-loc>New York</publisher-loc>: <publisher-name>CRC Press</publisher-name>.</citation>
</ref>
<ref id="B39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>Q.</given-names></name> <name><surname>Yamada</surname> <given-names>M.</given-names></name> <name><surname>Tian</surname> <given-names>Y.</given-names></name> <name><surname>Singh</surname> <given-names>D.</given-names></name> <name><surname>Yin</surname> <given-names>D.</given-names></name> <name><surname>Chang</surname> <given-names>Y.</given-names></name></person-group> (<year>2022</year>). <article-title>GraphLIME: local interpretable model explanations for graph neural networks</article-title>. <source>IEEE Trans. Knowl. Data Eng</source>. <volume>35</volume>, <fpage>6968</fpage>&#x02013;<lpage>6972</lpage>. <pub-id pub-id-type="doi">10.1109/TKDE.2022.3187455</pub-id></citation>
</ref>
<ref id="B40">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Islam</surname> <given-names>M.</given-names></name> <name><surname>Ahmed</surname> <given-names>M.</given-names></name> <name><surname>Barua</surname> <given-names>S.</given-names></name> <name><surname>Begum</surname> <given-names>S.</given-names></name></person-group> (<year>2022</year>). <article-title>A systematic review of explainable artificial intelligence in terms of different application domains and tasks</article-title>. <source>Appl. Sci</source>. <volume>12</volume>, <fpage>1</fpage>&#x02013;<lpage>38</lpage>. <pub-id pub-id-type="doi">10.3390/app12031353</pub-id></citation>
</ref>
<ref id="B41">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>J Crabbe</surname> <given-names>Z. Q.</given-names></name> <name><surname>Imrie</surname> <given-names>F.</given-names></name> <name><surname>van der Schaar</surname> <given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Explaining latent representations with a corpus of examples,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems</source>, <fpage>12154</fpage>&#x02013;<lpage>12166</lpage>.</citation>
</ref>
<ref id="B42">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jethani</surname> <given-names>N.</given-names></name> <name><surname>Sudarshan</surname> <given-names>M.</given-names></name> <name><surname>Covert</surname> <given-names>I.</given-names></name> <name><surname>Lee</surname> <given-names>S.-I.</given-names></name> <name><surname>Ranganath</surname> <given-names>R.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;FastSHAP: real-time shapley value estimation,&#x0201D;</article-title> in <source>The Tenth International Conference on Learning Representations, ICLR 2022</source>, <fpage>1</fpage>&#x02013;<lpage>23</lpage>.</citation>
</ref>
<ref id="B43">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Khosravani</surname> <given-names>H.</given-names></name> <name><surname>Ruano</surname> <given-names>A.</given-names></name> <name><surname>Ferreira</surname> <given-names>P.</given-names></name></person-group> (<year>2016</year>). <article-title>A convex hull-based data selection method for data driven models</article-title>. <source>Appl. Soft Comput</source>. <volume>47</volume>, <fpage>515</fpage>&#x02013;<lpage>533</lpage>. <pub-id pub-id-type="doi">10.1016/j.asoc.2016.06.014</pub-id></citation>
</ref>
<ref id="B44">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Konstantinov</surname> <given-names>A.</given-names></name> <name><surname>Utkin</surname> <given-names>L.</given-names></name></person-group> (<year>2021</year>). <article-title>Interpretable machine learning with an ensemble of gradient boosting machines</article-title>. <source>Knowl. Based Syst</source>. <volume>222</volume>, <fpage>1</fpage>&#x02013;<lpage>16</lpage>. <pub-id pub-id-type="doi">10.1016/j.knosys.2021.106993</pub-id></citation>
</ref>
<ref id="B45">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kovalev</surname> <given-names>M.</given-names></name> <name><surname>Utkin</surname> <given-names>L.</given-names></name> <name><surname>Kasimov</surname> <given-names>E.</given-names></name></person-group> (<year>2020</year>). <article-title>SurvLIME: a method for explaining machine learning survival models</article-title>. <source>Knowl. Based Syst</source>. <volume>203</volume>:<fpage>106164</fpage>. <pub-id pub-id-type="doi">10.1016/j.knosys.2020.106164</pub-id></citation>
</ref>
<ref id="B46">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Xiong</surname> <given-names>H.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Wu</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Liu</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Interpretable deep learning: Interpretations, interpretability, trustworthiness, and beyond</article-title>. <source>Knowl. Inf. Syst</source>. <volume>64</volume>, <fpage>3197</fpage>&#x02013;<lpage>3234</lpage>. <pub-id pub-id-type="doi">10.1007/s10115-022-01756-8</pub-id><pub-id pub-id-type="pmid">36990051</pub-id></citation></ref>
<ref id="B47">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liang</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>S.</given-names></name> <name><surname>Yan</surname> <given-names>C.</given-names></name> <name><surname>Li</surname> <given-names>M.</given-names></name> <name><surname>Jiang</surname> <given-names>C.</given-names></name></person-group> (<year>2021</year>). <article-title>Explaining the black-box model: a survey of local interpretation methods for deep neural networks</article-title>. <source>Neurocomputing</source> <volume>419</volume>, <fpage>168</fpage>&#x02013;<lpage>182</lpage>. <pub-id pub-id-type="doi">10.1016/j.neucom.2020.08.011</pub-id></citation>
</ref>
<ref id="B48">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Lundberg</surname> <given-names>S.</given-names></name> <name><surname>Lee</surname> <given-names>S.-I.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;A unified approach to interpreting model predictions,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems</source>, <fpage>4765</fpage>&#x02013;<lpage>4774</lpage>.</citation>
</ref>
<ref id="B49">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Molnar</surname> <given-names>C.</given-names></name></person-group> (<year>2019</year>). <source>Interpretable Machine Learning: A Guide for Making Black Box Models Explainable</source>. Available at: <ext-link ext-link-type="uri" xlink:href="https://christophm.github.io/interpretable-ml-book/">https://christophm.github.io/interpretable-ml-book/</ext-link> (accessed January 29, 2025).</citation>
</ref>
<ref id="B50">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Molnar</surname> <given-names>C.</given-names></name> <name><surname>K&#x000F6;nig</surname> <given-names>G.</given-names></name> <name><surname>Herbinger</surname> <given-names>J.</given-names></name> <name><surname>Freiesleben</surname> <given-names>T.</given-names></name> <name><surname>Dandl</surname> <given-names>S.</given-names></name> <name><surname>Scholbeck</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>&#x0201C;General pitfalls of model-agnostic interpretation methods for machine learning models,&#x0201D;</article-title> in <source>International Workshop on Extending Explainable AI Beyond Deep Models and Classifiers</source> (<publisher-loc>Springer</publisher-loc>), <fpage>39</fpage>&#x02013;<lpage>68</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-031-04083-2_4</pub-id></citation>
</ref>
<ref id="B51">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Murdoch</surname> <given-names>W.</given-names></name> <name><surname>Singh</surname> <given-names>C.</given-names></name> <name><surname>Kumbier</surname> <given-names>K.</given-names></name> <name><surname>Abbasi-Asl</surname> <given-names>R.</given-names></name> <name><surname>Yua</surname> <given-names>B.</given-names></name></person-group> (<year>2019</year>). <article-title>Interpretable machine learning: definitions, methods, and applications</article-title>. <source>ArXiv:1901.04592</source>.<pub-id pub-id-type="pmid">31619572</pub-id></citation></ref>
<ref id="B52">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nemirko</surname> <given-names>A.</given-names></name> <name><surname>Dula</surname> <given-names>J.</given-names></name></person-group> (<year>2021a</year>). <article-title>Machine learning algorithm based on convex hull analysis</article-title>. <source>Procedia Comput. Sci</source>. <volume>186</volume>, <fpage>381</fpage>&#x02013;<lpage>386</lpage>. <pub-id pub-id-type="doi">10.1016/j.procs.2021.04.160</pub-id></citation>
</ref>
<ref id="B53">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nemirko</surname> <given-names>A.</given-names></name> <name><surname>Dula</surname> <given-names>J.</given-names></name></person-group> (<year>2021b</year>). <article-title>Nearest convex hull classification based on linear programming</article-title>. <source>Patt. Recogn. Image Anal</source>. <volume>31</volume>, <fpage>205</fpage>&#x02013;<lpage>211</lpage>. <pub-id pub-id-type="doi">10.1134/S1054661821020139</pub-id></citation>
</ref>
<ref id="B54">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nori</surname> <given-names>H.</given-names></name> <name><surname>Jenkins</surname> <given-names>S.</given-names></name> <name><surname>Koch</surname> <given-names>P.</given-names></name> <name><surname>Caruana</surname> <given-names>R.</given-names></name></person-group> (<year>2019</year>). <article-title>InterpretML: a unified framework for machine learning interpretability</article-title>. <source>ArXiv:1909.09223</source>.</citation>
</ref>
<ref id="B55">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ottmann</surname> <given-names>T.</given-names></name> <name><surname>Schuierer</surname> <given-names>S.</given-names></name> <name><surname>Soundaralakshmi</surname> <given-names>S.</given-names></name></person-group> (<year>2001</year>). <article-title>Enumerating extreme points in higher dimensions</article-title>. <source>Nordic J. Comput</source>. <volume>8</volume>, <fpage>179</fpage>&#x02013;<lpage>192</lpage>. <pub-id pub-id-type="doi">10.1007/3-540-59042-0_105</pub-id></citation>
</ref>
<ref id="B56">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Petsiuk</surname> <given-names>V.</given-names></name> <name><surname>Das</surname> <given-names>A.</given-names></name> <name><surname>Saenko</surname> <given-names>K.</given-names></name></person-group> (<year>2018</year>). <article-title>RISE: Randomized input sampling for explanation of black-box models</article-title>. <source>ArXiv:1806.07421</source>.</citation>
</ref>
<ref id="B57">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Rabold</surname> <given-names>J.</given-names></name> <name><surname>Deininger</surname> <given-names>H.</given-names></name> <name><surname>Siebers</surname> <given-names>M.</given-names></name> <name><surname>Schmid</surname> <given-names>U.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;Enriching visual with verbal explanations for relational concepts: combining LIME with Aleph,&#x0201D;</article-title> in <source>Machine Learning and Knowledge Discovery in Databases: International Workshops of ECML PKDD 2019</source> (<publisher-loc>Springer</publisher-loc>), <fpage>180</fpage>&#x02013;<lpage>192</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-43823-4_16</pub-id></citation>
</ref>
<ref id="B58">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ras</surname> <given-names>G.</given-names></name> <name><surname>Xie</surname> <given-names>N.</given-names></name> <name><surname>Van Gerven</surname> <given-names>M.</given-names></name> <name><surname>Doran</surname> <given-names>D.</given-names></name></person-group> (<year>2022</year>). <article-title>Explainable deep learning: a field guide for the uninitiated</article-title>. <source>J. Artif. Intell. Res</source>. <volume>73</volume>, <fpage>329</fpage>&#x02013;<lpage>396</lpage>. <pub-id pub-id-type="doi">10.1613/jair.1.13200</pub-id></citation>
</ref>
<ref id="B59">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Renwang</surname> <given-names>S.</given-names></name> <name><surname>Baiqian</surname> <given-names>Y.</given-names></name> <name><surname>Hui</surname> <given-names>S.</given-names></name> <name><surname>Lei</surname> <given-names>Y.</given-names></name> <name><surname>Zengshou</surname> <given-names>D.</given-names></name></person-group> (<year>2022</year>). <article-title>Support vector machine fault diagnosis based on sparse scaling convex hull</article-title>. <source>Measur. Sci. Technol</source>. <volume>34</volume>:<fpage>035101</fpage>. <pub-id pub-id-type="doi">10.1088/1361-6501/aca217</pub-id></citation>
</ref>
<ref id="B60">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Ribeiro</surname> <given-names>M.</given-names></name> <name><surname>Singh</surname> <given-names>S.</given-names></name> <name><surname>Guestrin</surname> <given-names>C.</given-names></name></person-group> (<year>2016</year>). &#x0201C;Why should I trust You?&#x0201D; Explaining the predictions of any classifier,&#x0201D; in <source>Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source>, 1135&#x02013;1144. <pub-id pub-id-type="doi">10.1145/2939672.2939778</pub-id></citation>
</ref>
<ref id="B61">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ribeiro</surname> <given-names>M.</given-names></name> <name><surname>Singh</surname> <given-names>S.</given-names></name> <name><surname>Guestrin</surname> <given-names>C.</given-names></name></person-group> (<year>2018</year>). <article-title>&#x0201C;Anchors: high-precision model-agnostic explanations,&#x0201D;</article-title> in <source>AAAI Conference on Artificial Intelligence</source>, 1527&#x02013;1535. <pub-id pub-id-type="doi">10.1609/aaai.v32i1.11491</pub-id></citation>
</ref>
<ref id="B62">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Rockafellar</surname> <given-names>R.</given-names></name></person-group> (<year>1970</year>). <source>Convex Analysis</source>. <publisher-loc>Princeton, NJ</publisher-loc>: <publisher-name>Princeton University Press</publisher-name>. <pub-id pub-id-type="doi">10.1515/9781400873173</pub-id></citation>
</ref>
<ref id="B63">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rossignol</surname> <given-names>H.</given-names></name> <name><surname>Minotakis</surname> <given-names>M.</given-names></name> <name><surname>Cobelli</surname> <given-names>M.</given-names></name> <name><surname>Sanvito</surname> <given-names>S.</given-names></name></person-group> (<year>2024</year>). <article-title>Machine-learning-assisted construction of ternary convex hull diagrams</article-title>. <source>J. Chem. Inf. Model</source>. <volume>64</volume>, <fpage>1828</fpage>&#x02013;<lpage>1840</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jcim.3c01391</pub-id><pub-id pub-id-type="pmid">38271693</pub-id></citation></ref>
<ref id="B64">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Rubinstein</surname> <given-names>R.</given-names></name> <name><surname>Kroese</surname> <given-names>D.</given-names></name></person-group> (<year>2008</year>). <source>Simulation and the Monte Carlo Method, 2nd Edition</source> <publisher-loc>New Jersey</publisher-loc>: <publisher-name>Wiley</publisher-name>. <pub-id pub-id-type="doi">10.1002/9780470230381</pub-id></citation>
</ref>
<ref id="B65">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rudin</surname> <given-names>C.</given-names></name></person-group> (<year>2019</year>). <article-title>Stop explaining black box machine learning models for high stakes decisions and use interpretable models instead</article-title>. <source>Nat. Mach. Intell</source>. <volume>1</volume>, <fpage>206</fpage>&#x02013;<lpage>215</lpage>. <pub-id pub-id-type="doi">10.1038/s42256-019-0048-x</pub-id><pub-id pub-id-type="pmid">35603010</pub-id></citation></ref>
<ref id="B66">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rudin</surname> <given-names>C.</given-names></name> <name><surname>Chen</surname> <given-names>C.</given-names></name> <name><surname>Chen</surname> <given-names>Z.</given-names></name> <name><surname>Huang</surname> <given-names>H.</given-names></name> <name><surname>Semenova</surname> <given-names>L.</given-names></name> <name><surname>Zhong</surname> <given-names>C.</given-names></name></person-group> (<year>2021</year>). <article-title>Interpretable machine learning: fundamental principles and 10 grand challenges</article-title>. <source>ArXiv:2103.11251</source>. <pub-id pub-id-type="doi">10.1214/21-SS133</pub-id></citation>
</ref>
<ref id="B67">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Shankaranarayana</surname> <given-names>S. M.</given-names></name> <name><surname>Runje</surname> <given-names>D.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Alime: autoencoder based approach for local interpretability,&#x0201D;</article-title> in <source>Intelligent Data Engineering and Automated Learning-IDEAL 2019: 20th International Conference, Manchester, UK, November 14-16, 2019, Proceedings, Part I 20</source> (<publisher-loc>Springer</publisher-loc>), <fpage>454</fpage>&#x02013;<lpage>463</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-33607-3_49</pub-id></citation>
</ref>
<ref id="B68">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Shapley</surname> <given-names>L.</given-names></name></person-group> (<year>1953</year>). <article-title>&#x0201C;A value for n-person games,&#x0201D;</article-title> in <source>Contributions to the Theory of Games</source> (<publisher-loc>Princeton</publisher-loc>: <publisher-name>Princeton University Press</publisher-name>), <fpage>307</fpage>&#x02013;<lpage>317</lpage>. <pub-id pub-id-type="doi">10.1515/9781400881970-018</pub-id></citation>
</ref>
<ref id="B69">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Singh</surname> <given-names>V.</given-names></name> <name><surname>Kumar</surname> <given-names>N.</given-names></name></person-group> (<year>2021</year>). <article-title>Chelm: Convex hull based extreme learning machine for salient object detection</article-title>. <source>Multimed. Tools Appl</source>. <volume>80</volume>, <fpage>13535</fpage>&#x02013;<lpage>13558</lpage>. <pub-id pub-id-type="doi">10.1007/s11042-020-10374-x</pub-id></citation>
</ref>
<ref id="B70">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Smith</surname> <given-names>N.</given-names></name> <name><surname>Tromble</surname> <given-names>R.</given-names></name></person-group> (<year>2004</year>). <source>Sampling uniformly from the unit simplex</source>. Technical Report 29, Johns Hopkins University.</citation>
</ref>
<ref id="B71">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Strumbelj</surname> <given-names>E.</given-names></name> <name><surname>Kononenko</surname> <given-names>I.</given-names></name></person-group> (<year>2010</year>). <article-title>An efficient explanation of individual classifications using game theory</article-title>. <source>J. Mach. Learn. Res</source>. <volume>11</volume>, <fpage>1</fpage>&#x02013;<lpage>18</lpage>. Available at: <ext-link ext-link-type="uri" xlink:href="https://dl.acm.org/doi/pdf/10.5555/1756006.1756007">https://dl.acm.org/doi/pdf/10.5555/1756006.1756007</ext-link> (accessed January 29, 2025).<pub-id pub-id-type="pmid">34893693</pub-id></citation></ref>
<ref id="B72">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Strumbelj</surname> <given-names>E.</given-names></name> <name><surname>Kononenko</surname> <given-names>I.</given-names></name></person-group> (<year>2011</year>). <article-title>&#x0201C;A general method for visualizing and explaining black-box regression models,&#x0201D;</article-title> in <source>Adaptive and Natural Computing Algorithms. ICANNGA 2011</source> (<publisher-loc>Berlin</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>21</fpage>&#x02013;<lpage>30</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-642-20267-4_3</pub-id></citation>
</ref>
<ref id="B73">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Strumbelj</surname> <given-names>E.</given-names></name> <name><surname>Kononenko</surname> <given-names>I.</given-names></name></person-group> (<year>2014</year>). <article-title>Explaining prediction models and individual predictions with feature contributions</article-title>. <source>Knowl. Inf. Syst</source>. <volume>41</volume>, <fpage>647</fpage>&#x02013;<lpage>665</lpage>. <pub-id pub-id-type="doi">10.1007/s10115-013-0679-x</pub-id></citation>
</ref>
<ref id="B74">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sundararajan</surname> <given-names>M.</given-names></name> <name><surname>Taly</surname> <given-names>A.</given-names></name> <name><surname>Yan</surname> <given-names>Q.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;Axiomatic attribution for deep networks,&#x0201D;</article-title> in <source>34th International Conference on Machine Learning, ICML</source>, <fpage>5109</fpage>&#x02013;<lpage>5118</lpage>.</citation>
</ref>
<ref id="B75">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Teso</surname> <given-names>S.</given-names></name> <name><surname>Bontempelli</surname> <given-names>A.</given-names></name> <name><surname>Giunchiglia</surname> <given-names>F.</given-names></name> <name><surname>Passerini</surname> <given-names>A.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Interactive label cleaning with example-based explanations,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems</source>, <fpage>12966</fpage>&#x02013;<lpage>12977</lpage>.</citation>
</ref>
<ref id="B76">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Utkin</surname> <given-names>L.</given-names></name> <name><surname>Konstantinov</surname> <given-names>A.</given-names></name></person-group> (<year>2022</year>). <article-title>Ensembles of random SHAPs</article-title>. <source>Algorithms</source> <volume>15</volume>, <fpage>1</fpage>&#x02013;<lpage>27</lpage>. <pub-id pub-id-type="doi">10.3390/a15110431</pub-id></citation>
</ref>
<ref id="B77">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Vaidya</surname> <given-names>P.</given-names></name></person-group> (<year>1989</year>). <article-title>&#x0201C;Speeding-up linear programming using fast matrix multiplication,&#x0201D;</article-title> in <source>30th Annual Symposium on Foundations of Computer Science</source> (<publisher-loc>IEEE Computer Society</publisher-loc>), <fpage>332</fpage>&#x02013;<lpage>337</lpage>. <pub-id pub-id-type="doi">10.1109/SFCS.1989.63499</pub-id></citation>
</ref>
<ref id="B78">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vu</surname> <given-names>M.</given-names></name> <name><surname>Nguyen</surname> <given-names>T.</given-names></name> <name><surname>Phan</surname> <given-names>N. R.</given-names></name> <name><surname>Gera</surname> <given-names>M. T.</given-names></name></person-group> (<year>2019</year>). <article-title>Evaluating explainers via perturbation</article-title>. <source>ArXiv:1906.02032v02031</source>.</citation>
</ref>
<ref id="B79">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>D.</given-names></name> <name><surname>Qiao</surname> <given-names>H.</given-names></name> <name><surname>Zhang</surname> <given-names>B.</given-names></name> <name><surname>Wang</surname> <given-names>M.</given-names></name></person-group> (<year>2013</year>). <article-title>Online support vector machine based on convex hull vertices selection</article-title>. <source>IEEE Trans. Neural Netw. Learn. Syst</source>. <volume>24</volume>, <fpage>593</fpage>&#x02013;<lpage>609</lpage>. <pub-id pub-id-type="doi">10.1109/TNNLS.2013.2238556</pub-id><pub-id pub-id-type="pmid">24808380</pub-id></citation></ref>
<ref id="B80">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>R.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Inouye</surname> <given-names>D.</given-names></name></person-group> (<year>2021</year>). <article-title>Shapley explanation networks</article-title>. <source>ArXiv:2104.02297</source>.</citation>
</ref>
<ref id="B81">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>Z.</given-names></name> <name><surname>Zhang</surname> <given-names>A.</given-names></name> <name><surname>Sudjianto</surname> <given-names>A.</given-names></name></person-group> (<year>2021</year>). <article-title>GAMI-Net: An explainable neural networkbased on generalized additive models with structured interactions</article-title>. <source>Pattern Recogn</source>. <volume>120</volume>:<fpage>108192</fpage>. <pub-id pub-id-type="doi">10.1016/j.patcog.2021.108192</pub-id></citation>
</ref>
<ref id="B82">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yao</surname> <given-names>D.</given-names></name> <name><surname>Zhao</surname> <given-names>P.</given-names></name> <name><surname>Pham</surname> <given-names>T.-A.</given-names></name> <name><surname>Cong</surname> <given-names>G.</given-names></name></person-group> (<year>2018</year>). <article-title>&#x0201C;High-dimensional similarity learning via dual-sparse random projection,&#x0201D;</article-title> in <source>Proceedings of the Twenty-Seventh International Joint Conference on Artificial Intelligence (IJCAI-18)</source>, 3005&#x02013;3011. <pub-id pub-id-type="doi">10.24963/ijcai.2018/417</pub-id></citation>
</ref>
<ref id="B83">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yousefzadeh</surname> <given-names>R.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;Deep learning generalization and the convex hull of training sets,&#x0201D;</article-title> in <source>NeurIPS 2020 Workshop: Deep Learning through Information Geometry</source>, <fpage>1</fpage>&#x02013;<lpage>10</lpage>.</citation>
</ref>
<ref id="B84">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zablocki</surname> <given-names>E.</given-names></name> <name><surname>Ben-Younes</surname> <given-names>H.</given-names></name> <name><surname>Perez</surname> <given-names>P.</given-names></name> <name><surname>Cord</surname> <given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>Explainability of vision-based autonomous driving systems: review and challenges</article-title>. <source>arXiv:2101.05307</source>.</citation>
</ref>
<ref id="B85">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>H.</given-names></name> <name><surname>Cisse</surname> <given-names>M.</given-names></name> <name><surname>Dauphin</surname> <given-names>Y.</given-names></name> <name><surname>Lopez-Paz</surname> <given-names>D.</given-names></name></person-group> (<year>2018</year>). Mixup: beyond empirical risk minimization,&#x0201D; in <source>Proceedings of ICLR</source>, <fpage>1</fpage>&#x02013;<lpage>13</lpage>.</citation>
</ref>
<ref id="B86">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>T.</given-names></name></person-group> (<year>2002</year>). <article-title>On the dual formulation of regularized linear systems with convex risks</article-title>. <source>Mach. Learn</source>. <volume>46</volume>, <fpage>91</fpage>&#x02013;<lpage>129</lpage>. <pub-id pub-id-type="doi">10.1023/A:1012498226479</pub-id></citation>
</ref>
<ref id="B87">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Wang</surname> <given-names>C.</given-names></name> <name><surname>Fan</surname> <given-names>X.</given-names></name></person-group> (<year>2021</year>). <article-title>Convex hull-based distance metric learning for image classification</article-title>. <source>Comput. Appl. Mathem</source>. <volume>40</volume>, <fpage>1</fpage>&#x02013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1007/s40314-021-01482-x</pub-id></citation>
</ref>
<ref id="B88">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Ti&#x00148;o</surname> <given-names>P.</given-names></name> <name><surname>Leonardis</surname> <given-names>A.</given-names></name> <name><surname>Tang</surname> <given-names>K.</given-names></name></person-group> (<year>2021</year>). <article-title>A survey on neural network interpretability</article-title>. <source>IEEE Trans. Emer. Topics Comput. Intell</source>. <volume>5</volume>, <fpage>726</fpage>&#x02013;<lpage>742</lpage>. <pub-id pub-id-type="doi">10.1109/TETCI.2021.3100641</pub-id></citation>
</ref>
</ref-list>
</back>
</article> 