<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Sports Act. Living</journal-id>
<journal-title>Frontiers in Sports and Active Living</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Sports Act. Living</abbrev-journal-title>
<issn pub-type="epub">2624-9367</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fspor.2025.1504362</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Sports and Active Living</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Toward interpretable expected goals modeling using Bayesian mixed models</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author"><name><surname>Iapteff</surname><given-names>Lo&#x00EF;c</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/2604929/overview"/><role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/><role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/><role content-type="https://credit.niso.org/contributor-roles/investigation/"/><role content-type="https://credit.niso.org/contributor-roles/methodology/"/><role content-type="https://credit.niso.org/contributor-roles/visualization/"/><role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/><role content-type="https://credit.niso.org/contributor-roles/data-curation/"/></contrib>
<contrib contrib-type="author"><name><surname>Le Coz</surname><given-names>Sebastian</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/><role content-type="https://credit.niso.org/contributor-roles/methodology/"/><role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author"><name><surname>Rioland</surname><given-names>Maxime</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role content-type="https://credit.niso.org/contributor-roles/data-curation/"/><role content-type="https://credit.niso.org/contributor-roles/resources/"/><role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author"><name><surname>Houde</surname><given-names>Titouan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref><role content-type="https://credit.niso.org/contributor-roles/data-curation/"/><role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author"><name><surname>Carling</surname><given-names>Christopher</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/1302914/overview" /><role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/><role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/></contrib>
<contrib contrib-type="author" corresp="yes"><name><surname>Imbach</surname><given-names>Frank</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref><uri xlink:href="https://loop.frontiersin.org/people/1137976/overview" /><role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/><role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/><role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/><role content-type="https://credit.niso.org/contributor-roles/investigation/"/><role content-type="https://credit.niso.org/contributor-roles/methodology/"/><role content-type="https://credit.niso.org/contributor-roles/project-administration/"/><role content-type="https://credit.niso.org/contributor-roles/resources/"/><role content-type="https://credit.niso.org/contributor-roles/software/"/><role content-type="https://credit.niso.org/contributor-roles/supervision/"/><role content-type="https://credit.niso.org/contributor-roles/validation/"/><role content-type="https://credit.niso.org/contributor-roles/visualization/"/><role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
</contrib-group>
<aff id="aff1"><label><sup>1</sup></label><institution>Seenovate</institution>, <addr-line>Montpellier</addr-line>, <country>France</country></aff>
<aff id="aff2"><label><sup>2</sup></label><institution>Universit&#x00E9; de Lyon, Lyon2</institution>, <addr-line>Bron</addr-line>, <country>France</country></aff>
<aff id="aff3"><label><sup>3</sup></label><institution>Laboratoire Sport, Expertise and Performance INSEP</institution>, <addr-line>Paris</addr-line>, <country>France</country></aff>
<aff id="aff4"><label><sup>4</sup></label><institution>DMeM, Univ Montpellier, INRAe</institution>, <addr-line>Montpellier</addr-line>, <country>France</country></aff>
<author-notes>
<fn fn-type="edited-by"><p><bold>Edited by:</bold> Jos&#x00E9; Eduardo Teixeira, Instituto Polit&#x00E9;cnico da Guarda, Portugal</p></fn>
<fn fn-type="edited-by"><p><bold>Reviewed by:</bold> Jea-Woog Lee, Chung-Ang University, Republic of Korea</p>
<p>Christophe Ley, University of Luxembourg, Luxembourg</p></fn>
<corresp id="cor1"><label>&#x002A;</label><bold>Correspondence:</bold> Frank Imbach <email>frank.imbach@umontpellier.fr</email></corresp>
</author-notes>
<pub-date pub-type="epub"><day>23</day><month>04</month><year>2025</year></pub-date>
<pub-date pub-type="collection"><year>2025</year></pub-date>
<volume>7</volume><elocation-id>1504362</elocation-id>
<history>
<date date-type="received"><day>30</day><month>09</month><year>2024</year></date>
<date date-type="accepted"><day>31</day><month>03</month><year>2025</year></date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2025 Iapteff, Le Coz, Rioland, Houde, Carling and Imbach.</copyright-statement>
<copyright-year>2025</copyright-year><copyright-holder>Iapteff, Le Coz, Rioland, Houde, Carling and Imbach</copyright-holder><license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>Empowered by technological progress, sports teams and bookmakers strive to understand relationships between player and team activity and match outcomes. For this purpose, the probability of an event to succeed (e.g., the probability of a goal to be scored, namely, xG for eXpected Goals) provides insightful information on team and player performance and helps statistical and machine learning approaches predict match outcomes. However, recent approaches require powerful but complex models that need more inherent interpretability for practitioners. This study uses a Bayesian generalized linear mixed-effects model to introduce a simple and interpretable xG modeling approach. The model provided similar performance when compared to the StatsBomb model (property of the StatsBomb company) using only seven variables relating to shot type and position, and surrounding opponents (AUC = 0.781 and 0.801, respectively). Pre-trained models through transfer learning are suitable for identifying teams&#x2019; strengths and weaknesses using small sample sizes and enable interpretation of the model&#x2019;s predictions.</p>
</abstract>
<kwd-group>
<kwd>soccer</kwd>
<kwd>expected goals</kwd>
<kwd>Bayesian inference</kwd>
<kwd>generalized linear mixed model</kwd>
<kwd>transfer learning</kwd>
</kwd-group><counts>
<fig-count count="4"/>
<table-count count="4"/><equation-count count="96"/><ref-count count="39"/><page-count count="10"/><word-count count="0"/></counts><custom-meta-wrap><custom-meta><meta-name>section-at-acceptance</meta-name><meta-value>Sports Science, Technology and Engineering</meta-value></custom-meta></custom-meta-wrap>
</article-meta>
</front>
<body><sec id="s1" sec-type="intro"><label>1</label><title>Introduction</title>
<p>Football is a globally popular sport and its financial and social impact attracts researchers whose main aim is to increase comprehension of training and match-play performance (<xref ref-type="bibr" rid="B1">1</xref>&#x2013;<xref ref-type="bibr" rid="B3">3</xref>). Thanks to technological and analytical evolutions, new performance-oriented research perspectives have emerged from the analysis of player performance. Both training and match data, collected from football players using global navigation satellite systems (<xref ref-type="bibr" rid="B4">4</xref>) and markerless optical tracking systems (<xref ref-type="bibr" rid="B5">5</xref>), have become more plentiful and increasingly accurate. The information enables the development of advanced statistical and machine learning approaches to help analyze and subsequently optimize football performance and attempt predictions of match outcomes.</p>
<p>A popular performance metric in football is expected goals (xG). This metric represents the probability of a shot resulting in a goal. It was first introduced in football by Green (<xref ref-type="bibr" rid="B6">6</xref>) with the aim of identifying the key factors underpinning how goals are scored and has become a valuable objective measure of an individual player&#x2019;s performance that can also be extended to the team level (<xref ref-type="bibr" rid="B7">7</xref>&#x2013;<xref ref-type="bibr" rid="B10">10</xref>). To date, xG models typically account for spatio-temporal information, such as the time, the distance and angle between the player and the goal at the shooting time, the type of shot, and the preceding event (i.e., the last action such as a low pass or a high pass). Beyond these data, previous studies have reported different approaches and model architectures (<xref ref-type="bibr" rid="B11">11</xref>&#x2013;<xref ref-type="bibr" rid="B16">16</xref>). Modeling xG requires multiple features as the complexity of the task and its variability calls into question its predictability when using a restricted set of game features (e.g., the goal distance) (<xref ref-type="bibr" rid="B15">15</xref>). However, Umami et al. (<xref ref-type="bibr" rid="B16">16</xref>) reported that a logistic regression using only a few features (the distance and angle to the goal, and whether the shot is headed or not) provided convincing results. Alternatively, more complex architectures have been employed to attempt to better estimate xG (<xref ref-type="bibr" rid="B14">14</xref>). In effect, the authors in the latter study compared a logistic regression with non-linear ensemble learning algorithms (random forest and adaptive boosting) to predict the match score by summing the estimated xG of each shot opportunity. According to their results, the random forest algorithm provided the best model performance. In another study, Anzer and Bauer (<xref ref-type="bibr" rid="B11">11</xref>) used advanced features such as the height of the ball when the shot was attempted and analysis of the player&#x2019;s movement at the time of the shot. By comparing several supervised machine learning models, a gradient boosting model that accounted for the type of shot (header, leg kick, and direct free kick) provided the best performance in predicting the number of goals scored.</p>
<p>Most studies that have attempted to model xG have focused on fitting the best model using non-linear and complex model architectures, at the expense of model interpretability. These studies aimed to achieve the best performance in predicting that the shot will be converted into a goal. However, complex models are difficult to interpret. There are methods for explaining such models, such as the use of Shapley values (<xref ref-type="bibr" rid="B17">17</xref>, <xref ref-type="bibr" rid="B18">18</xref>), but these approaches are still criticized today (<xref ref-type="bibr" rid="B19">19</xref>, <xref ref-type="bibr" rid="B20">20</xref>), questioning the ability of one to fully master the complexity of Shapley value calculations. There are a few studies that have focused on xG modeling while preserving interpretability. To build a model that identifies key factors influencing xG, Decroos and Davis (<xref ref-type="bibr" rid="B13">13</xref>) and Bransen and Davis (<xref ref-type="bibr" rid="B12">12</xref>) proposed the use of a generalized additive model (GAM). The studies show that GAMs provide comparable results to a more complex gradient boosting model while retaining the advantage of interpretability. To further improve the interpretability of the model and since logistic regression has proved effective in modeling xG (<xref ref-type="bibr" rid="B16">16</xref>), one should consider the relationships between features and xG to be linear and consequently should utilize a generalized linear model instead of a GAM. One also assumes that soccer games evolve and that patterns are slightly different across seasons and competitions. As such, the present authors made a choice to investigate a Bayesian framework with mixed effects. Very recently, Scholtes and Karaku&#x015F; (<xref ref-type="bibr" rid="B21">21</xref>) also proposed a hierarchical Bayesian approach to model xG. They used this model to determine whether the individual player or their positional role impacted xG. The main strength of their work was the identification of specific player abilities throughout an interpretable model and a rigorous assessment of prediction uncertainty. However, the potential of the Bayesian framework was arguably not fully exploited and further research is warranted.</p>
<p>In the present article, an alternative prior specification method is proposed to model xG: an interpretable Bayesian generalized linear mixed-effects model (<xref ref-type="bibr" rid="B22">22</xref>). This model might achieve better estimation quality, and its benefits are potentially numerous. First, the linear structure makes the model very easy to interpret by analyzing the model&#x2019;s coefficients. Second, the inclusion of random effects means that intra- and inter-player/team variability can be considered. This enables interpretation of the strength of players and/or teams in specific game situations. Third, using the Bayesian framework enables the utilization of limited training data while incorporating expert prior knowledge. Such a model is not new and is used in other fields and applications. Yet, it was unseen in xG modeling until the recent work of Scholtes and Karaku&#x015F; (<xref ref-type="bibr" rid="B21">21</xref>) and also demonstrates the aforementioned advantages. In our study, we propose to further exploit the Bayesian contribution with a transfer learning strategy to build a highly informative prior. The prior is built using past competition, rather than relying on a hand-constructed prior, which is inevitably less informative. A study on the impact of prior choice on the posterior was presented using the Wasserstein Impact Measure (WIM) (<xref ref-type="bibr" rid="B23">23</xref>), showing the benefits of choosing the prior we propose (more informative prior and better predictive performance). Furthermore, a practical model explainability of outcomes is provided through Shapley Additive exPlanations estimates (SHAP), which can, according to the literature (<xref ref-type="bibr" rid="B24">24</xref>), benefit football analysts and coaches in their decision-making processes.</p>
<p>Following the presentation of the data and the features used to evaluate xG (<xref ref-type="sec" rid="s2a">Section 2.1</xref>), we will define our xG model (<xref ref-type="sec" rid="s2b">Section 2.2</xref>) and then present the results (<xref ref-type="sec" rid="s3">Section 3</xref>). Thereafter, we will engage in a discussion of the results and benefits of the model within the context of the existing literature (<xref ref-type="sec" rid="s4">Section 4</xref>).</p>
</sec>
<sec id="s2"><label>2</label><title>Material and methods</title>
<sec id="s2a"><label>2.1</label><title>Dataset description</title>
<p>Here, we used the StatsBomb open dataset (<xref ref-type="bibr" rid="B25">25</xref>) that includes 460 matches with 63,177 shots from 11 different competitions including the FIFA World Cup, Women&#x2019;s World Cup, UEFA Euro, UEFA Women&#x2019;s Euro, Indian Super League, NWSL, and Premier League between 2003 and 2022. Among these shots, we retained 59,417 from open-play situations and discarded 3,760 from set-play actions (penalty, free kick, or corner). Hence, only shots from open play were considered. Shot location and occurrence is displayed in <xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref>.</p>
<fig id="F1" position="float"><label>Figure 1</label>
<caption><p>Shot outcomes. On the left, dots represent shot locations on the field and the gradient color denotes the frequency of scored shots per location. On the right, all outcomes and their occurrences are listed.</p></caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fspor-07-1504362-g001.tif"/>
</fig>
<p>Shot data include the ball&#x2019;s location, the player&#x2019;s location, and the time of the shot. From these spatio-temporal data, we extracted features for subsequent modeling. An exhaustive list of the features is provided in <xref ref-type="table" rid="T1">Table&#x00A0;1</xref>. Many of these features are well-known and are common to previous studies (<xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B26">26</xref>). However, subtle features such as the <italic>Best&#x005F;angle</italic> may provide relevant information. Since our dataset includes quantitative and qualitative features (<italic>Position, Body&#x005F;part</italic>, and <italic>Last&#x005F;action</italic>), the latter have been one-hot encoded for the modeling. To reduce dimensionality and prevent collinearity, we performed a feature selection as described in <xref ref-type="sec" rid="s2b">Section 2.2</xref>.</p>
<table-wrap id="T1" position="float"><label>Table 1</label>
<caption><p>Feature descriptions and value range.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Feature</th>
<th valign="top" align="center">Description</th>
<th valign="top" align="center">Values</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Minute</td>
<td valign="top" align="left">Time of the shot in minutes</td>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM1"><mml:mo stretchy="false">[</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>128</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula></td>
</tr>
<tr>
<td valign="top" align="left">Distance</td>
<td valign="top" align="left">The distance between the location of the shot and the center of the goal</td>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM2"><mml:mo stretchy="false">[</mml:mo><mml:mn>0.4</mml:mn><mml:mo>,</mml:mo><mml:mn>93</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula></td>
</tr>
<tr>
<td valign="top" align="left">Angle</td>
<td valign="top" align="left">The angle of the shot (ball, center of the cage, and center of the field). Its value is 0 when the shot is aligned with the center of the cage and 90 when the shot is from the back line.</td>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM3"><mml:mo stretchy="false">[</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>90</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula></td>
</tr>
<tr>
<td valign="top" align="left">Distance&#x005F;goalkeeper</td>
<td valign="top" align="left">The distance between the goalkeeper and the center of the goal</td>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM4"><mml:mo stretchy="false">[</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>118</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula></td>
</tr>
<tr>
<td valign="top" align="left">Goalkeeper&#x005F;on&#x005F;traj</td>
<td valign="top" align="left">If the goalkeeper is on the trajectory (in the triangle formed by the two posts and the shot location)</td>
<td valign="top" align="left">&#x007B;1, 0&#x007D; for &#x007B;True, False&#x007D;</td>
</tr>
<tr>
<td valign="top" align="left">Nb&#x005F;opponent&#x005F;on&#x005F;traj</td>
<td valign="top" align="left">The number of opponents, excluding the goalkeeper, on the trajectory</td>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM5"><mml:mo stretchy="false">[</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>10</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula></td>
</tr>
<tr>
<td valign="top" align="left">Closest&#x005F;opponent</td>
<td valign="top" align="left">The distance between the shot location and the closest opponent</td>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM6"><mml:mo stretchy="false">[</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>92</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula></td>
</tr>
<tr>
<td valign="top" align="left">Opponent&#x005F;nearby</td>
<td valign="top" align="left">The number of opponent closer than 3&#x2009;m</td>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM7"><mml:mo stretchy="false">[</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>9</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula></td>
</tr>
<tr>
<td valign="top" align="left">Teammate&#x005F;front</td>
<td valign="top" align="left">The number of teammates more advanced in the field</td>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM8"><mml:mo stretchy="false">[</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>10</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula></td>
</tr>
<tr>
<td valign="top" align="left">Best&#x005F;angle</td>
<td valign="top" align="left">The best shot angle (left post, shot location, or right post angle if there is neither opponent nor goalkeeper on the trajectory, else the largest space)</td>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM9"><mml:mo stretchy="false">[</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>180</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula></td>
</tr>
<tr>
<td valign="top" align="left">Under&#x005F;pressure</td>
<td valign="top" align="left">Statsbomb feature estimating whether the shooter is under pressure or not during the shot</td>
<td valign="top" align="left">&#x007B;1, 0&#x007D; for &#x007B;True, False&#x007D;</td>
</tr>
<tr>
<td valign="top" align="left">Position</td>
<td valign="top" align="left">The position of the shooter</td>
<td valign="top" align="left">e.g., &#x201C;Goalkeeper,&#x201D; &#x201C;Right Attacking Midfield,&#x201D; &#x201C;Center Forward,&#x201D; <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM10"><mml:mo>&#x2026;</mml:mo></mml:math></inline-formula> 25 different instances</td>
</tr>
<tr>
<td valign="top" align="left">Body&#x005F;part</td>
<td valign="top" align="left">The part of the body used for the shot</td>
<td valign="top" align="left">&#x007B;&#x201C;Left Foot,&#x201D; &#x201C;Right Foot,&#x201D; &#x201C;Head,&#x201D; or &#x201C;Other&#x201D;&#x007D;</td>
</tr>
<tr>
<td valign="top" align="left">Last&#x005F;action</td>
<td valign="top" align="left">The action preceding the shot</td>
<td valign="top" align="left">&#x007B;&#x201C;Regular Play,&#x201D; &#x201C;From Throw In,&#x201D; &#x201C;From Keeper,&#x201D; &#x201C;From Corner,&#x201D; &#x201C;From Counter,&#x201D; &#x201C;From Free Kick,&#x201D; &#x201C;From Goal Kick,&#x201D; or &#x201C;From Kick Off&#x201D;&#x007D;</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The StatsBomb dataset provides an estimate of xG, which we will refer to as StatsBomb xG. Their model, which remains undetailed, has most likely been trained on their full dataset of matches instead of the open-source dataset used in our study. Furthermore, some features such as shot impact height are used and not shared in the open-source data set. This StatsBomb xG estimate, which has proven its performance (<xref ref-type="bibr" rid="B27">27</xref>, <xref ref-type="bibr" rid="B28">28</xref>), will be considered as a baseline model for comparison.</p>
</sec>
<sec id="s2b"><label>2.2</label><title>Model definition</title>
<p>In the literature, many statistical and machine learning methods have been tested to model xG (presented in <xref ref-type="sec" rid="s1">Section 1</xref>), most of them focusing on the best predictive performance at the expense of model interpretability. In this study, we propose a model allowing a detailed analysis of the shot quality. We built a Bayesian logistic regression with mixed effects, defined in <xref ref-type="disp-formula" rid="disp-formula1">Equations 1</xref> and <xref ref-type="disp-formula" rid="disp-formula2">2</xref>. Normal and Gamma prior distributions (see <xref ref-type="disp-formula" rid="disp-formula3">Equation 3</xref>) have been set for fixed and random effects, with adjusted means and standard deviations accordingly (the method is described hereafter).</p>
<p>Let us define <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM11"><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">x</mml:mi></mml:mrow><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM12"><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula>, respectively, as the vector of feature values and the shot result for observation <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM13"><mml:mi>i</mml:mi></mml:math></inline-formula> (<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM14"><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo fence="false" stretchy="false">}</mml:mo></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM15"><mml:mn>1</mml:mn></mml:math></inline-formula> represents a goal and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM16"><mml:mn>0</mml:mn></mml:math></inline-formula> otherwise). Hence, we define <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM17"><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:msub><mml:mi>i</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:msub></mml:math></inline-formula> such that the <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM18"><mml:mi>i</mml:mi></mml:math></inline-formula> predicted xG for a team <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM19"><mml:mi>k</mml:mi></mml:math></inline-formula> in the following:<disp-formula id="disp-formula1"><label>(1)</label><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM1"><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:msub><mml:mi>i</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:msup><mml:mi>e</mml:mi><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x03B8;</mml:mi><mml:mrow><mml:msub><mml:mi>i</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:msup><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03F5;</mml:mi><mml:mrow><mml:msub><mml:mi>i</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:msub><mml:mi>&#x03F5;</mml:mi><mml:mrow><mml:msub><mml:mi>i</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mo>&#x223C;</mml:mo><mml:mrow><mml:mi mathvariant="script">N</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msubsup><mml:mi>&#x03C3;</mml:mi><mml:mi>i</mml:mi><mml:mn>2</mml:mn></mml:msubsup><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo></mml:math></disp-formula><disp-formula id="disp-formula2"><label>(2)</label><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM2"><mml:msub><mml:mi>&#x03B8;</mml:mi><mml:mrow><mml:msub><mml:mi>i</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x03B1;</mml:mi><mml:mo>+</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mspace width="thinmathspace" /><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>p</mml:mi></mml:munderover><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msubsup><mml:mi>&#x03B7;</mml:mi><mml:mi>i</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo stretchy="false">)</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:msub><mml:mi>i</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msubsup><mml:mi>&#x03B7;</mml:mi><mml:mi>i</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03B1;</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:msubsup><mml:mi>&#x03B7;</mml:mi><mml:mi>i</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>q</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>&#x223C;</mml:mo><mml:mrow><mml:mi mathvariant="script">N</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msubsup><mml:mi>&#x03C9;</mml:mi><mml:mi>q</mml:mi><mml:mn>2</mml:mn></mml:msubsup><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo></mml:math></disp-formula><disp-formula id="disp-formula3"><label>(3)</label><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM3"><mml:mtable columnalign="right left" rowspacing=".5em" columnspacing="thickmathspace" displaystyle="true"><mml:mtr><mml:mtd><mml:mi>&#x03B1;</mml:mi></mml:mtd><mml:mtd><mml:mo>&#x223C;</mml:mo><mml:mrow><mml:mi mathvariant="script">N</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mi>&#x03B1;</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msubsup><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow><mml:mn>2</mml:mn></mml:msubsup><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B2;</mml:mi></mml:mrow><mml:mo>&#x223C;</mml:mo><mml:mrow><mml:mi mathvariant="script">N</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03BC;</mml:mi></mml:mrow><mml:mi>&#x03B2;</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold">&#x03A3;</mml:mi></mml:mrow><mml:mi>&#x03B2;</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:msub><mml:mi>&#x03C9;</mml:mi><mml:mi>q</mml:mi></mml:msub><mml:mo>&#x223C;</mml:mo><mml:mi mathvariant="normal">&#x0393;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mi>q</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mi>q</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mi>&#x03C3;</mml:mi></mml:mtd><mml:mtd><mml:mo>&#x223C;</mml:mo><mml:mi mathvariant="normal">&#x0393;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mtd></mml:mtr></mml:mtable><mml:mo>.</mml:mo></mml:math></disp-formula>where <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM20"><mml:mi>q</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mi>&#x03B1;</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mi>p</mml:mi></mml:msub><mml:mo fence="false" stretchy="false">}</mml:mo></mml:math></inline-formula> are the fixed effects parameters to be identified, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM21"><mml:mi>p</mml:mi></mml:math></inline-formula> is the number of features, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM22"><mml:mi>i</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mi>N</mml:mi><mml:mo fence="false" stretchy="false">}</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mi>k</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mspace width="thickmathspace" /><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:msub><mml:mi>n</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo fence="false" stretchy="false">}</mml:mo></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM23"><mml:mi>N</mml:mi></mml:math></inline-formula> is the number of teams, and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM24"><mml:msub><mml:mi>n</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula> is the number of observations for team <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM25"><mml:mi>i</mml:mi></mml:math></inline-formula>. Moreover, the parameters <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM26"><mml:mi>q</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mi>&#x03B1;</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mi>p</mml:mi></mml:msub><mml:mo fence="false" stretchy="false">}</mml:mo></mml:math></inline-formula> on which random effects <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM27"><mml:msubsup><mml:mi>&#x03B7;</mml:mi><mml:mi>i</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>q</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula> are applied must be determined. The model selection is performed using Pareto-smoothed importance sampling-leave-one-out (PSIS-LOO) (<xref ref-type="bibr" rid="B29">29</xref>). This Bayesian approach to model selection enables us to select the model that offers the best predictive accuracy in a robust way thanks to its leave-one-out strategy. In addition, the combination of Hamiltonian Monte Carlo and approximate cross-validation enables highly efficient model fitting and score estimation for a wide range of model selection contexts (<xref ref-type="bibr" rid="B30">30</xref>). Thus, not all <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM28"><mml:msubsup><mml:mi>&#x03B7;</mml:mi><mml:mi>i</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>q</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula> will be considered a random variable but some will be fixed to 0.</p>
<p>The choice of the prior distribution is a crucial step in the model fitting process (choice of <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM29"><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mi>&#x03B1;</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mi>&#x03B1;</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03BC;</mml:mi></mml:mrow><mml:mi>&#x03B2;</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold">&#x03A3;</mml:mi></mml:mrow><mml:mi>&#x03B2;</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mi>q</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mi>q</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi></mml:math></inline-formula>). Alternatively, two different approaches can be considered instead of a non-informative prior, with (i) a prior identification from expert knowledge, and (ii) a prior identification using a <italic>baseline</italic> model, computed on different data. Scholtes and Karaku&#x015F; (<xref ref-type="bibr" rid="B21">21</xref>) used the first method while the latter method has been employed in this study, thus considering a different training dataset. Hence, estimating prior distributions using a separate dataset ensures unbiased model training and model generalization. Our method therefore uses a transfer learning strategy (i.e., the reuse of a set of functions or knowledge learned from a source task). A model is fitted for each competition, the prior being built from the other competitions. In other words, let us assume that <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM30"><mml:mi>c</mml:mi></mml:math></inline-formula> is the competition of interest and that <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM31"><mml:mover><mml:mi>c</mml:mi><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover></mml:math></inline-formula> are the other competitions available in our dataset. The <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM32"><mml:mover><mml:mi>c</mml:mi><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover></mml:math></inline-formula> are used to fit a model <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM33"><mml:mrow><mml:mi mathvariant="script">M</mml:mi></mml:mrow></mml:math></inline-formula> without informative prior knowledge. The model of interest, fitted on <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM34"><mml:mi>c</mml:mi></mml:math></inline-formula>, then benefits from the posterior knowledge identified in <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM35"><mml:mrow><mml:mi mathvariant="script">M</mml:mi></mml:mrow></mml:math></inline-formula>.</p>
<p>A Bayesian model with flat prior and without random effect was fitted on the shots from <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM36"><mml:mover><mml:mi>c</mml:mi><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover></mml:math></inline-formula>. The posterior distribution obtained is then used to build the prior for the model on competition <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM37"><mml:mi>c</mml:mi></mml:math></inline-formula>, and is defined as<disp-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="UDM1"><mml:mi>&#x03B1;</mml:mi><mml:mo>&#x223C;</mml:mo><mml:mrow><mml:mi mathvariant="script">N</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mi>&#x03B1;</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mover><mml:mi>c</mml:mi><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mi>g</mml:mi><mml:mo>.</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mi>&#x03C3;</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mover><mml:mi>c</mml:mi><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover></mml:mrow></mml:msub></mml:mrow><mml:mn>2</mml:mn></mml:msubsup><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B2;</mml:mi></mml:mrow><mml:mo>&#x223C;</mml:mo><mml:mrow><mml:mi mathvariant="script">N</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B2;</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mover><mml:mi>c</mml:mi><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mi>g</mml:mi><mml:mo>.</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">&#x03A3;</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mover><mml:mi>c</mml:mi><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>.</mml:mo></mml:math></disp-formula>The coefficient <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM38"><mml:mi>g</mml:mi></mml:math></inline-formula> is chosen so that the mean of the standard deviations for the fixed effects is equal to <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM39"><mml:mn>0.5</mml:mn></mml:math></inline-formula> to ensure that the prior information is neither too strong nor too weak. Regarding the parameters <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM40"><mml:mi>&#x03C3;</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM41"><mml:msub><mml:mi>&#x03C9;</mml:mi><mml:mi>q</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>q</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mi>&#x03B1;</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mi>p</mml:mi></mml:msub><mml:mo fence="false" stretchy="false">}</mml:mo></mml:math></inline-formula>, the priors were fixed to <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM42"><mml:mi>&#x03C3;</mml:mi><mml:mo>&#x223C;</mml:mo><mml:mi mathvariant="normal">&#x0393;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>0.5</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03C9;</mml:mi><mml:mi>q</mml:mi></mml:msub><mml:mo>&#x223C;</mml:mo><mml:mi mathvariant="normal">&#x0393;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mn>0.3</mml:mn><mml:mo>,</mml:mo><mml:mn>0.1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>. The variance of <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM43"><mml:mi>&#x03C3;</mml:mi></mml:math></inline-formula> prior distribution is large, while <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM44"><mml:msub><mml:mi>&#x03C9;</mml:mi><mml:mi>q</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>q</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mi>&#x03B1;</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mi>p</mml:mi></mml:msub><mml:mo fence="false" stretchy="false">}</mml:mo></mml:math></inline-formula> is more restrictive. This allows us to avoid the high values obtained when teams are under-represented in the studied competition, and when observations are not necessarily representative of actual team performance.</p>
<p>Posterior distributions have been estimated through Markov chain Monte Carlo with a Hamiltonian Monte Carlo algorithm [No-U-Turn Sampler, (<xref ref-type="bibr" rid="B31">31</xref>)]. The No-U-Turn sampler allows us to simplify the tuning of the standard Hamiltonian Monte Carlo method with a similar efficient performance. Then, on the basis of the sampling obtained, we used 10,000 iterations and four chains, with a burn-in period of 1,000 samples for sampling. All the analyses were conducted using the latest version of the Python library PyMC (<xref ref-type="bibr" rid="B32">32</xref>). Once the sampling from the posterior distribution was complete, the model&#x2019;s parameters were estimated according to the maximum <italic>a posteriori</italic> for further predictions.</p>
<p>For a given competition, 70&#x0025; of the shots were used as a training set while the remaining 30&#x0025; were used as a test set. All the results presented in <xref ref-type="sec" rid="s3">Section 3</xref> stem from the test set. Model selection was carried out using PSIS-LOO for each competition to select <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM45"><mml:mi>q</mml:mi></mml:math></inline-formula> parameters on which random effects are applied. Consequently, the random effect structure could vary across competitions.</p>
<p>As the work of Scholtes and Karaku&#x015F; (<xref ref-type="bibr" rid="B21">21</xref>) is analogous to ours, we also fitted their model to our data and features (selected in <xref ref-type="sec" rid="s2c">Section 2.3</xref>) to emphasize the benefits of using an informative prior in small datasets. The following priors presented in this section correspond to the priors used to reproduce their work. The same features used to fit our model were considered, but the priors were chosen as proposed in their paper. Since some features are shared between our respective works, the priors chosen for the associated parameters are the identical (<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM46"><mml:mrow><mml:mi mathvariant="script">N</mml:mi></mml:mrow></mml:math></inline-formula> stands for normal distribution and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM47"><mml:mrow><mml:mi mathvariant="script">S</mml:mi><mml:mi mathvariant="script">N</mml:mi></mml:mrow></mml:math></inline-formula> for skew-normal distribution):
<list list-type="simple">
<list-item><label>&#x2022;</label>
<p><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM48"><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>c</mml:mi><mml:mi>e</mml:mi><mml:mi>p</mml:mi><mml:mi>t</mml:mi></mml:math></inline-formula> : <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM49"><mml:mrow><mml:mi mathvariant="script">N</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03BC;</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mi>&#x03C3;</mml:mi><mml:mo>=</mml:mo><mml:mn>5</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula></p></list-item>
<list-item><label>&#x2022;</label>
<p><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM50"><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi><mml:mi>e</mml:mi><mml:mi mathvariant="normal">&#x005F;</mml:mi><mml:mi>g</mml:mi><mml:mi>o</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>k</mml:mi><mml:mi>e</mml:mi><mml:mi>e</mml:mi><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:math></inline-formula> : <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM51"><mml:mrow><mml:mi mathvariant="script">N</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03BC;</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mi>&#x03C3;</mml:mi><mml:mo>=</mml:mo><mml:mn>5</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula></p></list-item>
<list-item><label>&#x2022;</label>
<p><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM52"><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi><mml:mi>e</mml:mi></mml:math></inline-formula> : <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM53"><mml:mrow><mml:mi mathvariant="script">S</mml:mi><mml:mi mathvariant="script">N</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03BC;</mml:mi><mml:mo>=</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mi>&#x03C3;</mml:mi><mml:mo>=</mml:mo><mml:mn>5</mml:mn><mml:mo>,</mml:mo><mml:mi>&#x03B1;</mml:mi><mml:mo>=</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula></p></list-item>
<list-item><label>&#x2022;</label>
<p><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM54"><mml:mi>b</mml:mi><mml:mi>o</mml:mi><mml:mi>d</mml:mi><mml:mi>y</mml:mi><mml:mi mathvariant="normal">&#x005F;</mml:mi><mml:mi>p</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mi>t</mml:mi><mml:mi mathvariant="normal">&#x005F;</mml:mi><mml:mi>h</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>d</mml:mi></mml:math></inline-formula> : <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM55"><mml:mrow><mml:mi mathvariant="script">N</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03BC;</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mi>&#x03C3;</mml:mi><mml:mo>=</mml:mo><mml:mn>5</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula></p></list-item>
<list-item><label>&#x2022;</label>
<p><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM56"><mml:mi>n</mml:mi><mml:mi>b</mml:mi><mml:mi mathvariant="normal">&#x005F;</mml:mi><mml:mi>o</mml:mi><mml:mi>p</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mi mathvariant="normal">&#x005F;</mml:mi><mml:mi>t</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:math></inline-formula> : <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM57"><mml:mrow><mml:mi mathvariant="script">S</mml:mi><mml:mi mathvariant="script">N</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03BC;</mml:mi><mml:mo>=</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mi>&#x03C3;</mml:mi><mml:mo>=</mml:mo><mml:mn>5</mml:mn><mml:mo>,</mml:mo><mml:mi>&#x03B1;</mml:mi><mml:mo>=</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula></p></list-item>
</list>For the other features, we applied the same method for their selection and finally used the following priors:</p>
<p>
<list list-type="simple">
<list-item><label>&#x2022;</label>
<p><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM58"><mml:mi>b</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi mathvariant="normal">&#x005F;</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>g</mml:mi><mml:mi>l</mml:mi><mml:mi>e</mml:mi></mml:math></inline-formula> : <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM59"><mml:mrow><mml:mi mathvariant="script">S</mml:mi><mml:mi mathvariant="script">N</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03BC;</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mi>&#x03C3;</mml:mi><mml:mo>=</mml:mo><mml:mn>5</mml:mn><mml:mo>,</mml:mo><mml:mi>&#x03B1;</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula></p>
<p>This feature is similar to the shot&#x005F;angle feature used in Scholtes and Karakus&#x2019; paper, which is the left post, shot location, and right post angle. The prior is therefore the same as that used in their article for the shot&#x005F;angle feature.</p></list-item>
<list-item><label>&#x2022;</label>
<p><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM60"><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>g</mml:mi><mml:mi>l</mml:mi><mml:mi>e</mml:mi></mml:math></inline-formula> : <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM61"><mml:mrow><mml:mi mathvariant="script">S</mml:mi><mml:mi mathvariant="script">N</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03BC;</mml:mi><mml:mo>=</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mi>&#x03C3;</mml:mi><mml:mo>=</mml:mo><mml:mn>5</mml:mn><mml:mo>,</mml:mo><mml:mi>&#x03B1;</mml:mi><mml:mo>=</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula></p>
<p>The angle we use represents alignment with the goal. A higher value represents a less centered positioning, which translates into a lower score probability and explains the choice of <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM62"><mml:mi>&#x03BC;</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM63"><mml:mi>&#x03B1;</mml:mi></mml:math></inline-formula>.</p></list-item>
<list-item><label>&#x2022;</label>
<p><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM64"><mml:mi>c</mml:mi><mml:mi>l</mml:mi><mml:mi>o</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi mathvariant="normal">&#x005F;</mml:mi><mml:mi>o</mml:mi><mml:mi>p</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi></mml:math></inline-formula> : <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM65"><mml:mrow><mml:mi mathvariant="script">S</mml:mi><mml:mi mathvariant="script">N</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03BC;</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mi>&#x03C3;</mml:mi><mml:mo>=</mml:mo><mml:mn>5</mml:mn><mml:mo>,</mml:mo><mml:mi>&#x03B1;</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula></p>
<p><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM66"><mml:mi>&#x03BC;</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM67"><mml:mi>&#x03B1;</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> because the closer the opponent, the greater the pressure.</p></list-item>
</list>In the following, we will refer to this model as Scholtes&#x2019; model.</p>
</sec>
<sec id="s2c"><label>2.3</label><title>Feature selection and performance criteria</title>
<p>To use our approach, the model used to build the prior must be fitted using the same features as the model of interest. Thus, feature selection was conducted using the training set of all competitions in a <italic>global</italic> model. A frequentist logistic regression was performed, discarding random effects and any prior knowledge. For feature selection, we performed a forward selection starting with the best features to model xG and then added features one by one until the area under the curve (AUC) reached a plateau (see <xref ref-type="fig" rid="F2">Figure&#x00A0;2</xref>). At each step, all unused features were tested.</p>
<fig id="F2" position="float"><label>Figure 2</label>
<caption><p>AUC score evolution of the fivefold cross-validation. On the <italic>x</italic>-axis, the best features are added one by one. The red dotted line represents the last selected feature, where the AUC reaches a plateau.</p></caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fspor-07-1504362-g002.tif"/>
</fig>
<p>Seven features were selected from the fivefold cross-validation (<xref ref-type="fig" rid="F2">Figure&#x00A0;2</xref>), with (1) best&#x005F;angle, (2) distance&#x005F;goalkeeper, (3) distance, (4) body&#x005F;part&#x005F;head (one-hot-encoded feature, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM68"><mml:mn>1</mml:mn></mml:math></inline-formula> if the shooter struck the ball with their head, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM69"><mml:mn>0</mml:mn></mml:math></inline-formula> otherwise), (5) angle, (6) nb&#x005F;opponent&#x005F;traj, and (7) closest&#x005F;opponent. They constitute the set of predictors for subsequent modeling.</p>
<p>The area under the curve, balanced accuracy, precision, recall, specificity, and F1 score were used to measure the efficiency of the models.</p>
<p>Each competition was used for model fitting, leading to models with heterogeneous fixed parameter estimates based on the prior information.</p>
</sec>
</sec>
<sec id="s3" sec-type="results"><label>3</label><title>Results</title>
<p>The comparison of the performance of the proposed model (i.e., a Bayesian mixed effect logistic regression, namely, <italic>Bayesian xG</italic>) with that of StatsBomb&#x2019;s and Scholtes&#x2019; models is reported in <xref ref-type="table" rid="T2">Table&#x00A0;2</xref>. Considering all competitions, the results show that model performances differed slightly, but similar performance was obtained with the Bayesian approach and the StatsBomb model. Scholtes&#x2019; model seemed to suffer from less informative priors when fitted on small data samples. This is supported by a lower AUC score than the other models, and the xG predictions tend to be symptomatically higher (higher recall with a smaller threshold).</p>
<table-wrap id="T2" position="float"><label>Table 2</label>
<caption><p>Model performance of the Bayesian model, StatsBomb xG, and Scholtes&#x2019; model.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left" rowspan="2">Threshold</th>
<th valign="top" align="left">Score</th>
<th valign="top" align="center">Bayesian xG</th>
<th valign="top" align="center">StatsBomb xG</th>
<th valign="top" align="center">Scholtes xG</th>
</tr>
<tr>
<th valign="top" align="left">AUC</th>
<th valign="top" align="center">0.781</th>
<th valign="top" align="center">0.801</th>
<th valign="top" align="center">0.578</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">Balanced accuracy</td>
<td valign="top" align="center">0.517</td>
<td valign="top" align="center">0.520</td>
<td valign="top" align="center">0.553</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">Precision</td>
<td valign="top" align="center">0.754</td>
<td valign="top" align="center">0.845</td>
<td valign="top" align="center">0.159</td>
</tr>
<tr>
<td valign="top" align="left">0.8</td>
<td valign="top" align="left">Recall</td>
<td valign="top" align="center">0.036</td>
<td valign="top" align="center">0.041</td>
<td valign="top" align="center">0.238</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">Specificity</td>
<td valign="top" align="center">0.999</td>
<td valign="top" align="center">0.999</td>
<td valign="top" align="center">0.868</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">F1 score</td>
<td valign="top" align="center">0.068</td>
<td valign="top" align="center">0.078</td>
<td valign="top" align="center">0.191</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">Balanced accuracy</td>
<td valign="top" align="center">0.559</td>
<td valign="top" align="center">0.558</td>
<td valign="top" align="center">0.504</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">Precision</td>
<td valign="top" align="center">0.563</td>
<td valign="top" align="center">0.608</td>
<td valign="top" align="center">0.095</td>
</tr>
<tr>
<td valign="top" align="left">0.5</td>
<td valign="top" align="left">Recall</td>
<td valign="top" align="center">0.129</td>
<td valign="top" align="center">0.124</td>
<td valign="top" align="center">0.944</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">Specificity</td>
<td valign="top" align="center">0.988</td>
<td valign="top" align="center">0.991</td>
<td valign="top" align="center">0.064</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">F1 score</td>
<td valign="top" align="center">0.211</td>
<td valign="top" align="center">0.207</td>
<td valign="top" align="center">0.173</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">Balanced accuracy</td>
<td valign="top" align="center">0.710</td>
<td valign="top" align="center">0.735</td>
<td valign="top" align="center">0.502</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">Precision</td>
<td valign="top" align="center">0.228</td>
<td valign="top" align="center">0.250</td>
<td valign="top" align="center">0.095</td>
</tr>
<tr>
<td valign="top" align="left">0.2</td>
<td valign="top" align="left">Recall</td>
<td valign="top" align="center">0.687</td>
<td valign="top" align="center">0.716</td>
<td valign="top" align="center">0.996</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">Specificity</td>
<td valign="top" align="center">0.733</td>
<td valign="top" align="center">0.754</td>
<td valign="top" align="center">0.007</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">F1 score</td>
<td valign="top" align="center">0.344</td>
<td valign="top" align="center">0.372</td>
<td valign="top" align="center">0.173</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="table-fn1"><p>The results are displayed according to three thresholds.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Having obtained a reasonable model, we will now illustrate the advantages of the proposed approach and focus on a given international football competition: the FIFA World Cup 2022. First, to underline the impact of the prior used for the <italic>Bayesian xG</italic> model compared to the prior proposed by Scholtes, we calculated the WIM. This measure is used to compare two distributions, and in the Bayesian framework, to compare two posterior distributions obtained with distinct priors. As suggested in the original paper, we computed the WIM using the uniform prior. The aim of the approach is to evaluate the quantity of information provided by the prior: the higher the WIM between a given prior and the uniform prior, the more informative the prior is. The WIM was computed using the samples obtained with the MCMC algorithm, and resulted in a WIM almost four times higher for the <italic>Bayesian xG</italic> prior against the uniform prior than for the Scholtes prior against the uniform prior (<xref ref-type="table" rid="T3">Table&#x00A0;3</xref>).</p>
<table-wrap id="T3" position="float"><label>Table 3</label>
<caption><p>Wasserstein Impact Measure using the priors proposed for the Bayesian model and Scholtes&#x2019; model and using a uniform prior for all parameters.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left"/>
<th valign="top" align="center">Uniform prior</th>
<th valign="top" align="center">Scholtes&#x2019; prior</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Bayesian xG prior</td>
<td valign="top" align="center">0.69</td>
<td valign="top" align="center">0.66</td>
</tr>
<tr>
<td valign="top" align="left">Scholtes&#x2019; prior</td>
<td valign="top" align="center">0.18</td>
<td valign="top" align="center"/>
</tr>
</tbody>
</table>
</table-wrap>
<p>To focus on the <italic>Bayesian xG</italic> model, the maximum <italic>a posteriori</italic> parameter estimation showed that the <italic>distance</italic> feature (i.e., the distance between the shot location and the center of the goal) had the greatest negative influence on the predicted xG (see <xref ref-type="table" rid="T4">Table&#x00A0;4</xref>). Furthermore, the optimal model structure retained for xG prediction includes a random intercept <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM70"><mml:msub><mml:mi>&#x03C9;</mml:mi><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and two random slopes (on <italic>angle</italic> and <italic>closest&#x005F;opponent</italic> parameters, denoted by <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM71"><mml:msub><mml:mi>&#x03C9;</mml:mi><mml:mrow><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mn>5</mml:mn></mml:msub></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM72"><mml:msub><mml:mi>&#x03C9;</mml:mi><mml:mrow><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mn>7</mml:mn></mml:msub></mml:mrow></mml:msub></mml:math></inline-formula>, respectively). Since <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM73"><mml:msub><mml:mi>&#x03C9;</mml:mi><mml:mrow><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mn>5</mml:mn></mml:msub></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM74"><mml:msub><mml:mi>&#x03C9;</mml:mi><mml:mrow><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mn>7</mml:mn></mml:msub></mml:mrow></mml:msub></mml:math></inline-formula> differ from 0, they highlight team-specific traits and relationships between these predictors and the predicted xG. Accordingly, such a model structure allows for consideration of inter-team variability. The univariate posterior and prior distribution for the fixed effects showed the differences between the Bayesian model calibrated on the 2022 FIFA World Cup and the <italic>global</italic> model fitted on other competitions (see <xref ref-type="fig" rid="F3">Figure&#x00A0;3</xref>). For this competition, the features <italic>nb&#x005F;opponent&#x005F;on&#x005F;traj, angle</italic>, and <italic>body&#x005F;part&#x005F;head</italic> had a particularly higher negative impact than the <italic>global</italic> model. This means that in this competition and situation, the players were less likely to score than usual. However, the optimal model retained a random slope on the <italic>angle</italic> feature, meaning that some teams were significantly better than others to deal with the bad angles.</p>
<table-wrap id="T4" position="float"><label>Table 4</label>
<caption><p>Maximum <italic>a posteriori</italic> parameter estimation for the FIFA World Cup 2022.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM75"><mml:mi>&#x03B1;</mml:mi></mml:math></inline-formula></th>
<th valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM76"><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:math></inline-formula></th>
<th valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM77"><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:math></inline-formula></th>
<th valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM78"><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mn>3</mml:mn></mml:msub></mml:math></inline-formula></th>
<th valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM79"><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mn>4</mml:mn></mml:msub></mml:math></inline-formula></th>
<th valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM80"><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mn>5</mml:mn></mml:msub></mml:math></inline-formula></th>
<th valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM81"><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mn>6</mml:mn></mml:msub></mml:math></inline-formula></th>
<th valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM82"><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mn>7</mml:mn></mml:msub></mml:math></inline-formula></th>
<th valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM83"><mml:msub><mml:mi>&#x03C9;</mml:mi><mml:mi>&#x03B1;</mml:mi></mml:msub></mml:math></inline-formula></th>
<th valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM84"><mml:msub><mml:mi>&#x03C9;</mml:mi><mml:mrow><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mn>5</mml:mn></mml:msub></mml:mrow></mml:msub></mml:math></inline-formula></th>
<th valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM85"><mml:msub><mml:mi>&#x03C9;</mml:mi><mml:mrow><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mn>7</mml:mn></mml:msub></mml:mrow></mml:msub></mml:math></inline-formula></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM86"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>2.58</td>
<td valign="top" align="center">0.393</td>
<td valign="top" align="center">0.397</td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM87"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>1.296</td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM88"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>0.501</td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM89"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>0.449</td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM90"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>0.703</td>
<td valign="top" align="center">0.378</td>
<td valign="top" align="center">0.427</td>
<td valign="top" align="center">0.364</td>
<td valign="top" align="center">0.326</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="table-fn2"><p>The <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM91"><mml:mi>b</mml:mi><mml:mi>e</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi></mml:math></inline-formula>&#x2019;s are defined as the parameters for (1) best&#x005F;angle, (2) distance&#x005F;goalkeeper, (3) distance, (4) body&#x005F;part&#x005F;head, (5) angle, (6) nb&#x005F;opponent&#x005F;traj, and (7) closest&#x005F;opponent.</p></fn>
</table-wrap-foot>
</table-wrap>
<fig id="F3" position="float"><label>Figure 3</label>
<caption><p>Univariate posterior and prior distribution of the <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM92"><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B2;</mml:mi></mml:mrow></mml:math></inline-formula> parameters for the 2022 FIFA World Cup.</p></caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fspor-07-1504362-g003.tif"/>
</fig>
<p>Computing SHAP values allowed us to explain the model predictions and, for a given shot, to interpret the impact of each feature&#x2019;s value on the prediction. In addition, we can examine poorly predicted shots to understand where the failure arises from. For example, <xref ref-type="fig" rid="F4">Figure&#x00A0;4</xref> presents two poorly predicted shots and their respective feature contributions. The first shot has a very high xG value because it is close, central, and there is no opponent on its trajectory. However, the pressure exerted by the nearest opponent likely exceeded the model&#x2019;s prediction, resulting in a missed shot. The second shot was challenging, characterized by its considerable distance and lateral displacement, with two opposing players present on its trajectory. Nevertheless, the player successfully converted his attempt into a goal.</p>
<fig id="F4" position="float"><label>Figure 4</label>
<caption><p>SHAP waterfall plots for two separate shots. The shot on the left has a high Bayesian xG prediction (0.787) and was missed, the one on the right has a low Bayesian xG prediction (0.026) and was scored.</p></caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fspor-07-1504362-g004.tif"/>
</fig>
</sec>
<sec id="s4" sec-type="discussion"><label>4</label><title>Discussion</title>
<p>The present xG mixed-effect model, based on a dataset from the 2022 FIFA World Cup, provided an opportunity to study teams&#x2019; goal scoring through strengths and weaknesses in an interpretable and explainable way. The results are arguably of major interest to coaching practitioners, sports scientists, and researchers interested in determining the influence of player and team actions on scoring goals and match outcomes. For practical usage, the models were implemented in a Streamlit web application (<xref ref-type="bibr" rid="B33">33</xref>).</p>
<p>Using transfer learning, our model further advances the recent work of Scholtes and Karaku&#x015F; (<xref ref-type="bibr" rid="B21">21</xref>). It can notably undergo an initial pre-training phase on a more extensive and historical dataset and a secondary training phase on a smaller, recent dataset to refine its predictive capabilities. The application of transfer learning within a Bayesian framework yielded results that were comparable to those of a commercial xG model (developed by StatsBomb), despite the utilization of a reduced dataset (see <xref ref-type="table" rid="T2">Table&#x00A0;2</xref> for a summary). Furthermore, the interpretability of our linear model is a notable advantage as each model parameter can be analyzed and compared with one another. For instance, the <italic>distance</italic> from the goal had the most significant influence on xG predictions (over the shot <italic>angle</italic>) and should, consequently, be considered in any training drills and in-game tactical decision-making.</p>
<p>The Bayesian framework has several advantages. Building an efficient predictive model in a frequentist way implies training a model over a large enough dataset (<xref ref-type="bibr" rid="B34">34</xref>). In their study, Robberechts and Davis (<xref ref-type="bibr" rid="B35">35</xref>) concluded that five seasons of data were needed to fit an accurate frequentist and non-parametric xG model. Since the Bayesian inference comes with prior distributions, fewer training observations are needed to fit a model correctly. In addition, our approach also addressed confidentiality issues. Identifying a prior distribution implies learning a first function that approximates xG using a separated data sample (in our case, to use another competition to build the <italic>baseline</italic> model). Through transfer learning, models trained on multiple competitions can be reused for the targeted competition, reducing the computation time and improving model performances. Furthermore, retraining the baseline model on the competition of interest helps identify differences and similarities between the competitions.</p>
<p>In this work, we focused on open-play phases only, while overall, football performance should be modeled from all playing phases. However, the method is transferable to any playing phases where models could be built separately or combined. As aforementioned, player characteristics have significant importance in any football performance modeling. Player-specific data might improve model accuracy and outperform other commercial xG models. Gender has also shown some importance, as mentioned by Bransen and Davis (<xref ref-type="bibr" rid="B12">12</xref>). Considering gender in xG modeling or building gender or youth-specific models is recommended. Since football data on the women&#x2019;s game are generally less abundant, the proposed approach based on transfer learning should address this issue as an optimal model would be built for a given competition while benefiting from broader information.</p>
<p>Even though sports performance modeling remains challenging due to its inherent complexity, analyzing players&#x2019; and teams&#x2019; characteristics provides essential information for this task. In this study, we considered individuals as teams since the data did not allow us to consider each player individually. Hence, the cross-random effects from the mixed logistic regression highlighted singularities between football teams, particularly regarding the <italic>body&#x005F;part&#x005F;head</italic> and <italic>nb&#x005F;opponent&#x005F;traj</italic> features. However, one may note that player-level information could significantly increase the performance of xG predictions, as players have strengths and weaknesses of their own. Beyond this, the model could allow for adapting a pre-game team strategy or making on-field decisions, to optimize the efficiency of the team.</p>
<p>Expert knowledge can be included in the model as priors. Nevertheless, if the number of observations is insufficient to construct the prior or if the prior information reflects extreme parameter values, there is a risk of bias and identification failure of posterior distributions (<xref ref-type="bibr" rid="B36">36</xref>&#x2013;<xref ref-type="bibr" rid="B38">38</xref>). This phenomenon can be observed in our dataset when a comparison is made between the results of Scholtes&#x2019; model and the Bayesian xG model that we propose. Indeed, fitting the model by competition results in a smaller dataset than that employed in the aforementioned work. Furthermore, the selection of priors has led to a model with a higher xG prediction than that of StatsBomb and our own model. Conversely, when a sufficient number of observations have been accumulated, the incorporation of expert knowledge as a prior enables coaches to exert direct influence on predictions and decisions, thereby facilitating the provision of valuable real-time feedback. To illustrate, a coach&#x2019;s knowledge of the game could be incorporated into the prior to facilitate meaningful insight into the players. Knowledge of the strengths of the starting team he has chosen, or of the opposing team, could lead to a manual shift in the value of the parameters influenced. Coaches could then readily utilize the model&#x2019;s parameters to optimize in-game strategies, such as identifying the optimal shooting distance for individual players and offering practical insights for enhancing overall team performance.</p>
<p>The proposed method may also be of particular interest for modeling expected goals on target (xGOT). xGOT is a post-shot metric that uses the position at which the ball enters the goal and whether it is saved or scored. The xGOT is primarily employed to assess a team&#x2019;s finishing proficiency by comparing xG with xGOT and to evaluate the performance of goalkeepers according to the quality of the shot. The methodology delineated in this paper also permits the identification of scenarios wherein specific teams exhibit superior finishing proficiency or more efficacious goalkeeping and the characterization of these scenarios. However, although our approach is generalizable to many cases, it is possible that predictive quality may be reduced in tasks that are too complex, such as expected threat. Maintaining high interpretability and, therefore, simple models for such tasks would potentially lead to a greater loss of predictive quality.</p>
</sec>
<sec id="s5" sec-type="conclusions"><label>5</label><title>Conclusion</title>
<p>The development of interpretable and, more widely, explainable artificial intelligence represents a pivotal area of research within the field of computer science and subsequently sports science. This approach facilitates the extraction of novel insights from complex sports data, thereby empowering practitioners to make well-informed decisions (<xref ref-type="bibr" rid="B39">39</xref>). Our approach, based on a Bayesian mixed logistic regression model, is aligned with the principles of reproducibility and interpretability. Furthermore, it achieves comparable predictive performance to that of more complex models, despite the utilization of a limited sample of competition data. It also addresses practical concerns such as the identification of team strengths and weaknesses, and could be further extended to model xG from individual characteristics in a straightforward, accessible, and reliable manner.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability"><title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/Supplementary Material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s7" sec-type="author-contributions"><title>Author contributions</title>
<p>LI: Conceptualization, Formal analysis, Investigation, Methodology, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing, Data curation. SC: Formal analysis, Methodology, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. MR: Data curation, Resources, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. TH: Data curation, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. CC: Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing, Formal analysis. FI: Conceptualization, Formal analysis, Funding acquisition, Investigation, Methodology, Project administration, Resources, Software, Supervision, Validation, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec id="s8" sec-type="funding-information"><title>Funding</title>
<p>The author(s) declare that no financial support was received for the research and/or publication of this article.</p>
</sec>
<sec id="s9" sec-type="COI-statement"><title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="ai-statement"><title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
</sec>
<sec id="s11" sec-type="disclaimer"><title>Publisher&#x0027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list><title>References</title>
<ref id="B1"><label>1.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ali</surname><given-names>A</given-names></name></person-group>. <article-title>Measuring soccer skill performance: a review</article-title>. <source>Scand J Med Sci Sports</source>. (<year>2011</year>) <volume>21</volume>:<fpage>170</fpage>&#x2013;<lpage>83</lpage>. <pub-id pub-id-type="doi">10.1111/j.1600-0838.2010.01256.x</pub-id><pub-id pub-id-type="pmid">21210855</pub-id></citation></ref>
<ref id="B2"><label>2.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sarmento</surname><given-names>H</given-names></name><name><surname>Clemente</surname><given-names>FM</given-names></name><name><surname>Afonso</surname><given-names>J</given-names></name><name><surname>Ara&#x00FA;jo</surname><given-names>D</given-names></name><name><surname>Fachada</surname><given-names>M</given-names></name><name><surname>Nobre</surname><given-names>P</given-names></name></person-group>, et al. <article-title>Match analysis in team ball sports: an umbrella review of systematic reviews and meta-analyses</article-title>. <source>Sports Med Open</source>. (<year>2022</year>) <volume>8</volume>:<fpage>66</fpage>. <pub-id pub-id-type="doi">10.1186/s40798-022-00454-7</pub-id><pub-id pub-id-type="pmid">35553279</pub-id></citation></ref>
<ref id="B3"><label>3.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sarmento</surname><given-names>H</given-names></name><name><surname>Marcelino</surname><given-names>R</given-names></name><name><surname>Anguera</surname><given-names>MT</given-names></name><name><surname>Campani&#x00C7;o</surname><given-names>J</given-names></name><name><surname>Matos</surname><given-names>N</given-names></name><name><surname>Leit&#x00C3;o</surname><given-names>JC</given-names></name></person-group>. <article-title>Match analysis in football: a systematic review</article-title>. <source>J Sports Sci</source>. (<year>2014</year>) <volume>32</volume>:<fpage>1831</fpage>&#x2013;<lpage>43</lpage>. <pub-id pub-id-type="doi">10.1080/02640414.2014.898852</pub-id><pub-id pub-id-type="pmid">24787442</pub-id></citation></ref>
<ref id="B4"><label>4.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shergill</surname><given-names>AS</given-names></name><name><surname>Twist</surname><given-names>C</given-names></name><name><surname>Highton</surname><given-names>J</given-names></name></person-group>. <article-title>Importance of GNSS data quality assessment with novel control criteria in professional soccer match-play</article-title>. <source>Int J Perform Anal Sport</source>. (<year>2021</year>) <volume>21</volume>:<fpage>820</fpage>&#x2013;<lpage>30</lpage>. <pub-id pub-id-type="doi">10.1080/24748668.2021.1947017</pub-id></citation></ref>
<ref id="B5"><label>5.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Agethen</surname><given-names>P</given-names></name><name><surname>Otto</surname><given-names>M</given-names></name><name><surname>Mengel</surname><given-names>S</given-names></name><name><surname>Rukzio</surname><given-names>E</given-names></name></person-group>. <article-title>Using marker-less motion capture systems for walk path analysis in paced assembly flow lines</article-title>. <source>Procedia Cirp</source>. (<year>2016</year>) <volume>54</volume>:<fpage>152</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1016/j.procir.2016.04.125</pub-id></citation></ref>
<ref id="B6"><label>6.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Green</surname><given-names>S</given-names></name></person-group>. <article-title>Assessing the performance of premier league goalscorers</article-title>. <source>OptaPro Blog</source>. (<year>2012</year>).</citation></ref>
<ref id="B7"><label>7.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Brechot</surname><given-names>M</given-names></name><name><surname>Flepp</surname><given-names>R</given-names></name></person-group>. <article-title>Dealing with randomness in match outcomes: how to rethink performance evaluation in European club football using expected goals</article-title>. <source>J Sports Econom</source>. (<year>2020</year>) <volume>21</volume>:<fpage>335</fpage>&#x2013;<lpage>62</lpage>. <pub-id pub-id-type="doi">10.1177/1527002519897962</pub-id></citation></ref>
<ref id="B8"><label>8.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cardoso</surname><given-names>FDS</given-names></name><name><surname>Gonz&#x00E1;lez-V&#x00ED;llora</surname><given-names>S</given-names></name><name><surname>Guilherme</surname><given-names>J</given-names></name><name><surname>Teoldo</surname><given-names>I</given-names></name></person-group>. <article-title>Young soccer players with higher tactical knowledge display lower cognitive effort</article-title>. <source>Percept Mot Skills</source>. (<year>2019</year>) <volume>126</volume>:<fpage>499</fpage>&#x2013;<lpage>514</lpage>. <pub-id pub-id-type="doi">10.1177/0031512519826437</pub-id><pub-id pub-id-type="pmid">30744488</pub-id></citation></ref>
<ref id="B9"><label>9.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kharrat</surname><given-names>T</given-names></name><name><surname>McHale</surname><given-names>IG</given-names></name><name><surname>Pe&#x00F1;a</surname><given-names>JL</given-names></name></person-group>. <article-title>Plus&#x2013;minus player ratings for soccer</article-title>. <source>Eur J Oper Res</source>. (<year>2020</year>) <volume>283</volume>:<fpage>726</fpage>&#x2013;<lpage>36</lpage>. <pub-id pub-id-type="doi">10.1016/j.ejor.2019.11.026</pub-id></citation></ref>
<ref id="B10"><label>10.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Spearman</surname><given-names>W</given-names></name></person-group>. <article-title>Beyond expected goals</article-title>. In: <source>Proceedings of the 12th MIT Sloan Sports Analytics Conference</source>. (<year>2018</year>). p. <fpage>1</fpage>&#x2013;<lpage>17</lpage>.</citation></ref>
<ref id="B11"><label>11.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Anzer</surname><given-names>G</given-names></name><name><surname>Bauer</surname><given-names>P</given-names></name></person-group>. <article-title>A goal scoring probability model for shots based on synchronized positional and event data in football (Soccer)</article-title>. <source>Front Sports Act Living</source>. (<year>2021</year>) <volume>3</volume>:<comment>624475</comment>. <pub-id pub-id-type="doi">10.3389/fspor.2021.624475</pub-id></citation></ref>
<ref id="B12"><label>12.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Bransen</surname><given-names>L</given-names></name><name><surname>Davis</surname><given-names>J</given-names></name></person-group>. <article-title>Women&#x2019;s football analyzed: interpretable expected goals models for women</article-title>. In: <source>Proceedings of the AI for Sports Analytics (AISA) Workshop at IJCAI</source>. (<year>2021</year>). Vol. <volume>2021</volume>.</citation></ref>
<ref id="B13"><label>13.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Decroos</surname><given-names>T</given-names></name><name><surname>Davis</surname><given-names>J</given-names></name></person-group>. <article-title>Interpretable prediction of goals in soccer</article-title>. In: <source>Proceedings of the AAAI-20 Workshop on Artificial Intelligence in Team Sports</source>. (<year>2019</year>).</citation></ref>
<ref id="B14"><label>14.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Eggels</surname><given-names>H</given-names></name><name><surname>Van Elk</surname><given-names>R</given-names></name><name><surname>Pechenizkiy</surname><given-names>M</given-names></name></person-group>. <article-title>Explaining soccer match outcomes with goal scoring opportunities predictive analytics</article-title>. In: <source>3rd Workshop on Machine Learning and Data Mining for Sports Analytics (MLSA 2016)</source>. <comment>CEUR-WS.org (2016)</comment>.</citation></ref>
<ref id="B15"><label>15.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rathke</surname><given-names>A</given-names></name></person-group>. <article-title>An examination of expected goals and shot efficiency in soccer</article-title>. <source>J Hum Sport Exerc</source>. (<year>2017</year>) <volume>12</volume>:<fpage>514</fpage>&#x2013;<lpage>29</lpage>. <pub-id pub-id-type="doi">10.14198/jhse.2017.12.Proc2.05</pub-id></citation></ref>
<ref id="B16"><label>16.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Umami</surname><given-names>I</given-names></name><name><surname>Gautama</surname><given-names>DH</given-names></name><name><surname>Hatta</surname><given-names>HR</given-names></name></person-group>. <article-title>Implementing the expected goal (xG) model to predict scores in soccer matches</article-title>. <source>Int J Inform Inf Syst</source>. (<year>2021</year>) <volume>4</volume>:<fpage>38</fpage>&#x2013;<lpage>54</lpage>. <pub-id pub-id-type="doi">10.47738/ijiis.v4i1</pub-id></citation></ref>
<ref id="B17"><label>17.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fryer</surname><given-names>D</given-names></name><name><surname>Str&#x00FC;mke</surname><given-names>I</given-names></name><name><surname>Nguyen</surname><given-names>H</given-names></name></person-group>. <article-title>Shapley values for feature selection: the good, the bad, and the axioms</article-title>. <source>IEEE Access</source>. (<year>2021</year>) <volume>9</volume>:<fpage>144352</fpage>&#x2013;<lpage>60</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2021.3119110</pub-id></citation></ref>
<ref id="B18"><label>18.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lundberg</surname><given-names>SM</given-names></name><name><surname>Lee</surname><given-names>S-I</given-names></name></person-group>. <article-title>A unified approach to interpreting model predictions</article-title>. <source>Adv Neural Inf Process Syst</source>. (<year>2017</year>) <volume>30</volume>. <pub-id pub-id-type="doi">10.48550/arXiv.1705.07874</pub-id></citation></ref>
<ref id="B19"><label>19.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Arenas</surname><given-names>M</given-names></name><name><surname>Barcel&#x00F3;</surname><given-names>P</given-names></name><name><surname>Bertossi</surname><given-names>L</given-names></name><name><surname>Monet</surname><given-names>M</given-names></name></person-group>. <article-title>On the complexity of Shap-score-based explanations: tractability via knowledge compilation and non-approximability results</article-title>. <source>J Mach Learn</source>. (<year>2023</year>) <volume>24</volume>:<fpage>1</fpage>&#x2013;<lpage>58</lpage>.</citation></ref>
<ref id="B20"><label>20.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname><given-names>X</given-names></name><name><surname>Marques-Silva</surname><given-names>J</given-names></name></person-group>. <article-title>On the failings of Shapley values for explainability</article-title>. <source>Int J Approx Reason</source>. (<year>2024</year>) <volume>171</volume>:<fpage>109112</fpage>. <pub-id pub-id-type="doi">10.1016/j.ijar.2023.109112</pub-id></citation></ref>
<ref id="B21"><label>21.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Scholtes</surname><given-names>A</given-names></name><name><surname>Karaku&#x015F;</surname><given-names>O</given-names></name></person-group>. <article-title>Bayes-xG: player and position correction on expected goals (xG) using Bayesian hierarchical approach</article-title>. <source>Front Sports Act Living</source>. (<year>2024</year>) <volume>6</volume>:<fpage>1348983</fpage>. <pub-id pub-id-type="doi">10.3389/fspor.2024.1348983</pub-id><pub-id pub-id-type="pmid">38947867</pub-id></citation></ref>
<ref id="B22"><label>22.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lee</surname><given-names>SY</given-names></name></person-group>. <article-title>Bayesian nonlinear models for repeated measurement data: an overview, implementation, and applications</article-title>. <source>Mathematics</source>. (<year>2022</year>) <volume>10</volume>:<fpage>898</fpage>. <pub-id pub-id-type="doi">10.3390/math10060898</pub-id></citation></ref>
<ref id="B23"><label>23.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ghaderinezhad</surname><given-names>F</given-names></name><name><surname>Ley</surname><given-names>C</given-names></name><name><surname>Serrien</surname><given-names>B</given-names></name></person-group>. <article-title>The Wasserstein impact measure (WIM): a practical tool for quantifying prior impact in Bayesian statistics</article-title>. <source>Comput Stat Data Anal</source>. (<year>2022</year>) <volume>174</volume>:<fpage>107352</fpage>. <pub-id pub-id-type="doi">10.1016/j.csda.2021.107352</pub-id></citation></ref>
<ref id="B24"><label>24.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Plakias</surname><given-names>S</given-names></name><name><surname>Kokkotis</surname><given-names>C</given-names></name><name><surname>Mitrotasios</surname><given-names>M</given-names></name><name><surname>Armatas</surname><given-names>V</given-names></name><name><surname>Tsatalas</surname><given-names>T</given-names></name><name><surname>Giakas</surname><given-names>G</given-names></name></person-group>. <article-title>Identifying key factors for securing a champions league position in French Ligue 1 using explainable machine learning techniques</article-title>. <source>Appl Sci</source>. (<year>2024</year>) <volume>14</volume>(<issue>18</issue>):<fpage>8375</fpage>. <pub-id pub-id-type="doi">10.3390/app14188375</pub-id></citation></ref>
<ref id="B25"><label>25.</label><citation citation-type="other"><collab>StatsBomb</collab>. <article-title>Data from: Statsbomb open data (2023)</article-title>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://github.com/statsbomb/open-data">https://github.com/statsbomb/open-data</ext-link></comment>.</citation></ref>
<ref id="B26"><label>26.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Cavus</surname><given-names>M</given-names></name><name><surname>Biecek</surname><given-names>P</given-names></name></person-group>. <article-title>Explainable expected goal models for performance analysis in football analytics</article-title>. In: <source>2022 IEEE 9th International Conference on Data Science and Advanced Analytics (DSAA)</source>. IEEE (<year>2022</year>). p. <fpage>1</fpage>&#x2013;<lpage>9</lpage>.</citation></ref>
<ref id="B27"><label>27.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Hewitt</surname><given-names>JH</given-names></name><name><surname>Karaku&#x015F;</surname><given-names>O</given-names></name></person-group>. <article-title>A machine learning approach for player and position adjusted expected goals in football (soccer)</article-title>. <comment><italic>arXiv</italic> [preprint]. <italic>arXiv:2301.13052</italic> (2023)</comment>.</citation></ref>
<ref id="B28"><label>28.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Van Roy</surname><given-names>M</given-names></name><name><surname>Robberechts</surname><given-names>P</given-names></name><name><surname>Yang</surname><given-names>W-C</given-names></name><name><surname>De Raedt</surname><given-names>L</given-names></name><name><surname>Davis</surname><given-names>J</given-names></name></person-group>. <article-title>Leaving goals on the pitch: evaluating decision making in soccer</article-title>. <comment><italic>arXiv</italic> [Preprint]. <italic>arXiv:2104.03252</italic> (2021)</comment>.</citation></ref>
<ref id="B29"><label>29.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vehtari</surname><given-names>A</given-names></name><name><surname>Gelman</surname><given-names>A</given-names></name><name><surname>Gabry</surname><given-names>J</given-names></name></person-group>. <article-title>Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC</article-title>. <source>Stat Comput</source>. (<year>2017</year>) <volume>27</volume>:<fpage>1413</fpage>&#x2013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1007/s11222-016-9696-4</pub-id></citation></ref>
<ref id="B30"><label>30.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yates</surname><given-names>LA</given-names></name><name><surname>Aandahl</surname><given-names>Z</given-names></name><name><surname>Richards</surname><given-names>SA</given-names></name><name><surname>Brook</surname><given-names>BW</given-names></name></person-group>. <article-title>Cross validation for model selection: a review with examples from ecology</article-title>. <source>Ecol Monogr</source>. (<year>2023</year>) <volume>93</volume>:<fpage>e1557</fpage>. <pub-id pub-id-type="doi">10.1002/ecm.1557</pub-id></citation></ref>
<ref id="B31"><label>31.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hoffman</surname><given-names>MD</given-names></name><name><surname>Gelman</surname><given-names>A</given-names></name></person-group>. <article-title>The No-U-turn sampler: adaptively setting path lengths in Hamiltonian Monte Carlo</article-title>. <source>J Mach Learn Res</source>. (<year>2014</year>) <volume>15</volume>:<fpage>1593</fpage>&#x2013;<lpage>623</lpage>.</citation></ref>
<ref id="B32"><label>32.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Salvatier</surname><given-names>J</given-names></name><name><surname>Wiecki</surname><given-names>TV</given-names></name><name><surname>Fonnesbeck</surname><given-names>C</given-names></name></person-group>. <article-title>Probabilistic programming in python using PyMC3</article-title>. <source>PeerJ Comput Sci</source>. (<year>2016</year>) <volume>2</volume>:<fpage>e55</fpage>. <pub-id pub-id-type="doi">10.7717/peerj-cs.55</pub-id></citation></ref>
<ref id="B33"><label>33.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Imbach</surname><given-names>F</given-names></name></person-group>. <article-title>Data from: Bayesian modelling of expected goals (2024)</article-title>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://github.com/fimbach/xG_stats">https://github.com/fimbach/xG&#x005F;stats</ext-link></comment>.</citation></ref>
<ref id="B34"><label>34.</label><citation citation-type="book"><person-group person-group-type="author"><name><surname>Andreon</surname><given-names>S</given-names></name><name><surname>Weaver</surname><given-names>B</given-names></name></person-group>. <source>Bayesian vs Simple Methods</source>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer International Publishing</publisher-name> (<year>2015</year>). <comment>p. 207&#x2013;28</comment>.</citation></ref>
<ref id="B35"><label>35.</label><citation citation-type="book"><person-group person-group-type="author"><name><surname>Robberechts</surname><given-names>P</given-names></name><name><surname>Davis</surname><given-names>J</given-names></name></person-group>. <article-title>How data availability affects the ability to learn good xG models</article-title>. In: <source>Machine Learning and Data Mining for Sports Analytics: 7th International Workshop, MLSA 2020, Co-located with ECML/PKDD 2020, Ghent, Belgium, September 14&#x2013;18, 2020, Proceedings 7</source>. <publisher-name>Springer</publisher-name> (<year>2020</year>). p. <fpage>17</fpage>&#x2013;<lpage>27</lpage>.</citation></ref>
<ref id="B36"><label>36.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gifford</surname><given-names>JA</given-names></name><name><surname>Swaminathan</surname><given-names>H</given-names></name></person-group>. <article-title>Bias and the effect of priors in Bayesian estimation of parameters of item response models</article-title>. <source>Appl Psychol Meas</source>. (<year>1990</year>) <volume>14</volume>:<fpage>33</fpage>&#x2013;<lpage>43</lpage>. <pub-id pub-id-type="doi">10.1177/014662169001400104</pub-id></citation></ref>
<ref id="B37"><label>37.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Smid</surname><given-names>SC</given-names></name><name><surname>Winter</surname><given-names>SD</given-names></name></person-group>. <article-title>Dangers of the defaults: a tutorial on the impact of default priors when using Bayesian SEM with small samples</article-title>. <source>Front Psychol</source>. (<year>2020</year>) <volume>11</volume>:<fpage>611963</fpage>. <pub-id pub-id-type="doi">10.3389/fpsyg.2020.611963</pub-id><pub-id pub-id-type="pmid">33362673</pub-id></citation></ref>
<ref id="B38"><label>38.</label><citation citation-type="book"><person-group person-group-type="author"><name><surname>Van der Vaart</surname><given-names>AW</given-names></name></person-group>. <source>Asymptotic Statistics</source>. <publisher-name>Cambridge University Press</publisher-name> (<year>2000</year>). <comment>Vol. 3</comment>. <pub-id pub-id-type="doi">10.1017/CBO9780511802256</pub-id></citation></ref>
<ref id="B39"><label>39.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Brocherie</surname><given-names>F</given-names></name><name><surname>Chassard</surname><given-names>T</given-names></name><name><surname>Toussaint</surname><given-names>J-F</given-names></name><name><surname>Sedeaud</surname><given-names>A</given-names></name></person-group>. <article-title>Comment on: black box prediction methods in sports medicine deserve a red card for reckless practice: a change of tactics is needed to advance athlete care</article-title>. <source>Sports Med</source>. (<year>2022</year>) <volume>52</volume>:<fpage>2797</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1007/s40279-022-01736-6</pub-id><pub-id pub-id-type="pmid">35870106</pub-id></citation></ref></ref-list>
</back>
</article>