<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Sports Act. Living</journal-id>
<journal-title>Frontiers in Sports and Active Living</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Sports Act. Living</abbrev-journal-title>
<issn pub-type="epub">2624-9367</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fspor.2025.1596196</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Sports and Active Living</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Quantifying future Olympic sport selection: a data-driven framework for SDE evaluation and selection</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author"><name><surname>Song</surname><given-names>Yunkun</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/><role content-type="https://credit.niso.org/contributor-roles/validation/"/><role content-type="https://credit.niso.org/contributor-roles/supervision/"/><role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/><role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/><role content-type="https://credit.niso.org/contributor-roles/project-administration/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/><role content-type="https://credit.niso.org/contributor-roles/data-curation/"/><role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/><role content-type="https://credit.niso.org/contributor-roles/resources/"/><role content-type="https://credit.niso.org/contributor-roles/methodology/"/><role content-type="https://credit.niso.org/contributor-roles/visualization/"/><role content-type="https://credit.niso.org/contributor-roles/investigation/"/><role content-type="https://credit.niso.org/contributor-roles/software/"/></contrib>
<contrib contrib-type="author"><name><surname>Dai</surname><given-names>Rui</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="an1"><sup>&#x2020;</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/3031638/overview"/><role content-type="https://credit.niso.org/contributor-roles/investigation/"/><role content-type="https://credit.niso.org/contributor-roles/methodology/"/><role content-type="https://credit.niso.org/contributor-roles/validation/"/><role content-type="https://credit.niso.org/contributor-roles/project-administration/"/><role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/><role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/><role content-type="https://credit.niso.org/contributor-roles/supervision/"/><role content-type="https://credit.niso.org/contributor-roles/data-curation/"/><role content-type="https://credit.niso.org/contributor-roles/visualization/"/><role content-type="https://credit.niso.org/contributor-roles/software/"/><role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/><role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/><role content-type="https://credit.niso.org/contributor-roles/resources/"/></contrib>
<contrib contrib-type="author"><name><surname>Zhang</surname><given-names>Qiaoyi</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/><role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/><role content-type="https://credit.niso.org/contributor-roles/software/"/><role content-type="https://credit.niso.org/contributor-roles/project-administration/"/><role content-type="https://credit.niso.org/contributor-roles/resources/"/><role content-type="https://credit.niso.org/contributor-roles/validation/"/><role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/><role content-type="https://credit.niso.org/contributor-roles/data-curation/"/><role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/><role content-type="https://credit.niso.org/contributor-roles/methodology/"/><role content-type="https://credit.niso.org/contributor-roles/supervision/"/><role content-type="https://credit.niso.org/contributor-roles/visualization/"/><role content-type="https://credit.niso.org/contributor-roles/investigation/"/></contrib>
<contrib contrib-type="author" corresp="yes"><name><surname>Sun</surname><given-names>Yizhuo</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref><uri xlink:href="https://loop.frontiersin.org/people/3008863/overview" /><role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/><role content-type="https://credit.niso.org/contributor-roles/supervision/"/><role content-type="https://credit.niso.org/contributor-roles/visualization/"/></contrib>
</contrib-group>
<aff id="aff1"><label><sup>1</sup></label><institution>Sendelta International Academy</institution>, <addr-line>Shenzhen, Guangdong</addr-line>, <country>China</country></aff>
<aff id="aff2"><label><sup>2</sup></label><institution>School of Media, Yangtze University</institution>, <addr-line>Jingzhou, Hubei</addr-line>, <country>China</country></aff>
<aff id="aff3"><label><sup>3</sup></label><institution>Department of Earth and Space Sciences, Southern University of Science and Technology</institution>, <addr-line>Shenzhen, Guangdong</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by"><p><bold>Edited by:</bold> Xin Long Xu, Hunan Normal University, China</p></fn>
<fn fn-type="edited-by"><p><bold>Reviewed by:</bold> Kamilla Swart, Hamad bin Khalifa University, Qatar</p>
<p>Yannis Theodorakis, University of Thessaly, Greece</p></fn>
<corresp id="cor1"><label>&#x002A;</label><bold>Correspondence:</bold> Yizhuo Sun <email>joe.sun@sendelta.com</email></corresp>
<fn fn-type="present-address" id="an1"><label><sup>&#x2020;</sup></label><p><bold>Present Address:</bold> Rui Dai, School of Culture and Creative Arts, University of Glasgow, Glasgow, United Kingdom</p></fn>
</author-notes>
<pub-date pub-type="epub"><day>29</day><month>07</month><year>2025</year></pub-date>
<pub-date pub-type="collection"><year>2025</year></pub-date>
<volume>7</volume><elocation-id>1596196</elocation-id>
<history>
<date date-type="received"><day>19</day><month>03</month><year>2025</year></date>
<date date-type="accepted"><day>04</day><month>07</month><year>2025</year></date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2025 Song, Dai, Zhang and Sun.</copyright-statement>
<copyright-year>2025</copyright-year><copyright-holder>Song, Dai, Zhang and Sun</copyright-holder><license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>The Olympic Games are the world&#x2019;s foremost sporting event, with over 200 countries participating. As the Games evolve, sports, disciplines, and events (SDEs) are periodically added or removed. The selection process for Olympic sports is inherently subjective, as seen with breakdancing&#x2019;s inclusion in the 2024 Paris Olympics and exclusion from the 2028 Los Angeles Olympics. Thus, developing a quantitative decision-making model is crucial for the International Olympic Committee (IOC). This study evaluates IOC criteria for new sports by considering factors such as social media engagement, TV viewership across demographics, affordability, gender equity, youth appeal, cultural diversity, and global involvement. Our model employs a scoring and labelling system based on the Analytic Hierarchy Process (AHP), which calculates the relative importance of each factor. Using Principal Component Analysis (PCA) for feature extraction, we apply a <italic>k</italic>-nearest neighbour (KNN) classifier for further evaluation. We apply this model to assess potential SDEs for the 2032 Brisbane Olympics, considering their popularity in Australia and alignment with Olympic criteria. Our findings suggest that Esports, Australian rules football, and pickleball are the top candidates for inclusion, while tug of war, bowling, and chess are also recommended based on their historical relevance and global popularity.</p>
</abstract>
<kwd-group>
<kwd>the olympic games and SDEs</kwd>
<kwd>scoring and labelling system</kwd>
<kwd>analytic hierarchy process</kwd>
<kwd>principal component analysis</kwd>
<kwd><italic>k</italic>-nearest neighbour classifier</kwd>
</kwd-group><counts>
<fig-count count="10"/>
<table-count count="6"/><equation-count count="121"/><ref-count count="34"/><page-count count="15"/><word-count count="0"/></counts><custom-meta-wrap><custom-meta><meta-name>section-at-acceptance</meta-name><meta-value>Sports Politics, Policy and Law</meta-value></custom-meta></custom-meta-wrap>
</article-meta>
</front>
<body><sec id="s1" sec-type="intro"><label>1</label><title>Introduction</title>
<sec id="s1a" sec-type="background"><label>1.1</label><title>Background</title>
<p>The Olympic Games are the world&#x2019;s largest and most prestigious sporting celebration, uniting over 200 countries to compete in a diverse array of events (<xref ref-type="bibr" rid="B1">1</xref>). Over more than a century, the Olympic program has undergone significant transformation. Traditionally, sports, disciplines, and events (SDEs) such as the marathon, gymnastics, and swimming have been the cornerstone of the Games (<xref ref-type="bibr" rid="B1">1</xref>, <xref ref-type="bibr" rid="B2">2</xref>). These long-standing events embody the core Olympic values of excellence, friendship, and respect. Yet, as global society evolves, the need for the Olympic program to remain relevant and dynamic has become increasingly apparent.</p>
<p>In recent years, the International Olympic Committee (IOC) has actively sought to modernize the Games by adapting its program to the interests of a younger and more diverse audience (<xref ref-type="bibr" rid="B3">3</xref>). This strategic shift is exemplified by the Tokyo 2020 Olympics, where sports like karate, sport climbing, surfing, and skateboarding were introduced for the first time, signalling an effort to engage contemporary audiences and to reflect modern cultural trends (<xref ref-type="bibr" rid="B4">4</xref>). Moreover, the debut of breakdancing as an Olympic sport in Paris 2024 further underscores the IOC&#x2019;s commitment to embracing unconventional and urban sports that resonate with today&#x2019;s youth (<xref ref-type="bibr" rid="B5">5</xref>). Meanwhile, traditional events continue to be reviewed and adjusted, ensuring that the overall program remains vibrant, competitive, and reflective of current global interests.</p>
<p>Despite these progressive changes, the process of including or excluding sports remains complex and often subjective. The decision-making process is not only influenced by the sport&#x2019;s global appeal but also by its relevance to the host country. For instance, the inclusion of baseball and softball at the Tokyo 2020 Olympics was partly driven by Japan&#x2019;s deep cultural connection to these sports (<xref ref-type="bibr" rid="B6">6</xref>), whereas in previous editions, the removal of events such as wrestling or baseball/softball has sparked debates over fairness and transparency in the selection process (<xref ref-type="bibr" rid="B6">6</xref>). This subjectivity poses a significant challenge for the IOC as it strives to balance tradition with innovation in a rapidly evolving global sports landscape.</p>
<p>The growing complexity of the global sports ecosystem&#x2014;with its myriad SDEs vying for recognition&#x2014;has heightened the need for a more quantitative and systematic approach to evaluating potential Olympic events. It is imperative for the IOC to assess each proposed sport based on objective and measurable criteria rather than relying solely on subjective opinions or the preferences of individual stakeholders. A transparent and data-driven decision-making model would not only streamline the evaluation process but also bolster the legitimacy of the selection decisions.</p>
<p>Under the core policies of the IOC, factors such as global popularity, gender parity, youth engagement, and stringent anti-doping measures are paramount (<xref ref-type="bibr" rid="B7">7</xref>, <xref ref-type="bibr" rid="B8">8</xref>). Popularity, which can be measured through metrics such as television viewership, social media engagement, and overall public interest, is a critical factor (<xref ref-type="bibr" rid="B9">9</xref>). Similarly, gender parity ensures that both male and female athletes are provided equal opportunities, reinforcing the ideals of inclusivity and fairness (<xref ref-type="bibr" rid="B8">8</xref>, <xref ref-type="bibr" rid="B10">10</xref>). Youth engagement is crucial for sustaining the long-term appeal of the Games, as evidenced by the strategic inclusion of sports that attract younger audiences, like skateboarding and surfing (<xref ref-type="bibr" rid="B11">11</xref>). Additionally, robust anti-doping policies are essential to maintain the integrity and fairness of competition (<xref ref-type="bibr" rid="B12">12</xref>).</p>
<p>To address the inherent subjectivity and complexity of current decision-making processes, a more rational approach that leverages quantitative methods and data-driven insights is necessary. Developing a comprehensive scoring system&#x2014;integrated within a decision-making model that accounts for these core criteria&#x2014;would enable more objective assessments of potential new sports. Such a framework promises to reduce bias, provide clarity in the evaluation process, and ensure that the Olympic Games continue to serve as a fair and inclusive platform for athletes worldwide.</p>
<p>In this study, we present a scoring and classification framework that combines expert-informed weighting with data-driven techniques. Specifically, we employ Principal Component Analysis (PCA) and <italic>k</italic>-Nearest Neighbour (KNN) algorithms to support decision-making. PCA is a mathematical technique that simplifies complex datasets by identifying the most important patterns and reducing the number of variables, while preserving the essence of the data. In our case, it allows us to condense multiple evaluation criteria into a few key dimensions that reveal how different sports compare. KNN, on the other hand, is a straightforward method for classification. It operates on the principle that similar sports&#x2014;those with comparable features&#x2014;tend to belong to the same category. To determine whether a new sport aligns with current Olympic trends, KNN looks at the &#x201C;nearest&#x201D; existing sports and assigns a label based on the majority of their classifications. Together, these methods help minimize subjectivity in sport evaluation, enhance model transparency, and offer a replicable, data-supported approach to Olympic program planning.</p>
<p>A transparent and data-driven model is essential for streamlining the evaluation of potential Olympic sports and enhancing the credibility of selection decisions. However, beyond technical criteria, Olympic programme planning must also align with the broader mission of the Olympic Movement. As emphasized in the Olympic Charter and recent IOC initiatives like the Hamburg Declaration (<xref ref-type="bibr" rid="B13">13</xref>), the Games serve not only as a stage for elite competition but also as a global platform to promote values such as excellence, friendship, and respect (<xref ref-type="bibr" rid="B14">14</xref>), while addressing pressing global issues like physical inactivity and environmental sustainability (<xref ref-type="bibr" rid="B15">15</xref>). These principles underscore the need to balance sporting performance with long-term societal benefits in sport selection.</p>
</sec>
<sec id="s1b"><label>1.2</label><title>Problem restatement</title>
<p>The essential question is to develop a model to quantitatively evaluate each SDE according to the established criteria, providing further recommendations for the future Olympic programme. The question can be broken down into the following subquestions:
<list list-type="simple">
<list-item><label>&#x2022;</label>
<p>Problem 1: We need to determine the key factors that need to be considered when evaluating SDEs based on the IOC&#x2019;s criteria. These factors may be quantitative or qualitative. It is also necessary for us to collect relevant data of identified factors.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Problem 2: Using the identified factors in Problem 1, we need to construct a mathematical model to evaluate SDEs within the scope of the Olympic criteria. The proposed model should be applicable to evaluate different SDEs and can return the most suitable ones that align well with the criteria.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Problem 3: We need to test our model with various SDEs, focusing especially on those which were added or removed from recent Olympics or that have continuously been in the Olympic program since the 1988 Games or earlier. In addition, we should also highlight how the model applies to these diverse SDEs and discuss how it supports or refutes their current Olympic status.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Problem 4: We aim to identify three additional SDEs for the 2032 Olympics based on their scores. Furthermore, it is also interesting to provide recommendations for new SDEs that can be considered for inclusion in the Olympics for 2036 or beyond as well.</p></list-item>
</list></p>
</sec>
</sec>
<sec id="s2"><label>2</label><title>Assumptions and notations</title>
<sec id="s2a"><label>2.1</label><title>Assumptions and justifications</title>
<p>To help determine the model scope, in this paper we adopt several assumptions listed as below:
<list list-type="simple">
<list-item><label>&#x2022;</label>
<p>Assumption 1: For each SDE, we mainly focus on the representative leagues or the most famous events and players. Because the largest events or most popular athletes typically have the greatest impact, complete data statistics and provide the most significant insights for modelling.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Assumption 2: The location of where the SDE is held is not a major impact on the decision-making process. Decision-making of SDEs is typically accomplished and centralized by the IOC. Besides, SDEs held in different regions often share similar features in terms of the Olympic criteria, while athletes and audiences are provided with similar facilities and accommodations.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Assumption 3: The basic rules of SDEs remain unchanged in a certain period of time. While it is true that some SDEs may introduce trial innovations, the basic rules of SDEs are highly unlikely to change significantly. Since athletes are trained based on stable rules, and such consistency ensures that audiences will remain engaged.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Assumption 4: The SDEs should provide equal chance for men and women to participate. Following the gender-equal principles and the Olympic Agenda 2020&#x002B;5 (<xref ref-type="bibr" rid="B16">16</xref>), it is reasonable to assume that all the SDEs should prioritize gender equity and ensure equal representation of men and women.</p></list-item>
</list></p>
</sec>
<sec id="s2b"><label>2.2</label><title>Symbols and notations</title>
<p>In this paper, we mainly use lowercase letters <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM1"><mml:mi>a</mml:mi></mml:math></inline-formula> for scalars, boldface letters a for vectors, and uppercase letters <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM2"><mml:mrow><mml:mi mathvariant="bold">A</mml:mi></mml:mrow></mml:math></inline-formula> for matrices. More details of symbols and notations used in our paper are listed in <xref ref-type="table" rid="T1">Table&#x00A0;1</xref>.</p>
<table-wrap id="T1" position="float"><label>Table 1</label>
<caption><p>Symbols and notations used in the paper and explanations.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Notation</th>
<th valign="top" align="center">Explanation</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">ASMF</td>
<td valign="top" align="left">Average social media followers of top five players or athletes (m)</td>
</tr>
<tr>
<td valign="top" align="left">EL</td>
<td valign="top" align="left">Expense level</td>
</tr>
<tr>
<td valign="top" align="left">MR</td>
<td valign="top" align="left">Male ratio</td>
</tr>
<tr>
<td valign="top" align="left">FR</td>
<td valign="top" align="left">Female ratio</td>
</tr>
<tr>
<td valign="top" align="left">T5MAI</td>
<td valign="top" align="left">Top5 male athletes average income (m)</td>
</tr>
<tr>
<td valign="top" align="left">T5FAI</td>
<td valign="top" align="left">Top5 female athletes average income (m)</td>
</tr>
<tr>
<td valign="top" align="left">WHIL</td>
<td valign="top" align="left">Whether have international league</td>
</tr>
<tr>
<td valign="top" align="left">CP</td>
<td valign="top" align="left">Countries play</td>
</tr>
<tr>
<td valign="top" align="left">APOP</td>
<td valign="top" align="left">Age proportion of players</td>
</tr>
<tr>
<td valign="top" align="left">ALCR</td>
<td valign="top" align="left">Safety level considering risk of injury, training requirements and protection</td>
</tr>
<tr>
<td valign="top" align="left">FPL</td>
<td valign="top" align="left">Fair play (level)</td>
</tr>
<tr>
<td valign="top" align="left">SL</td>
<td valign="top" align="left">Sustainability (level)</td>
</tr>
<tr>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM3"><mml:mi>D</mml:mi></mml:math></inline-formula></td>
<td valign="top" align="left">Dataset of original SDEs with feature vectors and labels</td>
</tr>
<tr>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM4"><mml:msub><mml:mi>D</mml:mi><mml:mrow><mml:mspace width=".1em"/><mml:mi>p</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula></td>
<td valign="top" align="left">Dataset of extracted features after PCA transformation</td>
</tr>
<tr>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM5"><mml:mi>N</mml:mi></mml:math></inline-formula></td>
<td valign="top" align="left">Number of samples in the dataset</td>
</tr>
<tr>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM6"><mml:mi>d</mml:mi></mml:math></inline-formula></td>
<td valign="top" align="left">Dimension of feature vector</td>
</tr>
<tr>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM7"><mml:mi>k</mml:mi></mml:math></inline-formula></td>
<td valign="top" align="left">Number of nearest neighbours</td>
</tr>
<tr>
<td valign="top" align="left">x</td>
<td valign="top" align="left">Constructed feature vector of length d</td>
</tr>
<tr>
<td valign="top" align="left">X</td>
<td valign="top" align="left">Constructed feature matrix of size <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM8"><mml:mi>d</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>N</mml:mi></mml:math></inline-formula>, each column represents a feature vector</td>
</tr>
<tr>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM9"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula></td>
<td valign="top" align="left">Element in the <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM10"><mml:mi>i</mml:mi></mml:math></inline-formula>-th row and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM11"><mml:mi>j</mml:mi></mml:math></inline-formula>-th column of the feature matrix</td>
</tr>
<tr>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM12"><mml:mrow><mml:mover><mml:mi>X</mml:mi><mml:mo>&#x005E;</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula></td>
<td valign="top" align="left">Feature matrix after data rescaling and normalization</td>
</tr>
<tr>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM13"><mml:msub><mml:mrow><mml:mover><mml:mi>X</mml:mi><mml:mo>&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">centered</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:math></inline-formula></td>
<td valign="top" align="left">Feature matrix after data centralization</td>
</tr>
<tr>
<td valign="top" align="left">C</td>
<td valign="top" align="left">Covariance matrix</td>
</tr>
<tr>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM14"><mml:msub><mml:mi>U</mml:mi><mml:mi>p</mml:mi></mml:msub></mml:math></inline-formula></td>
<td valign="top" align="left">PCA projection matrix consisting of top <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM15"><mml:mi>p</mml:mi></mml:math></inline-formula> eigenvectors</td>
</tr>
<tr>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM16"><mml:msub><mml:mrow><mml:mover><mml:mi>X</mml:mi><mml:mo>&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mi>p</mml:mi></mml:msub></mml:math></inline-formula></td>
<td valign="top" align="left">Matrix representing the linear transformation after PCA projection</td>
</tr>
<tr>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM17"><mml:msub><mml:mi>x</mml:mi><mml:mi>q</mml:mi></mml:msub></mml:math></inline-formula></td>
<td valign="top" align="left">Query feature vector of test SDE data</td>
</tr>
<tr>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM18"><mml:mrow><mml:mi mathvariant="normal">dist</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula></td>
<td valign="top" align="left">The Euclidean distance between two feature vectors <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM19"><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM20"><mml:msub><mml:mi>x</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:math></inline-formula></td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s3"><label>3</label><title>Methodology</title>
<sec id="s3a"><label>3.1</label><title>Model overview</title>
<p>In this paper, to address the concerns of the IOC, we propose a scoring-based classification model, which consists of two main building blocks, namely &#x201C;the scoring and labelling system,&#x201D; and the &#x201C;feature extraction and classification phase.&#x201D; The flowchart of our model is illustrated in <xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref>.</p>
<fig id="F1" position="float"><label>Figure 1</label>
<caption><p>Flowchart of the proposed scoring-based classification model.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fspor-07-1596196-g001.tif"><alt-text content-type="machine-generated">Flowchart of a scoring and labeling system. SDEs (Training) undergo feature engineering based on six criteria: Popularity, Gender Equity, Sustainability, Inclusivity, Youth Appeal, and Safety. These are scored using the AHP method, leading to a labeling process classifying items as High, Moderate, or Low. SDEs (Testing) involve feature extraction with PCA producing a projection matrix. This matrix is used by a KNN classifier to predict labels.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3b"><label>3.2</label><title>Scoring and labelling</title>
<p>To assign scores to different SDEs,we develop a scoring and labelling system, including four consecutive steps: identification of important factors, feature engineering, AHP method and labelling process.</p>
<sec id="s3b1"><label>3.2.1</label><title>Identification of important factors: question 1</title>
<p>In this section, we investigate and identify important factors related to the IOC criteria of new SDE inclusion for the Olympic Games. Our data is collected based on publicly available database, reports and research papers (<xref ref-type="bibr" rid="B17">17</xref>&#x2013;<xref ref-type="bibr" rid="B20">20</xref>).</p>
<p>
<list list-type="simple">
<list-item><label>&#x2022;</label>
<p>Popularity and accessibility. To measure popularity, we use the average number of top five athletes&#x2019; social media followers, which is a quantitative and deterministic variable and the unit is million. For the accessibility, we consider the affordability level in terms of costs of equipment and new constructions qualitative variable ranging from 1 to 5. The larger the value, the higher the accessibility. For example, benefiting from a huge fan base and relatively low cost, football, basketball and table tennis are considered highly popular and accessible. While the average number of social media followers among top athletes provides a proxy for global visibility and youth engagement, it may not fully represent grassroots or amateur-level popularity. Actual participation metrics, such as the number of registered athletes or nationwide participation rates, could serve as valuable complements, but are not uniformly available across sports. This limitation is noted, and future research may incorporate broader participation indicators when data access improves.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Gender equity. The indicators for gender equality in SDEs lies in two different aspects. First, we use the ratio of professional female players to male players in major leagues/events to determine the participation factor, which is a quantitative and deterministic variable. Besides, since gender inequality is also evident in terms of the income gap, it is measured by the income ratio of the top five male and female athletes.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Sustainability. Sports events and activities may inevitably produce carbon emissions and resource waste. Besides,the new construction of sports facilities is also associated with significant environmental impact. Therefore, we can use qualitative variable to rate sustainability level from 1 to 5. The sustainability of a SDE is considered better with higher ratings.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Inclusivity. To evaluate inclusivity of a given SDE, we consider two factors: the number of countries that frequently host or broadcast the competition, and if it has famous leagues or events in at least 4 continents. We can use qualitative variable to rate these two factors. We recognize that measuring inclusivity solely through the international presence of broadcasting and hosting may bias toward media-oriented sports. Therefore, while this approach captures global visibility and infrastructure readiness, it does not fully account for community-level engagement. The inclusion of metrics such as the number of participating countries in international federations or athlete registration data would provide a more comprehensive view and should be considered in future model updates.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Relevance and innovation. As youth appeal and engagement is playing a more and more important role in the Olympic Games, thus we investigate and analyze the TV audiences, which are divided into different age groups. We use the ratio of audiences under 35 years as an indicator for this criteria, which is a quantitative and deterministic variable.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Safety and fair play. For the safety level, we consider the risk of injury, training requirements and protection measurements. For the fair play level, we collect historical data of the doping records. These two factors are evaluated using qualitative variables ranging from 1 to 5.</p></list-item>
</list><xref ref-type="table" rid="T2">Table&#x00A0;2</xref> lists statistics of 6 represented sports according to different criteria and identified factors. From <xref ref-type="table" rid="T2">Table&#x00A0;2</xref>, we make several interesting observations, as follows,
<list list-type="simple">
<list-item><label>&#x2022;</label>
<p>Basketball has the highest popularity and accessibility in terms of a large audience base and relatively low cost compared to more expensive sports such as sailing and golf, in that basketball is played in almost every country with famous international leagues and organizations like FIBA and NBA. Besides, basketball has simple equipment requirements, making it affordable for many ordinary people.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Gymnastics enjoys the highest level of gender equity as it includes distinct sets of apparatuses and events tailored to female players, which increases their media attention and income.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Golf has the lowest sustainability score among all sports, since it has high water consumption, occupies vast amounts of land, which may eventually lead to deforestation or destruction of natural habitats. Besides, as the maintenance and construction of golf courses will generate high carbon emissions, it is also considered energy-intensive.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Golf, sailing and shooting have lower youth appeal because the pace of the game is slow, and the cost of specialized equipment can be very high.</p></list-item>
</list></p>
<table-wrap id="T2" position="float"><label>Table 2</label>
<caption><p>Statistics of identified factors of representative sports.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Criteria</th>
<th valign="top" align="left">Factor</th>
<th valign="top" align="center">Basketball</th>
<th valign="top" align="center">Shooting</th>
<th valign="top" align="center">Sailing</th>
<th valign="top" align="center">Gymnastics</th>
<th valign="top" align="center">Tennis</th>
<th valign="top" align="center">Golf</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Popularity and accessibility</td>
<td valign="top" align="left">Social media followers (m)</td>
<td valign="top" align="center">120</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">20</td>
<td valign="top" align="center">100</td>
<td valign="top" align="center">15</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">Affordability (level)</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">1</td>
</tr>
<tr>
<td valign="top" align="left">Gender equity</td>
<td valign="top" align="left">Participant ratio (f/m)</td>
<td valign="top" align="center">0.053</td>
<td valign="top" align="center">0.429</td>
<td valign="top" align="center">0.251</td>
<td valign="top" align="center">1.50</td>
<td valign="top" align="center">0.668</td>
<td valign="top" align="center">0.251</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">Income ratio (f/m)</td>
<td valign="top" align="center">0.152</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.668</td>
<td valign="top" align="center">2.06</td>
<td valign="top" align="center">0.471</td>
<td valign="top" align="center">0.268</td>
</tr>
<tr>
<td valign="top" align="left">Sustainability</td>
<td valign="top" align="left">Environmental impact (level)</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">1</td>
</tr>
<tr>
<td valign="top" align="left">Inclusivity</td>
<td valign="top" align="left">Number of countries</td>
<td valign="top" align="center">200</td>
<td valign="top" align="center">100</td>
<td valign="top" align="center">100</td>
<td valign="top" align="center">150</td>
<td valign="top" align="center">150</td>
<td valign="top" align="center">120</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">Famous leagues in at least 4 continents (Y/N)</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">1</td>
</tr>
<tr>
<td valign="top" align="left">Relevance and innovation</td>
<td valign="top" align="left">Youth appeal</td>
<td valign="top" align="center">0.60</td>
<td valign="top" align="center">0.30</td>
<td valign="top" align="center">0.25</td>
<td valign="top" align="center">0.55</td>
<td valign="top" align="center">0.6</td>
<td valign="top" align="center">0.25</td>
</tr>
<tr>
<td valign="top" align="left">Safety and fair play</td>
<td valign="top" align="left">Safety (level)</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">5</td>
</tr>
<tr>
<td valign="top" align="left"/>
<td valign="top" align="left">Fair play (level)</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">5</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The summarization and statistics of relevant factors can help us better understand the development and operations of different SDEs.</p>
</sec>
<sec id="s3b2"><label>3.2.2</label><title>AHP method: question 2</title>
<p>To address the concerns of IOC for SDE evaluations, we propose to adopt the AHP method (<xref ref-type="bibr" rid="B21">21</xref>) and complete our scoring system. Based on our analysis, we identify top 5 SDEs which align best with the IOC criteria: football (soccer), basketball, gymnastics, tennis and volleyball, by using AHP method. <xref ref-type="fig" rid="F2">Figure&#x00A0;2</xref> shows the calculated scores of top 20 SDEs, and more details are given in this section.</p>
<fig id="F2" position="float"><label>Figure 2</label>
<caption><p>Results of top 20 SDEs based on our scoring scheme.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fspor-07-1596196-g002.tif"><alt-text content-type="machine-generated">Bar chart ranking sports based on scores. Football leads with 0.1436, followed by Basketball (0.0772), and Tennis (0.0676). Lowest are Triathlon (0.0363) and Handball (0.0362). Scores gradually decrease from top to bottom.</alt-text>
</graphic>
</fig>
<p><bold>Defining the criteria.</bold> Based on the Olympic Agenda 2020 (<xref ref-type="bibr" rid="B16">16</xref>), the goal of IOC is to prioritize youth engagement, gender-balance, and innovation. Hence as illustrated in <xref ref-type="fig" rid="F3">Figure&#x00A0;3</xref>, we can divide the six decisive criteria into two categories, namely the major and minor criteria. Briefly, the main criteria includes popularity and accessibility, gender equity and relevance and innovation, while the sub-criteria consists of sustainability, inclusivity and safety and fair play.</p>
<fig id="F3" position="float"><label>Figure 3</label>
<caption><p>List of IOC criteria.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fspor-07-1596196-g003.tif"><alt-text content-type="machine-generated">Flowchart showing \"6 IOC criteria\" with two branches: Main Influencing Variables and Secondary Influencing Variables. Main Influencing Variables include Popularity and Accessibility, Gender Equity, and Relevance Innovation. Secondary Influencing Variables include Sustainability, Inclusivity, and Safety and Fair Play.</alt-text>
</graphic>
</fig>
<p>More specifically, according to Andrew Moore, &#x201C;The Olympics are unlike any other sporting event in the world because of their capacity to unite people through a shared enthusiasm for sport on a global scale,&#x201D; the importance of popularity and accessibility is thus underlined. Besides, the Paris 2024 sets a milestone as the first Olympic Games to achieve full gender parity (<xref ref-type="bibr" rid="B22">22</xref>), which indicates the significance of gender equity for SDEs. In addition, as the Olympic Games tend to introduce more SDEs related to young people, such as the breaking, BMX freestyle, skateboarding and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM21"><mml:mn>3</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:mn>3</mml:mn></mml:math></inline-formula> basketball events, innovations and youth appeal also plays an important role. For sub-criteria, safety and fair play should be regarded as the one with the highest proportion among three of them, since Olympian&#x2019;s mindset is to exhibit integrity and positive character in all aspects of sport and in life. Therefore, the weight of size relationship between these six criteria are popularity and accessibility <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM22"><mml:mo>&#x227B;</mml:mo></mml:math></inline-formula> gender equity <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM23"><mml:mo>&#x227B;</mml:mo></mml:math></inline-formula> relevance <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM24"><mml:mo>&#x227B;</mml:mo></mml:math></inline-formula> safety and fair play <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM25"><mml:mo>&#x227B;</mml:mo></mml:math></inline-formula> sustainability <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM26"><mml:mo>&#x227B;</mml:mo></mml:math></inline-formula> inclusivity.
<list list-type="simple">
<list-item><label>&#x2022;</label>
<p>Hierarchy structure. The AHP method allows us to assess the relative weight of multiple criteria against given criteria in an intuitive manner. A hierarchy structure of variables is illustrated in <xref ref-type="fig" rid="F4">Figure&#x00A0;4</xref>.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Pairwise comparison matrix. To perform pair-wise comparison, we construct a 6 by 6 comparison matrix <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM27"><mml:mrow><mml:mi mathvariant="bold">A</mml:mi></mml:mrow></mml:math></inline-formula> to characterize relative preference in each compared pair using a 1&#x2013;5 scale for relative importance. For example, if popularity and accessibility is regarded more significant than inclusivity, then the popularity and accessibility-inclusivity value will be 4, which indicates that popularity and accessibility is considered 4 times as important as inclusivity. <xref ref-type="table" rid="T3">Table&#x00A0;3</xref> presents details of the constructed comparison matrix <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM28"><mml:mrow><mml:mi mathvariant="bold">A</mml:mi></mml:mrow></mml:math></inline-formula>.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Priority vector. After establishing the comparison matrix <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM29"><mml:mrow><mml:mi mathvariant="bold">A</mml:mi></mml:mrow></mml:math></inline-formula>, we then calculate the priority vector <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM30"><mml:mrow><mml:mi mathvariant="bold">w</mml:mi></mml:mrow></mml:math></inline-formula>, which represents the relative weights of the criteria. Mathematically, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM31"><mml:mrow><mml:mi mathvariant="bold">w</mml:mi></mml:mrow></mml:math></inline-formula> can be obtained via<disp-formula id="disp-formula1"><label>(1)</label><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM1"><mml:mrow><mml:mi mathvariant="bold">A</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="bold">w</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mi>&#x03BB;</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">max</mml:mi></mml:mrow></mml:mrow></mml:msub><mml:mrow><mml:mi mathvariant="bold">w</mml:mi></mml:mrow></mml:math></disp-formula>where <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM32"><mml:msub><mml:mi>&#x03BB;</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">max</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:math></inline-formula> is the largest eigenvalue of <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM33"><mml:mrow><mml:mi mathvariant="bold">A</mml:mi></mml:mrow></mml:math></inline-formula>. Then, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM34"><mml:mrow><mml:mi mathvariant="bold">w</mml:mi></mml:mrow></mml:math></inline-formula> is further normalized so that the sum of its elements equals 1. The results of weights on are illustrated in <xref ref-type="fig" rid="F5">Figure&#x00A0;5</xref> following <xref ref-type="disp-formula" rid="disp-formula1">Equation 1</xref>.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Consistency check. Once weights are obtained, it is necessary to check the consistency. Inevitably, the final matrix of criteria may be subject to inconsistency to varying degrees, because the numerical values are derived from the subjective preferences. Therefore, to ensure the consistency we compute the consistency index (CI) via<disp-formula id="disp-formula2"><label>(2)</label><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM2"><mml:mi>C</mml:mi><mml:mi>I</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:msub><mml:mi>&#x03BB;</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">max</mml:mi></mml:mrow></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:mfrac></mml:mrow><mml:mo>,</mml:mo></mml:math></disp-formula>where <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM35"><mml:mi>n</mml:mi></mml:math></inline-formula> refers to the number of criteria. Based on CI from <xref ref-type="disp-formula" rid="disp-formula2">Equation 2</xref> and Random Index (RI), the consistency ratio (CR) can be calculated via<disp-formula id="disp-formula3"><label>(3)</label><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM3"><mml:mi>C</mml:mi><mml:mi>R</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mi>C</mml:mi><mml:mi>I</mml:mi></mml:mrow><mml:mrow><mml:mi>R</mml:mi><mml:mi>I</mml:mi></mml:mrow></mml:mfrac></mml:mrow><mml:mo>.</mml:mo></mml:math></disp-formula>If <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM36"><mml:mi>C</mml:mi><mml:mi>R</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mn>0.1</mml:mn></mml:math></inline-formula>, then the comparisons are considered consistent. The first 16 random consistency index is listed in <xref ref-type="table" rid="T4">Table&#x00A0;4</xref>. By applying SPSSAU (<xref ref-type="bibr" rid="B23">23</xref>), we confirm that our CR value calculated based on <xref ref-type="disp-formula" rid="disp-formula3">Equation 3</xref> is much lower than 0.1, thereby demonstrating the consistency of the constructed comparison matrix <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM37"><mml:mrow><mml:mi mathvariant="bold">A</mml:mi></mml:mrow></mml:math></inline-formula>.</p></list-item>
</list></p>
<fig id="F4" position="float"><label>Figure 4</label>
<caption><p>Illustration of different criteria and hierarchy structure.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fspor-07-1596196-g004.tif"><alt-text content-type="machine-generated">Flowchart depicting criteria to select a sport matching IOC standards. Main criteria include popularity, gender equity, inclusivity, relevance, safety, and sustainability. Sub-criteria listed as abbreviations like ASMF, EL, and MR. Each sub-criterion links to a \"Score of Each SDE\". Color-coded sections guide the organization: Goal in blue, Criteria in red, Sub-Criteria in green, Result in yellow.</alt-text>
</graphic>
</fig>
<table-wrap id="T3" position="float"><label>Table 3</label>
<caption><p>Constructed comparison matrix based on pair-wise relationship among different variables.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Criteria</th>
<th valign="top" align="center">Popularity and accessibility</th>
<th valign="top" align="center">Gender equity</th>
<th valign="top" align="center">Sustainability</th>
<th valign="top" align="center">Inclusivity</th>
<th valign="top" align="center">Relevance and innovation</th>
<th valign="top" align="center">Safety and fair play</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Popularity and accessibility</td>
<td valign="top" align="center">1.00</td>
<td valign="top" align="center">2.00</td>
<td valign="top" align="center">3.00</td>
<td valign="top" align="center">4.00</td>
<td valign="top" align="center">3.00</td>
<td valign="top" align="center">2.00</td>
</tr>
<tr>
<td valign="top" align="left">Gender equity</td>
<td valign="top" align="center">0.50</td>
<td valign="top" align="center">1.00</td>
<td valign="top" align="center">3.00</td>
<td valign="top" align="center">4.00</td>
<td valign="top" align="center">1.00</td>
<td valign="top" align="center">2.00</td>
</tr>
<tr>
<td valign="top" align="left">Sustainability</td>
<td valign="top" align="center">0.33</td>
<td valign="top" align="center">0.33</td>
<td valign="top" align="center">1.00</td>
<td valign="top" align="center">2.00</td>
<td valign="top" align="center">0.50</td>
<td valign="top" align="center">1.00</td>
</tr>
<tr>
<td valign="top" align="left">Inclusivity</td>
<td valign="top" align="center">0.25</td>
<td valign="top" align="center">0.25</td>
<td valign="top" align="center">0.50</td>
<td valign="top" align="center">1.00</td>
<td valign="top" align="center">0.33</td>
<td valign="top" align="center">1.00</td>
</tr>
<tr>
<td valign="top" align="left">Relevance and innovation</td>
<td valign="top" align="center">0.33</td>
<td valign="top" align="center">1.00</td>
<td valign="top" align="center">2.00</td>
<td valign="top" align="center">3.00</td>
<td valign="top" align="center">1.00</td>
<td valign="top" align="center">2.00</td>
</tr>
<tr>
<td valign="top" align="left">Safety and fair play</td>
<td valign="top" align="center">0.50</td>
<td valign="top" align="center">0.50</td>
<td valign="top" align="center">1.00</td>
<td valign="top" align="center">1.00</td>
<td valign="top" align="center">0.50</td>
<td valign="top" align="center">1.00</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F5" position="float"><label>Figure 5</label>
<caption><p>Illustration of weights on different criteria.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fspor-07-1596196-g005.tif"><alt-text content-type="machine-generated">Donut chart illustrating six categories with percentages: \"Popularity &#x0026; Accessibility\" at 33.08%, \"Gender Equity\" at 21.68%, \"Relevance &#x0026; Innovation\" at 18.03%, \"Safety &#x0026; Fair Play\" at 10.36%, \"Sustainability\" at 9.7%, and \"Inclusivity\" at 6.88%. Each segment is distinctly colored.</alt-text>
</graphic>
</fig>
<table-wrap id="T4" position="float"><label>Table 4</label>
<caption><p>Random consistency index.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM38"><mml:mi>n</mml:mi></mml:math></inline-formula></th>
<th valign="top" align="center">3</th>
<th valign="top" align="center">4</th>
<th valign="top" align="center">5</th>
<th valign="top" align="center">6</th>
<th valign="top" align="center">7</th>
<th valign="top" align="center">8</th>
<th valign="top" align="center">9</th>
<th valign="top" align="center">10</th>
<th valign="top" align="center">11</th>
<th valign="top" align="center">12</th>
<th valign="top" align="center">13</th>
<th valign="top" align="center">14</th>
<th valign="top" align="center">15</th>
<th valign="top" align="center">16</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM39"><mml:mi>R</mml:mi><mml:mi>I</mml:mi></mml:math></inline-formula></td>
<td valign="top" align="center">0.52</td>
<td valign="top" align="center">0.89</td>
<td valign="top" align="center">1.12</td>
<td valign="top" align="center">1.26</td>
<td valign="top" align="center">1.36</td>
<td valign="top" align="center">1.41</td>
<td valign="top" align="center">1.46</td>
<td valign="top" align="center">1.49</td>
<td valign="top" align="center">1.52</td>
<td valign="top" align="center">1.54</td>
<td valign="top" align="center">1.56</td>
<td valign="top" align="center">1.58</td>
<td valign="top" align="center">1.59</td>
<td valign="top" align="center">1.5943</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec><sec id="s3b3"><label>3.2.3</label><title>Feature engineering</title>
<p>Although the scoring mechanism may be useful to rate certain SDEs, it can be affected by subjectivity and noise in data collection and weight decision. As can be seen from <xref ref-type="fig" rid="F2">Figure&#x00A0;2</xref>, we notice that SDEs of similar scores may share common features. Therefore, based on the 10 identified factors of IOC criteria listed in previous sections and <xref ref-type="table" rid="T2">Table&#x00A0;2</xref>, we can construct feature vectors for SDEs. Each SDE can be described as a feature vector x of length 10, then all feature vectors are stacked into a large feature matrix <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM40"><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">X</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mn>2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mi>N</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mn>10</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:mi>N</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula>, as presented in <xref ref-type="table" rid="T2">Table&#x00A0;2</xref> and <xref ref-type="fig" rid="F6">Figure&#x00A0;6a</xref>. From <xref ref-type="table" rid="T2">Table&#x00A0;2</xref>, the factors are measured on different scales, thus to get rid of the scale and let the model focus on patterns in data, we apply row-wise normalization via<disp-formula id="disp-formula4"><label>(4)</label><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM4"><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">X</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mspace width=".1em"/><mml:mo>&#x003A;</mml:mo><mml:mspace width=".1em"/><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mrow><mml:mi mathvariant="bold">X</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mspace width=".1em"/><mml:mo>&#x003A;</mml:mo><mml:mspace width=".1em"/><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mrow><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mspace width=".1em"/><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:munderover><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mrow><mml:mo>,</mml:mo><mml:mspace width="1em"/><mml:mi mathvariant="normal">&#x2200;</mml:mi><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mn>10</mml:mn></mml:math></disp-formula>where <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM41"><mml:mrow><mml:mi mathvariant="bold">X</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mspace width=".1em"/><mml:mo>&#x003A;</mml:mo><mml:mspace width=".1em"/><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM42"><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">X</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mspace width=".1em"/><mml:mo>&#x003A;</mml:mo><mml:mspace width=".1em"/><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> represents the <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM43"><mml:mi>i</mml:mi></mml:math></inline-formula>-th row of the original and the normalized data, respectively. <xref ref-type="disp-formula" rid="disp-formula4">Equation 4</xref> rescales each factor such that the sum of its resulting elements is 1, which convert raw frequencies or values into discrete probability distributions. The data rescaling and normalization process is illustrated in <xref ref-type="fig" rid="F6">Figure&#x00A0;6b</xref>.</p>
<fig id="F6" position="float"><label>Figure 6</label>
<caption><p>Illustration of weights on different criteria. <bold>(a)</bold> SDE feature matrix and <bold>(b)</bold> Data rescaling and normalization.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fspor-07-1596196-g006.tif"><alt-text content-type="machine-generated">On the left, a schematic SDE feature matrix with colored circles representing data points, labeled from Factor 1 to Factor 10 and SDE 1 to SDE N. On the right, two scatter plots: one labeled \"Before normalization\" showing data points scattered randomly, and the other labeled \"After normalization\" with points confined within a one-by-one square. A blue arrow between the plots indicates the transformation.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3b4"><label>3.2.4</label><title>Labelling</title>
<p>After obtaining the weighted scores of SDEs, we can create labels accordingly. Briefly, we rank the scores in descending order and classify SDEs in three different categories of priority: High, Moderate and Low. The corresponding labels for High, Moderate and Low ratings are 1, 0, and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM44"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>1, respectively. Therefore, the SDE dataset can be represented by <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM45"><mml:mi>D</mml:mi><mml:mo>=</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mn>2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mi>N</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mi>N</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo fence="false" stretchy="false">}</mml:mo></mml:math></inline-formula>, where <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM46"><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo fence="false" stretchy="false">}</mml:mo></mml:math></inline-formula> is the corresponding label.</p>
<p>
<list list-type="simple">
<list-item><label>&#x2022;</label>
<p>High. The top 12 SDEs are labelled &#x201C;High,&#x201D; which reflects their widespread global appeal across different age groups, significant media coverage and importance in the Olympics. Sports such as swimming, gymnastics, and basketball fall in this category.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Moderate. SDEs ranked between 13 and 27 are labelled &#x201C;Moderate,&#x201D; which describes SDEs that are popular but may not have as global reach or as large a fan base compared to the top SDEs. The Moderate SDEs include Judo, handball and Archery.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Low. The rest are categorized as &#x201C;low,&#x201D; due to their limited international participation, gender inequality or doping concerns. For example, flag football and fencing have a more regional following compared to basketball. Besides, as weightlifting suffers from doping and corruption scandals, it is also rated low by our scoring system.</p></list-item>
</list>The scoring and labelling system provides us with training/testing samples and corresponding labels that can be used to learn patterns and relationships in the data. Therefore, we convert the original problem as a classification task with three different classes or categories: High, Moderate and Low.</p>
</sec>
</sec>
<sec id="s3c"><label>3.3</label><title>Feature extraction and classification</title>
<sec id="s3c1"><label>3.3.1</label><title>Feature extraction via principal component analysis</title>
<p>Given the scores of SDEs, a straightforward way to determine the category of a new SDE is to directly compare its score with the existing SDEs. However such naive comparison can be easily affected by change of data, variations of criteria and weights. To reduce subjectivity, we utilize the principal component analysis (PCA) (<xref ref-type="bibr" rid="B24">24</xref>, <xref ref-type="bibr" rid="B25">25</xref>) to capture the most important features. Specifically, we start by centering the normalized data <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM47"><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">X</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> via<disp-formula id="disp-formula5"><label>(5)</label><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM5"><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">X</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">centered</mml:mi></mml:mrow></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">X</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi>&#x03BC;</mml:mi></mml:math></disp-formula>where <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM48"><mml:mi>&#x03BC;</mml:mi></mml:math></inline-formula> is the mean vector calculated as<disp-formula id="disp-formula6"><label>(6)</label><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM6"><mml:mi>&#x03BC;</mml:mi><mml:mo>=</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:munderover><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mi>i</mml:mi></mml:msub></mml:math></disp-formula>Then, according to <xref ref-type="disp-formula" rid="disp-formula5">Equations 5</xref> and <xref ref-type="disp-formula" rid="disp-formula6">6</xref>, we can obtain the covariance matrix <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM49"><mml:mrow><mml:mi mathvariant="bold">C</mml:mi></mml:mrow></mml:math></inline-formula> that characterizes pair-wise relationships via<disp-formula id="disp-formula7"><label>(7)</label><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM7"><mml:mrow><mml:mi mathvariant="bold">C</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:mfrac></mml:mrow><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">X</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">centered</mml:mi></mml:mrow></mml:mrow></mml:msub><mml:mo>&#x002A;</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">X</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">centered</mml:mi></mml:mrow></mml:mrow><mml:mi>T</mml:mi></mml:msubsup></mml:math></disp-formula>Following the results of <xref ref-type="disp-formula" rid="disp-formula7">Equation 7</xref>, we can obtain the eigenvectors <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM50"><mml:mrow><mml:mi mathvariant="bold">U</mml:mi></mml:mrow></mml:math></inline-formula> and eigenvalues <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM51"><mml:mi>&#x03BB;</mml:mi></mml:math></inline-formula> of <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM52"><mml:mrow><mml:mi mathvariant="bold">C</mml:mi></mml:mrow></mml:math></inline-formula> by applying PCA. After sorting the eigenvalues in descending order, we then select the top <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM53"><mml:mi>p</mml:mi></mml:math></inline-formula> eigenvectors as the projection matrix <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM54"><mml:msub><mml:mrow><mml:mi mathvariant="bold">U</mml:mi></mml:mrow><mml:mrow><mml:mspace width=".1em"/><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mn>10</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:mi>p</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> via<disp-formula id="disp-formula8"><label>(8)</label><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM8"><mml:msub><mml:mrow><mml:mi mathvariant="bold">U</mml:mi></mml:mrow><mml:mi>p</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold">u</mml:mi></mml:mrow><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold">u</mml:mi></mml:mrow><mml:mn>2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold">u</mml:mi></mml:mrow><mml:mi>p</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo></mml:math></disp-formula>The low-dimensional feature embeddings <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM55"><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">X</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mspace width=".1em"/><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mspace width=".1em"/><mml:mi>p</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>N</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> is obtained by projecting the normalized data <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM56"><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">X</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> onto the selected principal components <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM57"><mml:msub><mml:mrow><mml:mi mathvariant="bold">U</mml:mi></mml:mrow><mml:mrow><mml:mspace width=".1em"/><mml:mi>p</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> from <xref ref-type="disp-formula" rid="disp-formula8">Equation 8</xref> via<disp-formula id="disp-formula9"><label>(9)</label><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM9"><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">X</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mspace width=".1em"/><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mi mathvariant="bold">U</mml:mi></mml:mrow><mml:mrow><mml:mspace width=".1em"/><mml:mi>p</mml:mi></mml:mrow><mml:mi>T</mml:mi></mml:msubsup><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">X</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow></mml:math></disp-formula>It is noticed that PCA is an unsupervised feature extraction method, thus when new SDEs are considered, they can also be included to update the projection matrix in an incremental manner to improve the quality of feature learning and dimensionality reduction.</p>
</sec>
<sec id="s3c2"><label>3.3.2</label><title>K-nearest neighbour classifier</title>
<p>Based on the features <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM58"><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">X</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mi>p</mml:mi></mml:msub></mml:math></inline-formula> extracted by PCA, the two-dimensional and three-dimensional feature embeddings of SDEs are visualized in <xref ref-type="fig" rid="F7">Figure&#x00A0;7</xref>. It can be seen that SDEs falling from high and low categories are separated, while the majority of moderate samples are close to each other. Furthermore, the decision boundary between classes is highly irregular and non-linear, thus in addition to the SDE scores, we also take advantage of the K-nearest neighbour (KNN) classifier (<xref ref-type="bibr" rid="B26">26</xref>) to determine the class label of test SDEs.</p>
<fig id="F7" position="float"><label>Figure 7</label>
<caption><p>Visualization of PCA feature embeddings of SDEs. <bold>(a)</bold> 2D feature embedding and <bold>(b)</bold> 3D feature embedding.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fspor-07-1596196-g007.tif"><alt-text content-type="machine-generated">Two scatter plots comparing feature embeddings. The left plot shows a 2D feature embedding with points marked as high (red circles), moderate (green diamonds), and low (blue triangles). The right plot displays a 3D feature embedding with the same color and shape coding. Both plots illustrate the spatial distribution of feature categories.</alt-text>
</graphic>
</fig>
<p>Specifically, given the PCA-based training data <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM59"><mml:msub><mml:mi>D</mml:mi><mml:mrow><mml:mspace width=".1em"/><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mspace width=".1em"/><mml:msub><mml:mi>p</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mspace width=".1em"/><mml:msub><mml:mi>p</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mspace width=".1em"/><mml:msub><mml:mi>p</mml:mi><mml:mi>N</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mi>N</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo fence="false" stretchy="false">}</mml:mo></mml:math></inline-formula> and a query SDE feature vector <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM60"><mml:msub><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mi>q</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow></mml:mrow><mml:mn>10</mml:mn></mml:msup></mml:math></inline-formula>, we normalize the query vector and obtain its PCA projection <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM61"><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mi>q</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mspace width=".1em"/><mml:mi>p</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> based on <xref ref-type="disp-formula" rid="disp-formula4">Equations 4</xref>, <xref ref-type="disp-formula" rid="disp-formula9">9</xref>, respectively. Then we calculate the similarity between <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM62"><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mi>q</mml:mi></mml:msub></mml:math></inline-formula> and each data point <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM63"><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mspace width=".1em"/><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> using the Euclidean distance metric <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM64"><mml:mi>d</mml:mi></mml:math></inline-formula> via<disp-formula id="disp-formula10"><label>(10)</label><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM10"><mml:mrow><mml:mi mathvariant="normal">dist</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mi>q</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mspace width=".1em"/><mml:msub><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mi>q</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mspace width=".1em"/><mml:msub><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:msub><mml:msub><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo><mml:mn>2</mml:mn></mml:msub></mml:math></disp-formula>Then, following <xref ref-type="disp-formula" rid="disp-formula10">Equation 10</xref>, we are able to extract from <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM65"><mml:msub><mml:mi>D</mml:mi><mml:mi>p</mml:mi></mml:msub></mml:math></inline-formula> the <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM66"><mml:mi>k</mml:mi></mml:math></inline-formula>-nearest neighbours of <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM67"><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mi>q</mml:mi></mml:msub></mml:math></inline-formula>, which are denoted by <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM68"><mml:msub><mml:mi>N</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:msub><mml:mi>q</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:msub><mml:mi>q</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:msub><mml:mi>q</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:msub><mml:mi>q</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:msub><mml:mi>q</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:msub><mml:mi>q</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo fence="false" stretchy="false">}</mml:mo></mml:math></inline-formula>. Based on the identified <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM69"><mml:mi>k</mml:mi></mml:math></inline-formula> neighbours, we can assign a label <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM70"><mml:msub><mml:mi>y</mml:mi><mml:mi>q</mml:mi></mml:msub></mml:math></inline-formula> to <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM71"><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mi>q</mml:mi></mml:msub></mml:math></inline-formula> by performing majority voting that returns the class with the most votes.</p>
</sec>
<sec id="s3c3"><label>3.3.3</label><title>Complexity analysis</title>
<p>The feature extraction and classification method is briefed in <xref ref-type="table" rid="A1">Algorithm 1</xref>. The computational burden of the proposed method lies mainly in two parts, namely the PCA feature extraction and the KNN classifier. The computational complexity of PCA is <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM72"><mml:mi>O</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>N</mml:mi><mml:msup><mml:mi>p</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:msup><mml:mi>p</mml:mi><mml:mn>3</mml:mn></mml:msup><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>, which consists of deriving covariance matrix and eigenvectors. The computational complexity of KNN is <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM73"><mml:mi>O</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>N</mml:mi><mml:mi>p</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>, which involves calculating Euclidean distances for all <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM74"><mml:mi>N</mml:mi></mml:math></inline-formula> samples. Therefore, the total computational complexity is <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM75"><mml:mi>O</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>N</mml:mi><mml:msup><mml:mi>p</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:msup><mml:mi>p</mml:mi><mml:mn>3</mml:mn></mml:msup><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> for the proposed method. Since the matrix multiplication and KNN neighbour search can both be parallelized, the algorithm may be more efficient by adopting parallel computing techniques.</p>
<table-wrap id="A1" position="float"><label>Algorithm 1</label>
<caption><p>The PCA-based KNN classifier.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
</colgroup>
<tbody>
<tr>
<td valign="top" align="left">
<list list-type="simple">
<list-item><label>&#x2022;</label>
<p>Input: SDE dataset <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM76"><mml:msub><mml:mi>D</mml:mi><mml:mi>p</mml:mi></mml:msub></mml:math></inline-formula>, PCA matrix <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM77"><mml:msub><mml:mrow><mml:mi mathvariant="bold">U</mml:mi></mml:mrow><mml:mi>p</mml:mi></mml:msub></mml:math></inline-formula>, query SDE feature vector <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM78"><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mi>q</mml:mi></mml:msub></mml:math></inline-formula>, number of neighbours <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM79"><mml:mi>k</mml:mi></mml:math></inline-formula>.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Output: Estimated label <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM80"><mml:msub><mml:mi>y</mml:mi><mml:mi>q</mml:mi></mml:msub></mml:math></inline-formula>.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Step 1 (PCA projection): Calculate the extracted feature <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM81"><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mi>q</mml:mi></mml:msub></mml:math></inline-formula> via <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM82"><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mi>q</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mi mathvariant="bold">U</mml:mi></mml:mrow><mml:mrow><mml:mspace width=".1em"/><mml:mi>p</mml:mi></mml:mrow><mml:mi>T</mml:mi></mml:msubsup><mml:msub><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mi>q</mml:mi></mml:msub></mml:math></inline-formula>.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Step 2 (KNN search): Calculate the Euclidean distance between <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM83"><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mi mathvariant="bold">x</mml:mi></mml:mrow><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mi>q</mml:mi></mml:msub></mml:math></inline-formula> and all samples of <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM84"><mml:msub><mml:mi>D</mml:mi><mml:mi>p</mml:mi></mml:msub></mml:math></inline-formula> according to <xref ref-type="disp-formula" rid="disp-formula9">Equation 9</xref>, and then identify its <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM85"><mml:mi>k</mml:mi></mml:math></inline-formula> nearest neighbours <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM86"><mml:msub><mml:mi>N</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></inline-formula>.</p></list-item>
<list-item><label>&#x2022;</label>
<p>Step 3 (Majority voting): The predicted class label <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM87"><mml:msub><mml:mi>y</mml:mi><mml:mi>q</mml:mi></mml:msub></mml:math></inline-formula> is determined by the voting for the nearest neighbour <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM88"><mml:msub><mml:mi>N</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></inline-formula> and the majority class label is assigned <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM89"><mml:msub><mml:mi>y</mml:mi><mml:mi>q</mml:mi></mml:msub></mml:math></inline-formula>.</p></list-item>
</list></td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
</sec>
<sec id="s4"><label>4</label><title>Experiments</title>
<p>In this section, we mainly report results of our experiments, which are performed with MATLAB2024a on a moderate computer equipped with Core(TM) i5 <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM90"><mml:mrow><mml:mrow><mml:mo mathvariant="monospace">@</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> 2.9&#x2009;GHz and 16&#x2009;GB RAM.</p>
<sec id="s4a"><label>4.1</label><title>Experimental settings</title>
<p>Dataset: We collected a comprehensive dataset D consisting of <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM91"><mml:mi>N</mml:mi><mml:mo>=</mml:mo><mml:mn>50</mml:mn></mml:math></inline-formula> samples covering a wide range of different SDEs, such as basketball, skating, fencing and cycling.</p>
<p>Parameters: There are several key parameters of the proposed PCA-based KNN classification model. Specifically, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM92"><mml:mi>P</mml:mi></mml:math></inline-formula> decides the low-dimensional embeddings of feature vectors and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM93"><mml:mi>k</mml:mi></mml:math></inline-formula> determines the number of nearest neighbours of the KNN classifier. In our experiments, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM94"><mml:mi>p</mml:mi></mml:math></inline-formula> is chosen based on 95&#x0025; explained variance, and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM95"><mml:mi>k</mml:mi></mml:math></inline-formula> is chosen from 3 to 8 using grid search.</p>
</sec>
<sec id="s4b"><label>4.2</label><title>Model testing: question 3</title>
<sec id="s4b1"><label>4.2.1</label><title>Evaluations of recently added or removed Olympic SDEs</title>
<p>To evaluate SDEs that have been added or removed from recent Olympics, we consider 4 different SDEs as our test set <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM96"><mml:msub><mml:mi>D</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">test</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:math></inline-formula>, including Breakdancing, cricket, flag football and basketball (<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM97"><mml:mn>3</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:mn>3</mml:mn></mml:math></inline-formula>), and the est are used for training. Briefly, breakdancing was introduced in the 2024 Paris Olympics but will be excluded in the 2028 Los Angeles Olympics. Cricket will be added to the 2028 Olympics since its first appearance in 1900. Similarly, flag football will also be included, which will become its first debut in the Olympics. Besides, basketball (<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM98"><mml:mn>3</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:mn>3</mml:mn></mml:math></inline-formula>) was introduced in the 2020 Tokyo Olympics and will also be included in the next Olympics.</p>
<p>First, we utilize our scoring system to derive the feature vectors and corresponding scores of the selected 4 SDEs. Then we can apply our PCA-based KNN model to obtain the estimated labels. The results are shown in <xref ref-type="fig" rid="F8">Figure&#x00A0;8a</xref> and <xref ref-type="sec" rid="s11">Supplementary Table S1</xref>. It can be seen that our scoring system and proposed classification method can effectively and accurately characterize the current status of selected SDEs. Specifically, Breakdancing and flag football are labelled &#x201C;Low&#x201D; according to our model&#x2019;s predictions. Interestingly, breakdancing will be removed and flag football is not currently included. Besides, Cricket and Basketball are labelled &#x201C;High&#x201D; since they have a large audience base and enjoy high level of inclusivity. It is noticed that Basketball (<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM99"><mml:mn>3</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:mn>3</mml:mn></mml:math></inline-formula>) was added in 2020 Tokyo Olympics and Cricket will return to the Olympics in 2028. Our results are inconsistent with both the development of the SDEs and their current status in modern Olympics.</p>
<fig id="F8" position="float"><label>Figure 8</label>
<caption><p>Statistics of <bold>(a)</bold> recently added or removed SDEs and <bold>(b)</bold> continuous SDEs.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fspor-07-1596196-g008.tif"><alt-text content-type="machine-generated">Dual bar charts compare statistics for recently added or removed sports (tennis, fencing, judo, weightlifting) with continuous sports (breakdancing, cricket, flag football, basketball 3x3). Each bar represents criteria like safety, relevance, inclusivity, sustainability, gender equity, and popularity, with varying heights indicating the weight of each criterion. Different colors denote different criteria.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s4b2"><label>4.2.2</label><title>Evaluations of continuous Olympic SDEs</title>
<p>To evaluate SDEs that have continuously been in the Olympic programme, we also choose 4 representative sports as our test set: Tennis, Fencing, Judo and Weightlifting. These sports were selected to represent a diverse range of characteristics relevant to the IOC evaluation framework, including differences in gender equity, global inclusivity, youth appeal, and fair play issues. In addition, they have each been continuously included in the Olympic programme for a substantial period, having been introduced or reintroduced since 1988, 1896, 1972, and 1920, respectively. The selected SDEs all have a relatively long history but they also face different challenges. Similarly, the rest of SDEs are used for training the PCA projection matrix and KNN classifier. Following the same feature extraction and classification steps, we report the results in <xref ref-type="fig" rid="F8">Figure&#x00A0;8b</xref> and <xref ref-type="sec" rid="s11">Supplementary Table S2</xref>.</p>
<p>As shown in <xref ref-type="fig" rid="F8">Figure&#x00A0;8b</xref>, Tennis is assigned a much higher score compared to other sports, due to its large audience base, tremendous market value, high level of gender equity and also better projection measurements. According to our system and model, fencing is considered more suitable that Judo for the Olympic programme because it has a higher level of inclusivity and accessibility. Besides, its fast-paced competition also appeals to younger audience. Interestingly, although weightlifting is considered one of the longest continuous sports, it is assigned a &#x201C;Low&#x201D; label by our system and the proposed model. According to <xref ref-type="fig" rid="F8">Figure&#x00A0;8b</xref>, we notice that the low score of weightlifting results from low gender equity and poor fair play level due to increasing doping concerns (<xref ref-type="bibr" rid="B27">27</xref>).</p>
</sec>
</sec>
<sec id="s4c"><label>4.3</label><title>Future Olympic SDEs: question 4</title>
<p>As the development of our society, the future Olympics need to adapt to changing global dynamics, audience expectations, and technology advancements. Therefore, for the 2032 Brisbane Olympic Games, we investigate 6 different new SDEs as strong candidates: netball, Australian rules football, Esports, darts, snooker and pickleball. The first two sports&#x2014;netball and Australian rules football&#x2014;were selected based on their strong cultural significance and widespread popularity within Australia, the host nation. To capture a balance between local cultural relevance and international appeal, the remaining four sports were chosen for their alignment with broader global trends: Esports has experienced explosive global growth, particularly among younger audiences, and has established professional leagues worldwide. Darts and snooker maintain large international fan bases, long-standing professional circuits, and strong media appeal. Pickleball, though relatively new, has seen rapid growth in participation across multiple continents and is recognized for its inclusivity and accessibility, especially among diverse age groups.</p>
<p>Following our scoring system, the current state of statistics of selected sports is shown in <xref ref-type="fig" rid="F9">Figure&#x00A0;9a</xref>. It can be seen that Esports has the largest combined score due to its high popularity, inclusivity and youth appeal. Australian rules foot ball has wide audiences and is also competitive in terms of sustainability. Besides, although pickleball is a relatively young sport, it has a high level of gender equity and sustainability. To investigate which sports are more suitable for the 2032 Olympics, we study their change over time. As well, the most important and also most volatile factor is popularity. According to (<xref ref-type="bibr" rid="B28">28</xref>, <xref ref-type="bibr" rid="B29">29</xref>) and also data fetched via social media (<xref ref-type="bibr" rid="B18">18</xref>), the estimated average growth of popularity of the selected SDEs in the past three years is listed in <xref ref-type="table" rid="T5">Table&#x00A0;5</xref>. Based on this observation and prior knowledge, we can apply our scoring system and calculate the estimated scores of different years by taking consideration of such variations. <xref ref-type="fig" rid="F9">Figure&#x00A0;9</xref> shows the change of the scores over time. It can be seen that the top 3 candidates for the 2032 Brisbane Olympic Games are Esports, Australian rules football and Pickleball. Furthermore, for the 2036 Olympic Games and beyond, we believe that tug of war, speed chess and bowling should be included for their global popularity, gender equity, safety and also appeal to people across all age groups.</p>
<fig id="F9" position="float"><label>Figure 9</label>
<caption><p>Current state and future estimates of 6 candidates for the 2032 Brisbane Olympic Games. <bold>(a)</bold> Current state of statistics of selected SDEs and <bold>(b)</bold> Predicted score of selected SDEs.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fspor-07-1596196-g009.tif"><alt-text content-type="machine-generated">Two-part image showing sports data: (a) A stacked bar chart depicting current statistics for netball, Australian rules football, esport, darts, snooker, and pickleball. Categories include safety, relevance, inclusivity, sustainability, gender equity, and popularity. (b) A line graph showing predicted scores for these sports from 2024 to 2032. Esport shows a rising trend, while others vary.</alt-text>
</graphic>
</fig>
<table-wrap id="T5" position="float"><label>Table 5</label>
<caption><p>Average growth rate of 6 selected sports.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">SDE</th>
<th valign="top" align="center">Netball</th>
<th valign="top" align="center">Australian rules football</th>
<th valign="top" align="center">Esports</th>
<th valign="top" align="center">Darts</th>
<th valign="top" align="center">Snooker</th>
<th valign="top" align="center">Pickleball</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Growth rate</td>
<td valign="top" align="center">1.15</td>
<td valign="top" align="center">1.2</td>
<td valign="top" align="center">1.3</td>
<td valign="top" align="center">1.1</td>
<td valign="top" align="center">1.05</td>
<td valign="top" align="center">1.4</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s4d"><label>4.4</label><title>Sensitivity analysis: question 5</title>
<p>In our model and experiments, the low-dimensional embedding feature size <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM100"><mml:mi>p</mml:mi></mml:math></inline-formula> and the number of nearest neighbours <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM101"><mml:mi>k</mml:mi></mml:math></inline-formula> play a crucial role, thus in this section we perform sensitivity analysis to investigate their impacts on the predicted results in terms of classification accuracy. Specifically, we randomly select 80&#x0025; of data from <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM102"><mml:mi>D</mml:mi></mml:math></inline-formula> as our training set <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM103"><mml:msub><mml:mi>D</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">train</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:math></inline-formula>, and the rest 20&#x0025; are used for testing. <xref ref-type="fig" rid="F10">Figure&#x00A0;10</xref> compares the classification accuracy with different <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM104"><mml:mi>p</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM105"><mml:mi>k</mml:mi></mml:math></inline-formula>. Interestingly, it can be seen that increasing the dimension size <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM106"><mml:mi>p</mml:mi></mml:math></inline-formula> does not always bring about benefits as more eigenvectors may capture not only meaningful variance but also noise. Similarly, choosing a larger number of neighbours <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM107"><mml:mi>k</mml:mi></mml:math></inline-formula> increases the risk of misclassification, in that it may be difficult to find sufficient neighbours that share similar features. In practice, we can select <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM108"><mml:mi>p</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM109"><mml:mi>k</mml:mi></mml:math></inline-formula> from 5 to 7 for better predictions.</p>
<fig id="F10" position="float"><label>Figure 10</label>
<caption><p>Sensitivity and parameter analysis of feature size <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM110"><mml:mi>p</mml:mi></mml:math></inline-formula> and number of nearest neighbours <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM111"><mml:mi>k</mml:mi></mml:math></inline-formula>. Influence of <bold>(a)</bold> different dimensions p and <bold>(b)</bold> number of neighbours <italic>k</italic>.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fspor-07-1596196-g010.tif"><alt-text content-type="machine-generated">Two line graphs compare accuracy. Graph (a) shows the influence of different dimensions \\(p\\) on accuracy. Accuracy peaks at 0.7 when \\(p\\) is 5. Graph (b) displays the influence of the number of neighbors \\(k\\) on accuracy. Accuracy reaches 0.65 when \\(k\\) is 6. Both graphs highlight the variation of accuracy with changes in respective parameters.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="s5" sec-type="discussion"><label>5</label><title>Discussion</title>
<sec id="s5a"><label>5.1</label><title>Evaluation of the model&#x2019;s performance</title>
<p>In this paper, we have proposed a comprehensive scoring and labelling system for evaluating SDEs for the Olympics, taking into account a variety of criteria such as popularity, gender equity, sustainability, and safety. The system was further integrated into a PCA-based classification model, combining unsupervised learning for feature extraction with a supervised KNN classifier to provide a more robust and objective method for SDE selection. Our experimental results successfully highlighted the current status of a wide range of SDEs in the Olympic context, validating the model&#x2019;s capability to categorize and prioritize sports based on IOC guidelines. Our analysis identified Esports, Australian rules football and pickleball as top contenders. Our framework provides valuable insights for future Olympic event evaluations and can inform decisions for the 2032 Brisbane Olympics. Finally, these findings carry important policy implications that should be considered from multiple perspectives, including those of the IOC, international sport federations, and potential host cities, to ensure balanced, sustainable, and strategically aligned event portfolios.</p>
</sec>
<sec id="s5b"><label>5.2</label><title>Implications for Olympic programme planning</title>
<p>Beyond the technical evaluation of SDEs, the planning of the Olympic programme must be firmly grounded in the broader mission of the Olympic Movement. As articulated in the Olympic Charter and reinforced by recent scholarship, the Olympic Games serve not only as a stage for elite competition but also as a global platform for promoting fundamental values such as excellence, friendship, and respect (<xref ref-type="bibr" rid="B14">14</xref>). These core principles should fundamentally guide decisions on sport inclusion, ensuring that new disciplines enhance public engagement, foster participation, and contribute to the Games&#x2019; lasting social and cultural impact.</p>
<p>Consistent with this mission, the Olympic Movement has long promoted global sport through initiatives like the Olympic Values Education Programme (OVEP), which cultivates moral awareness, cultural understanding, and essential life skills (<xref ref-type="bibr" rid="B30">30</xref>). In recent years, the Olympic Games have also faced heightened scrutiny regarding issues such as safety, pandemic management, environmental sustainability, and gender equality (<xref ref-type="bibr" rid="B14">14</xref>).</p>
<p>The revitalization of OVEP should be guided by the principles of Education for Sustainable Development (ESD), which equip individuals with the knowledge, values, and competencies needed to address global challenges through a sustainability lens (<xref ref-type="bibr" rid="B14">14</xref>, <xref ref-type="bibr" rid="B15">15</xref>, <xref ref-type="bibr" rid="B31">31</xref>). Embedding ESD objectives&#x2014;such as climate literacy, equitable resource access, and environmental responsibility&#x2014;into OVEP&#x2019;s curriculum and pedagogy would allow the programme to foster ethical decision-making, intercultural understanding, and long-term sustainability competencies (<xref ref-type="bibr" rid="B15">15</xref>, <xref ref-type="bibr" rid="B31">31</xref>). These principles can also be extended beyond education into practice, particularly in the area of sustainable urban planning. For example, both Beijing2008, London 2012 and Sochi 2014 integrated Olympic investments into long-term infrastructure development, transforming sporting venues and public spaces into lasting community assets (<xref ref-type="bibr" rid="B31">31</xref>). By aligning OVEP with such broader sustainability initiatives, the IOC can ensure that Olympic education not only transmits values but also supports systemic change across social, environmental, and urban domains.</p>
<p>Further reinforcing this commitment, the IOC&#x2019;s endorsement of initiatives like the Hamburg Declaration, in collaboration with the World Health Organization, highlights the role of sport in promoting public health and sustainability (<xref ref-type="bibr" rid="B13">13</xref>). This underscores the need to prioritize sports that encourage daily physical activity and community sport (<xref ref-type="bibr" rid="B32">32</xref>). As (<xref ref-type="bibr" rid="B14">14</xref>) note, maximizing the IOC&#x2019; prestige and momentum entails selecting disciplines with low barriers to entry (e.g., swimming, cycling, running) and requiring host cities to invest in accessible community sports infrastructure.</p>
<p>While the current model emphasizes quantifiable factors such as media visibility, gender equity, and global reach, future iterations should incorporate indicators related to sustainability, education, and health. By combining rigorous data-driven analysis with ethical and philosophical perspectives, the IOC can more effectively align sport selection with its evolving responsibilities in the 21st century. This holistic approach will help foster a more inclusive, sustainable, and forward-looking Olympic legacy.</p>
</sec>
<sec id="s5c"><label>5.3</label><title>Limitations and future directions</title>
<p>While the proposed method demonstrates effectiveness, there are areas where improvements can be made. For instance, during the scoring and labeling phase, incorporating additional factors such as athleticism, game duration, and historical significance of the sport could lead to a more comprehensive evaluation. Furthermore, the application of more advanced machine learning techniques, such as Support Vector Machines (SVM) (<xref ref-type="bibr" rid="B33">33</xref>) and Deep Learning (DL) (<xref ref-type="bibr" rid="B34">34</xref>), could enhance the accuracy and robustness of the classification model, allowing for better predictions and more nuanced decision-making.</p>
<p>Future work should focus on extending the model by integrating real-time data to capture shifts in public engagement and sport trends, as well as exploring other advanced algorithms to refine the classification process. Additionally, expanding the datasets to include emerging sports will help improve the model&#x2019;s adaptability. Emerging sports are defined here as disciplines that have recently gained international visibility, institutional support, or rapid growth in participation. These sports are not yet part of the official Olympic program. Furthermore, incorporating feedback from stakeholders will also contribute to ensuring the model remains adaptable to the dynamic landscape of the Olympic Games. However, due to limited availability of comprehensive stakeholder data and real-time IOC decisions, our current study does not include external validation based on such inputs. We acknowledge this as a limitation and suggest that future work could strengthen the model&#x2019;s reliability by integrating actual feedback from the IOC, international sport federations, and potential host cities when such data becomes accessible.</p>
<p>In parallel, while this study focuses on the Summer Olympic Games, the proposed framework could be extended to the Winter Games by adapting the evaluation criteria to reflect the distinct characteristics of winter sports, such as climatic dependence, snow- and ice-specific infrastructure, and limited geographic accessibility. Accounting for these factors represents a promising direction for extending and validating the model in broader Olympic contexts.</p>
<p>Finally, we note that some indicators, such as social media metrics or broadcast coverage, emphasize visibility over grassroots participation. While aligned with the IOC&#x2019;s focus on youth engagement and media reach, they may overlook factors like historical significance or adaptability (e.g., the evolution of modern pentathlon). These qualitative aspects were excluded due to difficulties in quantification, but future studies could incorporate them through expert input or case-based methods.</p>
</sec>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability"><title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s11">Supplementary Material</xref>, further inquiries can be directed to the corresponding author/s.</p>
</sec>
<sec id="s7" sec-type="author-contributions"><title>Author contributions</title>
<p>YS: Funding acquisition, Validation, Supervision, Formal analysis, Conceptualization, Project administration, Writing &#x2013; review &#x0026; editing, Data curation, Writing &#x2013; original draft, Resources, Methodology, Visualization, Investigation, Software. RD: Investigation, Methodology, Validation, Project administration, Formal analysis, Funding acquisition, Supervision, Data curation, Visualization, Software, Conceptualization, Writing &#x2013; original draft, Resources. QZ: Funding acquisition, Formal analysis, Software, Project administration, Resources, Validation, Conceptualization, Data curation, Writing &#x2013; original draft, Methodology, Supervision, Visualization, Investigation. YS: Conceptualization, Writing &#x2013; review &#x0026; editing, Supervision, Visualization.</p>
</sec>
<sec id="s8" sec-type="funding-information"><title>Funding</title>
<p>The author(s) declare that no financial support was received for the research and/or publication of this article.</p>
</sec>
<ack><title>Acknowledgments</title>
<p>We would like to express our sincere gratitude to all the authors for their efforts in completing the data collection and analysis, as well as drafting the initial manuscript. Special thanks to Yizhuo Sun for their valuable contributions in refining the text. This study was supported by Sendelta International Academy.</p>
</ack>
<sec id="s9" sec-type="COI-statement"><title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="ai-statement"><title>Generative AI statement</title>
<p>The author(s) declare that Generative AI was used in the creation of this manuscript. Generative AI was used only for grammar checking. It was not used for programming, data processing, or drafting the manuscript.</p>
</sec>
<sec id="s12" sec-type="disclaimer"><title>Publisher&#x0027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s11" sec-type="supplementary-material"><title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fspor.2025.1596196/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fspor.2025.1596196/full&#x0023;supplementary-material</ext-link></p>
<supplementary-material id="SD1" content-type="local-data">
<media mimetype="application" mime-subtype="pdf" xlink:href="Datasheet1.pdf"/></supplementary-material>
</sec>
<ref-list><title>References</title>
<ref id="B1"><label>1.</label><citation citation-type="book"><person-group person-group-type="author"><name><surname>Young</surname><given-names>DC</given-names></name></person-group>. <source>A Brief History of the Olympic Games</source>. <publisher-loc>Malden, MA</publisher-loc>: <publisher-name>John Wiley &#x0026; Sons</publisher-name> (<year>2008</year>).</citation></ref>
<ref id="B2"><label>2.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pop</surname><given-names>C</given-names></name></person-group>. <article-title>The modern Olympic Games&#x2013;a globalised cultural and sporting event</article-title>. <source>Procedia Soc Behav Sci</source>. (<year>2013</year>) <volume>92</volume>:<fpage>728</fpage>&#x2013;<lpage>34</lpage>. <pub-id pub-id-type="doi">10.1016/j.sbspro.2013.08.746</pub-id></citation></ref>
<ref id="B3"><label>3.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lu</surname><given-names>Z</given-names></name></person-group>. <article-title>Forging a link between competitive gaming, sport and the Olympics: history and new developments</article-title>. <source>Int J Hist Sport</source>. (<year>2022</year>) <volume>39</volume>:<fpage>251</fpage>&#x2013;<lpage>69</lpage>. <pub-id pub-id-type="doi">10.1080/09523367.2022.2061466</pub-id></citation></ref>
<ref id="B4"><label>4.</label><citation citation-type="book"><person-group person-group-type="author"><name><surname>Dubinsky</surname><given-names>Y</given-names></name></person-group>. <source>Sport-Tech Diplomacy at the Tokyo 2020 Olympic Games. CPD Perspectives on Public Diplomacy</source>. <publisher-loc>Los Angeles</publisher-loc>: <publisher-name>Figueroa Press</publisher-name> (<year>2022</year>). p. <fpage>4</fpage>&#x2013;<lpage>60</lpage>. <comment>Available online at</comment>: <ext-link ext-link-type="uri" xlink:href="https://uscpublicdiplomacy.org/sites/default/files/Sport-Tech%20Diplomacy_11.21.22.pdf">https://uscpublicdiplomacy.org/sites/default/files/Sport-Tech%20Diplomacy_11.21.22.pdf</ext-link> (<comment>Accessed July 24, 2025</comment>).</citation></ref>
<ref id="B5"><label>5.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname><given-names>Z</given-names></name><name><surname>Bai</surname><given-names>Y</given-names></name><name><surname>Wei</surname><given-names>M</given-names></name></person-group>. <article-title>The importance of creativity in the sportification of breakdance</article-title>. <source>Front Educ</source>. (<year>2022</year>) <volume>7</volume>:<fpage>855724</fpage>. <pub-id pub-id-type="doi">10.3389/feduc.2022.855724</pub-id></citation></ref>
<ref id="B6"><label>6.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Ramchandani</surname><given-names>G</given-names></name></person-group>. <article-title>Data from: Home advantage in the Summer Olympic Games: evidence from Tokyo 2020 and prospects for Paris 2024</article-title>. (2022). <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://olympicanalysis.org/section-4/home-advantage-in-the-summer-olympic-games-evidence-from-tokyo-2020-and-prospects-for-paris-2024/">https://olympicanalysis.org/section-4/home-advantage-in-the-summer-olympic-games-evidence-from-tokyo-2020-and-prospects-for-paris-2024/</ext-link> (Accessed June 11, 2025)</comment>.</citation></ref>
<ref id="B7"><label>7.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Garcia</surname><given-names>B</given-names></name></person-group>. <article-title>The Olympic movement and cultural policy: historical challenges and ways forward</article-title>. <source>J Olympic Stud</source>. (<year>2022</year>) <volume>3</volume>:<fpage>44</fpage>&#x2013;<lpage>65</lpage>. <pub-id pub-id-type="doi">10.5406/26396025.3.2.04</pub-id></citation></ref>
<ref id="B8"><label>8.</label><citation citation-type="book"><person-group person-group-type="author"><name><surname>Teetzel</surname><given-names>S</given-names></name></person-group>. <article-title>Intersections of gender, doping and sport: the shared implications of anti-doping and sex testing</article-title>. In: <person-group person-group-type="editor"><name><surname>Henning</surname><given-names>A</given-names></name><name><surname>Andreasson</surname><given-names>J</given-names></name></person-group>, editors. <source>Doping in Sport and Fitness</source>. <publisher-loc>Leeds</publisher-loc>: <publisher-name>Emerald Publishing Limited</publisher-name> (<year>2022</year>). <volume>Vol. 16</volume>. p. <fpage>239</fpage>&#x2013;<lpage>52</lpage>.</citation></ref>
<ref id="B9"><label>9.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bauman</surname><given-names>AE</given-names></name><name><surname>Kamada</surname><given-names>M</given-names></name><name><surname>Reis</surname><given-names>RS</given-names></name><name><surname>Troiano</surname><given-names>RP</given-names></name><name><surname>Ding</surname><given-names>D</given-names></name><name><surname>Milton</surname><given-names>K</given-names></name></person-group>, et al. <article-title>An evidence-based assessment of the impact of the Olympic games on population levels of physical activity</article-title>. <source>Lancet</source>. (<year>2021</year>) <volume>398</volume>:<fpage>456</fpage>&#x2013;<lpage>64</lpage>. <pub-id pub-id-type="doi">10.1016/S0140-6736(21)01165-X</pub-id></citation></ref>
<ref id="B10"><label>10.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>de Santana</surname><given-names>WF</given-names></name><name><surname>de Oliveira</surname><given-names>MH</given-names></name><name><surname>Uvinha</surname><given-names>RR</given-names></name></person-group>. <article-title>Are the Olympics up-to-date&#x003F; Measures taken by the IOC to enhance gender equality in the Games</article-title>. <source>Olimpianos J Olympic Stud</source>. (<year>2022</year>) <volume>6</volume>:<fpage>234</fpage>&#x2013;<lpage>50</lpage>. <pub-id pub-id-type="doi">10.30937/2526-6314.v6.id156</pub-id></citation></ref>
<ref id="B11"><label>11.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kinoshita</surname><given-names>K</given-names></name><name><surname>MacIntosh</surname><given-names>E</given-names></name><name><surname>Parent</surname><given-names>M</given-names></name></person-group>. <article-title>Social outcomes from participating in the Youth Olympic Games: the role of the service environment</article-title>. <source>Eur Sport Manage Q</source>. (<year>2023</year>) <volume>23</volume>:<fpage>488</fpage>&#x2013;<lpage>507</lpage>. <pub-id pub-id-type="doi">10.1080/16184742.2021.1889636</pub-id></citation></ref>
<ref id="B12"><label>12.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kolliari-Turner</surname><given-names>A</given-names></name><name><surname>Lima</surname><given-names>G</given-names></name><name><surname>Hamilton</surname><given-names>B</given-names></name><name><surname>Pitsiladis</surname><given-names>Y</given-names></name><name><surname>Guppy</surname><given-names>FM</given-names></name></person-group>. <article-title>Analysis of anti-doping rule violations that have impacted medal results at the summer Olympic Games 1968&#x2013;2012</article-title>. <source>Sports Med</source>. (<year>2021</year>) <volume>51</volume>:<fpage>2221</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1007/s40279-021-01463-4</pub-id></citation></ref>
<ref id="B13"><label>13.</label><citation citation-type="other"><collab>International Olympic Committee (IOC)</collab>. <article-title>Data from: IOC reiterates its support for the hamburg declaration to tackle physical inactivity</article-title>. (2023) <comment>(Accessed June 16, 2025)</comment>.</citation></ref>
<ref id="B14"><label>14.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Theodorakis</surname><given-names>Y</given-names></name><name><surname>Georgiadis</surname><given-names>K</given-names></name><name><surname>Hassandra</surname><given-names>M</given-names></name></person-group>. <article-title>Evolution of the olympic movement: adapting to contemporary global challenges</article-title>. <source>Soc Sci</source>. (<year>2024</year>) <volume>13</volume>:<fpage>326</fpage>. <pub-id pub-id-type="doi">10.3390/socsci13070326</pub-id></citation></ref>
<ref id="B15"><label>15.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Park</surname><given-names>S</given-names></name><name><surname>Lim</surname><given-names>D</given-names></name></person-group>. <article-title>Applicability of olympic values in sustainable development</article-title>. <source>Sustainability</source>. (<year>2022</year>) <volume>14</volume>:<fpage>5921</fpage>. <pub-id pub-id-type="doi">10.3390/su14105921</pub-id></citation></ref>
<ref id="B16"><label>16.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nicoliello</surname><given-names>M</given-names></name></person-group>. <article-title>The new agenda 2020&#x002B; 5 and the future challenges for the Olympic movement</article-title>. <source>Athens J Sports</source>. (<year>2021</year>) <volume>8</volume>:<fpage>121</fpage>&#x2013;<lpage>40</lpage>. <pub-id pub-id-type="doi">10.30958/ajspo.8-2-2</pub-id></citation></ref>
<ref id="B17"><label>17.</label><citation citation-type="other"><collab>International Olympic Committee</collab>. <article-title>Data from: Tokyo 2020 event programme</article-title>. (2020) (Accessed Febuary 18, 2025).</citation></ref>
<ref id="B18"><label>18.</label><citation citation-type="other"><collab>Feedspot</collab>. <article-title>Data from: Top influencers, blogs, podcasts &#x0026; youtubers</article-title>. (2025) <comment>(Accessed Febuary 18, 2025)</comment>.</citation></ref>
<ref id="B19"><label>19.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sakanashi</surname><given-names>S</given-names></name><name><surname>Tanaka</surname><given-names>H</given-names></name><name><surname>Yokota</surname><given-names>H</given-names></name><name><surname>Otomo</surname><given-names>Y</given-names></name><name><surname>Masuno</surname><given-names>T</given-names></name><name><surname>Nakano</surname><given-names>K</given-names></name></person-group>, et al. <article-title>Injuries and illness of athletes at the tokyo 2020 Olympic and Paralympic summer games visiting outside facilities</article-title>. <source>Sports Med Health Sci</source>. (<year>2024</year>) <volume>6</volume>:<fpage>48</fpage>&#x2013;<lpage>53</lpage>. <pub-id pub-id-type="doi">10.1016/j.smhs.2024.01.003</pub-id></citation></ref>
<ref id="B20"><label>20.</label><citation citation-type="other"><collab>Statista</collab>. <article-title>Data from: Number of doping cases worldwide by sport</article-title>. (2025) <comment>(Accessed Febuary 18, 2025)</comment>.</citation></ref>
<ref id="B21"><label>21.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Podvezko</surname><given-names>V</given-names></name></person-group>. <article-title>Application of AHP technique</article-title>. <source>J Bus Econ Manage</source>. (<year>2009</year>) <volume>10</volume>:<fpage>181</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.3846/1611-1699.2009.10.181-189</pub-id></citation></ref>
<ref id="B22"><label>22.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Galily</surname><given-names>Y</given-names></name><name><surname>Spaaij</surname><given-names>R</given-names></name><name><surname>McGannon</surname><given-names>KR</given-names></name></person-group>. <article-title>Beyond the rings: exploring the cultural and behavioral impact of the 2024 Paris Olympics</article-title>. <source>Am Behav Sci</source>. (<year>2024</year>):<fpage>00027642241261262</fpage>. <pub-id pub-id-type="doi">10.1177/00027642241261262</pub-id></citation></ref>
<ref id="B23"><label>23.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Hou</surname><given-names>Y</given-names></name></person-group>. <article-title>Spssau analysis of the application of new media technology in ideological and political theory teaching</article-title>. <comment>In: <italic>2020 International Conference on Information Science and Education (ICISE-IE)</italic>. IEEE (2020). p. 710&#x2013;3</comment>.</citation></ref>
<ref id="B24"><label>24.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Ebied</surname><given-names>HM</given-names></name></person-group>. <article-title>Feature extraction using PCA and Kernel-PCA for face recognition</article-title>. <comment>In: <italic>2012 8th International Conference on Informatics and Systems (INFOS)</italic>. IEEE (2012). p. MM&#x2013;72</comment>.</citation></ref>
<ref id="B25"><label>25.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wold</surname><given-names>S</given-names></name><name><surname>Esbensen</surname><given-names>K</given-names></name><name><surname>Geladi</surname><given-names>P</given-names></name></person-group>. <article-title>Principal component analysis</article-title>. <source>Chemometr Intell Lab Syst</source>. (<year>1987</year>) <volume>2</volume>:<fpage>37</fpage>&#x2013;<lpage>52</lpage>. <pub-id pub-id-type="doi">10.1016/0169-7439(87)80084-9</pub-id></citation></ref>
<ref id="B26"><label>26.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liao</surname><given-names>Y</given-names></name><name><surname>Vemuri</surname><given-names>VR</given-names></name></person-group>. <article-title>Use of k-nearest neighbor classifier for intrusion detection</article-title>. <source>Comput Secur</source>. (<year>2002</year>) <volume>21</volume>:<fpage>439</fpage>&#x2013;<lpage>48</lpage>. <pub-id pub-id-type="doi">10.1016/S0167-4048(02)00514-X</pub-id></citation></ref>
<ref id="B27"><label>27.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kolliari-Turner</surname><given-names>A</given-names></name><name><surname>Oliver</surname><given-names>B</given-names></name><name><surname>Lima</surname><given-names>G</given-names></name><name><surname>Mills</surname><given-names>JP</given-names></name><name><surname>Wang</surname><given-names>G</given-names></name><name><surname>Pitsiladis</surname><given-names>Y</given-names></name></person-group>, et al. <article-title>Doping practices in international weightlifting: analysis of sanctioned athletes/support personnel from 2008 to 2019 and retesting of samples from the 2008 and 2012 Olympic Games</article-title>. <source>Sports Med Open</source>. (<year>2021</year>) <volume>7</volume>:<fpage>1</fpage>&#x2013;<lpage>10</lpage>. <pub-id pub-id-type="doi">10.1186/s40798-020-00293-4</pub-id></citation></ref>
<ref id="B28"><label>28.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Block</surname><given-names>S</given-names></name><name><surname>Haack</surname><given-names>F</given-names></name></person-group>. <article-title>eSports: a new industry</article-title>. <comment>In: <italic>SHS Web of Conferences</italic>. EDP Sciences (2021). Vol. 92. p. 04002</comment>.</citation></ref>
<ref id="B29"><label>29.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gupta</surname><given-names>K</given-names></name></person-group>. <article-title>Understanding the fundamental reasons for the growth of pickleball</article-title>. <source>J Stud Res</source>. (<year>2024</year>) <volume>13</volume>:<fpage>1</fpage>&#x2013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.47611/jsrhs.v13i2.6795</pub-id></citation></ref>
<ref id="B30"><label>30.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Binder</surname><given-names>DL</given-names></name></person-group>. <article-title>Olympic values education: evolution of a pedagogy</article-title>. <source>Educ Rev</source>. (<year>2012</year>) <volume>64</volume>:<fpage>275</fpage>&#x2013;<lpage>302</lpage>. <pub-id pub-id-type="doi">10.1080/00131911.2012.676539</pub-id></citation></ref>
<ref id="B31"><label>31.</label><citation citation-type="book"><collab>International Olympic Committee</collab>. <source>The Fundamentals of Olympic Values Education</source>. <edition>2</edition>nd ed. <publisher-loc>Lausanne, Switzerland</publisher-loc>: <publisher-name>Department of Public Affairs and Social Development Through Sport</publisher-name> (<year>2016</year>).</citation></ref>
<ref id="B32"><label>32.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Steinacker</surname><given-names>JM</given-names></name><name><surname>Van Mechelen</surname><given-names>W</given-names></name><name><surname>Bloch</surname><given-names>W</given-names></name><name><surname>B&#x00F6;rjesson</surname><given-names>M</given-names></name><name><surname>Casasco</surname><given-names>M</given-names></name><name><surname>Wolfarth</surname><given-names>B</given-names></name></person-group>, et al. <article-title>Global alliance for the promotion of physical activity: the hamburg declaration</article-title>. <source>BMJ Open Sport Exerc Med</source>. (<year>2023</year>) <volume>9</volume>:<fpage>e001626</fpage>. <pub-id pub-id-type="doi">10.1136/bmjsem-2023-001626</pub-id></citation></ref>
<ref id="B33"><label>33.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Suthaharan</surname><given-names>S</given-names></name></person-group>. <article-title>Support vector machine</article-title>. In: <source>Machine Learning Models and Algorithms for Big Data Classification: Thinking with Examples for Effective Learning</source>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2016</year>).</citation></ref>
<ref id="B34"><label>34.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname><given-names>Z</given-names></name><name><surname>Liu</surname><given-names>F</given-names></name><name><surname>Yang</surname><given-names>W</given-names></name><name><surname>Peng</surname><given-names>S</given-names></name><name><surname>Zhou</surname><given-names>J</given-names></name></person-group>. <article-title>A survey of convolutional neural networks: analysis, applications, and prospects</article-title>. <source>IEEE Trans Neural Networks Learn Syst</source>. (<year>2021</year>) <volume>33</volume>:<fpage>6999</fpage>&#x2013;<lpage>7019</lpage>. <pub-id pub-id-type="doi">10.1109/TNNLS.2021.3084827</pub-id></citation></ref></ref-list>
</back>
</article>