<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="brief-report" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Sports Act. Living</journal-id>
<journal-title>Frontiers in Sports and Active Living</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Sports Act. Living</abbrev-journal-title>
<issn pub-type="epub">2624-9367</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fspor.2025.1638446</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Sports and Active Living</subject>
<subj-group>
<subject>Brief Research Report</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Advancing NFL win prediction: from Pythagorean formulas to machine learning algorithms</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author"><name><surname>Weirich</surname><given-names>Caroline</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author" corresp="yes"><name><surname>Kim</surname><given-names>Jun Woo</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref><uri xlink:href="https://loop.frontiersin.org/people/3063469/overview"/><role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author"><name><surname>Yoon</surname><given-names>Youngmin</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/2298568/overview" /><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/><role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/><role content-type="https://credit.niso.org/contributor-roles/data-curation/"/><role content-type="https://credit.niso.org/contributor-roles/software/"/><role content-type="https://credit.niso.org/contributor-roles/investigation/"/><role content-type="https://credit.niso.org/contributor-roles/visualization/"/></contrib>
<contrib contrib-type="author"><name><surname>Jeong</surname><given-names>Seunghoon</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/><role content-type="https://credit.niso.org/contributor-roles/investigation/"/><role content-type="https://credit.niso.org/contributor-roles/data-curation/"/><role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/><role content-type="https://credit.niso.org/contributor-roles/methodology/"/><role content-type="https://credit.niso.org/contributor-roles/visualization/"/></contrib>
</contrib-group>
<aff id="aff1"><label><sup>1</sup></label><institution>School of Global Business, Arcadia University</institution>, <addr-line>Glenside, PA</addr-line>, <country>United States</country></aff>
<aff id="aff2"><label><sup>2</sup></label><institution>College of Education, University of North Texas</institution>, <addr-line>Denton, TX</addr-line>, <country>United States</country></aff>
<aff id="aff3"><label><sup>3</sup></label><institution>College of Physical Education, Woosuk University</institution>, <addr-line>Wanju-gun</addr-line>, <country>Republic of Korea</country></aff>
<author-notes>
<fn fn-type="edited-by"><p><bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2899247">Sehwan Kim</ext-link>, Graceland University, United States</p></fn>
<fn fn-type="edited-by"><p><bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3096557/overview">Igor Costa</ext-link>, IFPB, Brazil</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3156238/overview">Farjana Akter Boby</ext-link>, Daffodil International University, Bangladesh</p></fn>
<corresp id="cor1"><label>&#x002A;</label><bold>Correspondence:</bold> Jun Woo Kim <email>kimjw@arcadia.edu</email></corresp>
</author-notes>
<pub-date pub-type="epub"><day>12</day><month>09</month><year>2025</year></pub-date>
<pub-date pub-type="collection"><year>2025</year></pub-date>
<volume>7</volume><elocation-id>1638446</elocation-id>
<history>
<date date-type="received"><day>30</day><month>05</month><year>2025</year></date>
<date date-type="accepted"><day>28</day><month>08</month><year>2025</year></date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2025 Weirich, Kim, Yoon and Jeong.</copyright-statement>
<copyright-year>2025</copyright-year><copyright-holder>Weirich, Kim, Yoon and Jeong</copyright-holder><license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>This study evaluates the predictive performance of traditional and machine learning-based models in forecasting NFL team winning percentages over a 21-season dataset (2003&#x2013;2023). Specifically, we compare the Pythagorean expectation formula&#x2014;commonly used in sports analytics&#x2014;with Random Forest regression and a feedforward Neural Network model. Using key performance indicators such as points scored, points allowed, turnovers, rushing and passing efficiency, and penalties, the machine learning models demonstrate superior predictive accuracy. The Neural Network model achieved the highest performance (MAE&#x2009;&#x003D;&#x2009;0.052, RMSE&#x2009;&#x003D;&#x2009;0.064, <italic>R</italic><sup>2</sup>&#x2009;&#x003D;&#x2009;0.891), followed by the Random Forest model, both of which significantly outperformed the Pythagorean method. Feature importance analysis using SHAP values identifies points scored and points allowed as the most influential predictors, supplemented by margin of victory, turnovers, and offensive efficiency metrics. These findings underscore the limitations of fixed-formula models and highlight the flexibility and robustness of data-driven approaches. The study offers practical implications for analysts, coaches, and sports management professionals seeking to optimize strategic decisions and competitive performance. Ultimately, the integration of advanced machine learning models provides a powerful tool for enhancing decision-making processes across the NFL landscape.</p>
</abstract>
<kwd-group>
<kwd>NFL</kwd>
<kwd>neural network</kwd>
<kwd>Pythagorean Theorem</kwd>
<kwd>machine learning</kwd>
<kwd>sports analytics</kwd>
<kwd>random forest</kwd>
</kwd-group><counts>
<fig-count count="2"/>
<table-count count="3"/><equation-count count="5"/><ref-count count="24"/><page-count count="8"/><word-count count="0"/></counts><custom-meta-wrap><custom-meta><meta-name>section-at-acceptance</meta-name><meta-value>Sports Management, Marketing, and Economics</meta-value></custom-meta></custom-meta-wrap>
</article-meta>
</front>
<body><sec id="s1" sec-type="intro"><label>1</label><title>Introduction</title>
<p>American football remains one of the most popular sports in the United States, consistently holding this position since 1972. The National Football League (NFL), at the heart of this popularity, has grown into an exceptionally lucrative industry. In 2024, the combined value of the NFL&#x0027;s 32 teams reached approximately &#x0024;190 billion, reflecting continued financial growth and robust market presence (<xref ref-type="bibr" rid="B1">1</xref>). Additionally, NFL viewership continues to set unprecedented records, with the 2023 playoffs averaging 38.5 million viewers, marking a notable nine-percent increase over the previous year (<xref ref-type="bibr" rid="B2">2</xref>).</p>
<p>In professional sports, success is fundamentally measured by a team&#x0027;s ability to win games, and NFL explicitly employs winning percentages to determine playoff eligibility and team standings. Winning percentage is traditionally calculated by dividing a team&#x0027;s total wins by the number of games played, with ties factored as half a win and half a loss, a standard that the NFL adopted in 1972 (<xref ref-type="bibr" rid="B3">3</xref>). Historically, one prominent method of predicting team success has been the Pythagorean Theorem Win/Loss formula, initially developed by Bill James for Major League Baseball. James&#x2019; formula calculates expected winning percentage based on runs scored and allowed, demonstrating impressive accuracy with a typical margin of error around 2&#x0025; per team (<xref ref-type="bibr" rid="B4">4</xref>). Adaptations of this formula have been explored in various sports. Former Houston Rockets General Manager Daryl Morey refined the formula specifically for NFL contexts, identifying 2.37 as the optimal exponent for predicting NFL winning percentages (<xref ref-type="bibr" rid="B4">4</xref>). The adapted Pythagorean formula for the NFL is mathematically expressed in <xref ref-type="disp-formula" rid="disp-formula1">Equation 1</xref>:<disp-formula id="disp-formula1"><label>(1)</label><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM1"><mml:mrow><mml:mi mathvariant="normal">Winning</mml:mi><mml:mspace width="0.25em"/><mml:mi mathvariant="normal">Percentage</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mfrac><mml:mrow><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi mathvariant="normal">points</mml:mi><mml:mspace width="0.25em"/><mml:mi mathvariant="normal">for</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2.37</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi mathvariant="normal">points</mml:mi><mml:mspace width="0.25em"/><mml:mi mathvariant="normal">for</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2.37</mml:mn></mml:mrow></mml:msup><mml:mo>+</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi mathvariant="normal">points</mml:mi><mml:mspace width="0.25em"/><mml:mi mathvariant="normal">against</mml:mi></mml:mrow><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mrow><mml:mn>2.37</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:math></disp-formula>Beyond its NFL adaptation, Pythagorean expectation formulas have been further modified for other sports, underscoring their versatility and wide applicability. For instance, research by Morey demonstrated that an exponent of 13.91 optimally predicts winning percentages in NBA contexts (<xref ref-type="bibr" rid="B4">4</xref>), while Caro and Machtmes (<xref ref-type="bibr" rid="B5">5</xref>) validated a simpler squared exponent formula to forecast win rates in college football. Further customization is evident in Davenport&#x0027;s logarithmic method, which adjusts exponents dynamically based on team-specific scoring data across an entire season (<xref ref-type="bibr" rid="B6">6</xref>). While powerful, the fixed mathematical structure of these traditional formulas inherently restricts their capacity to fully account for the nuanced, complex relationships present in competitive sports outcomes.</p>
<p>Recent trends in sports analytics highlight the growing potential of machine learning techniques as more flexible and robust predictive tools compared to fixed-formula methods (<xref ref-type="bibr" rid="B7">7</xref>). Algorithms such as random forest regression and neural networks&#x2014;two prominent supervised machine learning techniques frequently applied in sports analytics&#x2014;can efficiently model complex, nonlinear relationships among performance metrics (<xref ref-type="bibr" rid="B8">8</xref>). Unlike traditional prediction methods, these algorithms learn from historical data, capturing patterns involving offensive and defensive efficiency, schedule difficulty, margin of victory, and other influential variables. Random forest regression is valued for its interpretability and reliable accuracy in modeling intricate sports outcomes (<xref ref-type="bibr" rid="B9">9</xref>), while neural networks have been highlighted for their flexibility and success in capturing deeper, non-linear interactions between predictors (<xref ref-type="bibr" rid="B10">10</xref>).</p>
<p>Building upon this foundation, the current study leverages comprehensive NFL data spanning two decades (2003&#x2013;2023) to empirically compare the predictive performance of the traditional Pythagorean expectation formula against data-driven machine learning algorithms&#x2014;specifically random forest regression and neural network models. By evaluating these models, this study aims to identify effective methodologies for accurately forecasting NFL team winning percentages, thereby contributing valuable insights to the broader field of sport management. Sports analysts and team management can use insights derived from these predictive methodologies to optimize strategic decisions, effectively evaluate team performance, and enhance their competitive advantage in the NFL landscape.</p>
</sec>
<sec id="s2"><label>2</label><title>Data and empirical methods</title>
<sec id="s2a"><label>2.1</label><title>Data collection</title>
<p>The dataset utilized in this study was obtained from publicly accessible information provided by pro-football-reference.com. It comprises comprehensive NFL team statistics covering the seasons from 2003 through 2023. The collected data encompass details such as total games played, games won and lost, points scored (points for), points conceded (points against), average margin of victory per season, and performance statistics such as total passing yards, passes attempted, rushing yards, turnovers, penalties committed by team, etc. Across the 20-year span, the dataset contains 672 team-season observations, providing a substantial basis for predictive analysis.</p>
<p>Traditionally, the Pythagorean Theorem prediction method leverages only two variables&#x2014;points scored and points allowed&#x2014;to predict a team&#x0027;s winning percentage. This study incorporates this traditional method as a baseline, comparing its predictive accuracy against machine learning approaches. Random forest and neural network models are utilized as powerful analytical frameworks to capture complex patterns in the data. The random forest model (<xref ref-type="bibr" rid="B11">11</xref>), a robust ensemble algorithm, simultaneously analyzes multiple predictive variables, capturing complex nonlinear relationships and interactions among features included in the model. Unlike the static parameter-based Pythagorean approach, random forest regression automatically identifies and assigns appropriate weights to relevant predictors, significantly enhancing predictive flexibility and potentially improving accuracy (<xref ref-type="bibr" rid="B9">9</xref>). Similarly, the neural network model leverages a multilayered structure designed to adapt and learn intricate data patterns during training. Neural network is particularly adept at managing complex nonlinear relationships inherent within NFL team performance metrics such as passing yards, rushing efficiency, turnover rate, scoring consistency, and penalty impact&#x2014;variables that extend beyond the simplistic points-based approach of the Pythagorean formula. The neural network approach continuously adjusts internal parameters (i.e., weights and biases of the neurons) to optimize predictive performance, offering potential superiority in capturing subtle patterns and interactions within large, multidimensional datasets (<xref ref-type="bibr" rid="B10">10</xref>).</p>
<p>Prior to model training, rigorous data preprocessing was performed. Input features underwent standardization via the StandardScaler normalization technique from scikit-learn, which adjusts variables to a consistent scale (mean of zero, standard deviation of one), ensuring optimal convergence and performance of the random forest and neural network models (<xref ref-type="bibr" rid="B12">12</xref>, <xref ref-type="bibr" rid="B13">13</xref>). Additionally, the year variable was incorporated using one-hot encoding to control for temporal variability and annual differences (<xref ref-type="bibr" rid="B14">14</xref>).</p>
</sec>
<sec id="s2b"><label>2.2</label><title>Model architecture</title>
<p>This study employs three distinct methodologies to predict NFL teams&#x2019; winning percentages: the Pythagorean expectation model, random forest regression, and neural network. Each approach offers unique strengths, enabling comprehensive comparative analyses to ascertain their relative predictive power. Random forest, introduced by Breiman (<xref ref-type="bibr" rid="B15">15</xref>), constructs multiple decision trees during training and outputs the average prediction, effectively mitigating overfitting and improving generalization. Specifically, each tree within the random forest is constructed using bootstrap aggregation and a randomly selected subset of features, enhancing diversity among trees and reducing variance (<xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B15">15</xref>).</p>
<p>The random forest architecture employed in this study leverages predictive variables including total points scored, total points allowed, average margin of victory, passing yards, rushing yards, first downs, turnovers, and penalties. Hyperparameter tuning was systematically conducted to optimize the number of trees, maximum depth, and minimum sample splits, achieving enhanced predictive accuracy and robustness. Such ensemble models are particularly adept at capturing complex, non-linear relationships among predictors, substantially outperforming simplistic linear models or fixed formulas (<xref ref-type="bibr" rid="B9">9</xref>).</p>
<p>The feedforward neural network model was developed utilizing the TensorFlow and Keras libraries, renowned for their robustness and versatility in building deep learning models (<xref ref-type="bibr" rid="B16">16</xref>, <xref ref-type="bibr" rid="B17">17</xref>). Neural network implemented in this study consists of multiple interconnected layers of neurons&#x2014;namely input, hidden, and output layers&#x2014;configured to adjust parameters. The input layer receives standardized predictors, including points scored, points conceded, passing efficiency, rushing effectiveness, turnover rates, margin of victory, penalties, and encoded annual effects. These inputs are processed through two hidden layers that employ activation functions such as Rectified Linear Units (ReLU), enabling the network to learn non-linear Evaluation Metrics patterns efficiently (<xref ref-type="bibr" rid="B17">17</xref>). The final output layer produces predicted winning percentages. Hyperparameters such as learning rate, number of hidden layers, neuron counts, batch size, and epochs were optimized a random-split to ensure superior model performance (<xref ref-type="bibr" rid="B12">12</xref>, <xref ref-type="bibr" rid="B13">13</xref>).</p>
</sec>
<sec id="s2c"><label>2.3</label><title>Evaluation metrics</title>
<p>Evaluating predictive model performance accurately and rigorously is critical, where forecasting outcomes can significantly inform strategic decisions. This study adopted three standard evaluation metrics: Mean Absolute Error (MAE), Root Mean Squared Error (RMSE), and the R-squared value (<italic>R</italic><sup>2</sup>). Each metric provides distinct insights into the predictive accuracy and effectiveness of the models employed [(<xref ref-type="bibr" rid="B18">18</xref>); Namasudra et al, 2023; (<xref ref-type="bibr" rid="B17">17</xref>)]. MAE quantifies the average magnitude of errors between the predicted and actual values, ignoring their direction. The estimating <xref ref-type="disp-formula" rid="disp-formula2">Equation 2</xref> is presented as follows:<disp-formula id="disp-formula2"><label>(2)</label><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM2"><mml:mrow><mml:mi mathvariant="normal">MAE</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mi>n</mml:mi></mml:mfrac></mml:mrow><mml:munderover><mml:mrow><mml:mo movablelimits="false">&#x2211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>n</mml:mi></mml:munderover><mml:mspace width="0.2em"/><mml:mo fence="false" stretchy="false">|</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mrow><mml:mover><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo>&#x005E;</mml:mo></mml:mover></mml:mrow></mml:mrow><mml:mo fence="false" stretchy="false">|</mml:mo></mml:mstyle></mml:math></disp-formula>where <italic>y<sub>i</sub></italic> represents actual values, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM1"><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula><sub>i</sub> represents predicted values, and <italic>n</italic> is the number of observations. The strength of MAE lies in its simplicity and interpretability, providing an intuitive understanding of how much, on average, predictions deviate from actual outcomes (<xref ref-type="bibr" rid="B19">19</xref>). RMSE measures prediction accuracy by calculating the square root of the mean squared differences between predicted and actual outcomes. The corresponding formula is specified in <xref ref-type="disp-formula" rid="disp-formula3">Equation 3</xref>:<disp-formula id="disp-formula3"><label>(3)</label><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM3"><mml:mrow><mml:mi mathvariant="normal">RMSE</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:msqrt><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mi>n</mml:mi></mml:mfrac></mml:mrow><mml:munderover><mml:mrow><mml:mo movablelimits="false">&#x2211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi mathvariant="normal">n</mml:mi></mml:mrow></mml:munderover><mml:mspace width="0.2em"/><mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mrow><mml:mover><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo>&#x005E;</mml:mo></mml:mover></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mstyle></mml:msqrt></mml:math></disp-formula>RMSE places greater emphasis on larger errors by squaring the differences, making it sensitive to outliers and particularly useful when large errors significantly impact model utility and decision-making processes (<xref ref-type="bibr" rid="B19">19</xref>). The R-squared value quantifies the proportion of variance in the dependent variable explained by the independent variables. The corresponding formula is shown in <xref ref-type="disp-formula" rid="disp-formula4">Equation 4</xref>:<disp-formula id="disp-formula4"><label>(4)</label><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM4"><mml:msup><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow><mml:mn>2</mml:mn></mml:msup><mml:mrow><mml:mspace width="0.25em"/></mml:mrow><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mfrac><mml:mrow><mml:msubsup><mml:mrow><mml:mo movablelimits="false">&#x2211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>n</mml:mi></mml:msubsup><mml:mspace width="0.2em"/><mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>y</mml:mi><mml:mi>i</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow><mml:mrow><mml:msubsup><mml:mrow><mml:mo movablelimits="false">&#x2211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>n</mml:mi></mml:msubsup><mml:mspace width="0.2em"/><mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>y</mml:mi><mml:mi>i</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:math></disp-formula>where represents the mean of observed values. R-squared value closer to 1 indicates superior predictive performance, reflecting a higher explanatory power of the model regarding observed variance. These metrics are applied to evaluate the predictive performance of three models employed in this study: the traditional Pythagorean expectation model, random forest regression, and neural network model. Applying these evaluation metrics yields a comprehensive and nuanced understanding of model effectiveness, particularly beneficial in the multifaceted and dynamic context of NFL team performance prediction.</p>
</sec>
</sec>
<sec id="s3" sec-type="results"><label>3</label><title>Results</title>
<sec id="s3a"><label>3.1</label><title>Comparing predictive accuracy</title>
<p>To evaluate the predictive accuracy of different models in estimating a team&#x0027;s winning percentage, we compared the performance of the traditional Pythagorean expectation model with those of the random forest and neural network models. The results are summarized in <xref ref-type="table" rid="T1">Table&#x00A0;1</xref>.</p>
<table-wrap id="T1" position="float"><label>Table 1</label>
<caption><p>Comparison of predicted winning percentage.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Model</th>
<th valign="top" align="center">Predicted winning &#x0025;</th>
<th valign="top" align="center">MAE (Rd)</th>
<th valign="top" align="center">MAE (Chron)</th>
<th valign="top" align="center"><italic>&#x0394;</italic> MAE</th>
<th valign="top" align="center">RMSE (Rd)</th>
<th valign="top" align="center">RMSE (Chron)</th>
<th valign="top" align="center"><italic>&#x0394;</italic> RMSE</th>
<th valign="top" align="center"><italic>R</italic><sup>2</sup> (Rd)</th>
<th valign="top" align="center"><italic>R</italic><sup>2</sup> (Chron)</th>
<th valign="top" align="center"><italic>&#x0394; R</italic><sup>2</sup></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Pythagorean</td>
<td valign="top" align="center">0.434</td>
<td valign="top" align="center">0.066</td>
<td valign="top" align="center">0.059</td>
<td valign="top" align="center">&#x2212;10.6&#x0025;</td>
<td valign="top" align="center">0.082</td>
<td valign="top" align="center">0.069</td>
<td valign="top" align="center">&#x2212;15.9&#x0025;</td>
<td valign="top" align="center">0.816</td>
<td valign="top" align="center">0.811</td>
<td valign="top" align="center">&#x2212;0.005</td>
</tr>
<tr>
<td valign="top" align="left">RF</td>
<td valign="top" align="center">0.439</td>
<td valign="top" align="center">0.061</td>
<td valign="top" align="center">0.063</td>
<td valign="top" align="center">&#x002B;3.3&#x0025;</td>
<td valign="top" align="center">0.075</td>
<td valign="top" align="center">0.079</td>
<td valign="top" align="center">&#x002B;5.3&#x0025;</td>
<td valign="top" align="center">0.857</td>
<td valign="top" align="center">0.833</td>
<td valign="top" align="center">&#x2212;0.024</td>
</tr>
<tr>
<td valign="top" align="left">NN</td>
<td valign="top" align="center">0.493</td>
<td valign="top" align="center">0.052</td>
<td valign="top" align="center">0.058</td>
<td valign="top" align="center">&#x002B;11.5&#x0025;</td>
<td valign="top" align="center">0.064</td>
<td valign="top" align="center">0.072</td>
<td valign="top" align="center">&#x002B;12.5&#x0025;</td>
<td valign="top" align="center">0.891</td>
<td valign="top" align="center">0.862</td>
<td valign="top" align="center">&#x2212;0.029</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="table-fn1"><p>RF, random forest; NN, neural networks; Rd, random split; Chron, chronological split; <italic>&#x0394;</italic> MAE and <italic>&#x0394;</italic> RMSE are percent changes: (Chron&#x2212;Rd)/Rd&#x2009;&#x00D7;&#x2009;100&#x0025;; <italic>&#x0394; R</italic>&#x00B2; is the absolute point change: <italic>R</italic>&#x00B2;Chron&#x2212;R&#x00B2;Rd.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>In our random forest regression analysis, we employed an ensemble of 100 decision trees to balance predictive stability against computational cost (<xref ref-type="bibr" rid="B20">20</xref>). Each tree was trained on a different bootstrap sample of the data and, at every split, considered a random subset of the available features, thereby reducing variance and decorrelating the individual predictors (<xref ref-type="bibr" rid="B15">15</xref>). We standardized all input variables to zero mean and unit variance before training, and fixed the pseudo-random seed to guarantee full reproducibility of our results. During prediction, each of the 100 trees casts an individual estimate of the winning percentage, and the final forest prediction is simply the average of these tree-level outputs. This configuration&#x2014;100 trees with default maximum depth and feature-sampling settings&#x2014;proved sufficient for the error curve to converge, as additional trees yielded negligible reductions in out-of-bag error. The model&#x0027;s predictive accuracy was strong, with a MAE of 0.061, a RMSE of 0.075, and an <italic>R</italic><sup>2</sup> value of 0.857 (see <xref ref-type="table" rid="T1">Table&#x00A0;1</xref>). These results outperformed the traditional Pythagorean expectation method across all metrics, underscoring the value of data-driven ensemble approaches in modeling team performance. Feature importance analysis further revealed the dominant influence of total points scored and points allowed on the prediction of winning percentage. Specifically, &#x201C;points for&#x201D; and &#x201C;points_allowed&#x201D; accounted for 54&#x0025; and 34&#x0025; of the total importance, respectively. Other meaningful, albeit less influential, predictors included rush attempts (3&#x0025;), turnovers (2&#x0025;), penalties (2&#x0025;), passing yards (2&#x0025;), and passing attempts (2&#x0025;). These results suggest that while scoring remains the most significant determinant of success, additional team statistics&#x2014;particularly those related to ball control and offensive efficiency&#x2014;play secondary but non-negligible roles in predicting performance.</p>
<p>Our multilayer perceptron (MLP), a type of feedforward neural network, comprises two hidden layers, containing 64 and 32 neurons, respectively. We incorporated dropout layers with a rate of 0.2 after each hidden layer in the neural network architecture. By randomly deactivating 20&#x0025; of neurons during each training iteration, dropout disrupts potential over-reliance on specific features and encourages the model to learn more generalized patterns. This regularization technique is particularly important when working with datasets that are prone to overfitting. Among the configurations tested, the combination of an 80&#x0025; training size, a batch size of 20, and 100 epochs was found to be optimal based on the performance metrics. Hyperparameters were optimized via grid search with 5-fold cross-validation on the training set, and the held-out test set was used only for final evaluation. A learning rate of 0.001 strikes an optimal balance, facilitating rapid convergence while maintaining stability. The neural network model demonstrated the best overall performance, achieving the lowest MAE of 0.052, the lowest RMSE of 0.064, and the highest R&#x00B2; value of 0.891. This suggests that the neural network model captured the variation in actual team winning percentages more effectively than the other models.</p>
<p>Under the forecasting-style chronological split, performance shifts modestly relative to the random split but the ranking remains unchanged. The neural network still leads with the lowest errors and highest fit (<italic>&#x0394;</italic> MAE&#x2009;&#x003D;&#x2009;&#x2009;&#x002B;&#x2009;11.5&#x0025;; <italic>&#x0394;</italic> RMSE&#x2009;&#x003D;&#x2009;&#x002B;&#x2009;12.5&#x0025;; <italic>&#x0394; R</italic><sup>2</sup>&#x2009;&#x003D;&#x2009;&#x2212;0.029), followed by random forest (<italic>&#x0394;</italic> MAE&#x2009;&#x003D;&#x2009;&#x002B;&#x2009;3.3&#x0025;; <italic>&#x0394;</italic> RMSE&#x2009;&#x003D;&#x2009;&#x002B;&#x2009;5.3&#x0025;; <italic>&#x0394; R</italic><sup>2</sup>&#x2009;&#x003D;&#x2009;&#x2212;0.024). The Pythagorean baseline shows slightly lower error under chronology (<italic>&#x0394;</italic> MAE&#x2009;&#x003D;&#x2009;&#x2013;10.6&#x0025;; <italic>&#x0394;</italic> RMSE&#x2009;&#x003D;&#x2009;&#x2013;15.9&#x0025;) with essentially unchanged <italic>&#x0394; R</italic><sup>2</sup> (&#x2013;0.005). Despite these shifts, both machine-learning models continue to outperform the Pythagorean approach overall.</p>
<p>Additionally, the predicted average winning percentages for each model provide insight into potential under- or over-estimation tendencies. The neural network&#x0027;s prediction (0.493) was closest to the actual mean winning percentage (0.500), while the Pythagorean and Random Forest models predicted lower average values (0.434 and 0.439, respectively). An examination of season-by-season predictive performance reveals that the neural network model consistently produced strong results, with MAE values typically ranging between 0.05 and 0.06 and <italic>R</italic><sup>2</sup> values exceeding 0.80 (see <xref ref-type="table" rid="T2">Table&#x00A0;2</xref>). However, two notable exceptions&#x2014;2016 and 2020&#x2014;stand out due to elevated error metrics. In both years, the MAE exceeded 0.07, and the RMSE surpassed 0.09, indicating decreased model accuracy during these periods. <xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref> visualizes the season-by-season R&#x00B2; scores of the three models from 2003 to 2023, highlighting relative consistency in neural network performance and the notable dips in 2016, 2020, and 2022.</p>
<table-wrap id="T2" position="float"><label>Table 2</label>
<caption><p>Neural network prediction results by year.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Season</th>
<th valign="top" align="center">Actual winning &#x0025;</th>
<th valign="top" align="center">Predicted winning &#x0025;</th>
<th valign="top" align="center">MAE</th>
<th valign="top" align="center">RMSE</th>
<th valign="top" align="center"><italic>R</italic><sup>2</sup></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">2003</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.502</td>
<td valign="top" align="center">0.065</td>
<td valign="top" align="center">0.078</td>
<td valign="top" align="center">0.827</td>
</tr>
<tr>
<td valign="top" align="left">2004</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.508</td>
<td valign="top" align="center">0.067</td>
<td valign="top" align="center">0.081</td>
<td valign="top" align="center">0.816</td>
</tr>
<tr>
<td valign="top" align="left">2005</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.054</td>
<td valign="top" align="center">0.064</td>
<td valign="top" align="center">0.906</td>
</tr>
<tr>
<td valign="top" align="left">2006</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.493</td>
<td valign="top" align="center">0.064</td>
<td valign="top" align="center">0.078</td>
<td valign="top" align="center">0.809</td>
</tr>
<tr>
<td valign="top" align="left">2007</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.490</td>
<td valign="top" align="center">0.056</td>
<td valign="top" align="center">0.067</td>
<td valign="top" align="center">0.894</td>
</tr>
<tr>
<td valign="top" align="left">2008</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.490</td>
<td valign="top" align="center">0.066</td>
<td valign="top" align="center">0.078</td>
<td valign="top" align="center">0.854</td>
</tr>
<tr>
<td valign="top" align="left">2009</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.495</td>
<td valign="top" align="center">0.055</td>
<td valign="top" align="center">0.070</td>
<td valign="top" align="center">0.875</td>
</tr>
<tr>
<td valign="top" align="left">2010</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.487</td>
<td valign="top" align="center">0.062</td>
<td valign="top" align="center">0.072</td>
<td valign="top" align="center">0.846</td>
</tr>
<tr>
<td valign="top" align="left">2011</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.496</td>
<td valign="top" align="center">0.063</td>
<td valign="top" align="center">0.072</td>
<td valign="top" align="center">0.872</td>
</tr>
<tr>
<td valign="top" align="left">2012</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.488</td>
<td valign="top" align="center">0.058</td>
<td valign="top" align="center">0.073</td>
<td valign="top" align="center">0.855</td>
</tr>
<tr>
<td valign="top" align="left">2013</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.489</td>
<td valign="top" align="center">0.059</td>
<td valign="top" align="center">0.071</td>
<td valign="top" align="center">0.863</td>
</tr>
<tr>
<td valign="top" align="left">2014</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.498</td>
<td valign="top" align="center">0.053</td>
<td valign="top" align="center">0.065</td>
<td valign="top" align="center">0.889</td>
</tr>
<tr>
<td valign="top" align="left">2015</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.492</td>
<td valign="top" align="center">0.055</td>
<td valign="top" align="center">0.069</td>
<td valign="top" align="center">0.865</td>
</tr>
<tr>
<td valign="top" align="left">2016</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.503</td>
<td valign="top" align="center">0.078</td>
<td valign="top" align="center">0.094</td>
<td valign="top" align="center">0.775</td>
</tr>
<tr>
<td valign="top" align="left">2017</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.494</td>
<td valign="top" align="center">0.069</td>
<td valign="top" align="center">0.086</td>
<td valign="top" align="center">0.810</td>
</tr>
<tr>
<td valign="top" align="left">2018</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.486</td>
<td valign="top" align="center">0.051</td>
<td valign="top" align="center">0.062</td>
<td valign="top" align="center">0.877</td>
</tr>
<tr>
<td valign="top" align="left">2019</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.488</td>
<td valign="top" align="center">0.061</td>
<td valign="top" align="center">0.082</td>
<td valign="top" align="center">0.823</td>
</tr>
<tr>
<td valign="top" align="left">2020</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.488</td>
<td valign="top" align="center">0.071</td>
<td valign="top" align="center">0.091</td>
<td valign="top" align="center">0.818</td>
</tr>
<tr>
<td valign="top" align="left">2021</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.483</td>
<td valign="top" align="center">0.066</td>
<td valign="top" align="center">0.083</td>
<td valign="top" align="center">0.752</td>
</tr>
<tr>
<td valign="top" align="left">2022</td>
<td valign="top" align="center">0.501</td>
<td valign="top" align="center">0.488</td>
<td valign="top" align="center">0.070</td>
<td valign="top" align="center">0.091</td>
<td valign="top" align="center">0.748</td>
</tr>
<tr>
<td valign="top" align="left">2023</td>
<td valign="top" align="center">0.500</td>
<td valign="top" align="center">0.490</td>
<td valign="top" align="center">0.055</td>
<td valign="top" align="center">0.064</td>
<td valign="top" align="center">0.840</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="table-fn2"><p>All numbers represent averages across 32 teams per season.</p></fn>
</table-wrap-foot>
</table-wrap>
<fig id="F1" position="float"><label>Figure 1</label>
<caption><p>Model comparison of <italic>R</italic><sup>2</sup> values by season.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fspor-07-1638446-g001.tif"><alt-text content-type="machine-generated">Line graph showing R&#x00B2; scores from 2003 to 2023 for three models: Pythagorean (green), Random Forest (blue), and Neural Network (purple). Scores fluctuate, with Neural Network generally scoring highest, followed by Random Forest and Pythagorean.</alt-text>
</graphic>
</fig>
<p>The 2016 season, in particular, was widely regarded as one of the most unpredictable in NFL history. Numerous teams significantly underperformed relative to expectations, including the Cleveland Browns, San Francisco 49ers, New York Jets, Chicago Bears, and Jacksonville Jaguars. These franchises had been expected to show signs of improvement following roster changes but instead regressed dramatically. The Browns, for instance, finished the season with just one win, down from three the previous year, despite offseason acquisitions. The Jets dropped from ten wins in 2015 to just five in 2016. Additional contributing factors to the volatility of that season include inconsistent officiating and an unusually high number of penalties, especially concerning celebration rules that were later relaxed in 2017. Furthermore, injuries to key players such as Derek Carr, Marcus Mariota, Adrian Peterson, and Rob Gronkowski disrupted team dynamics and may have reduced the predictive reliability of input metrics. The 2020 season may have been similarly impacted by disruptions related to the COVID-19 pandemic, which affected player availability, game schedules, and team performance consistency. The 2022 NFL season presented a unique set of challenges that contributed to decreased predictive accuracy in our models. While the 2016 and 2020 seasons were marked by significant unpredictability due to factors like team underperformance and the COVID-19 pandemic, the 2022 season&#x0027;s complexity stemmed from a confluence of unexpected team performances, significant injuries, and coaching transitions. The Tampa Bay Buccaneers and Green Bay Packers, both considered strong Super Bowl contenders, experienced offensive struggles that deviated sharply from projections. The Buccaneers, for instance, suffered unexpected losses to underperforming teams like the Carolina Panthers and Pittsburgh Steelers, highlighting the volatility of team performances during the season (<xref ref-type="bibr" rid="B21">21</xref>). Injuries also played a pivotal role in the season&#x2019;s unpredictability. Key players returning from major injuries, such as J.K. Dobbins of the Baltimore Ravens, faced setbacks that impacted team performance. The league saw a high number of players returning from ACL injuries, introducing variability in player availability (<xref ref-type="bibr" rid="B22">22</xref>). Collectively, these anomalies help explain the comparatively higher prediction errors in these years.</p>
</sec>
<sec id="s3b"><label>3.2</label><title>Paired bootstrap test: model comparison</title>
<p>To rigorously compare the predictive accuracy of the models, we conducted a paired bootstrap analysis with 1,000 iterations, estimating the distribution of differences in MAE and RMSE across model pairs. <xref ref-type="table" rid="T3">Table&#x00A0;3</xref> presents the mean difference and 95&#x0025; confidence intervals for each comparison. The paired bootstrap analysis shows that the neural network model achieves the best predictive performance, significantly outperforming the Pythagorean method in both MAE (mean difference&#x2009;&#x003D;&#x2009;&#x2212;0.029, 95&#x0025; CI [&#x2212;0.033, &#x2212;0.024) and RMSE (mean difference&#x2009;&#x003D;&#x2009;&#x2212;0.023, 95&#x0025; CI [&#x2212;0.029, &#x2212;0.018), and significantly outperforming the random forest model in RMSE (mean difference&#x2009;&#x003D;&#x2009;&#x2212;0.014, 95&#x0025; CI [&#x2212;0.022, &#x2212;0.006). There is no significant difference between the neural network and random forest in MAE. These findings support the neural network as the most effective predictive model for estimating a team&#x0027;s winning percentage.</p>
<table-wrap id="T3" position="float"><label>Table 3</label>
<caption><p>Paired bootstrap test.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Metric</th>
<th valign="top" align="center">Comparison</th>
<th valign="top" align="center">Mean difference</th>
<th valign="top" align="center">95&#x0025; CI lower</th>
<th valign="top" align="center">95&#x0025; CI upper</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">MAE</td>
<td valign="top" align="center">NN vs. RF</td>
<td valign="top" align="center">0.001</td>
<td valign="top" align="center">&#x2212;0.005</td>
<td valign="top" align="center">0.005</td>
</tr>
<tr>
<td valign="top" align="left">MAE</td>
<td valign="top" align="center">NN vs. PY</td>
<td valign="top" align="center">&#x2212;0.029</td>
<td valign="top" align="center">&#x2212;0.033</td>
<td valign="top" align="center">&#x2212;0.024</td>
</tr>
<tr>
<td valign="top" align="left">MAE</td>
<td valign="top" align="center">RF vs. PY</td>
<td valign="top" align="center">&#x2212;0.029</td>
<td valign="top" align="center">&#x2212;0.035</td>
<td valign="top" align="center">&#x2212;0.023</td>
</tr>
<tr>
<td valign="top" align="left">RMSE</td>
<td valign="top" align="center">NN vs. RF</td>
<td valign="top" align="center">&#x2212;0.014</td>
<td valign="top" align="center">&#x2212;0.022</td>
<td valign="top" align="center">&#x2212;0.006</td>
</tr>
<tr>
<td valign="top" align="left">RMSE</td>
<td valign="top" align="center">NN vs. PY</td>
<td valign="top" align="center">&#x2212;0.023</td>
<td valign="top" align="center">&#x2212;0.029</td>
<td valign="top" align="center">&#x2212;0.018</td>
</tr>
<tr>
<td valign="top" align="left">RMSE</td>
<td valign="top" align="center">RF vs. PY</td>
<td valign="top" align="center">&#x2212;0.009</td>
<td valign="top" align="center">&#x2212;0.016</td>
<td valign="top" align="center">&#x2212;0.003</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="table-fn3"><p>NN, neural network; RF, random forest; PY, Pythagorean expectation. Negative mean differences indicate that the first model in the comparison achieved lower error than the second.</p></fn>
<fn id="table-fn4"><p>Confidence intervals (CI) were calculated using 1,000 paired bootstrap iterations.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3c"><label>3.3</label><title>Understanding feature impact through SHAP</title>
<p>To better understand how various game metrics influence predicted winning percentages in the trained neural network model, we employed SHAP (SHapley Additive exPlanations) analysis. The resulting SHAP beeswarm plot visualizes the contribution of each feature to the model&#x0027;s output&#x2014;NFL team winning percentage&#x2014;across all samples in the test dataset. The features with the most significant impact on predicted winning percentages are points scored and points allowed. These two variables dominate the top of the plot with the broadest SHAP value distributions (see <xref ref-type="fig" rid="F2">Figure&#x00A0;2</xref>). Specifically, higher point totals (shown in red) strongly increase predicted winning percentages (positive SHAP values), while lower point totals (blue) reduce them. High-scoring seasons (bright red points) almost universally exhibit large positive SHAP values, boosting predicted win rates by as much as 0.30 or more. The average margin of victory (avg_mov) and turnovers also show meaningful influence, albeit less than the core scoring variables. Higher margin values (red) generally increase predicted winning percentages, while lower or negative margins (blue) suppress predictions. Turnovers exhibit a similar trend: higher turnover counts (red) are associated with negative SHAP values, indicating that teams committing more turnovers are predicted to have lower winning percentages.</p>
<fig id="F2" position="float"><label>Figure 2</label>
<caption><p>SHAP summary plot: feature impact on predicted wiin percentage (neural network model).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fspor-07-1638446-g002.tif"><alt-text content-type="machine-generated">SHAP summary plot showing the impact of various features on a model's output. Features like points, points_op, and avg_mov are plotted against SHAP values. Data points are color-coded by feature value, ranging from low (blue) to high (red). Points and points_op have more widespread impact, while other features like penalties and pass_att show impact closer to zero.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="s4" sec-type="conclusions"><label>4</label><title>Conclusion</title>
<p>This study empirically evaluated the effectiveness of the traditional Pythagorean expectation formula against advanced machine learning methods, specifically random forest regression and neural network models, in predicting NFL teams&#x2019; winning percentages over a substantial 21-season dataset (2003&#x2013;2023). The findings demonstrate that the machine learning models significantly outperform the traditional Pythagorean expectation approach, achieving greater predictive accuracy as evidenced by lower MAE, RMSE, and higher <italic>R</italic><sup>2</sup> values. Specifically, the neural network model exhibited the strongest predictive performance, with the lowest MAE (0.052), lowest RMSE (0.064), and highest <italic>R</italic><sup>2</sup> value (0.891). The random forest model also consistently outperformed the Pythagorean approach, indicating the advantage of leveraging data-driven ensemble methods for capturing complex nonlinear relationships among NFL performance metrics. Importantly, under the forecasting-style chronological evaluation, the neural network achieved an average MAE of 0.058. Because our outcome variable is winning percentage, it is useful to translate this value into season outcomes. In a 17-game NFL season, one game corresponds to approximately 1&#x2009;&#x00F7;&#x2009;17&#x2009;&#x003D;&#x2009;0.059 (&#x2248;5.9&#x0025;) of winning percentage. Thus, an error of 0.058 equates to about one game difference in the standings. This level of predictive accuracy is practically meaningful, as a single win can determine playoff qualification, alter betting market expectations, and influence front-office or coaching evaluations.</p>
<p>The feature importance analysis using SHAP values further revealed critical insights into key variables influencing winning predictions. Consistent with prior literature (<xref ref-type="bibr" rid="B4">4</xref>, <xref ref-type="bibr" rid="B9">9</xref>), points scored and points allowed emerged as dominant predictors. However, additional metrics such as average margin of victory, turnovers, rushing yards, passing efficiency, and penalties also significantly contributed to predictive accuracy, suggesting the importance of adopting comprehensive analytical frameworks rather than simplified scoring-based predictions alone.</p>
<p>This study contributes to existing sport management and analytics literature by validating advanced analytical methods within NFL contexts, demonstrating their accuracy and flexibility in predictive tasks compared to traditional formulas. These findings align with previous research highlighting the effectiveness of machine learning techniques in sports prediction (<xref ref-type="bibr" rid="B8">8</xref>, <xref ref-type="bibr" rid="B10">10</xref>, <xref ref-type="bibr" rid="B23">23</xref>), thereby reinforcing the growing scholarly consensus regarding their value. Specifically, previous machine learning studies in sports prediction&#x2014; particularly in the NFL context (<xref ref-type="bibr" rid="B24">24</xref>), have typically focused on classification problems, where the outcome is categorical (i.e., win or loss). Only a limited number of studies have addressed continuous prediction tasks, such as spread and scoreline (<xref ref-type="bibr" rid="B8">8</xref>). In terms of predictive accuracy, classification models in the NFL context have achieved between 75&#x0025; and 86&#x0025;, while models predicting continuous outcomes have attained accuracy levels between 72&#x0025; and 77&#x0025; (<xref ref-type="bibr" rid="B8">8</xref>). The current study explains 89&#x0025; of the variance in team winning percentage, with an average prediction error of approximately 5&#x0025;, indicating a relatively higher level of predictive accuracy.</p>
<p>From a practical perspective, this research provides valuable implications for sports analysts, coaches, and management in professional football. Given the neural network&#x0027;s minimal error margin, sports analysts can utilize this approach to predict team winning percentages and playoff outcomes. Similarly, sports bettors could leverage these predictive insights to estimate team success and strategically inform betting decisions, including predicting playoff appearances and championship outcomes. NFL teams could adopt neural network-based deep learning models to evaluate and predict their performance, determining whether team performance aligns with, surpasses, or falls short of expectations (<xref ref-type="bibr" rid="B5">5</xref>). Additionally, such analytical tools can assist coaches and management in systematically reviewing critical in-game decisions related to scoring opportunities, fourth down strategies, turnover management, and clock management, ultimately enhancing strategic decision-making and competitive performance (<xref ref-type="bibr" rid="B5">5</xref>). Overall, this study underscores the substantial potential of machine learning methods, notably neural networks and random forest models, as robust decision-support tools in contemporary sport management, enhancing strategic planning and decision-making processes within professional sports organizations.</p>
</sec>
</body>
<back>
<sec id="s5" sec-type="data-availability"><title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: <ext-link ext-link-type="uri" xlink:href="https://www.pro-football-reference.com">https://www.pro-football-reference.com</ext-link>.</p>
</sec>
<sec id="s6" sec-type="author-contributions"><title>Author contributions</title>
<p>CW: Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. JK: Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. YY: Writing &#x2013; review &#x0026; editing, Formal analysis, Data curation, Software, Investigation, Visualization. SJ: Writing &#x2013; review &#x0026; editing, Investigation, Data curation, Formal analysis, Methodology, Visualization.</p>
</sec>
<sec id="s7" sec-type="funding-information"><title>Funding</title>
<p>The author(s) declare that no financial support was received for the research and/or publication of this article.</p>
</sec>
<sec id="s8" sec-type="COI-statement"><title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s9" sec-type="ai-statement"><title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec id="s10" sec-type="disclaimer"><title>Publisher&#x0027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list><title>References</title>
<ref id="B1"><label>1.</label><citation citation-type="book"><collab>NBC</collab>. <source>2024 NFL Franchise Values: See Where all 32 Teams Rank</source>. <publisher-loc>Englewood Cliffs, NJ</publisher-loc>: <publisher-name>CNBC</publisher-name> (<year>2024</year>). <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://www.cnbc.com/2024/09/05/official-nfl-team-valuations-2024.html">https://www.cnbc.com/2024/09/05/official-nfl-team-valuations-2024.html</ext-link></citation></ref>
<ref id="B2"><label>2.</label><citation citation-type="book"><collab>NFL</collab>. <source>2023 NFL Playoffs Set All-Time Viewership Records</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>National Football League (NFL)</publisher-name> (<year>2024</year>). <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://www.nfl.com/news/2023-nfl-playoffs-set-all-time-viewership-records">https://www.nfl.com/news/2023-nfl-playoffs-set-all-time-viewership-records</ext-link></citation></ref>
<ref id="B3"><label>3.</label><citation citation-type="book"><collab>NFL</collab>. <source>NFL Record and Fact Book</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>NFL Enterprises LLC</publisher-name> (<year>2022</year>). p. <fpage>28</fpage>&#x2013;<lpage>9</lpage>. <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://static.www.nfl.com/league/apps/league-site/media-guides/2022/2022_NFL_Record_and_Fact_Book.pdf">https://static.www.nfl.com/league/apps/league-site/media-guides/2022/2022_NFL_Record_and_Fact_Book.pdf</ext-link></citation></ref>
<ref id="B4"><label>4.</label><citation citation-type="book"><person-group person-group-type="author"><name><surname>Winston</surname><given-names>WL</given-names></name></person-group>. <source>Mathletics: How Gamblers, Managers, and Sports Enthusiasts Use Mathematics in Baseball, Basketball, and Football</source>. <publisher-loc>Princeton, NJ</publisher-loc>: <publisher-name>Princeton University Press</publisher-name> (<year>2012</year>).</citation></ref>
<ref id="B5"><label>5.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Caro</surname><given-names>CA</given-names></name><name><surname>Machtmes</surname><given-names>R</given-names></name></person-group>. <article-title>Testing the utility of the pythagorean expectation formula on division one college football: an examination and comparison to the morey model</article-title>. <source>J Bus Econ Res</source>. (<year>2013</year>) <volume>11</volume>(<issue>12</issue>):<fpage>537</fpage>&#x2013;<lpage>42</lpage>. <pub-id pub-id-type="doi">10.19030/jber.v11i12.8261</pub-id></citation></ref>
<ref id="B6"><label>6.</label><citation citation-type="book"><person-group person-group-type="author"><name><surname>Davenport</surname><given-names>C</given-names></name><name><surname>Woolner</surname><given-names>K</given-names></name></person-group>. <source>Revisiting the Pythagorean Theorem: Putting Bill James&#x2019; Pythagorean Theorem to the Test</source>. <publisher-loc>Cleveland, OH</publisher-loc>: <publisher-name>Baseball Prospectus</publisher-name> (<year>1999</year>).</citation></ref>
<ref id="B7"><label>7.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname><given-names>JW</given-names></name></person-group>. <article-title>Perspectives on the sports analytics revolution: an introduction to the special issue</article-title>. <source>J Appl Sport Manag</source>. (<year>2022</year>) <volume>14</volume>(<issue>4</issue>):<fpage>1</fpage>&#x2013;<lpage>4</lpage>. <pub-id pub-id-type="doi">10.7290/jasm14eslv</pub-id></citation></ref>
<ref id="B8"><label>8.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Horvat</surname><given-names>T</given-names></name><name><surname>Job</surname><given-names>J</given-names></name></person-group>. <article-title>The use of machine learning in sport outcome prediction: a review</article-title>. <source>Adv Rev</source>. (<year>2020</year>) <volume>10</volume>(<issue>5</issue>):<fpage>1</fpage>&#x2013;<lpage>28</lpage>. <pub-id pub-id-type="doi">10.1002/widm.1380</pub-id></citation></ref>
<ref id="B9"><label>9.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lock</surname><given-names>D</given-names></name><name><surname>Nettleton</surname><given-names>D</given-names></name></person-group>. <article-title>Using random forests to estimate win probability before each play of an NFL game</article-title>. <source>J Quant Anal Sports</source>. (<year>2014</year>) <volume>10</volume>(<issue>2</issue>):<fpage>197</fpage>&#x2013;<lpage>205</lpage>. <pub-id pub-id-type="doi">10.1515/jqas-2013-0100</pub-id></citation></ref>
<ref id="B10"><label>10.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Anyama</surname><given-names>OU</given-names></name><name><surname>Igiri</surname><given-names>CP</given-names></name></person-group>. <article-title>An application of linear regression &#x0026; artificial neural network model in the NFL result prediction</article-title>. <source>Int J Eng Res Technol</source>. (<year>2015</year>) <volume>4</volume>(<issue>1</issue>):<fpage>457</fpage>&#x2013;<lpage>61</lpage>. <pub-id pub-id-type="doi">10.17577/IJERTV4IS010426</pub-id></citation></ref>
<ref id="B11"><label>11.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gr&#x00F6;mping</surname><given-names>U</given-names></name></person-group>. <article-title>Variable importance assessment in regression: linear regression versus random forest</article-title>. <source>Am Stat</source>. (<year>2009</year>) <volume>63</volume>(<issue>4</issue>):<fpage>308</fpage>&#x2013;<lpage>19</lpage>. <pub-id pub-id-type="doi">10.1198/tast.2009.08199</pub-id></citation></ref>
<ref id="B12"><label>12.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Aldi</surname><given-names>F</given-names></name><name><surname>Hadi</surname><given-names>F</given-names></name><name><surname>Rahmi</surname><given-names>NA</given-names></name><name><surname>Defit</surname><given-names>S</given-names></name></person-group>. <article-title>Standardscaler&#x2019;s potential in enhancing breast cancer accuracy using machine learning</article-title>. <source>J Appl Eng Technol Sci</source>. (<year>2023</year>) <volume>5</volume>(<issue>1</issue>):<fpage>401</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.37385/jaets.v5i1.3080</pub-id></citation></ref>
<ref id="B13"><label>13.</label><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Mankodi</surname><given-names>A</given-names></name><name><surname>Bhatt</surname><given-names>A</given-names></name><name><surname>Chaudhury</surname><given-names>B</given-names></name></person-group>. <article-title>Evaluation of neural network models for performance prediction of scientific applications</article-title>. <conf-name>2020 IEEE Region 10 Conference (TENCON)</conf-name> (<year>2020</year>). <publisher-name>IEEE</publisher-name>. p. <fpage>426</fpage>&#x2013;<lpage>31</lpage>. <pub-id pub-id-type="doi">10.1109/TENCON50793.2020.9293788</pub-id></citation></ref>
<ref id="B14"><label>14.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Potdar</surname><given-names>K</given-names></name><name><surname>Pardawala</surname><given-names>T</given-names></name><name><surname>Pai</surname><given-names>CD</given-names></name></person-group>. <article-title>A comparative study of categorical variable encoding technique for neural network classifiers</article-title>. <source>Int J Comput Appl</source>. (<year>2017</year>) <volume>175</volume>(<issue>4</issue>):<fpage>7</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.5120/ijca2017915495</pub-id></citation></ref>
<ref id="B15"><label>15.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Breiman</surname><given-names>L</given-names></name></person-group>. <article-title>Random forests</article-title>. <source>Mach Learn</source>. (<year>2001</year>) <volume>45</volume>(<issue>1</issue>):<fpage>5</fpage>&#x2013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id></citation></ref>
<ref id="B16"><label>16.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>LeCun</surname><given-names>Y</given-names></name><name><surname>Bengio</surname><given-names>Y</given-names></name><name><surname>Hinton</surname><given-names>G</given-names></name></person-group>. <article-title>Deep learning</article-title>. <source>Nature</source>. (<year>2015</year>) <volume>521</volume>:<fpage>436</fpage>&#x2013;<lpage>44</lpage>. <pub-id pub-id-type="doi">10.1038/nature14539</pub-id><pub-id pub-id-type="pmid">26017442</pub-id></citation></ref>
<ref id="B17"><label>17.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pang</surname><given-names>B</given-names></name><name><surname>Nijkamp</surname><given-names>E</given-names></name><name><surname>Wu</surname><given-names>YN</given-names></name></person-group>. <article-title>Deep learning with TensorFlow: a review</article-title>. <source>J Educ Behav Stat</source>. (<year>2020</year>) <volume>45</volume>(<issue>2</issue>):<fpage>227</fpage>&#x2013;<lpage>48</lpage>. <pub-id pub-id-type="doi">10.3102/1076998619872761</pub-id></citation></ref>
<ref id="B18"><label>18.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Al-Maqaleh</surname><given-names>BM</given-names></name><name><surname>Al-Mansoub</surname><given-names>AA</given-names></name><name><surname>Al-Badani</surname><given-names>FN</given-names></name></person-group>. <article-title>Forecasting using artificial neural network and statistics models</article-title>. <source>Int J Educ Manag Eng</source>. (<year>2016</year>) <volume>3</volume>:<fpage>20</fpage>&#x2013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.5815/ijeme.2016.03.03</pub-id></citation></ref>
<ref id="B19"><label>19.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Oytun</surname><given-names>M</given-names></name><name><surname>Tinazci</surname><given-names>C</given-names></name><name><surname>Sekeroglu</surname><given-names>B</given-names></name><name><surname>Acikada</surname><given-names>C</given-names></name><name><surname>Yavuz</surname><given-names>HU</given-names></name></person-group>. <article-title>Performance prediction and evaluation in female handball players using machine learning models</article-title>. <source>IEEE Access</source>. (<year>2020</year>) <volume>8</volume>:<fpage>116321</fpage>&#x2013;<lpage>35</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2020.3004182</pub-id></citation></ref>
<ref id="B20"><label>20.</label><citation citation-type="book"><person-group person-group-type="author"><name><surname>Oshiro</surname><given-names>TM</given-names></name><name><surname>Perez</surname><given-names>PS</given-names></name><name><surname>Baranauskas</surname><given-names>JA</given-names></name></person-group>. <article-title>How many trees in a random forest?</article-title> In: <person-group person-group-type="editor"><name><surname>Perner</surname><given-names>P</given-names></name></person-group>, editor. <source>Machine Learning and Data Mining in Pattern Recognition: MLDM 2012</source>. Vol. 7376. <publisher-loc>Berlin</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2012</year>). p. <fpage>154</fpage>&#x2013;<lpage>68</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-642-31537-4_13</pub-id></citation></ref>
<ref id="B21"><label>21.</label><citation citation-type="book"><person-group person-group-type="author"><name><surname>Battista</surname><given-names>J</given-names></name></person-group>. <source>Five Biggest Surprises of 2022 NFL Season Thus Far: Struggles for Buccaneers, Packers Stand Out</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>National Football League (NFL)</publisher-name> (<year>2022</year>). <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://NFL.com">NFL.com</ext-link>; <ext-link ext-link-type="uri" xlink:href="https://www.nfl.com/news/five-biggest-surprises-of-2022-nfl-season-thus-far-struggles-for-buccaneers-pack">https://www.nfl.com/news/five-biggest-surprises-of-2022-nfl-season-thus-far-struggles-for-buccaneers-pack</ext-link></citation></ref>
<ref id="B22"><label>22.</label><citation citation-type="book"><collab>NFL Nation</collab>. <source>NFL 2022 Midseason Report: AFC, NFC Questions and Key Injuries</source>. <publisher-loc>Bristol, CT</publisher-loc>: <publisher-name>ESPN</publisher-name> (<year>2022</year>). <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://www.espn.com/nfl/story/_/id/34973666/nfl-2022-midseason-report-afc-nfc-questions-key-injuries">https://www.espn.com/nfl/story/_/id/34973666/nfl-2022-midseason-report-afc-nfc-questions-key-injuries</ext-link></citation></ref>
<ref id="B23"><label>23.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname><given-names>JW</given-names></name><name><surname>Magnusen</surname><given-names>M</given-names></name><name><surname>Jeong</surname><given-names>S</given-names></name></person-group>. <article-title>March madness prediction: different machine learning approaches with non-box score statistics</article-title>. <source>MDE Manage Decis Econ</source>. (<year>2023</year>) <volume>44</volume>(<issue>4</issue>):<fpage>2223</fpage>&#x2013;<lpage>36</lpage>. <pub-id pub-id-type="doi">10.1002/mde.3814</pub-id></citation></ref>
<ref id="B24"><label>24.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bunker</surname><given-names>R</given-names></name><name><surname>Susnjak</surname><given-names>T</given-names></name></person-group>. <article-title>The application of machine learning techniques for predicting match results in team sport: a review</article-title>. <source>J Artif Intell Res</source>. (<year>2022</year>) <volume>73</volume>:<fpage>1285</fpage>&#x2013;<lpage>322</lpage>. <pub-id pub-id-type="doi">10.1613/jair.1.13509</pub-id></citation></ref></ref-list>
</back>
</article>