<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="research-article" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Digit. Health</journal-id><journal-title-group>
<journal-title>Frontiers in Digital Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Digit. Health</abbrev-journal-title></journal-title-group>
<issn pub-type="epub">2673-253X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fdgth.2026.1774436</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>An explainable ensemble machine learning model using baseline blood transcriptomics to predict Parkinson&#x0027;s disease motor progression</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes"><name><surname>F&#x0131;rat</surname><given-names>Yelda</given-names></name>
<xref ref-type="aff" rid="aff1"/>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref><uri xlink:href="https://loop.frontiersin.org/people/3289646/overview"/><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
</contrib-group>
<aff id="aff1"><institution>Department of Computer Engineering, Mudanya University</institution>, <city>Bursa</city>, <country country="">T&#x00FC;rkiye</country></aff>
<author-notes>
<corresp id="cor1"><label>&#x002A;</label><bold>Correspondence:</bold> Yelda F&#x0131;rat <email xlink:href="mailto:yelda.firat@mudanya.edu.tr">yelda.firat@mudanya.edu.tr</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-18"><day>18</day><month>02</month><year>2026</year></pub-date>
<pub-date publication-format="electronic" date-type="collection"><year>2026</year></pub-date>
<volume>8</volume><elocation-id>1774436</elocation-id>
<history>
<date date-type="received"><day>23</day><month>12</month><year>2025</year></date>
<date date-type="rev-recd"><day>26</day><month>01</month><year>2026</year></date>
<date date-type="accepted"><day>27</day><month>01</month><year>2026</year></date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2026 F&#x0131;rat.</copyright-statement>
<copyright-year>2026</copyright-year><copyright-holder>F&#x0131;rat</copyright-holder><license><ali:license_ref start_date="2026-02-18">https://creativecommons.org/licenses/by/4.0/</ali:license_ref><license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p></license>
</permissions>
<abstract><sec><title>Introduction</title>
<p>Predicting Parkinson&#x0027;s disease (PD) motor progression remains challenging despite advances in neuroimaging. Blood-based transcriptomic profiling offers a more accessible and cost-effective alternative. This study aimed to develop and validate a machine learning approach using blood-based transcriptomic data to predict 12-month motor severity in PD and to identify the transcriptomic features and biological pathways most strongly associated with progression.</p>
</sec><sec><title>Methods</title>
<p>A Stacking Regressor ensemble model combining three gradient boosting algorithms (XGBoost, LightGBM, CatBoost) was developed using baseline Parkinson&#x0027;s Progression Markers Initiative (PPMI) data (<italic>n</italic>&#x2009;&#x003D;&#x2009;390), integrating blood RNA sequencing (RNA-seq) and clinical features to predict 12-month UPDRS Part III scores. SHapley Additive exPlanations (SHAP) analysis was applied to identify key prognostic features, evaluating seven PD risk genes (SNCA, LRRK2, GBA, PRKN, PINK1, PARK7, VPS35) and pathway scores for mitochondrial dysfunction, neuroinflammation, and autophagy.</p>
</sec><sec><title>Results</title>
<p>On an independent test set (<italic>n</italic>&#x2009;&#x003D;&#x2009;78), the model achieved a Coefficient of Determination (R&#x00B2;) of 0.551 and Mean Absolute Error (MAE) of 6.01. SHAP analysis identified the baseline UPDRS&#x2009;&#x00D7;&#x2009;PINK1 interaction (UPDRS_BL&#x2009;&#x00D7;&#x2009;PINK1) as the most influential feature (mean |SHAP|&#x2009;&#x003D;&#x2009;0.283). Among transcriptomic features, VPS35 (mean |SHAP|&#x2009;&#x003D;&#x2009;0.010), GBA, and LRRK2 were most prominent. Mitochondrial dysfunction showed the highest pathway contribution (mean |SHAP|&#x2009;&#x003D;&#x2009;0.008).</p>
</sec><sec><title>Discussion</title>
<p>The study establishes that machine learning integrating blood transcriptomics and clinical data effectively predicts motor progression in PD. Crucially, the interplay between initial clinical state and specific genetic backgrounds-particularly PINK1-is a more powerful prognostic indicator than any factor alone. This study provides systematic evidence that mitochondrial dysfunction is a dominant prognostic signal for disease progression, nominating key genes and pathways for future mechanistic and therapeutic investigation.</p>
</sec>
</abstract>
<kwd-group>
<kwd>mitochondrial dysfunction</kwd>
<kwd>Parkinson&#x0027;s disease</kwd>
<kwd>PPMI</kwd>
<kwd>RNA-seq</kwd>
<kwd>SHAP</kwd>
</kwd-group><funding-group><funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement></funding-group><counts>
<fig-count count="3"/>
<table-count count="1"/><equation-count count="0"/><ref-count count="31"/><page-count count="9"/><word-count count="0"/></counts><custom-meta-group><custom-meta><meta-name>section-at-acceptance</meta-name><meta-value>Personalized Medicine</meta-value></custom-meta></custom-meta-group>
</article-meta>
</front>
<body><sec id="s1" sec-type="intro"><label>1</label><title>Introduction</title>
<p>PD is a neurodegenerative disease characterized by progressive dopaminergic neuron loss, affecting approximately 1&#x0025; of individuals over 60 worldwide. Clinical course varies greatly; some patients remain stable for years, while others experience rapid motor and cognitive decline. This heterogeneity underscores the need for reliable biomarkers predicting early progression. Accurate progression prediction is critical for personalized treatment strategies and optimizing patient selection in clinical trials.</p>
<p>In recent years, artificial intelligence (AI) methods have emerged as powerful tools for predicting PD progression and understanding underlying biological mechanisms. PD&#x0027;s clinical and genetic heterogeneity make prognosis difficult with traditional methods, whereas AI models effectively process complex data structures. AI models increasingly succeed at diagnosing and monitoring PD (<xref ref-type="bibr" rid="B1">1</xref>). Various data types predict PD progression: motor activity from wearable sensors (<xref ref-type="bibr" rid="B2">2</xref>), surface electromyography signals (<xref ref-type="bibr" rid="B3">3</xref>), and force platform balance data (<xref ref-type="bibr" rid="B4">4</xref>) successfully model motor symptom changes. Transcriptomic data, particularly blood-based RNA-seq, offer rich prognostic markers by revealing immune cell and gene expression changes in early disease stages (<xref ref-type="bibr" rid="B5">5</xref>). Deep recurrent neural networks trained on RNA-seq data accurately predict PD progression (<xref ref-type="bibr" rid="B6">6</xref>). Deep sequencing of small non-coding RNAs (sncRNAs) reveals regulatory modules and discriminative transcriptomic features during progression (<xref ref-type="bibr" rid="B7">7</xref>). These approaches critically identify patient subtypes and develop personalized progression models (<xref ref-type="bibr" rid="B8">8</xref>).</p>
<p>However, the black-box mechanisms of complex AI models remain a major obstacle to clinical integration. Explainable Artificial Intelligence (XAI) methods, such as SHapley Additive exPlanations (SHAP), address this by revealing which features influence predictions and how. SHAP analysis enhances model interpretability and reliability across applications, from drug development (<xref ref-type="bibr" rid="B9">9</xref>) to hospital data (<xref ref-type="bibr" rid="B10">10</xref>) and chronic disease management (<xref ref-type="bibr" rid="B11">11</xref>). XAI&#x0027;s potential and challenges in healthcare remain active areas of research (<xref ref-type="bibr" rid="B12">12</xref>, <xref ref-type="bibr" rid="B13">13</xref>).</p>
<p>The machine learning model used in this study employs ensemble learning, combining multiple learners for robust, accurate predictions. Ensemble learning effectively predicts Parkinson&#x0027;s disease progression (<xref ref-type="bibr" rid="B8">8</xref>, <xref ref-type="bibr" rid="B14">14</xref>). Ensemble methods outperform individual models, particularly in observational, noisy healthcare datasets (<xref ref-type="bibr" rid="B15">15</xref>, <xref ref-type="bibr" rid="B16">16</xref>). XGBoost is widely used in medical prediction models due to its high performance and natural compatibility with XAI techniques like SHAP (<xref ref-type="bibr" rid="B17">17</xref>).</p>
<p>In this study, an integrated approach is presented combining machine learning with molecular genetic analysis. Using baseline blood RNA-seq and clinical data from the Parkinson&#x0027;s Progression Markers Initiative (PPMI) dataset, a Stacking Regressor model was developed to predict 12-month motor status (UPDRS Part III score). The model generates robust predictions by combining three gradient boosting variants (XGBoost, LightGBM, CatBoost). Interpretability was ensured through SHAP analysis, identifying features most important for predicting progression. SHAP analysis evaluated seven PD risk genes (SNCA, LRRK2, GBA, PRKN, PINK1, PARK7, VPS35). VPS35 showed the highest individual prognostic importance (SHAP&#x2009;&#x003D;&#x2009;0.010), followed by GBA and LRRK2. However, baseline UPDRS&#x2009;&#x00D7;&#x2009;PINK1 interaction (UPDRS_BL&#x2009;&#x00D7;&#x2009;PINK1) exhibited the highest prognostic value among all features (SHAP&#x2009;&#x003D;&#x2009;0.283). PINK1 and PARK7 have been shown to regulate mitophagy, clearing damaged mitochondria via autophagy (<xref ref-type="bibr" rid="B18">18</xref>, <xref ref-type="bibr" rid="B19">19</xref>). In the PINK1/PARKIN pathway, PINK1 marks damaged mitochondria and activates PARKIN (<xref ref-type="bibr" rid="B20">20</xref>). PARK7 (DJ-1) provides cellular protection against oxidative stress and regulates mitochondrial homeostasis (<xref ref-type="bibr" rid="B21">21</xref>). Although SNCA (&#x03B1;-synuclein) is the best-known genetic and pathological marker of Parkinson&#x0027;s disease, its blood-based expression showed little prognostic significance. Among biological pathways, mitochondrial dysfunction exhibited the highest SHAP value (0.008), revealing that baseline motor severity, gene-clinical interactions (UPDRS_BL&#x2009;&#x00D7;&#x2009;PINK1), PD risk genes (VPS35, GBA, LRRK2), and mitochondrial dysfunction most contributed to predictions. By evaluating individual and interactive contributions of these seven PD risk genes, this study demonstrates the strong association of mitochondrial dysfunction with disease progression. This integrated approach provides predictive power while illuminating underlying molecular mechanisms, advancing personalized medicine and targeted therapy development.</p>
</sec>
<sec id="s2"><label>2</label><title>Method</title>
<sec id="s2a"><label>2.1</label><title>PPMI dataset and data preprocessing</title>
<p>Data were obtained from the Parkinson&#x0027;s Progression Markers Initiative (PPMI) cohort, supported by the Michael J. Fox Foundation (<xref ref-type="bibr" rid="B22">22</xref>). PPMI is a multicenter, observational study launched in 2010 to monitor PD progression and discover biomarkers. All data were downloaded in accordance with ethical approval and data use agreements.</p>
<p>The PPMI dataset comprises genomic data (RNA-seq) and clinical data. Genomic data include RNA sequencing from patients&#x2019; blood samples, with gene expression normalized to Transcripts Per Million (TPM) units to ensure fair comparison between samples and genes. Clinical data include patients&#x0027; demographic information, diagnoses, and motor assessment scores. UPDRS Part III scores, which measure motor function, serve as the model&#x0027;s target variable.</p>
<p>Data preprocessing was performed in three main steps. First, UPDRS Part III scores from Baseline (BL) and Visit 04 (V04, 12-month) were processed. The 12-month UPDRS Part III score (UPDRS_V04) was used as the continuous target variable. A classification variable (Progressor_Type) was created based on 12-month UPDRS change (<italic>&#x0394;</italic>UPDRS&#x2009;&#x003D;&#x2009;UPDRS_V04&#x2212;UPDRS_BL), with patients classified as fast progressors if <italic>&#x0394;</italic>UPDRS&#x2009;&#x2265;&#x2009;5. This 5-point threshold was selected based on established literature defining the minimal clinically important difference (MCID) for UPDRS Part III motor scores. Shulman et al. (<xref ref-type="bibr" rid="B23">23</xref>) identified 5.2 points as the moderate clinically important difference, and subsequent studies have consistently used 5 points as the threshold for clinically meaningful motor progression (<xref ref-type="bibr" rid="B24">24</xref>, <xref ref-type="bibr" rid="B25">25</xref>). This variable was used for stratified sampling to ensure balanced representation of fast and slow progressors in training and test sets.</p>
<p>Second, the PPMI Project 133 RNA-sequencing data were processed. Baseline RNA-seq data were filtered to exclude low-quality samples and genes with low expression (TPM &#x003C; 1 in 90&#x0025; of samples), reducing noise and improving model performance. A single-time-point (baseline-only) approach was adopted to predict 12-month progression from a single blood sample at diagnosis, rather than a longitudinal design (<xref ref-type="bibr" rid="B8">8</xref>, <xref ref-type="bibr" rid="B16">16</xref>). This approach enables clinical applicability by determining progression risk from a single baseline blood sample without requiring repeated patient visits.</p>
<p>To ensure methodological rigor and prevent label leakage, all predictor variables were measured at baseline (Visit 0, t&#x2009;&#x003D;&#x2009;0), while the target variable was measured at a future time point (Visit 04, t&#x2009;&#x003D;&#x2009;12 months). Specifically, baseline UPDRS Part III score (UPDRS_BL), age, gender, baseline blood RNA-seq gene expression profiles, PD risk gene expression levels, biological pathway scores, and interaction terms (e.g., UPDRS_BL&#x2009;&#x00D7;&#x2009;PINK1) were all measured at t&#x2009;&#x003D;&#x2009;0. The target variable, 12-month UPDRS Part III score (UPDRS_V04), was measured at t&#x2009;&#x003D;&#x2009;12. This temporal separation ensures that all predictors are measured before the outcome, consistent with standard prognostic modeling practices in Parkinson&#x0027;s disease research (<xref ref-type="bibr" rid="B6">6</xref>, <xref ref-type="bibr" rid="B8">8</xref>, <xref ref-type="bibr" rid="B16">16</xref>). Using baseline disease severity (UPDRS_BL) to predict future disease severity (UPDRS_V04) is clinically valid and does not constitute label leakage, as these represent measurements of the same clinical construct at different time points. The variable DELTA_UPDRS (UPDRS_V04 - UPDRS_BL), which quantifies 12-month motor progression, was used only for stratified sampling to ensure balanced representation of fast and slow progressors in training and test sets, but was not used as a predictor variable in the model.</p>
<p>Third, clinical and RNA-seq data were merged using patient identifiers (PATNO), and patients with missing data were excluded. Feature engineering was then applied. The top 100 genes most correlated with 12-month UPDRS change (<italic>&#x0394;</italic>UPDRS) were selected from approximately 20,000 genes remaining after low-expression filtering. Pearson correlation coefficients were calculated between each gene&#x0027;s baseline expression and <italic>&#x0394;</italic>UPDRS, and genes were ranked by absolute correlation magnitude. The top 100 genes (correlation range: &#x007C;r&#x007C;&#x2009;&#x003D;&#x2009;0.45 to 0.15) were selected to form the most important transcriptomic features. This correlation-based selection reduces dimensionality while retaining genes with the strongest linear associations with motor progression. Additionally, baseline expression levels of seven PD risk genes (SNCA, LRRK2, GBA, PRKN, PINK1, PARK7, VPS35) were added as separate features (<xref ref-type="bibr" rid="B26">26</xref>, <xref ref-type="bibr" rid="B27">27</xref>). Pathway scores were calculated as the mean expression of gene sets associated with neuroinflammation, mitochondrial dysfunction, and autophagy to assess the contribution of biological mechanisms to progression. Three interaction features were established based on pre-specified biological hypotheses regarding the PINK1/PARKIN mitophagy pathway (<xref ref-type="bibr" rid="B19">19</xref>, <xref ref-type="bibr" rid="B21">21</xref>), rather than data-driven discovery: PINK1&#x2009;&#x00D7;&#x2009;PARK7 (mitochondrial quality control interaction), AGE&#x2009;&#x00D7;&#x2009;PINK1 (age-dependent mitochondrial effects), and UPDRS_BL&#x2009;&#x00D7;&#x2009;PINK1 (baseline severity modulation of PINK1 prognostic value).</p>
<p>Outliers in UPDRS_V04 scores were identified using the Interquartile Range (IQR) method, and 2 patients were excluded to reduce the impact of extreme values on model performance. The final dataset contains 390 patients and 116 features: 3 clinical covariates (UPDRS_BL, AGE, GENDER), 100 most correlated genes, 7 PD risk genes, 3 pathway scores, and 3 interaction features. UPDRS_V04 (continuous variable) was used as the target variable.</p>
<p>Before model training, all continuous features (gene expression and clinical covariates) were standardized using StandardScaler (mean&#x2009;&#x003D;&#x2009;0, standard deviation&#x2009;&#x003D;&#x2009;1) to prevent features on different scales from adversely affecting performance. A Power Transformation (Yeo-Johnson method) was applied to the target variable (UPDRS_V04) to normalize its distribution, enhancing the model&#x0027;s learning capacity and predictive performance (<xref ref-type="bibr" rid="B28">28</xref>). Model predictions were inverse-transformed to the original scale.</p>
<p>The dataset was divided into a training/validation set (80&#x0025;, <italic>n</italic>&#x2009;&#x003D;&#x2009;312) and an independent clinical test set (20&#x0025;, <italic>n</italic>&#x2009;&#x003D;&#x2009;78) using stratified sampling to preserve the balance between progressor and non-progressor classes (Fast vs. Slow). Hyperparameter optimization was performed using Bayesian optimization (Optuna) with 7-fold cross-validation on the training/validation sets (<italic>n</italic>&#x2009;&#x003D;&#x2009;312). Optuna&#x0027;s Tree-structured Parzen Estimator (TPE) algorithm systematically explored the hyperparameter space across 30 trials to maximize mean cross-validation R<sup>2</sup>.</p>
<p>For each base model, the following hyperparameter ranges were explored: XGBoost (n_estimators: 100&#x2013;250, max_depth: 3&#x2013;7, learning_rate: 0.01&#x2013;0.1, subsample: 0.6&#x2013;0.9, colsample_bytree: 0.6&#x2013;0.9, min_child_weight: 1&#x2013;8, L1 regularization: 0.01&#x2013;1.0, L2 regularization: 0.1&#x2013;5.0); LightGBM (n_estimators: 100&#x2013;250, max_depth: 3&#x2013;7, learning_rate: 0.01&#x2013;0.1, subsample: 0.6&#x2013;0.9, colsample_bytree: 0.6&#x2013;0.9, min_data_in_leaf: 5&#x2013;25, L1 regularization: 0.01&#x2013;1.0, L2 regularization: 0.1&#x2013;5.0); CatBoost (iterations: 100&#x2013;250, depth: 3&#x2013;7, learning_rate: 0.01&#x2013;0.1, subsample: 0.6&#x2013;0.9, L2 regularization: 0.1&#x2013;5.0). For the meta-learner (Huber Regressor), alpha (regularization strength: 0.01&#x2013;1.0) and epsilon (robustness parameter: 1.0&#x2013;2.0) were optimized. Learning rate and regularization parameters were sampled on a logarithmic scale to efficiently explore multiple orders of magnitude. After optimization, the final model with optimal hyperparameters was retrained on the entire training/validation set (<italic>n</italic>&#x2009;&#x003D;&#x2009;312) and evaluated using 7-fold cross-validation and the independent clinical test set (<italic>n</italic>&#x2009;&#x003D;&#x2009;78). The independent test set was never used for hyperparameter optimization or training and was reserved for assessing real-world performance.</p>
<p>All model development, evaluation, and visualization were carried out using Python 3.11, Scikit-learn, and Matplotlib. The code, trained model, curated analysis dataset, and aggregate results (feature importance and SHAP values) that support the findings of this study are openly available on [GitHub] at [<ext-link ext-link-type="uri" xlink:href="https://github.com/yeldafrt/PD-Blood-Transcriptomics-SHAP">https://github.com/yeldafrt/PD-Blood-Transcriptomics-SHAP</ext-link>]. The repository includes frozen environment files (requirements.txt with exact package versions), Docker container configuration (Dockerfile), and Conda environment specification (environment.yml) to ensure computational reproducibility.</p>
</sec>
<sec id="s2b"><label>2.2</label><title>Modeling and performance evaluation</title>
<p>In this study, a Stacking Regressor model was used to predict 12-month motor state (UPDRS_V04) from baseline data. Stacking is an ensemble learning technique that combines multiple machine learning models to achieve robust predictions (<xref ref-type="bibr" rid="B29">29</xref>). Three gradient boosting variants (XGBoost, LightGBM, CatBoost) served as base models, with their predictions combined by a Huber Regressor meta-model. Gradient boosting models capture complex linear and nonlinear relationships across heterogeneous data, including clinical features (e.g., UPDRS scores, age, gender) and genomic features (e.g., gene expression profiles). Each variant&#x0027;s distinct optimization strategies and regularization techniques increase ensemble diversity and reduce the risk of overfitting.</p>
<p>The model was evaluated using a two-stage strategy. First, 7-fold cross-validation was applied to the training/validation set (<italic>n</italic>&#x2009;&#x003D;&#x2009;312), dividing the dataset into 7 equal parts, with each serving as the validation set in turn to assess learning capacity and internal consistency. Cross-validation reduces the risk of overfitting and provides reliable performance estimates. Second, the model was evaluated on an independent clinical holdout set (<italic>n</italic>&#x2009;&#x003D;&#x2009;78) that was never used for hyperparameter optimization or training, to measure real-world performance and generalization to new patients. This two-stage strategy comprehensively tests both learning capacity and generalization.</p>
<p>Model performance was measured using three commonly used metrics for regression problems: R<sup>2</sup> (the proportion of variance explained by the model), MAE, and Root Mean Squared Error (RMSE). The R<sup>2</sup> value indicates how much of the variance in the target variable the model explains, ranging from 0 to 1; values closer to 1 indicate better performance. MAE expresses the average deviation of predictions from the actual values in UPDRS score units, with lower values indicating better performance. RMSE is a metric that penalizes larger errors more heavily, and lower values are also preferred. <xref ref-type="table" rid="T1">Table&#x00A0;1</xref> summarizes the model&#x0027;s performance metrics on 7-fold cross-validation and an independent clinical test set.</p>
<table-wrap id="T1" position="float"><label>Table&#x00A0;1</label>
<caption><p>Model performance metrics.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Evaluation set</th>
<th valign="top" align="center">R<sup>2</sup></th>
<th valign="top" align="center">MAE</th>
<th valign="top" align="center">RMSE</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="4">7-Fold CV</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Fold 1</td>
<td valign="top" align="center">0.574</td>
<td valign="top" align="center">6.04</td>
<td valign="top" align="center">7.40</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Fold 2</td>
<td valign="top" align="center">0.480</td>
<td valign="top" align="center">6.56</td>
<td valign="top" align="center">8.45</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Fold 3</td>
<td valign="top" align="center">0.498</td>
<td valign="top" align="center">5.91</td>
<td valign="top" align="center">7.54</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Fold 4</td>
<td valign="top" align="center">0.571</td>
<td valign="top" align="center">6.21</td>
<td valign="top" align="center">7.38</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Fold 5</td>
<td valign="top" align="center">0.506</td>
<td valign="top" align="center">6.30</td>
<td valign="top" align="center">8.55</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Fold 6</td>
<td valign="top" align="center">0.428</td>
<td valign="top" align="center">6.22</td>
<td valign="top" align="center">8.38</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Fold 7</td>
<td valign="top" align="center">0.537</td>
<td valign="top" align="center">5.81</td>
<td valign="top" align="center">7.30</td>
</tr>
<tr>
<td valign="top" align="left">Mean&#x2009;&#x00B1;&#x2009;SD (<italic>n</italic>&#x2009;&#x003D;&#x2009;312)</td>
<td valign="top" align="center">0.513&#x2009;&#x00B1;&#x2009;0.052</td>
<td valign="top" align="center">6.15&#x2009;&#x00B1;&#x2009;0.25</td>
<td valign="top" align="center">7.86&#x2009;&#x00B1;&#x2009;0.57</td>
</tr>
<tr>
<td valign="top" align="left">Clinical holdout (<italic>n</italic>&#x2009;&#x003D;&#x2009;78)</td>
<td valign="top" align="center"><bold>0</bold>.<bold>551</bold></td>
<td valign="top" align="center"><bold>6</bold>.<bold>01</bold></td>
<td valign="top" align="center"><bold>7</bold>.<bold>21</bold></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold values indicate the best performance achieved across all evaluation sets.</p></fn>
</table-wrap-foot>
</table-wrap>
<p><xref ref-type="table" rid="T1">Table&#x00A0;1</xref> summarizes the model&#x0027;s performance metrics from 7-fold cross-validation and an independent clinical test set. The 7-fold cross-validation results (<italic>n</italic>&#x2009;&#x003D;&#x2009;312) demonstrate consistent performance, with R<sup>2</sup> ranging from 0.428 to 0.574 (mean: 0.513&#x2009;&#x00B1;&#x2009;0.052). This low standard deviation indicates stable performance across patient subgroups and acceptable fold variation. MAE ranges from 5.81 to 6.56 points (mean: 6.15&#x2009;&#x00B1;&#x2009;0.25), showing that predictions deviate from actual values by approximately 6 points on average, which is clinically acceptable relative to the UPDRS Part III significance threshold (5 points).</p>
<p>Results from an independent clinical holdout set (<italic>n</italic>&#x2009;&#x003D;&#x2009;78) demonstrate strong generalization ability. This set, never used for hyperparameter optimization or training, was reserved for evaluating real-world performance. The R<sup>2</sup> (0.551) exceeded the cross-validation average (0.513), indicating better performance on new patients without overfitting. MAE (6.01 points) was lower than the cross-validation score (6.15 points), confirming the reliability of clinical predictions. RMSE (7.21) indicates rare large errors. The consistency between cross-validation and holdout results (R<sup>2</sup>: 0.513 vs. 0.551) demonstrates robust model structure and reliable performance across patient populations.</p>
</sec>
<sec id="s2c"><label>2.3</label><title>SHAP analysis and model interpretability</title>
<p>For clinical acceptance, machine learning models must achieve high predictive performance and have explainable decision-making mechanisms. SHAP analysis was applied to improve model interpretability and quantitatively assess each feature&#x0027;s contribution to progression predictions. SHAP, based on game theory Shapley values, makes black-box model decisions explainable (<xref ref-type="bibr" rid="B30">30</xref>).</p>
<p>SHAP analysis was performed on an independent clinical test set (<italic>n</italic>&#x2009;&#x003D;&#x2009;78) for three feature categories: (1) Clinical Features &#x2013; baseline UPDRS Part III, age, sex, and their interactions with PD risk genes (e.g., UPDRS_BL&#x2009;&#x00D7;&#x2009;PINK1); (2) PD Risk Genes &#x2013; individual expression levels of seven literature-supported risk genes (SNCA, LRRK2, GBA, PRKN, PINK1, PARK7, VPS35) (<xref ref-type="bibr" rid="B26">26</xref>, <xref ref-type="bibr" rid="B27">27</xref>) and (3) Pathway Scores &#x2013; pathway scores representing biological processes (mitochondrial dysfunction, neuroinflammation, autophagy). SHAP values were calculated separately for each base model (XGBoost, LightGBM, CatBoost) using TreeExplainer and then averaged across all three estimators to produce ensemble-averaged SHAP values. This approach ensures that feature importance reflects the collective contribution across all models in the stacking ensemble.</p>
</sec>
</sec>
<sec id="s3" sec-type="results"><label>3</label><title>Results</title>
<p>A Stacking Regressor ensemble model integrating baseline clinical and blood-based transcriptomic data was developed to predict 12-month motor progression in Parkinson&#x0027;s disease. The model achieved R<sup>2</sup>&#x2009;&#x003D;&#x2009;0.551 and MAE&#x2009;&#x003D;&#x2009;6.01 on an independent clinical test set (<italic>n</italic>&#x2009;&#x003D;&#x2009;78), demonstrating strong predictive performance using only baseline blood RNA-seq data. The stacking ensemble model architecture and comprehensive performance evaluation results across multiple visualization approaches are presented in <xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref>.</p>
<fig id="F1" position="float"><label>Figure&#x00A0;1</label>
<caption><p>Stacking regressor model architecture and comprehensive performance evaluation. <bold>(a)</bold> model architecture; <bold>(b)</bold> Predicted vs. actual (<italic>n</italic>&#x2009;&#x003D;&#x2009;78); <bold>(c)</bold> Residuals; <bold>(d)</bold> Distribution comparison; <bold>(e)</bold> 7-fold CV bar chart (<italic>n</italic>&#x2009;&#x003D;&#x2009;312).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-08-1774436-g001.tif"><alt-text content-type="machine-generated">Five-part figure showing a machine learning workflow and model performance for predicting UPDRS Part III at twelve months. Top left: flowchart diagram of a stacking regressor using XGBoost, LightGBM, and CatBoost as base models, with a Huber regressor meta-model. Top center: scatter plot comparing actual versus predicted UPDRS scores, with R squared equal to zero point five five one and mean absolute error six point zero one (n equals seventy-eight). Top right: residual plot showing spread of residuals for predictions, with mean negative zero point four three and standard deviation seven point two zero. Bottom left: violin plot comparing distributions of actual and predicted scores, with means and standard deviations indicated. Bottom right: bar chart of R squared scores across seven cross-validation folds, mean R squared of zero point five one three, and summary statistics displayed.</alt-text>
</graphic>
</fig>
<p><xref ref-type="fig" rid="F1">Figure&#x00A0;1a</xref> shows the two-level stacking ensemble architecture. Level 0 comprises three gradient-boosting base models (XGBoost, LightGBM, CatBoost) trained on 390 patients with 116 features (100 genes&#x2009;&#x002B;<sans-serif>&#x2009;16</sans-serif> clinical/pathway features). Level 1 uses a Huber Regressor meta-model to combine base model predictions through optimal linear weighting, producing the final UPDRS_V04 prediction while maintaining robustness against outliers.</p>
<p><xref ref-type="fig" rid="F1">Figure&#x00A0;1b</xref> visualizes model performance on the independent clinical validation set. The scatter plot shows actual vs. predicted UPDRS Part III scores at 12 months, with the dashed line representing perfect prediction (y&#x2009;&#x003D;&#x2009;x). The model achieved R<sup>2</sup>&#x2009;&#x003D;&#x2009;0.551 and MAE&#x2009;&#x003D;<sans-serif>&#x2009;6</sans-serif>.01 on patients not used during training, demonstrating strong generalization. The balanced distribution of points around the perfect-prediction line indicates no systematic bias.</p>
<p><xref ref-type="fig" rid="F1">Figure&#x00A0;1c</xref> presents a residual plot to evaluate the error structure and prediction consistency. The plot shows residuals (actual - predicted) vs. predicted UPDRS Part III scores, with the dashed line representing zero error and the dotted line indicating the mean residual (Mean&#x2009;&#x003D;&#x2009;&#x2212;0.43). The random distribution of residuals around zero suggests no systematic bias. The residual standard deviation (SD&#x2009;&#x003D;&#x2009;7.20) is consistent with RMSE (7.21). The absence of heteroscedasticity indicates consistent performance across the prediction range.</p>
<p><xref ref-type="fig" rid="F1">Figure&#x00A0;1d</xref> compares actual and predicted UPDRS Part III score distributions using violin plots. The actual scores (Mean&#x2009;&#x003D;&#x2009;18.6, SD&#x2009;&#x003D;&#x2009;10.8) and predicted scores (Mean&#x2009;&#x003D;&#x2009;19.0, SD&#x2009;&#x003D;&#x2009;9.5) exhibit similar distributions, indicating that the model successfully captured the true data distribution. The slightly narrower predicted distribution (SD: 9.5 vs. 10.8) suggests regression to the mean. The minimal difference between the mean values (18.6 vs. 19.0) confirms the absence of systematic bias and balanced predictions.</p>
<p><xref ref-type="fig" rid="F1">Figure&#x00A0;1e</xref> shows 7-fold cross-validation results on the training/validation set (<italic>n</italic>&#x2009;&#x003D;&#x2009;312) using a bar chart to display fold-by-fold R<sup>2</sup> scores. The model achieved R<sup>2</sup>&#x2009;&#x003D;&#x2009;0.513&#x2009;&#x00B1;&#x2009;0.052 and MAE&#x003D;6.15&#x2009;&#x00B1;&#x2009;0.25, demonstrating consistent performance across patient subgroups with R<sup>2</sup> values ranging from 0.428 to 0.574 across the seven folds.</p>
<p><xref ref-type="fig" rid="F2">Figure&#x00A0;2</xref> presents the 20 features that most contribute to the model&#x0027;s predictive performance, grouped into three main categories.</p>
<fig id="F2" position="float"><label>Figure&#x00A0;2</label>
<caption><p>Top 20 features contributing to stacking regressor model predictions.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-08-1774436-g002.tif"><alt-text content-type="machine-generated">Horizontal bar chart showing feature importance for clinical, interaction, and RNA-seq gene categories. UPDRS_BL&#x00D7;PINK1 (interaction, 0.082) and UPDRS_BL (clinical, 0.060) are most important, followed by various RNA-seq gene features with importance values ranging from 0.025 to 0.012. A legend in the lower right details category colors.</alt-text>
</graphic>
</fig>
<p>The feature importance values in <xref ref-type="fig" rid="F2">Figure&#x00A0;2</xref> were calculated by averaging across three algorithms (XGBoost, LightGBM, and CatBoost). The most important feature was the UPDRS_BL&#x2009;&#x00D7;&#x2009;PINK1 interaction (0.082), followed by baseline UPDRS score (0.060). Notably, 90&#x0025; (18/20) of top features are RNA-seq gene expression data, indicating that transcriptomic data play a critical role in predicting Parkinson&#x0027;s disease progression. Clinical features (blue), gene-gene interactions (red), and RNA-seq genes (green) are color-coded, with normalized importance scores shown on the right.</p>
<p><xref ref-type="fig" rid="F3">Figure&#x00A0;3</xref> visualizes SHAP results in three panels.</p>
<fig id="F3" position="float"><label>Figure&#x00A0;3</label>
<caption><p>SHAP analysis for model interpretability. <bold>(a)</bold> prediction contributions of clinical features; <bold>(b)</bold> PD risk genes; <bold>(c)</bold> Pathway scores.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-08-1774436-g003.tif"><alt-text content-type="machine-generated">Three horizontal bar charts display mean absolute SHAP values for different variable groups: the left chart shows clinical variables, with UPDRS_BL_x_PINK1 and UPDRS_BL as most influential; the center chart shows genetic variables, with PD_VPS35 highest; the right chart shows pathway variables, with Mitochondrial having the greatest impact.</alt-text>
</graphic>
</fig>
<p>In <xref ref-type="fig" rid="F3">Figure&#x00A0;3</xref>, Panel (a) shows that the UPDRS_BL&#x2009;&#x00D7;&#x2009;PINK1 interaction has the highest SHAP value (0.283), followed by the baseline UPDRS score (0.258). Demographic features (age, sex) show minimal contribution (&#x003C;0.002). Panel (b) shows VPS35 has the highest SHAP value among seven PD risk genes (0.010), followed by GBA (0.005) and LRRK2 (0.005). The low individual SHAP values for PINK1 and SNCA suggest they contribute to predictions primarily through interactions. Panel (c) shows that the mitochondrial dysfunction pathway score has the highest SHAP value (0.008), followed by neuroinflammation (0.005) and autophagy (0.003), supporting mitochondrial dysfunction as a dominant prognostic signal in PD progression.</p>
</sec>
<sec id="s4" sec-type="discussion"><label>4</label><title>Discussion</title>
<p>Model performance is competitive with similar cross-sectional approaches. While Nguyen et al. (<xref ref-type="bibr" rid="B31">31</xref>) achieved R<sup>2</sup>&#x2009;&#x003D;&#x2009;0.558 using neuroimaging, the proposed ensemble model achieved R<sup>2</sup>&#x2009;&#x003D;&#x2009;0.551 using baseline blood RNA-seq data alone, offering key advantages: (1) explainability through SHAP analysis revealed gene-clinical interactions (e.g., UPDRS_BL&#x2009;&#x00D7;&#x2009;PINK1) as critical predictive features of progression, addressing not only predictive accuracy but also the features underlying the predictions; (2) blood-based transcriptomic profiling provides a minimally invasive, accessible alternative to expensive brain imaging; and (3) the proposed single-timepoint baseline approach enables immediate prognostic assessment from a single blood sample at diagnosis, without requiring longitudinal data or repeated visits. Although longitudinal designs achieve higher performance using prior-visit data (R<sup>2</sup>&#x2248;0.69) (<xref ref-type="bibr" rid="B6">6</xref>), the proposed cross-sectional baseline design offers superior clinical applicability: newly diagnosed patients can receive immediate predictions of progression risk, facilitating early intervention planning and patient stratification in clinical trials.</p>
<p>Recent studies demonstrate the effectiveness of ensemble learning for predicting Parkinson&#x0027;s progression (<xref ref-type="bibr" rid="B8">8</xref>, <xref ref-type="bibr" rid="B14">14</xref>). This study extends this paradigm in two ways: First, unlike Dadu et al.&#x0027;s (2022) categorical classification, the proposed Stacking Regressor predicts 12-month UPDRS scores as continuous values, enabling more precise individualized forecasts. Second, SHAP analysis (<xref ref-type="bibr" rid="B30">30</xref>) quantitatively explains which features (UPDRS_BL&#x2009;&#x00D7;&#x2009;PINK1, VPS35, mitochondrial pathway) predict progression, adding interpretability to the ensemble approach.</p>
<p>The model&#x0027;s success derives from the synergistic integration of clinical features and gene expression profiles. SHAP analysis revealed biologically meaningful, interpretable features, with clinical and interaction features dominating predictions. The two highest contributors were the interaction between baseline UPDRS and PINK1 expression (UPDRS_BL&#x2009;&#x00D7;&#x2009;PINK1, SHAP&#x2009;&#x003D;&#x2009;0.283) and baseline UPDRS itself (SHAP&#x2009;&#x003D;&#x2009;0.258), indicating that initial disease severity is strongly associated with progression rate and this association varies with genetic factors like PINK1. Among seven Parkinson&#x0027;s risk genes (<xref ref-type="bibr" rid="B27">27</xref>), VPS35, which is involved in endosomal trafficking and lysosomal function, had the highest SHAP value (0.010), followed by the classical risk genes GBA (0.005) and LRRK2 (0.005). Notably, PINK1 and SNCA showed relatively low individual SHAP values, suggesting their effects emerge primarily through interactions (e.g., UPDRS_BL&#x2009;&#x00D7;&#x2009;PINK1) or that blood-based RNA-seq incompletely captures their brain-specific roles. Among biological pathways, mitochondrial dysfunction exhibited the highest SHAP value (0.008), followed by neuroinflammation (0.005) and autophagy (0.003). This is consistent with mitochondrial dysfunction&#x0027;s strong association with Parkinson&#x0027;s progression, given dopaminergic neurons&#x0027; high energy demands and their susceptibility to oxidative stress (<xref ref-type="bibr" rid="B18">18</xref>, <xref ref-type="bibr" rid="B19">19</xref>, <xref ref-type="bibr" rid="B21">21</xref>).</p>
<p>Demographic features (age, sex) contributed minimally to the model&#x0027;s predictions (mean &#x007C;SHAP&#x007C;&#x2009;&#x003C;&#x2009;0.002). Specifically, the negligible contribution of sex is consistent with the lack of a significant difference in 12-month motor progression between males and females in this cohort (<italic>p</italic>&#x2009;&#x003D;&#x2009;0.54). The dataset shows a male-predominant sex distribution (1.42:1 male-to-female ratio), which reflects the known epidemiology of Parkinson&#x0027;s disease. Similarly, age showed minimal predictive contribution despite being a known risk factor for Parkinson&#x0027;s disease; this may be because baseline motor severity (UPDRS_BL) already captures age-related disease burden. The model&#x0027;s predictive power is primarily driven by baseline clinical severity and transcriptomic features rather than demographic variables.</p>
<p>This study offers notable strengths. Integrating machine learning with SHAP analysis provides moderate prediction accuracy while ensuring model transparency. Clinically, the baseline blood RNA-seq approach offers a minimally invasive, cost-effective alternative to neuroimaging for progression risk assessment. Additionally, sharing the ensemble model and analysis code as open source ensures reproducibility.</p>
<p>This study has limitations. First, this study is limited to the PPMI cohort. While the model was rigorously validated using an independent holdout test set (<italic>n</italic>&#x2009;&#x003D;&#x2009;78, 20&#x0025;) that was completely withheld from training and hyperparameter optimization, this represents internal validation within the same cohort rather than external validation in an entirely independent dataset. Although this approach demonstrates robust generalization to unseen patients within the PPMI population, external validation is essential to confirm generalizability across diverse populations that may differ in demographic characteristics (age, ethnicity, geographic distribution), disease phenotypes (severity, subtypes, progression patterns), RNA-seq protocols (sequencing platforms, batch effects), and clinical assessment methods (rater variability, protocol differences). Validation in independent cohorts from different centers and populations is required to establish clinical utility and real-world applicability. Second, the model predicts 12-month progression; validation for longer-term forecasts is needed. Third, blood-based RNA-seq may not fully reflect brain-specific pathology, potentially explaining low SHAP values for genes such as SNCA. Fourth, the model focuses exclusively on motor symptoms (UPDRS Part III) rather than non-motor features. Fifth, treatment changes during the 12-month follow-up period were not modeled. Changes in medication (e.g., levodopa dosage adjustments) can significantly influence UPDRS scores, acting as a major confounding variable. This limitation means the model predicts progression based on the combined effects of natural disease progression and treatment, rather than isolating the biological progression alone. Future studies should incorporate time-varying medication data as a covariate to disentangle these effects. Finally, this study does not include environmental exposure variables (e.g., pesticide exposure, air pollution, occupational exposures) or site/center identifiers that could account for geographic variation in environmental risk factors. Environmental factors may influence disease progression and interact with genetic risk factors. Future studies integrating blood transcriptomics with environmental exposure data could provide a more comprehensive understanding of gene-environment interactions in Parkinson&#x0027;s disease progression.</p>
<p>Future studies should integrate blood RNA-seq with brain imaging, cerebrospinal fluid biomarkers, and genetic data to develop more comprehensive prognostic models. Investigating non-coding regulatory elements (promoters, enhancers, miRNAs) and epigenetic modifications could reveal deeper mechanisms of progression. Therapeutically, mitochondrial biomarkers could predict treatment response, warranting randomized trials of mitochondria-targeted therapies (coenzyme Q10, creatine). Clinically, SHAP-based patient-specific progression profiles could inform personalized treatment strategies. Finally, validating the ensemble model in independent multi-center cohorts will be critical for clinical integration.</p>
</sec>
<sec id="s5" sec-type="conclusions"><label>5</label><title>Conclusion</title>
<p>This study establishes that machine learning integrating blood transcriptomics and clinical data effectively predicts motor progression in Parkinson&#x0027;s disease. Crucially, the interplay between initial clinical state and specific genetic backgrounds&#x2014;particularly PINK1&#x2014;is a more powerful prognostic indicator than any factor alone. This study provides systematic evidence that mitochondrial dysfunction is a dominant prognostic signal for disease progression, highlighting key genes and biological pathways as promising targets for future mechanistic and therapeutic investigation.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability"><title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s12">Supplementary Material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s7" sec-type="ethics-statement"><title>Ethics statement</title>
<p>Ethical approval was not required for this study because it is a secondary analysis of de-identified, publicly available data obtained from the Parkinson's Progression Markers Initiative (PPMI). Ethical approval and written informed consent were obtained by the original PPMI study investigators from all participants. Therefore, no additional ethical approval was required for this secondary data analysis. The study was conducted in accordance with the local legislation and institutional requirements.</p>
</sec>
<sec id="s8" sec-type="author-contributions"><title>Author contributions</title>
<p>YF: Conceptualization, Data curation, Formal analysis, Funding acquisition, Investigation, Methodology, Project administration, Resources, Software, Supervision, Validation, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<ack><title>Acknowledgments</title>
<p>Thanks to Meral Sefero&#x011F;lu from the Department of Neurology, University of Health Sciences, Bursa Y&#x00FC;ksek Ihtisas Training and Research Hospital, Bursa, Turkey.</p>
</ack>
<sec id="s10" sec-type="COI-statement"><title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s11" sec-type="ai-statement"><title>Generative AI statement</title>
<p>The author(s) declared that generative AI was used in the creation of this manuscript. Generative artificial intelligence tools were used solely to assist with language editing and improvement of clarity in the writing of this manuscript. The scientific content, study design, data analysis, interpretation of results, and conclusions were entirely conceived, performed, and validated by the author. The author takes full responsibility for the integrity and originality of the work.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec id="s13" sec-type="disclaimer"><title>Publisher&#x0027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s12" sec-type="supplementary-material"><title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fdgth.2026.1774436/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fdgth.2026.1774436/full&#x0023;supplementary-material</ext-link></p>
<supplementary-material xlink:href="Datasheet1.csv" id="SM1" mimetype="text/csv"/>
</sec>
<ref-list><title>References</title>
<ref id="B1"><label>1.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tabashum</surname> <given-names>T</given-names></name> <name><surname>Snyder</surname> <given-names>RC</given-names></name> <name><surname>O&#x2019;Brien</surname> <given-names>MK</given-names></name> <name><surname>Albert</surname> <given-names>MV</given-names></name></person-group>. <article-title>Machine learning models for Parkinson disease: systematic review</article-title>. <source>JMIR Med Inform</source>. (<year>2024</year>) <volume>12</volume>:<fpage>e50117</fpage>. <pub-id pub-id-type="doi">10.2196/50117</pub-id><pub-id pub-id-type="pmid">38771237</pub-id></mixed-citation></ref>
<ref id="B2"><label>2.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sotirakis</surname> <given-names>C</given-names></name> <name><surname>Su</surname> <given-names>Z</given-names></name> <name><surname>Brzezicki</surname> <given-names>MA</given-names></name> <name><surname>Conway</surname> <given-names>N</given-names></name> <name><surname>Tarassenko</surname> <given-names>L</given-names></name> <name><surname>FitzGerald</surname> <given-names>JJ</given-names></name><etal/></person-group> <article-title>Identification of motor progression in Parkinson&#x2019;s disease using wearable sensors and machine learning</article-title>. <source>NPJ Parkinsons Dis</source>. (<year>2023</year>) <volume>9</volume>:<fpage>142</fpage>. <pub-id pub-id-type="doi">10.1038/s41531-023-00581-2</pub-id><pub-id pub-id-type="pmid">37805655</pub-id></mixed-citation></ref>
<ref id="B3"><label>3.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kleinholdermann</surname> <given-names>U</given-names></name> <name><surname>Wullstein</surname> <given-names>M</given-names></name> <name><surname>Pedrosa</surname> <given-names>D</given-names></name></person-group>. <article-title>Prediction of motor unified Parkinson&#x2019;s disease rating scale scores in patients with Parkinson&#x2019;s disease using surface electromyography</article-title>. <source>Clin Neurophysiol</source>. (<year>2021</year>) <volume>132</volume>:<fpage>1708</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1016/j.clinph.2021.01.031</pub-id><pub-id pub-id-type="pmid">33958263</pub-id></mixed-citation></ref>
<ref id="B4"><label>4.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Exley</surname> <given-names>T</given-names></name> <name><surname>Moudy</surname> <given-names>S</given-names></name> <name><surname>Patterson</surname> <given-names>RM</given-names></name> <name><surname>Kim</surname> <given-names>J</given-names></name> <name><surname>Albert</surname> <given-names>MV</given-names></name></person-group>. <article-title>Predicting UPDRS motor symptoms in individuals with Parkinson&#x2019;s disease from force plates using machine learning</article-title>. <source>IEEE J Biomed Health Inform</source>. (<year>2022</year>) <volume>26</volume>:<fpage>3486</fpage>&#x2013;<lpage>94</lpage>. <pub-id pub-id-type="doi">10.1109/JBHI.2022.3157518</pub-id><pub-id pub-id-type="pmid">35259121</pub-id></mixed-citation></ref>
<ref id="B5"><label>5.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Craig</surname> <given-names>DW</given-names></name> <name><surname>Hutchins</surname> <given-names>E</given-names></name> <name><surname>Violich</surname> <given-names>I</given-names></name> <name><surname>Alsop</surname> <given-names>E</given-names></name> <name><surname>Gibbs</surname> <given-names>JR</given-names></name> <name><surname>Levy</surname> <given-names>S</given-names></name><etal/></person-group> <article-title>RNA sequencing of whole blood reveals early alterations in immune cells and gene expression in Parkinson&#x2019;s disease</article-title>. <source>Nat Aging</source>. (<year>2021</year>) <volume>1</volume>:<fpage>734</fpage>&#x2013;<lpage>47</lpage>. <pub-id pub-id-type="doi">10.1038/s43587-021-00088-6</pub-id><pub-id pub-id-type="pmid">37117765</pub-id></mixed-citation></ref>
<ref id="B6"><label>6.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ahmed</surname> <given-names>S</given-names></name> <name><surname>Komeili</surname> <given-names>M</given-names></name> <name><surname>Park</surname> <given-names>J</given-names></name></person-group>. <article-title>Predictive modelling of Parkinson&#x2019;s disease progression based on RNA sequencing with densely connected deep recurrent neural networks</article-title>. <source>Sci Rep</source>. (<year>2022</year>) <volume>12</volume>:<fpage>21469</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-022-25454-1</pub-id><pub-id pub-id-type="pmid">36509776</pub-id></mixed-citation></ref>
<ref id="B7"><label>7.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kern</surname> <given-names>F</given-names></name> <name><surname>Fehlmann</surname> <given-names>T</given-names></name> <name><surname>Violich</surname> <given-names>I</given-names></name> <name><surname>Alsop</surname> <given-names>E</given-names></name> <name><surname>Hutchins</surname> <given-names>E</given-names></name> <name><surname>Kahraman</surname> <given-names>M</given-names></name><etal/></person-group> <article-title>Deep sequencing of sncRNAs reveals hallmarks and regulatory modules of the transcriptome during Parkinson&#x2019;s disease progression</article-title>. <source>Nat Aging</source>. (<year>2021</year>) <volume>1</volume>:<fpage>309</fpage>&#x2013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1038/s43587-021-00042-6</pub-id><pub-id pub-id-type="pmid">37118411</pub-id></mixed-citation></ref>
<ref id="B8"><label>8.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dadu</surname> <given-names>A</given-names></name> <name><surname>Satone</surname> <given-names>V</given-names></name> <name><surname>Kaur</surname> <given-names>R</given-names></name> <name><surname>Hashemi</surname> <given-names>SH</given-names></name> <name><surname>Leonard</surname> <given-names>H</given-names></name> <name><surname>Iwaki</surname> <given-names>H</given-names></name><etal/></person-group> <article-title>Identification and prediction of Parkinson&#x2019;s disease subtypes and progression using machine learning in two cohorts</article-title>. <source>NPJ Parkinsons Dis</source>. (<year>2022</year>) <volume>8</volume>:<fpage>172</fpage>. <pub-id pub-id-type="doi">10.1038/s41531-022-00439-z</pub-id><pub-id pub-id-type="pmid">36526647</pub-id></mixed-citation></ref>
<ref id="B9"><label>9.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ponce-Bobadilla</surname> <given-names>AV</given-names></name> <name><surname>Schmitt</surname> <given-names>V</given-names></name> <name><surname>Maier</surname> <given-names>CS</given-names></name> <name><surname>Mensing</surname> <given-names>S</given-names></name> <name><surname>Stodtmann</surname> <given-names>S</given-names></name></person-group>. <article-title>Practical guide to SHAP analysis: explaining supervised machine learning model predictions in drug development</article-title>. <source>Clin Transl Sci</source>. (<year>2024</year>) <volume>17</volume>:<fpage>e70056</fpage>. <pub-id pub-id-type="doi">10.1111/cts.70056</pub-id><pub-id pub-id-type="pmid">39463176</pub-id></mixed-citation></ref>
<ref id="B10"><label>10.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nohara</surname> <given-names>Y</given-names></name> <name><surname>Matsumoto</surname> <given-names>K</given-names></name> <name><surname>Soejima</surname> <given-names>H</given-names></name> <name><surname>Nakashima</surname> <given-names>N</given-names></name></person-group>. <article-title>Explanation of machine learning models using shapley additive explanation and application for real data in hospital</article-title>. <source>Comput Methods Programs Biomed</source>. (<year>2022</year>) <volume>214</volume>:<fpage>106584</fpage>. <pub-id pub-id-type="doi">10.1016/j.cmpb.2021.106584</pub-id><pub-id pub-id-type="pmid">34942412</pub-id></mixed-citation></ref>
<ref id="B11"><label>11.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Prendin</surname> <given-names>F</given-names></name> <name><surname>Pavan</surname> <given-names>J</given-names></name> <name><surname>Cappon</surname> <given-names>G</given-names></name> <name><surname>Ruggeri</surname> <given-names>MF</given-names></name> <name><surname>Baroni</surname> <given-names>S</given-names></name> <name><surname>Ceseracciu</surname> <given-names>L</given-names></name><etal/></person-group> <article-title>The importance of interpreting machine learning models for blood glucose prediction in diabetes: an analysis using SHAP</article-title>. <source>Sci Rep</source>. (<year>2023</year>) <volume>13</volume>:<fpage>16865</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-023-44155-x</pub-id><pub-id pub-id-type="pmid">37803177</pub-id></mixed-citation></ref>
<ref id="B12"><label>12.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Saraswat</surname> <given-names>D</given-names></name> <name><surname>Bhattacharya</surname> <given-names>P</given-names></name> <name><surname>Verma</surname> <given-names>A</given-names></name> <name><surname>Prasad</surname> <given-names>VK</given-names></name> <name><surname>Tanwar</surname> <given-names>S</given-names></name> <name><surname>Sharma</surname> <given-names>G</given-names></name><etal/></person-group> <article-title>Explainable AI for healthcare 5.0: opportunities and challenges</article-title>. <source>IEEE Access</source>. (<year>2022</year>) <volume>10</volume>:<fpage>84486</fpage>&#x2013;<lpage>517</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2022.3197671</pub-id></mixed-citation></ref>
<ref id="B13"><label>13.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Stenwig</surname> <given-names>E</given-names></name> <name><surname>Salvi</surname> <given-names>G</given-names></name> <name><surname>Rossi</surname> <given-names>PS</given-names></name> <name><surname>Legrand</surname> <given-names>M</given-names></name> <name><surname>Dechartres</surname> <given-names>A</given-names></name> <name><surname>Gayat</surname> <given-names>E</given-names></name><etal/></person-group> <article-title>Comparative analysis of explainable machine learning prediction models for hospital mortality</article-title>. <source>BMC Med Res Methodol</source>. (<year>2022</year>) <volume>22</volume>:<fpage>53</fpage>. <pub-id pub-id-type="doi">10.1186/s12874-022-01540-w</pub-id><pub-id pub-id-type="pmid">35220950</pub-id></mixed-citation></ref>
<ref id="B14"><label>14.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nilashi</surname> <given-names>M</given-names></name> <name><surname>Abumalloh</surname> <given-names>RA</given-names></name> <name><surname>Minaei-Bidgoli</surname> <given-names>B</given-names></name> <name><surname>Ahmadi</surname> <given-names>SM</given-names></name> <name><surname>Asadi</surname> <given-names>M</given-names></name> <name><surname>Ahmadi</surname> <given-names>A</given-names></name><etal/></person-group> <article-title>Predicting Parkinson&#x2019;s disease progression: evaluation of ensemble methods in machine learning</article-title>. <source>J Healthc Eng</source>. (<year>2022</year>) <volume>2022</volume>:<fpage>2793361</fpage>. <pub-id pub-id-type="doi">10.1155/2022/2793361</pub-id><pub-id pub-id-type="pmid">35154618</pub-id></mixed-citation></ref>
<ref id="B15"><label>15.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Naderalvojoud</surname> <given-names>B</given-names></name> <name><surname>Hernandez-Boussard</surname> <given-names>T</given-names></name></person-group>. <article-title>Improving machine learning with ensemble learning on observational healthcare data</article-title>. <source>AMIA Annu Symp Proc</source>. (<year>2024</year>) <volume>2023</volume>:<fpage>521</fpage>&#x2013;<lpage>9</lpage>.<pub-id pub-id-type="pmid">38222353</pub-id></mixed-citation></ref>
<ref id="B16"><label>16.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nguyen</surname> <given-names>DK</given-names></name> <name><surname>Lan</surname> <given-names>CH</given-names></name> <name><surname>Chan</surname> <given-names>CL</given-names></name></person-group>. <article-title>Deep ensemble learning approaches in healthcare to enhance the prediction and diagnosing performance: the workflows, deployments, and surveys on the statistical, image-based, and sequential datasets</article-title>. <source>Int J Environ Res Public Health</source>. (<year>2021</year>) <volume>18</volume>:<fpage>10811</fpage>. <pub-id pub-id-type="doi">10.3390/ijerph182010811</pub-id><pub-id pub-id-type="pmid">34682554</pub-id></mixed-citation></ref>
<ref id="B17"><label>17.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Moore</surname> <given-names>A</given-names></name> <name><surname>Bell</surname> <given-names>M</given-names></name></person-group>. <article-title>XGBoost, a novel explainable AI technique, in the prediction of myocardial infarction: a UK biobank cohort study</article-title>. <source>Clin Med Insights Cardiol</source>. (<year>2022</year>) <volume>16</volume>:<fpage>1</fpage>&#x2013;<lpage>10</lpage>. <pub-id pub-id-type="doi">10.1177/11795468221133611</pub-id></mixed-citation></ref>
<ref id="B18"><label>18.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Clark</surname> <given-names>EH</given-names></name> <name><surname>V&#x00E1;zquez de la Torre</surname> <given-names>A</given-names></name> <name><surname>Hoshikawa</surname> <given-names>T</given-names></name> <name><surname>Briston</surname> <given-names>T</given-names></name></person-group>. <article-title>Targeting mitophagy in Parkinson&#x2019;s disease</article-title>. <source>J Biol Chem</source>. (<year>2021</year>) <volume>296</volume>:<fpage>100209</fpage>. <pub-id pub-id-type="doi">10.1074/jbc.REV120.014294</pub-id><pub-id pub-id-type="pmid">33372898</pub-id></mixed-citation></ref>
<ref id="B19"><label>19.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pickrell</surname> <given-names>AM</given-names></name> <name><surname>Youle</surname> <given-names>RJ</given-names></name></person-group>. <article-title>The roles of PINK1, parkin, and mitochondrial fidelity in Parkinson&#x2019;s disease</article-title>. <source>Neuron</source>. (<year>2015</year>) <volume>85</volume>:<fpage>257</fpage>&#x2013;<lpage>73</lpage>. <pub-id pub-id-type="doi">10.1016/j.neuron.2014.12.007</pub-id><pub-id pub-id-type="pmid">25611507</pub-id></mixed-citation></ref>
<ref id="B20"><label>20.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Quinn</surname> <given-names>PMJ</given-names></name> <name><surname>Moreira</surname> <given-names>PI</given-names></name> <name><surname>Ambr&#x00F3;sio</surname> <given-names>AF</given-names></name> <name><surname>Alves</surname> <given-names>CH</given-names></name></person-group>. <article-title>PINK1/PARKIN Signalling in neurodegeneration and neuroinflammation</article-title>. <source>Acta Neuropathol Commun</source>. (<year>2020</year>) <volume>8</volume>:<fpage>189</fpage>. <pub-id pub-id-type="doi">10.1186/s40478-020-01062-w</pub-id><pub-id pub-id-type="pmid">33168089</pub-id></mixed-citation></ref>
<ref id="B21"><label>21.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Imberechts</surname> <given-names>D</given-names></name> <name><surname>Kinnart</surname> <given-names>I</given-names></name> <name><surname>Wauters</surname> <given-names>F</given-names></name> <name><surname>Terbeek</surname> <given-names>J</given-names></name> <name><surname>Manders</surname> <given-names>L</given-names></name> <name><surname>Wierda</surname> <given-names>K</given-names></name><etal/></person-group> <article-title>DJ-1 is an essential downstream mediator in PINK1/parkin-dependent mitophagy</article-title>. <source>Brain</source>. (<year>2022</year>) <volume>145</volume>:<fpage>4368</fpage>&#x2013;<lpage>84</lpage>. <pub-id pub-id-type="doi">10.1093/brain/awac313</pub-id><pub-id pub-id-type="pmid">36039535</pub-id></mixed-citation></ref>
<ref id="B22"><label>22.</label><mixed-citation publication-type="other"><collab>Parkinson&#x0027;s Progression Markers Initiative (PPMI)</collab>. <article-title>PPMI Database</article-title>. <source>MJFF.</source> (<year>2024</year>). <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://ida.loni.usc.edu/">https://ida.loni.usc.edu/</ext-link> (Accessed December 30, 2025).</comment></mixed-citation></ref>
<ref id="B23"><label>23.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shulman</surname> <given-names>LM</given-names></name> <name><surname>Gruber-Baldini</surname> <given-names>AL</given-names></name> <name><surname>Anderson</surname> <given-names>KE</given-names></name> <name><surname>Fishman</surname> <given-names>PS</given-names></name> <name><surname>Reich</surname> <given-names>SG</given-names></name> <name><surname>Weiner</surname> <given-names>WJ</given-names></name></person-group>. <article-title>The clinically important difference on the unified Parkinson&#x2019;s disease rating scale</article-title>. <source>Arch Neurol</source>. (<year>2010</year>) <volume>67</volume>:<fpage>64</fpage>&#x2013;<lpage>70</lpage>. <pub-id pub-id-type="doi">10.1001/archneurol.2009.295</pub-id><pub-id pub-id-type="pmid">20065131</pub-id></mixed-citation></ref>
<ref id="B24"><label>24.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>S&#x00E1;nchez-Ferro</surname> <given-names>&#x00C1;</given-names></name> <name><surname>Matarazzo</surname> <given-names>M</given-names></name> <name><surname>Mart&#x00ED;nez-Mart&#x00ED;n</surname> <given-names>P</given-names></name> <name><surname>Mart&#x00ED;nez-&#x00C1;vila</surname> <given-names>JC</given-names></name> <name><surname>G&#x00F3;mez de la C&#x00E1;mara</surname> <given-names>A</given-names></name> <name><surname>Giancardo</surname> <given-names>L</given-names></name><etal/></person-group> <article-title>Minimal clinically important difference for UPDRS-III in daily practice</article-title>. <source>Mov Disord Clin Pract</source>. (<year>2018</year>) <volume>5</volume>:<fpage>448</fpage>&#x2013;<lpage>50</lpage>. <pub-id pub-id-type="doi">10.1002/mdc3.12632</pub-id></mixed-citation></ref>
<ref id="B25"><label>25.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zanigni</surname> <given-names>S</given-names></name> <name><surname>Trundell</surname> <given-names>D</given-names></name> <name><surname>Monnet</surname> <given-names>A</given-names></name> <name><surname>Shariati</surname> <given-names>N</given-names></name> <name><surname>Moore</surname> <given-names>E</given-names></name> <name><surname>Davies</surname> <given-names>E</given-names></name><etal/></person-group> <article-title>Estimating the meaningful within-patient change threshold for the MDS-UPDRS part III</article-title>. <source>Mov Disord</source>. (<year>2022</year>) <volume>37</volume>. <pub-id pub-id-type="doi">10.1212/WNL.98.18_supplement.2585</pub-id></mixed-citation></ref>
<ref id="B26"><label>26.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Domingo</surname> <given-names>A</given-names></name> <name><surname>Klein</surname> <given-names>C</given-names></name></person-group>. <article-title>Genetics of Parkinson disease</article-title>. <source>Handb Clin Neurol</source>. (<year>2018</year>) <volume>147</volume>:<fpage>211</fpage>&#x2013;<lpage>27</lpage>. <pub-id pub-id-type="doi">10.1016/B978-0-444-63233-3.00014-2</pub-id><pub-id pub-id-type="pmid">29325612</pub-id></mixed-citation></ref>
<ref id="B27"><label>27.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nuytemans</surname> <given-names>K</given-names></name> <name><surname>Theuns</surname> <given-names>J</given-names></name> <name><surname>Cruts</surname> <given-names>M</given-names></name> <name><surname>Van Broeckhoven</surname> <given-names>C</given-names></name></person-group>. <article-title>Genetic etiology of Parkinson disease associated with mutations in the SNCA, PARK2, PINK1, PARK7, and LRRK2 genes: a mutation update</article-title>. <source>Hum Mutat</source>. (<year>2010</year>) <volume>31</volume>:<fpage>763</fpage>&#x2013;<lpage>80</lpage>. <pub-id pub-id-type="doi">10.1002/humu.21277</pub-id><pub-id pub-id-type="pmid">20506312</pub-id></mixed-citation></ref>
<ref id="B28"><label>28.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yeo</surname> <given-names>IK</given-names></name> <name><surname>Johnson</surname> <given-names>RA</given-names></name></person-group>. <article-title>A new family of power transformations to improve normality or symmetry</article-title>. <source>Biometrika</source>. (<year>2000</year>) <volume>87</volume>(<volume>4</volume>):<fpage>954</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1093/biomet/87.4.954</pub-id></mixed-citation></ref>
<ref id="B29"><label>29.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>D&#x017E;eroski</surname> <given-names>S</given-names></name> <name><surname>&#x017D;enko</surname> <given-names>B</given-names></name></person-group>. <article-title>Is combining classifiers with stacking better than selecting the best one?</article-title> <source>Mach Learn</source>. (<year>2004</year>) <volume>54</volume>:<fpage>255</fpage>&#x2013;<lpage>73</lpage>. <pub-id pub-id-type="doi">10.1023/B:MACH.0000015881.36452.6e</pub-id></mixed-citation></ref>
<ref id="B30"><label>30.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Lundberg</surname> <given-names>S</given-names></name> <name><surname>Lee</surname> <given-names>S-I</given-names></name></person-group>. <comment>A unified approach to interpreting model predictions</comment>. <comment><italic>arXiv</italic>[Preprint]. <italic>arXiv:1705.07874</italic></comment> (<year>2017</year>). <pub-id pub-id-type="doi">10.48550/arXiv.1705.07874</pub-id></mixed-citation></ref>
<ref id="B31"><label>31.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nguyen</surname> <given-names>KP</given-names></name> <name><surname>Raval</surname> <given-names>V</given-names></name> <name><surname>Treacher</surname> <given-names>A</given-names></name> <name><surname>Mellema</surname> <given-names>C</given-names></name> <name><surname>Yu</surname> <given-names>FF</given-names></name> <name><surname>Pinho</surname> <given-names>MC</given-names></name><etal/></person-group> <article-title>Predicting Parkinson&#x2019;s disease trajectory using clinical and neuroimaging baseline measures</article-title>. <source>Parkinsonism Relat Disord</source>. (<year>2021</year>) <volume>85</volume>:<fpage>44</fpage>&#x2013;<lpage>51</lpage>. <pub-id pub-id-type="doi">10.1016/j.parkreldis.2021.02.026</pub-id><pub-id pub-id-type="pmid">33730626</pub-id></mixed-citation></ref></ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by"><p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/576528/overview">Paraskevi Papadopoulou</ext-link>, American College of Greece, Greece</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by"><p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2002768/overview">Aya Galal</ext-link>, American University in Cairo, Egypt</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3169586/overview">Mehul Kaliya</ext-link>, All India Institute of Medical Sciences, India</p></fn>
</fn-group>
</back>
</article>