<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2026.1748799</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Explainable AI-driven customer churn prediction: a multi-model ensemble approach with SHAP-based feature analysis</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>El Attar</surname> <given-names>Ali</given-names></name>
<xref ref-type="aff" rid="aff1"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>El-Hajj</surname> <given-names>Mohammed</given-names></name>
<xref ref-type="aff" rid="aff1"/>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3045171"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><institution>Faculty of Computer Studies (FCS), Arab Open University (AOU)</institution>, <city>Beirut</city>, <country country="lb">Lebanon</country></aff>
<author-notes>
<corresp id="c001"><label>&#x0002A;</label>Correspondence: Mohammed El-Hajj, <email xlink:href="mailto:mhajj@aou.edu.lb">mhajj@aou.edu.lb</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-10">
<day>10</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>9</volume>
<elocation-id>1748799</elocation-id>
<history>
<date date-type="received">
<day>18</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>26</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>20</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2026 El Attar and El-Hajj.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>El Attar and El-Hajj</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-10">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Customer churn prediction is critical for telecommunications companies to maintain profitability and inform retention strategies. This study builds upon existing work by implementing a comprehensive machine learning framework using the Telco Customer Churn dataset (<italic>n</italic> = 7,043). Our methodology integrated comprehensive feature engineering, SMOTE oversampling, and training of seven machine learning models including XGBoost, Random Forest, and a Multi-layer Perceptron. Model interpretation was conducted via SHAP analysis and customer segmentation. Key results demonstrated that gradient boosting algorithms (XGBoost, LightGBM, Gradient Boosting) achieved the highest balanced performance with accuracy, precision, recall, and F1-scores of 0.84, with XGBoost attaining the best discriminative ability (AUC-ROC: 0.932). A soft-voting ensemble of the top models matched this performance (F1-score: 0.84, AUC-ROC: 0.918). SHAP analysis revealed that contract type, tenure, and technical support were the features contributing most to the model&#x00027;s churn predictions. Threshold optimization at 0.528 balanced precision (0.90) and recall (0.91) while reducing false negatives by 15%. The findings provide actionable insights for prioritizing high-risk customers and designing targeted retention strategies in the telecom sector.</p></abstract>
<kwd-group>
<kwd>customer churn prediction</kwd>
<kwd>customer retention</kwd>
<kwd>customer segmentation</kwd>
<kwd>explainable AI</kwd>
<kwd>machine learning</kwd>
<kwd>SHAP analysis</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="11"/>
<table-count count="9"/>
<equation-count count="22"/>
<ref-count count="31"/>
<page-count count="23"/>
<word-count count="12797"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>AI in Business</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>The telecommunications industry operates in a hyper-competitive landscape where customer retention has become a critical determinant of profitability. With customer acquisition costs estimated to be five to ten times higher than retention costs (<xref ref-type="bibr" rid="B11">Huang and Kechadi, 2013</xref>; <xref ref-type="bibr" rid="B1">Asif et al., 2025</xref>), even modest reductions in churn rates can safeguard substantial revenues. This economic reality elevates churn prediction from a technical challenge to a strategic imperative for telecom operators seeking sustainable growth in saturated markets.</p>
<p>Machine learning has transformed churn prediction capabilities, with ensemble methods and deep learning achieving state-of-the-art accuracy (<xref ref-type="bibr" rid="B4">Chen and Guestrin, 2016</xref>; <xref ref-type="bibr" rid="B27">Ullah et al., 2019</xref>). While prior studies have demonstrated the effectiveness of individual techniques like XGBoost, SHAP, and ensemble methods for telecom churn prediction, there remains limited research on their systematic integration within a unified framework. This study builds upon existing work by: (1) conducting a comprehensive comparison of seven machine learning models including gradient boosting variants, (2) implementing a soft-voting ensemble that consolidates top-performing models, and (3) integrating SHAP-based explainability with autoencoder-driven segmentation to provide actionable business insights.</p>
<p>Despite these advancements, a significant disconnect persists between technical performance and practical deployment. High-performing models often operate as opaque black boxes, providing predictions without actionable explanations for business stakeholders (<xref ref-type="bibr" rid="B15">Lundberg and Lee, 2017</xref>). Furthermore, while predictive models excel at estimating individual churn probabilities, they typically lack integration with customer segmentation approaches that could inform differentiated retention strategies (<xref ref-type="bibr" rid="B30">Wu et al., 2021</xref>). This separation limits the strategic value of churn prediction systems, as effective retention requires not only identifying who might leave but also understanding why and what interventions would be most effective for different customer archetypes.</p>
<sec>
<label>1.1</label>
<title>Research opportunities and objectives</title>
<p>Building upon existing methodologies, this study identifies several opportunities for enhanced integration in churn prediction research. First, there exists an opportunity to improve the interpretability of ensemble models to enhance trust and adoption in business contexts. Second, better integration of supervised prediction with unsupervised clustering approaches could more effectively align risk assessment with customer profiling. Third, incorporating cost-sensitive evaluation alongside statistical metrics could better align model optimization with business objectives. Finally, systematic comparison of multiple modeling approaches could provide more robust insights for model selection in different contexts.</p>
<sec>
<label>1.1.1</label>
<title>Dataset considerations</title>
<p>It is important to note that many churn prediction studies, including this one, rely on benchmark datasets such as the publicly available Telco Customer Churn dataset from Kaggle. While this facilitates reproducibility and direct comparison with prior work, it also highlights the need for future research to validate findings across multiple datasets from different contexts, geographies, and service portfolios. Churn behavior may vary based on cultural, regulatory, and market-specific factors (<xref ref-type="bibr" rid="B29">Verbraken et al., 2012</xref>), and our study acknowledges this limitation while using the IBM dataset as a well-established benchmark for methodological development.</p>
<p>To address these opportunities, this study pursues four interconnected objectives. First, we design and validate a Soft-voting ensemble framework that combines diverse machine learning algorithms for robust churn prediction. Second, we integrate SHAP-based explainability to provide both global and local interpretability of model decisions. Third, we apply autoencoder-based representation learning to discover latent customer segments with distinct churn risk profiles. Fourth, we evaluate model performance using both statistical metrics and cost-sensitive business measures to ensure practical utility.</p>
</sec>
</sec>
<sec>
<label>1.2</label>
<title>Contributions</title>
<p>This work extends the existing literature by: (1) providing a comprehensive comparison of seven machine learning models on the Telco dataset with consistent evaluation protocols, (2) implementing and evaluating a soft-voting ensemble approach, (3) integrating autoencoder-based segmentation with SHAP explanations for enhanced interpretability, and (4) demonstrating how business-aligned evaluation metrics can bridge the gap between predictive performance and financial outcomes.</p>
</sec>
<sec>
<label>1.3</label>
<title>Paper organization</title>
<p>The remainder of this paper is structured as follows. Section 2 reviews relevant literature on ensemble methods, explainable AI, and customer segmentation in churn prediction. Section 3 details the dataset, preprocessing pipeline, model architectures, and evaluation framework. Section 4 presents experimental findings including predictive performance, interpretability insights, and segment analysis. Section 5 interprets the results, discusses business implications, and acknowledges limitations. Section 6 summarizes key findings and suggests directions for future research.</p>
</sec>
</sec>
<sec id="s2">
<label>2</label>
<title>Related work</title>
<p>This section surveys the extensive research on customer churn prediction in telecom, arguing that prior studies have frequently prioritized narrow technical advancements and accuracy metrics over interpretability and tangible business value. We organize the review around three pivotal methodological families, ensemble learning, explainable AI (XAI), and unsupervised segmentation, to evaluate their contribution to business-utility-driven evaluation. The identified limitations within and across these families form the primary motivation for our integrated framework.</p>
<sec>
<label>2.1</label>
<title>Triad alignment: ensembles, XAI, and segmentation</title>
<p>Our literature review reveals that studies simultaneously addressing all three components, ensemble methods, explainable AI (XAI), and unsupervised segmentation, remain scarce. To date, <bold>RetenNet</bold> represents one of the most comprehensive frameworks, integrating classification models (Random Forest, XGBoost, lightGBM), SHAP-based explanations, fuzzy rule-based clustering, and prescriptive optimization (<xref ref-type="bibr" rid="B23">Prashanthan et al., 2025</xref>). While demonstrating the potential of integrated approaches, RetenNet relies solely on the IBM Telco dataset and does not address certain methodological considerations such as time-aware validation, model calibration, or lift-based evaluation.</p>
<p>More commonly, existing work combines only two of the three triad components. For example, several studies integrate ensemble methods with XAI techniques (e.g., SHAP or LIME) but do not incorporate customer segmentation (<xref ref-type="bibr" rid="B3">Chang et al., 2024</xref>; <xref ref-type="bibr" rid="B19">Noviandy et al., 2024</xref>). Another stream of research combines ensembles with clustering-based segmentation but provides limited or no model explainability (<xref ref-type="bibr" rid="B5">Christopher and Anand, 2024</xref>; <xref ref-type="bibr" rid="B26">Thankam and El Gayar, 2023</xref>; <xref ref-type="bibr" rid="B30">Wu et al., 2021</xref>; <xref ref-type="bibr" rid="B27">Ullah et al., 2019</xref>). A third group links segmentation and optimization with classification but excludes XAI components (<xref ref-type="bibr" rid="B22">Prashanthan, 2025</xref>).</p>
<p><bold>This systematic gap</bold>, namely the absence of frameworks that holistically integrate high-performance prediction (ensembles), transparent explanation (XAI), and strategic customer profiling (segmentation), motivates the present study. Our approach aims to bridge these components while addressing methodological limitations observed in prior work.</p>
</sec>
<sec>
<label>2.2</label>
<title>Ensemble learning in telecom churn</title>
<p>Ensemble methods consistently outperform single classifiers in churn prediction. For example, XGBoost achieved top F1 and AUC scores on a U.S. telecom dataset (<xref ref-type="bibr" rid="B26">Thankam and El Gayar, 2023</xref>), while Random Forest produced the best accuracy (91.66%) in another large dataset (<xref ref-type="bibr" rid="B3">Chang et al., 2024</xref>). LightGBM also demonstrated strong performance (accuracy 80.70%, F1 87.34%) (<xref ref-type="bibr" rid="B19">Noviandy et al., 2024</xref>). AdaBoost has occasionally been reported as superior in specific settings (<xref ref-type="bibr" rid="B30">Wu et al., 2021</xref>), while SVM-RBF surpassed tree-based methods in RetenNet experiments, underscoring dataset sensitivity (<xref ref-type="bibr" rid="B23">Prashanthan et al., 2025</xref>). Despite these advances, soft-voting ensemble is rarely implemented rigorously, with few works employing out-of-fold meta-features (<xref ref-type="bibr" rid="B5">Christopher and Anand, 2024</xref>; <xref ref-type="bibr" rid="B26">Thankam and El Gayar, 2023</xref>; <xref ref-type="bibr" rid="B30">Wu et al., 2021</xref>; <xref ref-type="bibr" rid="B3">Chang et al., 2024</xref>; <xref ref-type="bibr" rid="B19">Noviandy et al., 2024</xref>).</p>
</sec>
<sec>
<label>2.3</label>
<title>Explainable AI approaches</title>
<p>Explainability has become an emerging focus. SHAP and LIME are the most widely adopted tools for attributing churn risk to specific features such as tenure, contract type, and charges (<xref ref-type="bibr" rid="B3">Chang et al., 2024</xref>; <xref ref-type="bibr" rid="B19">Noviandy et al., 2024</xref>). RetenNet extends this with SHAP waterfall plots (<xref ref-type="bibr" rid="B23">Prashanthan et al., 2025</xref>). However, methodological rigor is limited: few works specify background dataset selection, address correlated features (via conditional SHAP or ALE), or report stability of explanations (<xref ref-type="bibr" rid="B3">Chang et al., 2024</xref>; <xref ref-type="bibr" rid="B19">Noviandy et al., 2024</xref>). Moreover, translation from explanations to prescriptive interventions is seldom demonstrated beyond illustrative cases.</p>
</sec>
<sec>
<label>2.4</label>
<title>Unsupervised segmentation</title>
<p>Segmentation remains a key theme but is dominated by classical clustering methods. K-means is the most common, though some studies also test hierarchical clustering, Gaussian Mixture Models, or DBSCAN (<xref ref-type="bibr" rid="B23">Prashanthan et al., 2025</xref>; <xref ref-type="bibr" rid="B22">Prashanthan, 2025</xref>; <xref ref-type="bibr" rid="B5">Christopher and Anand, 2024</xref>; <xref ref-type="bibr" rid="B26">Thankam and El Gayar, 2023</xref>; <xref ref-type="bibr" rid="B30">Wu et al., 2021</xref>; <xref ref-type="bibr" rid="B27">Ullah et al., 2019</xref>). RetenNet incorporates fuzzy rule-based clustering (<xref ref-type="bibr" rid="B23">Prashanthan et al., 2025</xref>). Integration patterns vary: clusters are sometimes used as features in classifiers (<xref ref-type="bibr" rid="B22">Prashanthan, 2025</xref>), as <italic>post-hoc</italic> groupings for intervention design (<xref ref-type="bibr" rid="B27">Ullah et al., 2019</xref>), or as modules within broader pipelines (<xref ref-type="bibr" rid="B30">Wu et al., 2021</xref>; <xref ref-type="bibr" rid="B6">Eswarapu et al., 2023</xref>). Yet validation is typically limited to internal indices such as silhouette scores, with little assessment of external business utility (e.g., churn separation or response heterogeneity). Notably, autoencoder-based segmentation is almost absent in the telecom churn literature, and no benchmarking of representation learning vs. traditional clustering has been reported.</p>
</sec>
<sec>
<label>2.5</label>
<title>Business alignment and evaluation</title>
<p>While predictive accuracy remains the dominant metric, a few works attempt to integrate decision optimization. RetenNet and an integrated clustering-classification-optimization framework explicitly allocate retention budgets under constraints (<xref ref-type="bibr" rid="B23">Prashanthan et al., 2025</xref>; <xref ref-type="bibr" rid="B22">Prashanthan, 2025</xref>). Similarly, profit-driven evaluation has been advocated by (<xref ref-type="bibr" rid="B28">Verbeke et al. 2012</xref>), yet most recent works continue to rely on accuracy or F1 as primary measures (<xref ref-type="bibr" rid="B5">Christopher and Anand, 2024</xref>; <xref ref-type="bibr" rid="B26">Thankam and El Gayar, 2023</xref>; <xref ref-type="bibr" rid="B30">Wu et al., 2021</xref>; <xref ref-type="bibr" rid="B3">Chang et al., 2024</xref>; <xref ref-type="bibr" rid="B19">Noviandy et al., 2024</xref>). Moreover, none of the reviewed studies describe rolling-origin validation, calibration analysis, or drift monitoring, limiting their external validity for real-world deployment.</p>
</sec>
<sec>
<label>2.6</label>
<title>Research gaps</title>
<p>From this review, four critical gaps emerge in the current literature on telecom churn prediction:</p>
<list list-type="bullet">
<list-item><p><bold>Lack of comprehensive comparative analysis</bold>: There is a need for detailed, telecom-specific comparative studies that span multiple model families (ensembles, neural networks, and linear models) with consistent, leakage-safe evaluation protocols. Most studies focus on limited model comparisons rather than systematic benchmarking across algorithmic families.</p></list-item>
<list-item><p><bold>Under-exploration of ensemble strategies</bold>: While individual algorithms are well-studied, there is insufficient exploration of different ensemble strategies (voting, stacking, and blending) and their comparative effectiveness for telecom churn prediction. The optimal combination of diverse models for this specific domain remains underexplored.</p></list-item>
<list-item><p><bold>Minimal integration of business-aligned metrics</bold>: Many studies focus primarily on statistical metrics without sufficient integration of business-oriented evaluation frameworks. There is limited use of cost-sensitive analysis, probability calibration, expected retention value, and uplift modeling to align predictive performance with actual business outcomes.</p></list-item>
<list-item><p><bold>Need for multi-dataset validation</bold>: While the IBM Telco dataset serves as a valuable benchmark for comparative analysis, there is a need for studies that validate approaches across multiple datasets to assess generalizability across different market contexts and customer populations.</p></list-item>
</list>
</sec>
<sec>
<label>2.7</label>
<title>Benchmark of prior studies</title>
<p><xref ref-type="table" rid="T1">Table 1</xref> provides a comparative benchmark of representative studies in telecom churn modeling, summarized by methodological components and evaluation focus.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Comparative benchmark of prior telecom churn studies.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Study</bold></th>
<th valign="top" align="left"><bold>Ensembles</bold></th>
<th valign="top" align="left"><bold>XAI</bold></th>
<th valign="top" align="left"><bold>Segmentation</bold></th>
<th valign="top" align="left"><bold>Business metrics</bold></th>
<th valign="top" align="left"><bold>Dataset and Notes</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">RetenNet (<xref ref-type="bibr" rid="B23">Prashanthan et al., 2025</xref>)</td>
<td valign="top" align="left">RF, XGB, SVM</td>
<td valign="top" align="left">SHAP</td>
<td valign="top" align="left">Fuzzy clustering</td>
<td valign="top" align="left">Budget optimization</td>
<td valign="top" align="left">IBM Telco, no time-aware validation</td>
</tr>
<tr>
<td valign="top" align="left">Integrated budget optimization (<xref ref-type="bibr" rid="B22">Prashanthan, 2025</xref>)</td>
<td valign="top" align="left">RF, XGB</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">K-means</td>
<td valign="top" align="left">Budget optimization</td>
<td valign="top" align="left">IBM Telco</td>
</tr>
<tr>
<td valign="top" align="left"><xref ref-type="bibr" rid="B5">Christopher and Anand (2024</xref>)</td>
<td valign="top" align="left">RF, XGB, LR, MLP</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">K-means</td>
<td valign="top" align="left">Accuracy</td>
<td valign="top" align="left">Telecom dataset</td>
</tr>
<tr>
<td valign="top" align="left"><xref ref-type="bibr" rid="B6">Eswarapu et al. (2023</xref>)</td>
<td valign="top" align="left">AutoML ensemble</td>
<td valign="top" align="left">Local XAI</td>
<td valign="top" align="left">Basic</td>
<td valign="top" align="left">Fairness</td>
<td valign="top" align="left">Telecom dataset</td>
</tr>
<tr>
<td valign="top" align="left"><xref ref-type="bibr" rid="B26">Thankam and El Gayar (2023</xref>)</td>
<td valign="top" align="left">RF, XGB, others</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">K-means</td>
<td valign="top" align="left">Accuracy</td>
<td valign="top" align="left">Telecom dataset</td>
</tr>
<tr>
<td valign="top" align="left"><xref ref-type="bibr" rid="B30">Wu et al. (2021</xref>)</td>
<td valign="top" align="left">RF, AdaBoost</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">K-means</td>
<td valign="top" align="left">Accuracy</td>
<td valign="top" align="left">Telecom dataset</td>
</tr>
<tr>
<td valign="top" align="left"><xref ref-type="bibr" rid="B3">Chang et al. (2024</xref>)</td>
<td valign="top" align="left">RF, GBM</td>
<td valign="top" align="left">SHAP, LIME</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">Accuracy</td>
<td valign="top" align="left">Telecom dataset</td>
</tr>
<tr>
<td valign="top" align="left"><xref ref-type="bibr" rid="B27">Ullah et al. (2019</xref>)</td>
<td valign="top" align="left">RF</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left"><italic>Post hoc</italic> clusters</td>
<td valign="top" align="left">Accuracy</td>
<td valign="top" align="left">Large telecom dataset</td>
</tr>
<tr>
<td valign="top" align="left"><xref ref-type="bibr" rid="B19">Noviandy et al. (2024</xref>)</td>
<td valign="top" align="left">LightGBM, CatBoost</td>
<td valign="top" align="left">SHAP</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">Accuracy</td>
<td valign="top" align="left">Telecom dataset</td>
</tr>
<tr>
<td valign="top" align="left"><xref ref-type="bibr" rid="B28">Verbeke et al. (2012</xref>)</td>
<td valign="top" align="left">Rule induction</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">Profit driven</td>
<td valign="top" align="left">Telecom dataset</td>
</tr></tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Methodology</title>
<p>This research employs a comprehensive machine learning framework designed to address the multifaceted challenge of customer churn prediction in the telecommunications industry. Our methodology integrates predictive modeling, explainable AI techniques, and customer segmentation to develop both accurate and interpretable churn prediction models. The systematic approach ensures robust model performance while providing actionable insights for business decision-making. The overall architecture of our proposed framework, illustrated in <xref ref-type="fig" rid="F1">Figure 1</xref>, demonstrates the interconnected components and workflow from raw data processing to final business insights.</p>
<fig position="float" id="F1">
<label>Figure 1</label>
<caption><p>Comprehensive methodology architecture with detailed phase descriptions.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1748799-g0001.tif">
<alt-text content-type="machine-generated">Flowchart outlining a machine learning workflow for customer segmentation and analysis. It consists of four phases: Data Preparation, Model Development, Interpretation, and Evaluation. Data Preparation involves preprocessing a raw dataset of 7,043 customers with 21 features, handling missing values, encoding, and standardization. Model Development includes feature engineering, class imbalance handling with SMOTE, and training seven algorithms using five-fold cross-validation. Interpretation employs SHAP analysis for model explanation. Evaluation involves statistical metrics, cost analysis, and threshold optimization, leading to business insights like risk profiles and ROI analysis, with customer segmentation using K-means and autoencoder methods.</alt-text>
</graphic>
</fig>
<p>The architectural framework comprises four sequential phases: <bold>(1) Data preparation</bold> involving comprehensive preprocessing and feature engineering; <bold>(2) Model development</bold> employing individual classifiers and a soft voting ensemble with threshold optimization; <bold>(3) Interpretation</bold> utilizing explainable AI and customer segmentation for actionable insights; and <bold>(4) Evaluation</bold> conducting rigorous performance assessment and statistical validation. This structured approach ensures methodological rigor while maintaining practical applicability for telecommunications companies.</p>
<sec>
<label>3.1</label>
<title>Dataset description and preprocessing</title>
<p>TThe foundation of our predictive modeling framework is built upon the Telco Customer Churn dataset, obtained from the IBM Analytics community and publicly available on Kaggle (<xref ref-type="bibr" rid="B12">Kaggle and blastchar, 2021</xref>). This dataset contains comprehensive customer information from a telecommunications company, specifically designed for churn prediction tasks.</p>
<sec>
<label>3.1.1</label>
<title>Selection of IBM telco dataset</title>
<p>This study employs the IBM Telco Customer Churn dataset as it represents a well-established benchmark in churn prediction literature, enabling direct comparison with prior work. The dataset&#x00027;s comprehensive feature set and public availability support reproducibility and methodological evaluation. While acknowledging that findings from a single dataset may not generalize to all telecom contexts, this dataset provides a standardized foundation for developing and comparing analytical approaches that can be subsequently validated on additional datasets.</p></sec>
<sec>
<label>3.1.2</label>
<title>Dataset characteristics</title>
<p>The original dataset comprises 7, 043 customer records with 21 features that capture demographic information, account details, service subscriptions, and customer behavior patterns. The feature set can be mathematically represented as:</p>
<disp-formula id="EQ1"><mml:math id="M1"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mrow><mml:mi mathvariant="script">D</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>7043</mml:mn></mml:mrow></mml:msubsup></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(1)</label></disp-formula>
<p>where <inline-formula><mml:math id="M2"><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mn>21</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> represents the feature vector for customer <italic>i</italic>, and <italic>y</italic><sub><italic>i</italic></sub>&#x02208;{0, 1} denotes the binary churn status (0: No Churn, 1: Churn).</p>
<p>The feature space <inline-formula><mml:math id="M3"><mml:mrow><mml:mi mathvariant="script">X</mml:mi></mml:mrow></mml:math></inline-formula> encompasses three distinct types of variables:</p>
<list list-type="bullet">
<list-item><p><bold>Demographic features:</bold> Customer demographic information including gender, age (SeniorCitizen), and partnership status.</p></list-item>
<list-item><p><bold>Account information:</bold> Contract details, payment methods, paperless billing, and monthly/total charges.</p></list-item>
<list-item><p><bold>Service subscriptions:</bold> Comprehensive service enrollment including phone lines, internet services, online security, streaming services, and technical support.</p></list-item>
</list>
<p>The target variable distribution exhibits significant class imbalance, with approximately 73.5% of customers retaining services and 26.5% churning, which necessitates specialized handling strategies discussed in Section 3.3.</p></sec>
<sec>
<label>3.1.3</label>
<title>Data quality assessment</title>
<p>A rigorous data quality assessment was conducted to identify and address potential issues that could compromise model performance:</p>
<p><bold>Missing values analysis:</bold> Systematic examination revealed missing values predominantly in the <monospace>TotalCharges</monospace> feature, which were identified as blank entries rather than explicit null values. The missing pattern was determined to be Missing Completely at Random (MCAR) through statistical testing.</p>
<p><bold>Data type validation:</bold> Initial exploration identified data type inconsistencies, particularly with the <monospace>TotalCharges</monospace> feature being stored as string type due to the presence of whitespace characters in missing values. This required type conversion to numerical format for analytical processing. For example, <italic>ContractDuration</italic><sub><italic>i</italic></sub> is encoded numerically (Month-to-month = 1, 1 year = 12, 2 year = 24).</p></sec>
<sec>
<label>3.1.4</label>
<title>Preprocessing pipeline with domain considerations</title>
<p><bold>Missing value imputation:</bold> For the TotalCharges feature with missing values (11 instances, 0.16%), median imputation was selected:</p>
<disp-formula id="E2"><mml:math id="M4"><mml:mrow><mml:msub><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TotalCharges</mml:mtext></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">missing</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mtext class="textrm" mathvariant="normal">median</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TotalCharges</mml:mtext></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">non-missing</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></disp-formula>
<p><bold>Domain justification:</bold> In telecommunications billing data, charge distributions often exhibit right skewness due to high-value outliers. Median imputation preserves the central tendency while minimizing distortion from extreme values, which is critical for maintaining the economic interpretability of spending patterns. This approach aligns with industry practices where billing anomalies are handled conservatively to avoid artificial inflation of customer value metrics.</p>
<p><bold>Contract duration encoding:</bold> Contract types were encoded numerically (Month-to-month = 1, 1 year = 12, 2 year = 24) rather than using one-hot encoding alone. <bold>Domain justification:</bold> This ordinal encoding captures the inherent hierarchy in contract commitment, which directly correlates with churn risk in telecommunications. The numerical representation preserves the business intuition that longer contracts indicate higher commitment, enabling models to learn this progressive relationship more effectively than treating contract types as purely categorical.</p>
<p><bold>Feature scaling:</bold> Numerical features were standardized using StandardScaler. <bold>Domain justification:</bold> In telecom datasets, features like MonthlyCharges (range: $18&#x02013;118) and tenure (range: 0&#x02013;72 months) operate on vastly different scales. Standardization prevents algorithm bias toward features with larger numerical ranges, particularly important for distance-based algorithms and neural networks. This ensures that all customer attributes contribute proportionally to the model&#x00027;s learning process.</p>
</sec>
</sec>
<sec>
<label>3.2</label>
<title>Feature engineering</title>
<p>Feature engineering constitutes a critical phase in our methodology, transforming raw variables into meaningful predictors that enhance model performance and interpretability. This process leverages telecommunications domain knowledge to create features that better capture customer behavior patterns and churn signals.</p>
<sec>
<label>3.2.1</label>
<title>AvgMonthlyCharge: normalized spending indicator</title>
<disp-formula id="E3"><mml:math id="M5"><mml:mrow><mml:msub><mml:mrow><mml:mtext class="textrm" mathvariant="normal">AvgMonthlyCharge</mml:mtext></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TotalCharges</mml:mtext></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mtext class="textrm" mathvariant="normal">tenure</mml:mtext></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:mfrac></mml:mrow></mml:math></disp-formula>
<p>The addition of 1 in the denominator prevents division by zero for new customers with zero tenure.</p>
<p><bold>Domain rationale:</bold> Traditional metrics like TotalCharges and MonthlyCharges provide incomplete pictures of customer value. In telecommunications, a customer with high total charges over long tenure represents stable loyalty, whereas similar spending over a short period may indicate premium but potentially unstable service usage. This normalized metric captures spending intensity relative to relationship duration, addressing a key business insight: customers who spend more per month relative to their tenure may have higher perceived value or, conversely, may experience &#x0201C;bill shock&#x0201D; leading to churn. This feature incorporates principles from behavioral economics, particularly the <italic>sunk cost effect</italic>, where customers perceive greater investment in services used intensively over time.</p></sec>
<sec>
<label>3.2.2</label>
<title>HasMultipleServices: service bundle complexity</title>
<disp-formula id="E4"><mml:math id="M6"><mml:mrow><mml:msub><mml:mrow><mml:mtext class="textrm" mathvariant="normal">HasMultipleServices</mml:mtext></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mi>I</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mtext class="textrm" mathvariant="normal">service</mml:mtext></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mtext class="textrm" mathvariant="normal">active</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></disp-formula>
<p>where <italic>k</italic> &#x0003D; 9 represents the total service categories: phone lines, internet services, online security, device protection, technical support, streaming TV, streaming movies, and associated add-ons.</p>
<p><bold>Domain rationale:</bold> Service bundling is a fundamental strategy in telecommunications, yet its impact on churn is complex. While theory suggests multiple services create higher switching costs and loyalty, industry experience shows complex bundles can lead to confusion and &#x0201C;bill shock.&#x0201D; This feature quantifies service engagement depth, capturing the dual nature of bundling: customers with 3&#x0002B; premium services showed 67% lower churn risk in our analysis, validating the <italic>switching costs</italic> principle, yet those with only basic services but high monthly charges exhibited elevated risk. The feature directly measures a customer&#x00027;s integration into the service ecosystem, a key indicator of retention potential in telecom CRM strategies.</p>
</sec>
</sec>
<sec>
<label>3.3</label>
<title>Class imbalance handling</title>
<p>Class imbalance represents a fundamental challenge in churn prediction modeling, as the natural distribution of customer churn typically skews heavily toward retention. This section outlines the systematic approach employed to address this imbalance and ensure robust model performance across both majority and minority classes.</p>
<sec>
<label>3.3.1</label>
<title>Problem identification: distribution analysis of target variable</title>
<p>The binary nature of churn prediction requires careful examination of class balance, as standard classification algorithms often underperform on imbalanced datasets. Analysis of the Telco Customer Churn dataset revealed a significant skew: churned customers constitute the minority class at approximately 26.5% of the dataset, while retained customers comprise the majority 73.5%. This corresponds to a class distribution ratio of approximately 1:2.77, meaning for every churned customer, there are nearly three retained ones.</p>
<p>Such imbalance presents three critical challenges:</p>
<p><bold>Predictive bias</bold>: Algorithms tend to optimize overall accuracy by favoring the majority class, potentially achieving high accuracy while failing to identify churners&#x02014;the customers of greatest business interest.</p>
<p><bold>Evaluation metric distortion</bold>: Traditional accuracy metrics become misleading, necessitating alternative measures like precision, recall, F1-score, and AUC-ROC that better capture minority-class performance.</p>
<p><bold>Cost-sensitive considerations</bold>: From a business perspective, the cost of false negatives (missed churners) typically exceeds that of false positives, further emphasizing the need for specialized imbalance handling techniques.</p></sec>
<sec>
<label>3.3.2</label>
<title>SMOTE implementation</title>
<sec>
<label>3.3.2.1</label>
<title>Synthetic minority oversampling technique</title>
<p>The Synthetic Minority Over-sampling Technique (SMOTE) was adopted to address class imbalance, as it generates informative synthetic minority samples rather than replicating existing observations. This property reduces overfitting and improves the representation of the minority class in the feature space.</p>
<p>SMOTE generates synthetic samples through linear interpolation between a minority instance and one of its nearest neighbors:</p>
<disp-formula id="EQ5"><mml:math id="M7"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">new</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mi>&#x003BB;</mml:mi><mml:mo>&#x000B7;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>z</mml:mi><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(2)</label></disp-formula>
<p>Here, <italic>x</italic><sub>new</sub> denotes the synthetic instance, <italic>x</italic><sub><italic>i</italic></sub> represents a minority class observation, <italic>x</italic><sub><italic>zi</italic></sub> is one of the <italic>k</italic> nearest minority neighbors of <italic>x</italic><sub><italic>i</italic></sub>, and &#x003BB;&#x02208;[0, 1] is a random interpolation coefficient. This process expands the minority decision region by populating sparse areas of the feature space and supports the learning of more representative decision boundaries.</p>
<p>Compared to random oversampling, SMOTE offers several advantages. It reduces variance by avoiding direct duplication of minority instances, improves class separation by reinforcing boundary regions, and enriches the minority feature space with diverse synthetic examples.</p></sec>
<sec>
<label>3.3.2.2</label>
<title>Parameter selection</title>
<p>SMOTE parameters were selected through a systematic validation procedure. The neighborhood size <italic>k</italic> was evaluated over the set {3, 5, 7, 9} using cross-validation on the training data. A value of <italic>k</italic> &#x0003D; 5 was selected as it provided a balance between capturing local structure and avoiding the introduction of noisy synthetic samples. The implementation used <monospace>imbalanced-learn</monospace> (v0.10.1) with parameters: <monospace>k_neighbors</monospace> = <monospace>5</monospace>, <monospace>sampling_strategy</monospace> = <monospace>&#x0201C;auto&#x0201D;</monospace>, and <monospace>random_state</monospace> = <monospace>42</monospace>. The sampling ratio was defined to achieve approximate class balance while preserving the majority class distribution:</p>
<disp-formula id="EQ6"><mml:math id="M8"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">Sampling Ratio</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">majority</mml:mtext></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">minority</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">minority</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(3)</label></disp-formula>
<p>Synthetic minority samples were generated until the desired balance was achieved, while the original majority class observations were retained unchanged.</p></sec>
<sec>
<label>3.3.2.3</label>
<title>Application protocol and data leakage prevention</title>
<p>To ensure a valid evaluation and prevent data leakage, SMOTE was applied according to a strict protocol. First, the dataset was divided into training (80%, 5,634 instances) and test (20%, 1,409 instances) sets using stratified sampling with <monospace>random_state</monospace> = <monospace>42</monospace> to preserve the original class distribution and ensure reproducibility. SMOTE was then applied exclusively to the training data, while the test set remained untouched to reflect real-world class imbalance.</p>
<p>During hyperparameter tuning, stratified five-fold cross-validation was conducted on the training set. For each fold, SMOTE was applied only to the training portion of the fold, ensuring that synthetic samples did not contaminate the validation data. Final models were trained using the fully oversampled training set and evaluated on the original test set.</p>
<p>This procedure avoids the common methodological error of applying SMOTE prior to data splitting, which can lead to optimistic performance estimates due to information leakage (<xref ref-type="bibr" rid="B2">Blagus and Lusa, 2017</xref>; <xref ref-type="bibr" rid="B13">Lema^&#x00131;tre et al., 2020</xref>).</p></sec>
<sec>
<label>3.3.2.4</label>
<title>Impact on model learning</title>
<p>Balancing the training data with SMOTE influenced multiple aspects of model development. First, learning dynamics improved as classifiers were exposed to more representative minority class patterns, preventing dominance by majority class observations. Second, probability calibration and decision threshold selection became more stable, as predicted probabilities were less skewed toward the majority class.</p>
<p>Overall, the use of SMOTE constitutes a critical methodological choice that supports reliable churn prediction in cost-sensitive settings, where accurately identifying potential churners carries significant financial consequences. Empirical performance comparisons with and without SMOTE are reported in the results section.</p>
</sec>
</sec>
</sec>
<sec>
<label>3.4</label>
<title>Machine learning models</title>
<p>This section details the comprehensive machine learning framework developed for customer churn prediction, encompassing both individual model architectures and advanced ensemble strategies. The methodological approach ensures robust performance through systematic hyperparameter optimization, cross-validation, and probability calibration.</p>
<sec>
<label>3.4.1</label>
<title>Individual models</title>
<p>A diverse set of machine learning algorithms was implemented to leverage their complementary strengths and provide comprehensive coverage of different modeling paradigms.</p>
<sec>
<label>3.4.1.1</label>
<title>XGBoost: hyperparameter tuning and cross-validation strategy</title>
<p>The Extreme Gradient Boosting (XGBoost) algorithm was selected for its proven effectiveness in tabular data classification tasks. Hyperparameter tuning was performed using Optuna, employing a 5-fold stratified cross-validation scheme to select the parameter combination that maximized cross-validated AUC:</p>
<disp-formula id="EQ7"><mml:math id="M9"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">XGB</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mi>l</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>&#x00177;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mi>&#x003A9;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(4)</label></disp-formula>
<p>where the regularization term &#x003A9;(<italic>f</italic>) is defined as:</p>
<disp-formula id="EQ8"><mml:math id="M10"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>&#x003A9;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>&#x003B3;</mml:mi><mml:mi>T</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:mfrac><mml:mi>&#x003BB;</mml:mi><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:mi>w</mml:mi><mml:mo>|</mml:mo><mml:msup><mml:mrow><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(5)</label></disp-formula>
<p>The hyperparameter search space and optimal values are summarized in <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>XGBoost hyperparameter optimization results.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Parameter</bold></th>
<th valign="top" align="left"><bold>Search space</bold></th>
<th valign="top" align="center"><bold>Optimal value</bold></th>
<th valign="top" align="left"><bold>Interpretation</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Learning rate (&#x003B7;)</td>
<td valign="top" align="center">[0.01, 0.1]</td>
<td valign="top" align="left">Optuna-determined</td>
<td valign="top" align="left">Step size shrinkage</td>
</tr>
<tr>
<td valign="top" align="left">Max depth</td>
<td valign="top" align="center">[3, 8]</td>
<td valign="top" align="left">Optuna-determined</td>
<td valign="top" align="left">Maximum tree depth</td>
</tr>
<tr>
<td valign="top" align="left">n_estimators</td>
<td valign="top" align="center">[300, 700]</td>
<td valign="top" align="left">Optuna-determined</td>
<td valign="top" align="left">Number of boosting rounds</td>
</tr>
<tr>
<td valign="top" align="left">Sub-sample</td>
<td valign="top" align="center">[0.6, 1.0]</td>
<td valign="top" align="left">Optuna-determined</td>
<td valign="top" align="left">Training data sampling ratio</td>
</tr>
<tr>
<td valign="top" align="left">Colsample by Tree</td>
<td valign="top" align="center">[0.6, 1.0]</td>
<td valign="top" align="left">Optuna-determined</td>
<td valign="top" align="left">Feature sampling ratio</td>
</tr></tbody>
</table>
</table-wrap>
<p>The cross-validation strategy employed stratified k-fold partitioning to maintain class distribution across folds, ensuring reliable performance estimation.</p></sec>
<sec>
<label>3.4.1.2</label>
<title>Random forest: ensemble size optimization and feature sub-sampling</title>
<p>The Random Forest implementation focused on ensemble diversity and feature space partitioning:</p>
<disp-formula id="EQ9"><mml:math id="M11"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x00177;</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">RF</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mtext class="textrm" mathvariant="normal">mode</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(6)</label></disp-formula>
<p>where <italic>h</italic><sub><italic>t</italic></sub> represents individual decision trees and <italic>T</italic> denotes the ensemble size. The feature sub-sampling strategy follows:</p>
<disp-formula id="EQ10"><mml:math id="M12"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>m</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>&#x0230A;</mml:mo><mml:mrow><mml:msqrt><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msqrt></mml:mrow><mml:mo>&#x0230B;</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(7)</label></disp-formula>
<p>where <italic>p</italic> is the total number of features and <italic>m</italic> represents features considered for each split.</p>
<p>Key optimization results include:</p>
<list list-type="bullet">
<list-item><p><bold>Ensemble size</bold>: 100 estimators (beyond which diminishing returns observed)</p></list-item>
<list-item><p><bold>Feature sub-sampling</bold>: <inline-formula><mml:math id="M13"><mml:msqrt><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">features</mml:mtext></mml:mstyle></mml:mrow></mml:msqrt></mml:math></inline-formula> for classification tasks</p></list-item>
<list-item><p><bold>Maximum depth</bold>: Unlimited for individual trees to capture complex interactions</p></list-item>
<list-item><p><bold>Minimum samples split</bold>: 2 to allow fine-grained partitioning</p></list-item>
</list></sec>
<sec>
<label>3.4.1.3</label>
<title>Deep neural network: architecture design and regularization techniques</title>
<p>A multilayer perceptron (MLP) architecture was designed with systematic regularization to prevent overfitting based on the data presented in <xref ref-type="table" rid="T3">Table 3</xref>:</p>
<disp-formula id="EQ11"><mml:math id="M14"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>h</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mi>&#x003C3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>W</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:msup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>h</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>&#x0002B;</mml:mo><mml:msup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>b</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(8)</label></disp-formula>
<p>The network architecture employed dropout regularization:</p>
<disp-formula id="EQ12"><mml:math id="M15"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>h</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">drop</mml:mtext></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>h</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>&#x02299;</mml:mo><mml:msup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>m</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:mtext>&#x02003;</mml:mtext><mml:msup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>m</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>&#x0007E;</mml:mo><mml:mtext class="textrm" mathvariant="normal">Bernoulli</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:mi>p</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(9)</label></disp-formula>
<p>Additional regularization techniques included:</p>
<list list-type="bullet">
<list-item><p><bold>L2 weight regularization</bold>: Tuned via Optuna to penalize large weights (parameter &#x003B1; in MLPClassifier).</p></list-item>
<list-item><p><bold>Batch normalization</bold>: Not applied in the current implementation.</p></list-item>
<list-item><p><bold>Early stopping</bold>: Not applied in the current implementation.</p></list-item>
</list>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Optimized deep neural network architecture for churn prediction.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Layer</bold></th>
<th valign="top" align="center"><bold>Units</bold></th>
<th valign="top" align="center"><bold>Activation/regularization</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Input</td>
<td valign="top" align="center">46</td>
<td valign="top" align="center">-</td>
</tr>
<tr>
<td valign="top" align="left">Hidden 1</td>
<td valign="top" align="center">64</td>
<td valign="top" align="center">ReLU</td>
</tr>
<tr>
<td valign="top" align="left">Hidden 2</td>
<td valign="top" align="center">32</td>
<td valign="top" align="center">ReLU</td>
</tr>
<tr>
<td valign="top" align="left">Output</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">Sigmoid</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>Hidden layer sizes were selected via Optuna hyperparameter tuning using a 5-fold stratified cross-validation scheme.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec>
<label>3.4.1.4</label>
<title>LightGBM: gradient boosting with histogram-based optimization</title>
<p>The Light Gradient Boosting Machine (LightGBM) was implemented for its efficiency with large datasets and categorical features. LightGBM uses histogram-based algorithms to bucket continuous feature values into discrete bins, accelerating the training process:</p>
<disp-formula id="EQ13"><mml:math id="M16"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">LGBM</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mi>l</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>&#x00177;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mi>&#x003A9;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(10)</label></disp-formula>
<p>with leaf-wise tree growth strategy that minimizes loss more directly than level-wise approaches. Key optimization parameters included:</p>
<list list-type="bullet">
<list-item><p><bold>Number of Leaves (<italic>num</italic>_<italic>leaves</italic>)</bold>: Tuned via Optuna in the range 20&#x02013;100 (controls model complexity).</p></list-item>
<list-item><p><bold>Learning Rate (&#x003B7;)</bold>: Tuned via Optuna in the range 0.01&#x02013;0.08 (step size shrinkage).</p></list-item>
<list-item><p><bold>Feature Fraction (<italic>feature</italic>_<italic>fraction</italic>)</bold>: Tuned via Optuna in the range 0.7&#x02013;0.95 (random subspace method).</p></list-item>
<list-item><p><bold>Bagging Fraction (<italic>bagging</italic>_<italic>fraction</italic>)</bold>: Tuned via Optuna in the range 0.7&#x02013;0.95 (data sampling for each iteration).</p></list-item>
<list-item><p><bold>Minimum Data in Leaf (<italic>min</italic>_<italic>data</italic>_<italic>in</italic>_<italic>leaf</italic>)</bold>: Tuned via Optuna in the range 20&#x02013;120 (prevents overfitting).</p></list-item>
</list></sec>
<sec>
<label>3.4.1.5</label>
<title>Traditional models: logistic regression, AdaBoost, gradient boosting</title>
<p>Complementary traditional algorithms provided baseline performance and ensemble diversity:</p>
<p><bold>Logistic regression</bold> with L2 regularization:</p>
<disp-formula id="EQ14"><mml:math id="M17"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>|</mml:mo><mml:mstyle mathvariant="bold"><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#x0002B;</mml:mo><mml:msup><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mo>-</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>w</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup><mml:mstyle mathvariant="bold"><mml:mtext>x</mml:mtext></mml:mstyle><mml:mo>&#x0002B;</mml:mo><mml:mi>b</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(11)</label></disp-formula>
<p><bold>AdaBoost</bold> with decision stumps as weak learners:</p>
<disp-formula id="EQ15"><mml:math id="M18"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:mfrac><mml:mo class="qopname">ln</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003F5;</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003F5;</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(12)</label></disp-formula>
<p><bold>Gradient boosting</bold> with exponential loss minimization:</p>
<disp-formula id="EQ16"><mml:math id="M19"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003B3;</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(13)</label></disp-formula>
</sec>
</sec>
<sec>
<label>3.4.2</label>
<title>Comprehensive hyperparameter specifications</title>
<p>To ensure full reproducibility, <xref ref-type="table" rid="T4">Table 4</xref> provides the complete hyperparameter configurations for all models used in this study. These parameters were determined through systematic grid search with 5-fold cross-validation, with final values selected to optimize F1-score.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>Complete hyperparameter configurations for all machine learning models based on Optuna tuning.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="left"><bold>Final hyperparameters</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">XGBoost</td>
<td valign="top" align="left"><monospace>learning_rate=best, n_estimators=best, max_depth=best, subsample=best, colsample_bytree=best, random_state=42, eval_metric=&#x0007E;logloss&#x0007E;</monospace> </td>
</tr>
<tr>
<td valign="top" align="left">Random forest</td>
<td valign="top" align="left"><monospace>n_estimators=best, max_depth=best, min_samples_leaf=best, class_weight=balanced, random_state=42</monospace> </td>
</tr>
<tr>
<td valign="top" align="left">LightGBM</td>
<td valign="top" align="left"><monospace>num_leaves=best, learning_rate=best, n_estimators=best, feature_fraction=best, bagging_fraction=best, min_data_in_leaf=best, random_state=42, verbosity=-1</monospace> </td>
</tr>
<tr>
<td valign="top" align="left">Gradient boosting</td>
<td valign="top" align="left"><monospace>n_estimators=best, learning_rate=best, max_depth=best, random_state=42</monospace> </td>
</tr>
<tr>
<td valign="top" align="left">AdaBoost</td>
<td valign="top" align="left"><monospace>n_estimators=best, learning_rate=best, random_state=42</monospace> </td>
</tr>
<tr>
<td valign="top" align="left">Logistic regression</td>
<td valign="top" align="left"><monospace>C=best, class_weight=balanced, solver=lbfgs, max_iter=2000</monospace> </td>
</tr>
<tr>
<td valign="top" align="left">Multi-layer perceptron (MLP)</td>
<td valign="top" align="left"><monospace>hidden_layer_sizes=best, alpha=best, learning_rate_init=best, max_iter=500, random_state=42</monospace> </td>
</tr></tbody>
</table>
</table-wrap>
<p>All models were implemented using scikit-learn (v1.3.0), except for XGBoost (v1.7.0) and LightGBM (v4.1.0). To ensure full reproducibility, the random seed was consistently set to 42 across all components of the pipeline, including data splitting (<monospace>train_test_split</monospace>), cross-validation (<monospace>StratifiedKFold</monospace>), model initialization (<monospace>random_state</monospace> in all classifiers), and oversampling with SMOTE. This setup guarantees that results can be reliably reproduced under the same software environment and data preprocessing steps.</p></sec>
<sec>
<label>3.4.3</label>
<title>Ensemble strategy: soft voting</title>
<sec>
<label>3.4.3.1</label>
<title>Technical implementation</title>
<p>The soft voting ensemble was implemented by averaging predicted probabilities from the three top-performing models: XGBoost, LightGBM, and Gradient Boosting. Each base model was trained using the optimized hyperparameters specified in <xref ref-type="table" rid="T4">Table 4</xref>. Equal weighting was applied to the probability outputs of the models, and computations were parallelized where applicable.</p></sec>
<sec>
<label>3.4.3.2</label>
<title>Training strategy</title>
<p>The ensemble was trained on the SMOTE-balanced training set obtained after the initial train-test split. No out-of-fold or stacking procedure was used; the ensemble prediction consists of a simple mean of the individual model probabilities. This approach ensures that all base models contribute equally to the final prediction while maintaining the integrity of the evaluation process on the untouched test set.</p></sec>
<sec>
<label>3.4.3.3</label>
<title>Probability calibration</title>
<p>All base models in the ensemble were calibrated using <monospace>CalibratedClassifierCV</monospace> with isotonic regression (5-fold) before ensemble combination. This ensured that predicted probabilities from each model were properly calibrated, reducing overconfidence and improving the reliability of the weighted average.</p></sec>
<sec>
<label>3.4.3.4</label>
<title>Variance reduction</title>
<p>The ensemble provides variance reduction through model averaging. With three diverse boosting algorithms, the ensemble reduces overfitting by combining models with different error patterns. The stability of ensemble predictions was validated through bootstrap analysis (1,000 iterations), showing a 28% reduction in prediction variance compared to individual models.</p>
</sec>
</sec>
<sec>
<label>3.4.4</label>
<title>Individual model comparison framework</title>
<p>Seven machine learning models were implemented to ensure comprehensive evaluation across distinct algorithmic paradigms:</p>
<list list-type="bullet">
<list-item><p><bold>Bagging ensemble</bold>: Random Forest.</p></list-item>
<list-item><p><bold>Boosting ensembles</bold>: XGBoost, LightGBM, Gradient Boosting, AdaBoost.</p></list-item>
<list-item><p><bold>Linear model</bold>: Logistic Regression with L2 regularization.</p></list-item>
<list-item><p><bold>Neural network</bold>: Multi-layer Perceptron (MLP).</p></list-item>
</list>
<p>This selection provides coverage of the primary algorithmic families used in classification tasks, from linear discriminants to complex ensemble methods.</p>
<sec>
<label>3.4.4.1</label>
<title>Unified training and evaluation protocol</title>
<p>All models were trained on identical preprocessed data with SMOTE-applied balanced classes (Section 3.3.2). Stratified 5-fold cross-validation was employed consistently to ensure fair comparison while maintaining class distribution in each fold.</p></sec>
<sec>
<label>3.4.4.2</label>
<title>Probability calibration methodology</title>
<p>Model probability calibration was implemented using the <monospace>CalibratedClassifierCV</monospace> utility from scikit-learn (v1.3.0) to improve the reliability of predicted class probabilities for downstream decision-making. Calibration was applied after model training using a prefit strategy, ensuring that the original learned decision functions remained unchanged.</p>
<list list-type="order">
<list-item><p><bold>Calibration data</bold>: Calibration was performed using the SMOTE-balanced training set after the initial train&#x02013;test split. The calibrated models were subsequently evaluated on an independent, held-out test set to assess generalization performance and probability reliability.</p></list-item>
<list-item><p><bold>Calibration methods</bold>: Calibration strategies were selected a priori based on model characteristics:</p></list-item>
</list>
<list list-type="bullet">
<list-item><p><bold>Platt scaling (sigmoid)</bold> (<xref ref-type="bibr" rid="B21">Platt et al., 1999</xref>): Applied exclusively to Logistic Regression, reflecting its inherently linear decision boundary and near-sigmoidal miscalibration behavior.</p></list-item>
<list-item><p><bold>Isotonic regression</bold> (<xref ref-type="bibr" rid="B31">Zadrozny and Elkan, 2002</xref>): Applied to all tree-based ensemble models (XGBoost, LightGBM, Random Forest, Gradient Boosting, AdaBoost) and the MLP, due to their complex, non-linear probability distortions.</p></list-item>
</list>
<list list-type="simple">
<list-item><p>3. <bold>Evaluation criterion</bold>: Calibration effectiveness was quantified using the Brier score (<xref ref-type="bibr" rid="B7">Glenn et al., 1950</xref>), computed before and after calibration on the test set. Reliability curves and ROC-based evaluation were additionally used to assess probability alignment and discrimination performance.</p></list-item>
<list-item><p>4. <bold>Reproducibility</bold>: All calibration procedures were conducted with a fixed random seed (<monospace>random_state=42</monospace>) across data splitting, model training, and oversampling to ensure deterministic and reproducible results.</p></list-item>
</list>
<p>The adopted calibration strategy improves the interpretability and trustworthiness of predicted probabilities, particularly for complex ensemble models that tend to produce overconfident outputs. Isotonic regression demonstrated superior calibration behavior for most non-linear models, while logistic regression exhibited minimal calibration gain, confirming its well-known probabilistic stability. These calibrated probability estimates enable more reliable threshold-based churn intervention and risk stratification decisions in practical deployment scenarios.</p>
</sec>
</sec>
</sec>
<sec>
<label>3.5</label>
<title>Explainable AI implementation</title>
<p>To ensure transparency and actionable insights for churn management, we implemented SHAP (SHapley Additive exPlanations) (<xref ref-type="bibr" rid="B15">Lundberg and Lee, 2017</xref>) for all models. The SHAP framework provides consistent, theoretically grounded feature attributions based on cooperative game theory (<xref ref-type="bibr" rid="B25">Shapley, 1953</xref>). Two complementary explainers were used:</p>
<list list-type="bullet">
<list-item><p><bold>TreeExplainer</bold>: For tree-based models (XGBoost, Random Forest, LightGBM), offering exact SHAP values efficiently by leveraging tree structure optimizations (<xref ref-type="bibr" rid="B14">Lundberg et al., 2018</xref>).</p></list-item>
<list-item><p><bold>KernelExplainer</bold>: For non-tree models (Logistic Regression, MLP), using a model-agnostic approximation based on the original SHAP formulation. This approximates the Shapley value computation:</p></list-item>
</list>
<disp-formula id="EQ17"><mml:math id="M20"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003D5;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>f</mml:mi><mml:mo>,</mml:mo><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>S</mml:mi><mml:mo>&#x02286;</mml:mo><mml:mi>N</mml:mi><mml:mo>\</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:munder></mml:mstyle><mml:mfrac><mml:mrow><mml:mo>|</mml:mo><mml:mi>S</mml:mi><mml:mo>|</mml:mo><mml:mo>!</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:mi>N</mml:mi><mml:mo>|</mml:mo><mml:mo>-</mml:mo><mml:mo>|</mml:mo><mml:mi>S</mml:mi><mml:mo>|</mml:mo><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>!</mml:mo></mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mi>N</mml:mi><mml:mo>|</mml:mo><mml:mo>!</mml:mo></mml:mrow></mml:mfrac><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>S</mml:mi><mml:mo>&#x0222A;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(14)</label></disp-formula>
<list list-type="simple">
<list-item><p>where &#x003D5;<sub><italic>i</italic></sub> is the SHAP value for feature <italic>i</italic>, <italic>N</italic> is the set of all features, <italic>S</italic> is a feature subset excluding <italic>i</italic>, and <italic>f</italic>(<italic>S</italic>) is the model prediction using only features in <italic>S</italic>. We employed a background dataset of 100 representative instances selected via k-means clustering to estimate expected values, balancing computational efficiency with approximation accuracy (<xref ref-type="bibr" rid="B15">Lundberg and Lee, 2017</xref>).</p></list-item>
</list>
<p><bold>Model-explainer alignment rationale:</bold> TreeExplainer computes exact Shapley values for tree ensembles in polynomial time by exploiting tree structures, while KernelExplainer offers flexibility for non-tree models through approximation of <xref ref-type="disp-formula" rid="EQ17">Equation 14</xref>. The 100-instance background set was selected to represent the data distribution while maintaining tractable computation (approximately 15 seconds per explanation vs. TreeExplainer&#x00027;s 2 seconds).</p>
<p>Our implementation emphasized computational efficiency, actionable granularity, and alignment with stakeholders (data scientists, analysts, executives).</p>
<sec>
<label>3.5.1</label>
<title>Global feature importance</title>
<p>Global feature importance was quantified via mean absolute SHAP values:</p>
<disp-formula id="EQ18"><mml:math id="M21"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mtext class="textrm" mathvariant="normal">Global Importance</mml:mtext></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mo>|</mml:mo><mml:msubsup><mml:mrow><mml:mi>&#x003D5;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>|</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(15)</label></disp-formula>
<p><xref ref-type="table" rid="T5">Table 5</xref> summarizes key global insights alongside business interpretation and retention actions.</p>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>SHAP-informed global and local insights for retention strategy.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Feature/insight</bold></th>
<th valign="top" align="center"><bold>Mean |&#x003D5;|</bold></th>
<th valign="top" align="left"><bold>Example local impact</bold></th>
<th valign="top" align="left"><bold>Recommended action</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Contract type</td>
<td valign="top" align="center">0.284</td>
<td valign="top" align="left">&#x0002B;0.42 (month-to-month)</td>
<td valign="top" align="left">Tiered contract conversion incentives</td>
</tr>
<tr>
<td valign="top" align="left">Tenure &#x0003C; 12 months</td>
<td valign="top" align="center">0.120</td>
<td valign="top" align="left">&#x0002B;0.15 (new customer)</td>
<td valign="top" align="left">Enhanced onboarding program</td>
</tr>
<tr>
<td valign="top" align="left">Electronic check</td>
<td valign="top" align="center">0.122</td>
<td valign="top" align="left">&#x0002B;0.18</td>
<td valign="top" align="left">Automated payment migration campaign</td>
</tr>
<tr>
<td valign="top" align="left">Service bundling</td>
<td valign="top" align="center">0.065</td>
<td valign="top" align="left">&#x02013;0.08</td>
<td valign="top" align="left">Service upselling and bundling promotions</td>
</tr>
<tr>
<td valign="top" align="left">Technical support</td>
<td valign="top" align="center">0.045</td>
<td valign="top" align="left">&#x02013;0.03</td>
<td valign="top" align="left">Proactive support outreach</td>
</tr></tbody>
</table>
</table-wrap>
</sec>
<sec>
<label>3.5.2</label>
<title>Local explanations and action planning</title>
<p>Local explanations decompose individual predictions:</p>
<disp-formula id="EQ19"><mml:math id="M22"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003D5;</mml:mi></mml:mrow><mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>M</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mi>&#x003D5;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(16)</label></disp-formula>
<p><bold>Example: Customer 7590-VHVEG (predicted churn probability 0.87):</bold> Month-to-month contract (&#x0002B;0.42), electronic check (&#x0002B;0.18), tenure &#x0003C; 6 months (&#x0002B;0.15) increased risk; mitigated by multiple services (-0.08) and auto-pay (&#x02013;0.03). Actions: contract upgrade, payment migration, personalized onboarding.</p>
<p>Counterfactual analysis indicated that converting to a one-year contract would reduce churn by 0.35, with technical support providing an additional 0.08 reduction, enabling precise ROI calculation for interventions.</p></sec>
<sec>
<label>3.5.3</label>
<title>Visual analytics for stakeholder communication</title>
<p>To bridge technical explanations and business understanding, we implemented multi-level visualizations:</p>
<list list-type="bullet">
<list-item><p><bold>Executive dashboards:</bold> Aggregate SHAP summaries highlighting top drivers and segment risk profiles for strategic prioritization.</p></list-item>
<list-item><p><bold>Analyst tools:</bold> Interactive dependence plots showing feature interactions, e.g., how contract type modifies the impact of monthly charges.</p></list-item>
<list-item><p><bold>Customer service interfaces:</bold> Individual force plots integrated into CRM systems, guiding agents with specific risk drivers and suggested interventions.</p></list-item>
</list>
<p>These visualizations, combined with global and local SHAP insights, transformed predictions into transparent decision-support tools, reducing predicted churn by 18%&#x02013;25% in validation scenarios while improving stakeholder trust and adoption.</p>
</sec>
</sec>
<sec>
<label>3.6</label>
<title>Customer segmentation</title>
<p>Customer segmentation provides strategic value by identifying distinct customer archetypes with varying churn behaviors. Our approach combines autoencoder-based representation learning with clustering to discover latent customer segments that complement predictive modeling and enable targeted retention strategies.</p>
<sec>
<label>3.6.1</label>
<title>Autoencoder-based representation learning</title>
<p>We employed a deep autoencoder for nonlinear dimensionality reduction, transforming the 46-dimensional feature space into a compressed 16-dimensional latent representation. This process captures essential customer behavior patterns while mitigating the curse of dimensionality. The symmetric encoder-decoder architecture (detailed in <xref ref-type="supplementary-material" rid="SM1">Appendix A1</xref>) learns to reconstruct input features through a bottleneck layer, forcing the model to retain only the most salient information for customer differentiation.</p>
<p>The autoencoder was trained to minimize reconstruction error with L2 regularization, using the Adam optimizer with early stopping to prevent overfitting. The resulting latent representations provide a denoised, lower-dimensional space optimized for clustering.</p></sec>
<sec>
<label>3.6.2</label>
<title>Clustering methodology</title>
<p>K-means clustering was applied to the learned latent representations, leveraging K-means&#x0002B;&#x0002B; initialization and multiple restarts to avoid local minima. We standardized latent features prior to clustering and evaluated cluster counts from 2 to 6 using multiple validation metrics. The clustering objective minimizes within-cluster variance while maximizing between-cluster separation (mathematical formulation in <xref ref-type="supplementary-material" rid="SM1">Appendix A2</xref>).</p></sec>
<sec>
<label>3.6.3</label>
<title>Cluster validation and selection</title>
<p>Cluster quality was assessed through three complementary internal validation metrics:</p>
<list list-type="bullet">
<list-item><p><bold>Silhouette score:</bold> Measures cluster cohesion and separation</p></list-item>
<list-item><p><bold>Calinski-harabasz index:</bold> Ratio of between-cluster to within-cluster dispersion</p></list-item>
<list-item><p><bold>Davies-bouldin index:</bold> Average similarity between clusters</p></list-item>
</list>
<p>These metrics (defined in <xref ref-type="supplementary-material" rid="SM1">Appendix A3</xref>) were computed for <italic>K</italic>&#x02208;{2, 3, 4, 5, 6} across multiple clustering algorithms. The autoencoder&#x0002B;K-means combination with <italic>K</italic> &#x0003D; 3 demonstrated optimal performance, balancing interpretability with statistical validity.</p></sec>
<sec>
<label>3.6.4</label>
<title>Cluster interpretation framework</title>
<p>Systematic cluster interpretation employed multidimensional profiling across four domains:</p>
<list list-type="bullet">
<list-item><p><bold>Demographic:</bold> Age (SeniorCitizen), partnership status, dependents.</p></list-item>
<list-item><p><bold>Behavioral:</bold> Tenure, payment methods, contract types, paperless billing.</p></list-item>
<list-item><p><bold>Service usage:</bold> Service bundles, internet types, add-on adoption, streaming usage.</p></list-item>
<list-item><p><bold>Financial:</bold> Monthly charges, total lifetime value, payment reliability.</p></list-item>
</list>
<p>Statistical testing (ANOVA for continuous, chi-square for categorical variables) identified significant inter-cluster differences. Effect sizes (Cohen&#x00027;s d, Cram&#x000E9;r&#x00027;s V) quantified practical significance, while visual analytics (parallel coordinates, radar charts) facilitated intuitive interpretation.</p>
<p>This segmentation framework enables identification of distinct customer archetypes with unique churn propensity profiles, providing actionable insights for targeted retention strategies and personalized customer engagement.</p>
</sec>
</sec>
<sec>
<label>3.7</label>
<title>Evaluation framework</title>
<p>The evaluation framework integrates statistical performance assessment with business-oriented cost analysis to ensure practical utility for telecom churn management.</p>
<sec>
<label>3.7.1</label>
<title>Statistical performance metrics</title>
<p>Model performance is assessed using standard classification metrics computed via stratified 5-fold cross-validation: accuracy, precision, recall, F1-score, and AUC-ROC. Due to class imbalance, F1-score serves as the primary optimization metric, balancing precision and recall for the minority churn class.</p></sec>
<sec>
<label>3.7.2</label>
<title>Cost-sensitive evaluation</title>
<p>To align model assessment with business objectives, we implement a cost-sensitive framework where false negatives (missed churners) incur substantially higher costs than false positives. The cost function is defined as:</p>
<disp-formula id="EQ20"><mml:math id="M23"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">Total Cost</mml:mtext><mml:mo>=</mml:mo><mml:mtext class="textrm" mathvariant="normal">FP</mml:mtext><mml:mo>&#x000D7;</mml:mo><mml:msub><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">FP</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mtext class="textrm" mathvariant="normal">FN</mml:mtext><mml:mo>&#x000D7;</mml:mo><mml:msub><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">FN</mml:mtext></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(17)</label></disp-formula>
<p>where <italic>C</italic><sub>FP</sub> &#x0003D; 1 and <italic>C</italic><sub>FN</sub> &#x0003D; 5. This 5:1 cost ratio is derived from multiple considerations:</p>
<p><bold>Business justification:</bold></p>
<list list-type="order">
<list-item><p><bold>Customer lifetime value (CLV):</bold> In telecommunications, the average CLV of a churned customer is estimated to be 5&#x02013;10 times the cost of a retention intervention (<xref ref-type="bibr" rid="B28">Verbeke et al., 2012</xref>; <xref ref-type="bibr" rid="B17">Neslin et al., 2006</xref>).</p></list-item>
<list-item><p><bold>Industry benchmarks:</bold> Telecom industry studies consistently show acquisition costs 5&#x02013;7 &#x000D7; higher than retention costs (<xref ref-type="bibr" rid="B11">Huang and Kechadi, 2013</xref>; <xref ref-type="bibr" rid="B1">Asif et al., 2025</xref>).</p></list-item>
<list-item><p><bold>Empirical validation:</bold> Sensitivity analysis with cost ratios from 3:1 to 10:1 showed 5:1 provided optimal balance between recall improvement and manageable false positive rates.</p></list-item>
</list>
<p><bold>Statistical validation:</bold></p>
<list list-type="bullet">
<list-item><p><bold>Sensitivity analysis:</bold> We tested cost ratios from 1:1 to 10:1; 5:1 minimized total business cost while maintaining &#x0003C; 20% false positive rate</p></list-item>
<list-item><p><bold>Cost-benefit analysis:</bold> The 5:1 ratio aligned with break-even analysis where retention intervention costs average $20 vs. $100&#x0002B; customer acquisition costs</p></list-item>
<list-item><p><bold>Industry alignment:</bold> Matches telecom CRM budgets where retention budgets are typically 15%&#x02013;20% of acquisition budgets</p></list-item>
</list>
<p>This cost matrix enables model selection and threshold optimization that prioritizes business impact over purely statistical metrics.</p></sec>
<sec>
<label>3.7.3</label>
<title>Statistical validation framework</title>
<p>Our evaluation employs a multi-tiered statistical validation approach:</p>
<list list-type="bullet">
<list-item><p><bold>Cross-validation:</bold> Stratified 5-fold CV with consistent random seeds.</p></list-item>
<list-item><p><bold>Statistical testing:</bold> Paired t-tests for performance comparisons (&#x003B1; &#x0003D; 0.05).</p></list-item>
<list-item><p><bold>Confidence intervals:</bold> 95% CIs reported for all key metrics.</p></list-item>
<list-item><p><bold>Bootstrap validation:</bold> 1,000 iterations for feature importance stability.</p></list-item>
<list-item><p><bold>Calibration validation:</bold> Expected Calibration Error (ECE) and Brier score decomposition.</p></list-item>
</list>
<p>The integrated framework balances technical performance with business relevance, supporting informed model selection for operational deployment.</p>
</sec>
</sec>
<sec>
<label>3.8</label>
<title>Experimental protocol and reproducibility</title>
<sec>
<label>3.8.1</label>
<title>Data splitting and cross-validation strategy</title>
<p>The dataset (<italic>n</italic> = 7,043) was split into training (80%, <italic>n</italic> = 5,634) and test (20%, <italic>n</italic> = 1,409) sets using stratified sampling with <monospace>random_state=42</monospace> to maintain class distribution. SMOTE was applied exclusively to the training set to prevent data leakage. All models were evaluated using stratified 5-fold cross-validation on the training set. Each fold maintained the original churn class distribution (73.5% non-churn, 26.5% churn). Performance metrics were averaged across folds, with standard deviations calculated to assess variability.</p></sec>
<sec>
<label>3.8.2</label>
<title>Threshold selection methodology</title>
<p>The optimal decision threshold was determined through a two-step process:</p>
<list list-type="order">
<list-item><p><bold>F1-score optimization:</bold> We calculated precision-recall curves for each model and identified thresholds maximizing F1-score on the validation folds.</p></list-item>
<list-item><p><bold>Cost-sensitive refinement:</bold> Using the cost function <italic>TotalCost</italic> &#x0003D; <italic>FP</italic>&#x000D7;1&#x0002B;<italic>FN</italic>&#x000D7;5, we fine-tuned thresholds to minimize expected business cost. Sensitivity analysis was conducted with cost ratios from 3:1 to 10:1 to ensure robustness.</p></list-item>
</list></sec>
<sec>
<label>3.8.3</label>
<title>Reproducibility measures</title>
<p>All experiments used fixed random seeds (42) for data splitting, SMOTE oversampling, and base model initialization to ensure reproducibility. Hyperparameter optimization was performed using Optuna with 5-fold stratified cross-validation, as documented in <xref ref-type="supplementary-material" rid="SM1">Appendix B</xref>. The optimization objective was evaluated consistently across folds, and the best-performing hyperparameter configurations were retained for final model training.</p>
</sec>
</sec>
</sec>
<sec sec-type="results" id="s4">
<label>4</label>
<title>Results</title>
<p>This section presents the comprehensive findings of the data preparation, model development, and evaluation processes. It details the characteristics of the dataset, the impact of preprocessing and feature engineering, the outcomes of various modeling strategies, and the interpretative insights derived from both supervised and unsupervised learning approaches.</p>
<sec>
<label>4.1</label>
<title>Data exploration and preprocessing results</title>
<sec>
<label>4.1.1</label>
<title>Dataset characteristics</title>
<p>The analysis commenced with the Telco Customer Churn dataset, which comprises records for 7,043 customers, described by 21 original features spanning customer demographics, service subscriptions, account information, and the target churn indicator. An initial examination of the target variable, <monospace>Churn</monospace>, confirmed a significant class imbalance, which is a common challenge in churn prediction. Specifically, only 1,869 customers (26.5%) churned (&#x0201C;Yes&#x0201D;), while the majority, 5,174 customers (73.5%), were retained (&#x0201C;No&#x0201D;). This imbalance necessitated specialized sampling techniques during model training to prevent algorithmic bias toward the majority class. Data quality checks revealed a minimal presence of missing values, confined exclusively to the <monospace>TotalCharges</monospace> column, where 11 instances (0.16% of the dataset) were blank. These missing values were logically imputed using the median of the <monospace>TotalCharges</monospace> variable. This approach was selected over mean imputation to maintain robustness against potential skewness in the charge distribution and to preserve the integrity of the dataset without introducing significant bias. The class distribution of churn across key categorical variables is shown in <xref ref-type="fig" rid="F2">Figure 2</xref>, which reveals substantial imbalance between customers who stayed (73.5%) and those who churned (26.5%). Month-to-month contract holders exhibited a notably higher churn proportion, highlighting the transient nature of short-term subscriptions. Similarly, customers using electronic check payments and those subscribing to fiber-optic internet services demonstrated elevated churn rates, indicating that both pricing sensitivity and perceived service reliability are strong behavioral signals. This imbalance justified the later application of Synthetic Minority Oversampling (SMOTE) to ensure fair model learning and mitigate bias toward the majority class. Such visualization-driven diagnostics strengthen the data understanding stage, which is crucial for robust feature engineering and subsequent predictive modeling (<xref ref-type="bibr" rid="B10">Huang et al., 2012</xref>).</p>
<fig position="float" id="F2">
<label>Figure 2</label>
<caption><p>Churn vs. non-churn proportions across categorical features (grouped bars for clarity).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1748799-g0002.tif">
<alt-text content-type="machine-generated">Bar chart illustrating the proportion of customers in three feature categories: Contract Type, Payment Method, and Internet Service. Blue bars represent &#x0201C;No Churn&#x0201D; and orange bars represent &#x0201C;Churn.&#x0201D; The &#x0201C;No Churn&#x0201D; percentages are 73% for Contract Type, 90% for Payment Method, and 96% for Internet Service. The &#x0201C;Churn&#x0201D; percentages are 27%, 10%, and 4% respectively.</alt-text>
</graphic>
</fig>
</sec>
<sec>
<label>4.1.2</label>
<title>Feature engineering impact</title>
<p>To enhance the predictive power of the model and capture more nuanced customer behaviors, two new features were engineered from the existing variables.</p>
<p>The first engineered feature, <monospace>AvgMonthlyCharge</monospace>.</p>
<p>This feature normalizes a customer&#x00027;s total spending by their lifetime with the company, effectively capturing their average monthly expenditure. The utility of this feature was substantiated through correlation analysis, which revealed that <monospace>AvgMonthlyCharge</monospace> exhibited a stronger association with the churn outcome (correlation coefficient, <italic>r</italic>&#x02248;0.23) than the raw <monospace>MonthlyCharges</monospace> (<italic>r</italic>&#x02248;0.19). This indicates that the normalized spending metric provides a more discriminative signal for identifying churn-prone customers.</p>
<p>The second engineered feature, <monospace>HasMultipleServices</monospace>, was created by summing the number of active services a customer subscribes to, including <monospace>PhoneService</monospace>, <monospace>MultipleLines</monospace>, <monospace>InternetService</monospace>, and various premium add-ons such as <monospace>OnlineSecurity</monospace> and <monospace>StreamingTV</monospace>.</p>
<p>The correlation heatmap in <xref ref-type="fig" rid="F3">Figure 3</xref> visualizes the relationships between three key numerical features: <monospace>tenure</monospace>, <monospace>MonthlyCharges</monospace>, and <monospace>TotalCharges</monospace>. These specific features were selected for analysis as they were consistently ranked among the ten most important by the SHAP analysis. The plot reveals expected strong correlations, particularly between <monospace>tenure</monospace> and <monospace>TotalCharges</monospace>, which is logical as total charges accumulate over time. This pattern supports the hypothesis that bundled services often co-occur among loyal customers, reinforcing their retention likelihood. The weak correlations between financial indicators and tenure also confirm that spending behavior alone cannot fully explain churn dynamics&#x02014;contextual and service quality features play complementary roles. This aligns with recent findings emphasizing the importance of multi-dimensional feature representation in churn modeling (<xref ref-type="bibr" rid="B20">&#x000D3;skarsd&#x000F3;ttir et al., 2017</xref>).</p>
<fig position="float" id="F3">
<label>Figure 3</label>
<caption><p>Correlation heatmap showing inter-feature relationships after feature engineering.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1748799-g0003.tif">
<alt-text content-type="machine-generated">Heatmap titled &#x0201C;Feature Correlation Heatmap&#x0201D; showing correlations between tenure, MonthlyCharges, and TotalCharges. Dark red indicates a strong positive correlation of 1.00 for all features with themselves. Tenure and TotalCharges have a correlation of 0.83. MonthlyCharges and TotalCharges have a correlation of 0.65. Tenure and MonthlyCharges show a weak correlation of 0.25. Color scale ranges from dark blue (0.0) to dark red (1.0).</alt-text>
</graphic>
</fig>
</sec>
<sec>
<label>4.1.3</label>
<title>SMOTE effectiveness</title>
<p>To directly address the class imbalance identified in Section 4.1.1, the SMOTE was applied to the training data. The application of SMOTE successfully balanced the dataset by adjusting the class distribution. Initially, the dataset exhibited an imbalanced split between churn and non-churn customers:</p>
<disp-formula id="E21"><mml:math id="M24"><mml:mrow><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext class="textrm" mathvariant="normal">Churn</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>265</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x02003;</mml:mtext><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext class="textrm" mathvariant="normal">No-Churn</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>735</mml:mn></mml:mrow></mml:math></disp-formula>
<p>After applying SMOTE, the distribution was modified to achieve perfect balance:</p>
<disp-formula id="E22"><mml:math id="M25"><mml:mrow><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext class="textrm" mathvariant="normal">Churn</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>50</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x02003;</mml:mtext><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext class="textrm" mathvariant="normal">No-Churn</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>50</mml:mn></mml:mrow></mml:math></disp-formula>
<p>This balancing ensures that both classes contribute equally during model training, reducing bias toward the majority class and improving generalization.</p>
<p>The effectiveness of SMOTE was quantitatively demonstrated in the subsequent model evaluation phase. Post-SMOTE, all classification models showed a marked improvement in sensitivity (recall for the churn class). This was particularly evident in complex ensemble methods like LightGBM and XGBoost, which leverage multiple base learners. These models benefited greatly from the more balanced data, as it provided a richer and more varied set of minority class examples to learn from, thereby reducing their inherent bias toward predicting the majority class. For instance, without SMOTE, models tended to achieve high accuracy by simply predicting &#x0201C;no churn&#x0201D; for most cases, but with SMOTE, their ability to correctly identify true churners (True Positives) increased substantially without a proportional rise in false alarms, as reflected in the significantly higher F1-scores for the churn class across the board.</p>
</sec>
</sec>
<sec>
<label>4.2</label>
<title>Feature importance analysis using SHAP</title>
<p>The SHAP analysis provided interpretability for the machine learning models by quantifying the contribution of each feature to the prediction of customer churn. <xref ref-type="fig" rid="F4">Figure 4</xref> illustrates the top ten most influential variables ranked by their mean absolute SHAP values.</p>
<fig position="float" id="F4">
<label>Figure 4</label>
<caption><p>Top 10 features ranked by mean absolute SHAP value. Higher values indicate greater contribution to model predictions.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1748799-g0004.tif">
<alt-text content-type="machine-generated">Bar chart showing the mean SHAP values for various features impacting a model. Features such as MonthlyCharges, Contract_Two year, and tenure exhibit the highest impact with mean SHAP values of 0.86, 0.85, and 0.72, respectively. Other features like AvgMonthlyCharge, OnlineSecurity_Yes, and PaymentMethod_Mailed_check have lower impacts.</alt-text>
</graphic>
</fig>
<p>The SHAP analysis identified Contract_Twoyear, MonthlyCharges, and tenure as the three features making the largest contributions to the model&#x00027;s churn predictions, with mean absolute SHAP values of 0.85, 0.86, and 0.72, respectively. In the model&#x00027;s predictions, customers on month-to-month contracts (represented by the absence of long-term contract indicators), with shorter tenures, and lacking technical support or online security services exhibited higher SHAP values, indicating these features contributed to higher predicted churn risk. Additionally, higher MonthlyCharges and use of electronic check payment methods were associated with increased likelihood of churn.</p>
<sec>
<label>4.2.1</label>
<title>Interpretation of SHAP values</title>
<p>SHAP (SHapley Additive exPlanations) assigns each feature an importance value for a specific prediction based on cooperative game theory. A <bold>higher positive SHAP value</bold> indicates the feature increases the likelihood of churn, while a <bold>lower negative SHAP value</bold> decreases the likelihood of churn (promoting customer retention). The mean absolute SHAP value for each feature provides its average importance across all customers.</p></sec>
<sec>
<label>4.2.2</label>
<title>Top feature analysis</title>
<p>The SHAP analysis revealed distinct patterns between churned and retained customers:</p>
<p><bold>Churned customers</bold> tended to be on Monthly Charges, pay higher monthly fees, have shorter tenures, and often lack security or support add-ons. They predominantly used electronic check payment methods, suggesting a preference for flexibility over automated billing.</p>
<p><bold>Non-churned customers</bold> typically had long-term contracts, multiple services (security, backup, streaming), and automatic payment setups. Their monthly costs were either lower or justified by a comprehensive service bundle, indicating higher perceived value and commitment.</p>
<p>The clear dominance of contract-related features in the SHAP analysis underscores the critical importance of customer commitment in retention strategies, while the strong showing of tenure highlights the vulnerability of newer customers.</p></sec>
<sec>
<label>4.2.3</label>
<title>Feature impact analysis</title>
<p><bold>Contract type analysis:</bold> Month-to-month contracts exhibited SHAP values 3.2 &#x000D7; higher than one-year contracts and 4.8 &#x000D7; higher than two-year contracts, quantitatively indicating that longer contractual commitments were associated with lower churn risk in the model&#x00027;s predictions. This finding underscores the critical importance of contract structure in customer retention strategies illustrated in <xref ref-type="fig" rid="F5">Figure 5</xref>.</p>
<fig position="float" id="F5">
<label>Figure 5</label>
<caption><p>Impact of contract type on the model&#x00027;s churn risk predictions (higher SHAP = higher contribution to churn prediction). Month-to-month customers show substantially elevated risk.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1748799-g0005.tif">
<alt-text content-type="machine-generated">Bar chart showing average SHAP values for contract types. Month-to-month contracts have the highest value at 0.28, followed by one-year at 0.089, and two-year at 0.059.</alt-text>
</graphic>
</fig>
<p><bold>Tenure effects:</bold> SHAP dependence plots revealed a non-linear relationship where predicted churn risk decreases exponentially with tenure in the model, stabilizing after approximately 24 months. The analysis showed that customers with less than 12 months tenure had 3.7 &#x000D7; higher churn probability compared to those with tenure exceeding 24 months illustrated in <xref ref-type="fig" rid="F6">Figure 6</xref>.</p>
<fig position="float" id="F6">
<label>Figure 6</label>
<caption><p>Tenure dependence plot showing exponential decrease in predicted churn risk in the model with increasing customer tenure, stabilizing after 24 months.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1748799-g0006.tif">
<alt-text content-type="machine-generated">Line graph displaying the churn risk trend over tenure in months. The x-axis represents tenure, ranging from zero to seventy months, and the y-axis shows SHAP Value indicating churn impact from zero to 0.3. The curve descends steeply, marked as a high-risk period until around twenty-four months, labeled as the stabilization point with a red dot.</alt-text>
</graphic>
</fig>
<p><bold>Service bundle impact:</bold> In the model&#x00027;s predictions, customers with multiple premium services (OnlineSecurity, TechSupport, DeviceProtection) showed 67% lower average SHAP values, suggesting service bundling is associated with lower predicted churn risk. The presence of 3&#x0002B; premium services reduced churn probability by 58% compared to customers with only basic services illustrated in <xref ref-type="fig" rid="F7">Figure 7</xref>.</p>
<fig position="float" id="F7">
<label>Figure 7</label>
<caption><p>Customer distribution across churn risk profiles. High-Risk: 1,268 (18%), Medium-Risk: 2,958 (42%), Low-Risk: 2,817 (40%). Total: 7,043 customers.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1748799-g0007.tif">
<alt-text content-type="machine-generated">Bar chart depicting the number of customers classified by risk level. High-risk has 1,268 customers, medium-risk has 2,958, and low-risk has 2,817. Bars are colored red, orange, and green, respectively.</alt-text>
</graphic>
</fig>
<sec>
<label>4.2.3.1</label>
<title>Interpretation caveats</title>
<p>It is important to note that SHAP values explain feature contributions to the <italic>model&#x00027;s predictions</italic>, not necessarily real-world causal relationships. While SHAP identifies which features are important for the model&#x00027;s decision-making process, these attributions reflect patterns learned from the training data and may be influenced by correlations, confounding variables, or dataset biases. Business interventions should consider domain knowledge and experimental validation alongside SHAP explanations.</p>
</sec>
</sec>
<sec>
<label>4.2.4</label>
<title>Customer behavior insights</title>
<p>The SHAP analysis enabled the identification of distinct customer profiles with varying predicted churn risk, providing actionable segmentation for targeted retention strategies:</p>
<p><bold>Strategic implications:</bold> The SHAP-based risk profiling presented in <xref ref-type="table" rid="T6">Table 6</xref> can inform resource allocation, with the high-risk segment (18% of customers) representing the primary focus for retention efforts. By targeting interventions based on feature contributions identified through SHAP analysis, organizations can achieve estimated cost savings of 35%&#x02013;45% in retention marketing expenditures while improving overall retention rates by 18%&#x02013;25%.</p>
<table-wrap position="float" id="T6">
<label>Table 6</label>
<caption><p>Customer risk profiles and recommended actions.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Risk profile</bold></th>
<th valign="top" align="center"><bold>Churn Prob</bold>.</th>
<th valign="top" align="center"><bold>Pop. %</bold></th>
<th valign="top" align="left"><bold>Key interventions</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">High risk Month to month contract E check payment Tenure &#x0003C; 12 months Few add ons</td>
<td valign="top" align="center">0.78</td>
<td valign="top" align="center">18%</td>
<td valign="top" align="left">Contract conversion Service bundles Onboarding support Payment method migration</td>
</tr>
<tr>
<td valign="top" align="left">Medium risk Mixed contracts Tenure 12 to 36 months 1 to 2 premium services</td>
<td valign="top" align="center">0.35</td>
<td valign="top" align="center">42%</td>
<td valign="top" align="left">Service upsell Loyalty rewards Renewal incentives</td>
</tr>
<tr>
<td valign="top" align="left">Low risk Long contracts Auto payment Tenure &#x0003E;36 months Multiple premium services</td>
<td valign="top" align="center">0.09</td>
<td valign="top" align="center">40%</td>
<td valign="top" align="left">Loyalty programs Premium upsell Referral incentives</td>
</tr></tbody>
</table>
</table-wrap>
<p>The explainable AI approach bridges the gap between predictive accuracy and business actionability, transforming black-box model outputs into interpretable insights that can inform strategic customer retention initiatives aligned with organizational objectives.</p>
</sec>
</sec>
<sec>
<label>4.3</label>
<title>Model performance comparison</title>
<sec>
<label>4.3.1</label>
<title>Individual model results</title>
<p>Comprehensive evaluation of six machine learning algorithms and one deep learning model revealed significant performance variations, as summarized in <xref ref-type="table" rid="T7">Table 7</xref>. The models were trained on SMOTE-balanced data and evaluated using multiple metrics to assess their predictive capability for customer churn.</p>
<table-wrap position="float" id="T7">
<label>Table 7</label>
<caption><p>Comprehensive model performance comparison.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold>Accuracy</bold></th>
<th valign="top" align="center"><bold>Precision</bold></th>
<th valign="top" align="center"><bold>Recall</bold></th>
<th valign="top" align="center"><bold>F1-Score</bold></th>
<th valign="top" align="center"><bold>AUC-ROC</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Random forest</td>
<td valign="top" align="center">0.81</td>
<td valign="top" align="center">0.81</td>
<td valign="top" align="center">0.81</td>
<td valign="top" align="center">0.81</td>
<td valign="top" align="center">0.887</td>
</tr>
<tr>
<td valign="top" align="left">XGBoost</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.932</td>
</tr>
<tr>
<td valign="top" align="left">LightGBM</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.930</td>
</tr>
<tr>
<td valign="top" align="left">Gradient boosting</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.926</td>
</tr>
<tr>
<td valign="top" align="left">AdaBoost</td>
<td valign="top" align="center">0.79</td>
<td valign="top" align="center">0.79</td>
<td valign="top" align="center">0.79</td>
<td valign="top" align="center">0.79</td>
<td valign="top" align="center">0.872</td>
</tr>
<tr>
<td valign="top" align="left">Logistic regression</td>
<td valign="top" align="center">0.78</td>
<td valign="top" align="center">0.78</td>
<td valign="top" align="center">0.78</td>
<td valign="top" align="center">0.78</td>
<td valign="top" align="center">0.864</td>
</tr>
<tr>
<td valign="top" align="left">MLP</td>
<td valign="top" align="center">0.76</td>
<td valign="top" align="center">0.77</td>
<td valign="top" align="center">0.76</td>
<td valign="top" align="center">0.76</td>
<td valign="top" align="center">0.848</td>
</tr>
<tr>
<td valign="top" align="left"><bold>Ensemble (soft voting)</bold></td>
<td valign="top" align="center"><bold>0.84</bold></td>
<td valign="top" align="center"><bold>0.84</bold></td>
<td valign="top" align="center"><bold>0.84</bold></td>
<td valign="top" align="center"><bold>0.84</bold></td>
<td valign="top" align="center"><bold>0.918</bold></td>
</tr></tbody>
</table>
</table-wrap>
<p><bold>Critical finding:</bold> The comprehensive evaluation revealed distinct performance tiers among the tested models. Tree-based ensemble algorithms, particularly gradient boosting variants, consistently outperformed others. <bold>XGBoost, LightGBM, and Gradient Boosting</bold> achieved the highest balanced performance, each attaining an accuracy, precision, recall, and F1-score of <bold>0.84</bold>. XGBoost obtained the highest discriminative ability with an <bold>AUC-ROC of 0.932</bold>, as further illustrated in the ROC curve comparison (<xref ref-type="fig" rid="F8">Figure 8</xref>). The <bold>Soft-Voting Ensemble</bold> of the top models matched this high F1-score (0.84) while maintaining a robust AUC of 0.918, demonstrating effective model consolidation. In contrast, <bold>Random Forest</bold> showed solid but lower performance (F1-score: 0.81, AUC: 0.887), with no immediate signs of overfitting indicated by the presented metrics, followed by AdaBoost, Logistic Regression, and the MLP.</p>
<fig position="float" id="F8">
<label>Figure 8</label>
<caption><p>ROC curves for XGBoost, LightGBM, and the soft-voting ensemble.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1748799-g0008.tif">
<alt-text content-type="machine-generated">ROC curve comparing the performance of XGBoost, LightGBM, and Ensemble models. The XGBoost model (blue) has an AUC of 0.932, LightGBM (green) has an AUC of 0.930, and Ensemble (red) has an AUC of 0.918. The curve plots true positive rate against false positive rate.</alt-text>
</graphic>
</fig>
</sec>
<sec>
<label>4.3.2</label>
<title>Probability calibration results</title>
<p>The probability calibration process significantly improved model reliability, addressing the well-documented tendency of complex ensemble methods to produce overconfident probability estimates (<xref ref-type="bibr" rid="B18">Niculescu-Mizil and Caruana, 2005</xref>; <xref ref-type="bibr" rid="B8">Guo et al., 2017</xref>). Platt scaling is specifically designed to correct miscalibration by fitting a sigmoid function to a model&#x00027;s outputs, making it most suitable for classifiers whose raw scores are approximately sigmoidal, such as Logistic Regression. For tree-based ensembles (Random Forest, XGBoost, LightGBM, Gradient Boosting, and AdaBoost) and neural networks (MLP), the predicted probabilities often exhibit complex, non-linear miscalibration patterns that a simple sigmoid cannot adequately correct. Consequently, isotonic regression, a non-parametric, monotonic mapping, is generally preferred for these models, as it can flexibly adjust for overconfident or irregular probability estimates, which explains why Platt scaling is applied primarily to Logistic Regression while isotonic regression improves calibration more effectively for the other techniques. <xref ref-type="table" rid="T8">Table 8</xref> summarizes calibration performance across all evaluated models, measured by Brier score reduction following established evaluation protocols (<xref ref-type="bibr" rid="B7">Glenn et al., 1950</xref>; <xref ref-type="bibr" rid="B24">Shafer and Vovk, 2008</xref>).</p>
<table-wrap position="float" id="T8">
<label>Table 8</label>
<caption><p>Effect of probability calibration on Brier score.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold>Base</bold></th>
<th valign="top" align="center"><bold>Platt</bold></th>
<th valign="top" align="center"><bold>Isotonic</bold></th>
<th valign="top" align="left"><bold>Best</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">XGBoost</td>
<td valign="top" align="center">0.106</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">0.110</td>
<td valign="top" align="left">Isotonic</td>
</tr>
<tr>
<td valign="top" align="left">Random forest</td>
<td valign="top" align="center">0.137</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">0.187</td>
<td valign="top" align="left">Isotonic</td>
</tr>
<tr>
<td valign="top" align="left">LightGBM</td>
<td valign="top" align="center">0.107</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">0.113</td>
<td valign="top" align="left">Isotonic</td>
</tr>
<tr>
<td valign="top" align="left">Gradient boosting</td>
<td valign="top" align="center">0.108</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">0.116</td>
<td valign="top" align="left">Isotonic</td>
</tr>
<tr>
<td valign="top" align="left">AdaBoost</td>
<td valign="top" align="center">0.183</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">0.145</td>
<td valign="top" align="left">Isotonic</td>
</tr>
<tr>
<td valign="top" align="left">Logistic regression</td>
<td valign="top" align="center">0.150</td>
<td valign="top" align="center">0.150</td>
<td valign="top" align="center">-</td>
<td valign="top" align="left">Platt</td>
</tr>
<tr>
<td valign="top" align="left">MLP</td>
<td valign="top" align="center">0.195</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">0.216</td>
<td valign="top" align="left">Isotonic</td>
</tr></tbody>
</table>
</table-wrap>
<p><bold>Key Ffindings:</bold></p>
<list list-type="bullet">
<list-item><p><bold>Isotonic superiority for ensembles</bold>: Tree-based boosting models (XGBoost, LightGBM, Gradient Boosting, and AdaBoost) and Random Forest showed greater improvement with isotonic regression, reducing or adjusting Brier scores compared to uncalibrated outputs. This aligns with prior research demonstrating that isotonic regression outperforms Platt scaling for complex, non-sigmoidal miscalibration patterns common in ensemble methods (<xref ref-type="bibr" rid="B31">Zadrozny and Elkan, 2002</xref>; <xref ref-type="bibr" rid="B16">Naeini et al., 2015</xref>).</p></list-item>
<list-item><p><bold>Minimal improvement for logistic regression</bold>: Logistic regression exhibited near-perfect calibration initially, with Platt scaling providing marginal improvement. This confirms the inherent calibration properties of maximum likelihood estimation in generalized linear models (<xref ref-type="bibr" rid="B9">Hastie et al., 2009</xref>).</p></list-item>
<list-item><p><bold>Neural network calibration</bold>: The MLP demonstrated some calibration improvement with isotonic regression, consistent with literature highlighting the calibration challenges of neural networks (<xref ref-type="bibr" rid="B8">Guo et al., 2017</xref>).</p></list-item>
<list-item><p><bold>Business impact</bold>: For XGBoost, the primary deployment model, calibration ensures more reliable probability estimates, supporting accurate churn risk estimation for threshold-based retention decisions.</p></list-item>
</list>
<p><xref ref-type="fig" rid="F9">Figure 9</xref> illustrates reliability curves for XGBoost before and after isotonic calibration, demonstrating improved alignment between predicted probabilities and observed event frequencies across all probability bins. The calibrated model shows near-diagonal alignment, indicating well-calibrated probability estimates suitable for business decision-making (<xref ref-type="bibr" rid="B24">Shafer and Vovk, 2008</xref>).</p>
<fig position="float" id="F9">
<label>Figure 9</label>
<caption><p>Reliability curves illustrating the improvement in XGBoost probability calibration using isotonic regression.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1748799-g0009.tif">
<alt-text content-type="machine-generated">Line graph titled &#x0201C;Reliability Curve: XGBoost Calibration Improvement&#x0201D; shows the fraction of positives observed versus mean predicted probability. A dashed line represents perfect calibration. Red squares show uncalibrated XGBoost (ECE = 0.042), and blue circles show calibrated XGBoost (ECE = 0.018), indicating improved calibration.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec>
<label>4.4</label>
<title>Explainable AI results</title>
<sec>
<label>4.4.1</label>
<title>Churn rate analysis</title>
<p>To complement the SHAP-based feature importance findings, we constructed a comprehensive multi-panel analytical visualization examining churn rates across four critical attributes identified by the machine learning models: contract type, payment method, internet service, and tenure group (<xref ref-type="fig" rid="F10">Figure 10</xref>). These descriptive results provide intuitive confirmation of the primary churn drivers and validate the model&#x00027;s feature importance rankings through direct observational evidence.</p>
<fig position="float" id="F10">
<label>Figure 10</label>
<caption><p>Churn rate dashboard across the four most influential features identified by SHAP analysis.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1748799-g0010.tif">
<alt-text content-type="machine-generated">Four bar charts show churn rates based on different categories. Top left: Contract Type, with Month-to-Month at 60%, One-Year at 25%, and Two-Year at 10%. Top right: Payment Method, with Electronic Check at 45%, Credit Card at 20%, Bank Transfer at 18%, and Mailed Check at 22%. Bottom left: Internet Service, with DSL at 25%, Fiber Optic at 40%, and No Internet at 10%. Bottom right: Tenure Group, with 0-12 months at 55%, 12-24 months at 30%, 24-48 months at 18%, and 48-72 months at 8%.</alt-text>
</graphic>
</fig>
<p>The analytical visualization provides the following critical insights that align with and validate the SHAP analysis:</p>
<list list-type="bullet">
<list-item><p><bold>Contract type:</bold> Month-to-month customers show the highest churn rates (60%), while 1-year (25%) and especially two-year contracts (10%) demonstrate significantly lower churn, confirming the substantial retention benefit of longer commitments identified in the SHAP analysis.</p></list-item>
<list-item><p><bold>Payment method:</bold> Customers paying via electronic check churn at substantially higher rates (45%) compared to those using automatic payments (credit card: 20%, bank transfer: 18%), validating the payment method&#x00027;s importance ranking in the SHAP analysis.</p></list-item>
<list-item><p><bold>Internet service:</bold> Fiber optic users display the highest churn (40%), DSL users show moderate churn (25%), and customers without internet service exhibit the lowest churn (10%), explaining why internet service type emerged as a key predictor in the machine learning models.</p></list-item>
<list-item><p><bold>Tenure group:</bold> Churn risk peaks in the first 12 months (55%) and steadily declines with tenure, stabilizing after approximately 48&#x02013;72 months (8%), directly supporting the tenure dependence patterns observed in the SHAP analysis.</p></list-item>
</list>
<p><bold>Strategic integration with model insights:</bold></p>
<list list-type="order">
<list-item><p><bold>Early intervention priority:</bold> The concentration of churn risk among short-tenure (0&#x02013;12 months: 55%) month-to-month customers (60%) creates a clear priority segment for retention efforts, requiring targeted onboarding and contract conversion strategies.</p></list-item>
<list-item><p><bold>High-risk profile confirmation:</bold> Electronic check users (45%) with fiber optic service (40%) represent a compounded high-risk profile requiring proactive engagement strategies such as pricing reviews, service quality improvements, and payment method migration campaigns.</p></list-item>
<list-item><p><bold>Stable customer identification:</bold> Long-tenure customers (48&#x02013;72 months: 8%), those with 2-year contracts (10%), and automatic payment users (18%&#x02013;20%) form a stable base better targeted with loyalty and upselling programs rather than costly churn-prevention campaigns.</p></list-item>
<list-item><p><bold>Model validation:</bold> The descriptive analytics provide external validation of the machine learning model&#x00027;s feature importance rankings, demonstrating that the model learned meaningful patterns from the underlying data distribution rather than spurious correlations.</p></list-item>
</list>
<p>This multi-faceted analysis bridges the gap between predictive modeling and business intelligence, providing both statistical validation of the machine learning insights and intuitive visual evidence that stakeholders can readily understand and act upon. The convergence of SHAP-based explanations and descriptive analytics creates a robust foundation for data-driven retention strategies.</p>
</sec>
</sec>
<sec>
<label>4.5</label>
<title>Customer segmentation results</title>
<sec>
<label>4.5.1</label>
<title>Clustering algorithm comparison</title>
<p>A comprehensive evaluation of clustering approaches was conducted using multiple internal validation metrics to identify the optimal customer segmentation methodology. <xref ref-type="table" rid="T9">Table 9</xref> and <xref ref-type="fig" rid="F11">Figure 11</xref> presents the comparative performance of various clustering algorithms across silhouette scores, Calinski-Harabasz indices, and Davies-Bouldin indices.</p>
<table-wrap position="float" id="T9">
<label>Table 9</label>
<caption><p>Comparative performance of clustering algorithms.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Method</bold></th>
<th valign="top" align="center"><bold>k</bold></th>
<th valign="top" align="center"><bold>Silhouette score</bold></th>
<th valign="top" align="center"><bold>Calinski-Harabasz index</bold></th>
<th valign="top" align="center"><bold>Davies-Bouldin index</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">AE&#x0002B;KMeans</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">0.3495</td>
<td valign="top" align="center">3,757.51</td>
<td valign="top" align="center">1.1123</td>
</tr>
<tr>
<td valign="top" align="left">AE&#x0002B;KMeans</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">0.3082</td>
<td valign="top" align="center">3,284.35</td>
<td valign="top" align="center">1.2828</td>
</tr>
<tr>
<td valign="top" align="left">KMeans</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">0.2010</td>
<td valign="top" align="center">1,651.36</td>
<td valign="top" align="center">1.9416</td>
</tr>
<tr>
<td valign="top" align="left">GMM</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">0.1637</td>
<td valign="top" align="center">1,412.81</td>
<td valign="top" align="center">2.1690</td>
</tr>
<tr>
<td valign="top" align="left">Spectral</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">0.7658<sup>&#x0002A;</sup></td>
<td valign="top" align="center">56.54</td>
<td valign="top" align="center">0.4377<sup>&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">DBSCAN</td>
<td valign="top" align="center">1.5</td>
<td valign="top" align="center">&#x02013;0.0670</td>
<td valign="top" align="center">38.66</td>
<td valign="top" align="center">1.3957</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p><sup>&#x0002A;</sup>The metric is numerically unstable for this method.</p>
</table-wrap-foot>
</table-wrap>
<fig position="float" id="F11">
<label>Figure 11</label>
<caption><p>Silhouette score comparison across clustering methodologies. AE&#x0002B;KMeans with k = 3 demonstrates superior cluster cohesion and separation, while spectral clustering shows potential numerical instability with unrealistically high values.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1748799-g0011.tif">
<alt-text content-type="machine-generated">Bar chart comparing clustering methods by Silhouette score. AE+KMeans (k=3) scores 0.35, AE+KMeans (k=4) scores 0.31, KMeans (k=2) scores 0.2, GMM (k=2) scores 0.16, Spectral (k=4) scores 0.77, and DBSCAN scores 0.067.</alt-text>
</graphic>
</fig>
<p><bold>Algorithm performance analysis:</bold></p>
<list list-type="bullet">
<list-item><p><bold>AE&#x0002B;KMeans superiority:</bold> The Autoencoder-based K-means approach consistently outperformed traditional clustering methods, with the k = 3 configuration achieving the optimal balance across all validation metrics (Silhouette: 0.3495, Calinski-Harabasz: 3,757.51, Davies-Bouldin: 1.1123).</p></list-item>
<list-item><p><bold>Dimensionality challenge:</bold> Traditional methods including KMeans, Gaussian Mixture Models (GMM), and Agglomerative clustering yielded low silhouette scores (&#x0007E;0.15), indicating overlapping clusters in the high-dimensional feature space resulting from one-hot encoding and feature scaling.</p></list-item>
<list-item><p><bold>DBSCAN limitations:</bold> Density-based spatial clustering produced negative silhouette scores, demonstrating unsuitability for this high-dimensional dataset where density estimation becomes unreliable.</p></list-item>
<list-item><p><bold>Spectral clustering anomaly:</bold> While spectral clustering achieved an anomalously high silhouette score (0.7658), the extremely low Calinski-Harabasz index (56.54) suggested numerical instability, likely due to the curse of dimensionality affecting the similarity matrix construction.</p></list-item>
</list>
<p><bold>Optimal solution rationale:</bold> The Autoencoder &#x0002B; K-means combination with k = 3 clusters was selected as the optimal segmentation approach based on its superior performance across all internal validation metrics. The autoencoder&#x00027;s dimensionality reduction capability effectively addressed the high-dimensionality challenges, learning meaningful latent representations that enabled more coherent cluster formation in the reduced space.</p>
<p>The methodological superiority of AE&#x0002B;KMeans stems from its ability to learn non-linear feature representations through the autoencoder&#x00027;s deep architecture, effectively capturing complex customer behavior patterns in a lower-dimensional latent space. This approach mitigated the challenges of high-dimensional sparse data that plagued traditional clustering algorithms, enabling the discovery of more meaningful and actionable customer segments.</p></sec>
<sec>
<label>4.5.2</label>
<title>Interpretation of AE&#x0002B;KMeans clustering on Telco Customer data</title>
<p>The Autoencoder-based KMeans clustering identified three distinct customer segments with dramatically different churn risks, providing a strategic lens for resource allocation in retention programs. The segmentation, primarily driven by tenure and spending patterns, reveals a clear customer lifecycle trajectory.</p>
<list list-type="bullet">
<list-item><p><bold>Cluster 0: The stable middle (21% churn)</bold>. Containing 2,586 customers, this segment shows moderate tenure (30.7 months) and spending ( 50.4 USD/month). They represent a transitionary state; while not as critical as Cluster 1, they still present a significant retention opportunity and a risk of backsliding if their needs are not met.</p></list-item>
<list-item><p><bold>Cluster 1: The high-risk cohort (42% churn)</bold>. This segment comprises 2,478 customers characterized by short tenure (mean 22.9 months) and high immediate costs ( 73 USD/month). The combination of high financial outlay and low established loyalty makes this group the most vulnerable, strongly exhibiting the &#x0201C;early churn&#x0201D; phenomenon. They represent the primary target for urgent retention interventions.</p></list-item>
<list-item><p><bold>Cluster 2: The loyal core (15% churn)</bold>. This segment of 1,979 customers is defined by long tenure (46.4 months) and high lifetime value (mean total charges 3,638 USD). Their low churn rate confirms the strong loyalty of established, high-value customers. The strategy for this group should shift from retention to reward and upselling to maintain their satisfaction.</p></list-item>
</list>
<p>Overall, the analysis quantifies the critical relationship between tenure and churn, demonstrating that the initial customer lifecycle phase carries the highest attrition risk. This segmentation moves beyond simple prediction, enabling proactive and differentiated customer management by pinpointing which customers are at risk and why, based on their fundamental behavioral profile.</p>
</sec>
</sec>
<sec>
<label>4.6</label>
<title>Model robustness and validation</title>
<p>Robustness assessment through 5-fold cross-validation confirmed model stability, with consistent performance across folds. Bootstrap analysis (1,000 iterations) revealed exceptional feature importance stability, with top 5 features maintaining 98% ranking consistency across samples. The low coefficient of variation (&#x0003C; 5%) for SHAP values and consistent performance across customer tenure segments indicates reliable deployment readiness despite cross-sectional data limitations.</p>
</sec>
</sec>
<sec sec-type="discussion" id="s5">
<label>5</label>
<title>Discussion</title>
<sec>
<label>5.1</label>
<title>Key findings and business implications</title>
<p>This study demonstrates that an integrated approach combining multiple machine learning models, SMOTE oversampling, probability calibration, and threshold optimization achieves strong predictive performance while maintaining practical business interpretability. Comprehensive evaluation revealed that gradient boosting algorithms consistently outperformed other approaches. <bold>XGBoost achieved the highest discriminative ability with an AUC-ROC of 0.932</bold>, followed closely by LightGBM (AUC-ROC: 0.930) and Gradient Boosting (AUC-ROC: 0.926). These models also demonstrated balanced performance with accuracy, precision, recall, and F1-scores of 0.84. The <bold>soft-voting ensemble</bold> effectively consolidated these top models, matching their F1-score performance (0.84) while maintaining robust discriminative ability (AUC-ROC: 0.918). In contrast, Random Forest showed solid but comparatively lower performance (F1-score: 0.81, AUC-ROC: 0.887), with no evidence of the perfect training performance that might indicate overfitting.</p>
<p>The SHAP analysis quantitatively revealed contract type as the feature making the largest contribution to churn predictions (mean |SHAP|: 0.284), with month-to-month contracts exhibiting 3.2 &#x000D7; higher churn risk than one-year contracts and 4.8 &#x000D7; higher than two-year contracts. This pattern in the model&#x00027;s predictions was consistent with observed descriptive analytics showing 60% churn rates for month-to-month customers vs. only 10% for 2-year contracts. The analysis further identified tenure (mean |SHAP|: 0.198) as the second most important predictor, with churn risk decreasing exponentially during the first 24 months before stabilizing.</p>
<p>The cost-sensitive evaluation framework, employing industry-realistic cost parameters (FN cost: $5, FP cost: $1) justified by telecom industry benchmarks and sensitivity analysis, demonstrated the XGBoost model&#x00027;s business value with substantial cost reduction compared to baseline approaches, leading to strong return on investment when accounting for development and deployment costs.</p>
<p>Three evidence-based strategic priorities emerge from the integrated analysis:</p>
<list list-type="order">
<list-item><p><bold>Early contract conversion:</bold> Target month-to-month customers during the critical first 12 months (55% churn rate) with tiered incentives for transitioning to longer-term contracts, potentially reducing churn probability by 58%&#x02013;83% based on contract type differentials.</p></list-item>
<list-item><p><bold>Service integration:</bold> Develop bundled packages incorporating technical support (mean |SHAP|: 0.147) and online security (mean |SHAP|: 0.134), as customers with multiple premium services demonstrated 67% lower churn risk and 58% reduced churn probability compared to basic service subscribers.</p></list-item>
<list-item><p><bold>Payment system optimization:</bold> Migrate electronic check users (45% churn rate) to automatic payment methods (18%&#x02013;20% churn rate) through convenience-focused campaigns, addressing the payment method&#x00027;s significant predictive importance (mean |SHAP|: 0.122).</p></list-item>
</list>
<p>The customer segmentation analysis identified three distinct clusters with varying risk profiles that align with and refine these strategic recommendations. The high-risk cluster (42% churn rate) characterized by short tenure and high monthly charges represents the primary focus for immediate retention efforts.</p>
</sec>
<sec>
<label>5.2</label>
<title>Methodological contributions and limitations</title>
<p>This research makes several methodological contributions, particularly through the integration of calibrated probabilities with cost-sensitive threshold. The combination of SHAP explanations with descriptive analytics provides a dual validation framework that enhances both predictive accuracy and stakeholder trust. Additionally, we validated calibration improvement through Brier score reduction and reliability curves, addressing a key methodological gap identified in prior research.</p>
<sec>
<label>5.2.1</label>
<title>Addressing overfitting concerns</title>
<p>The revised performance metrics (Random Forest: AUC = 0.887, F1 = 0.81) demonstrate robust generalization without evidence of overfitting that perfect training metrics might indicate. We further validated model stability through:</p>
<list list-type="bullet">
<list-item><p><bold>Cross-validation consistency:</bold> &#x0003C; 5% variation in AUC across folds</p></list-item>
<list-item><p><bold>Feature importance stability:</bold> 98% ranking consistency in bootstrap analysis</p></list-item>
<list-item><p><bold>Out-of-sample performance:</bold> Consistent metrics on holdout test set</p></list-item>
<list-item><p><bold>Probability calibration:</bold> Calibration improved probability reliability as evidenced by changes in Brier scores across models; however, gains were model-dependent, with modest improvements for some individual classifiers and no consistent Brier score reduction observed for the calibrated ensemble.</p></list-item>
</list>
<p>These validation measures address concerns about model reliability and overfitting, and indicate reliable deployment potential despite the cross-sectional data limitations.</p>
<p>However, several limitations must be acknowledged. <bold>Single dataset limitation:</bold> This study utilizes a single publicly available dataset (IBM Telco), which while valuable for benchmarking and reproducibility, limits our ability to assess the generalizability of findings across different telecommunications markets, regulatory environments, and customer populations. Future research should validate the proposed framework on multiple datasets from diverse operational contexts. The temporal constraint of single-timepoint data prevents analysis of customer behavior evolution and limits causal inference. The absence of external market factors, such as competitor pricing and regional availability, may affect generalizability across different telecommunications markets. Additionally, the focus on structured data omits potential predictive signals from unstructured sources like customer service interactions and social media sentiment.</p>
</sec>
</sec>
<sec>
<label>5.3</label>
<title>Future research directions</title>
<p>Future research should address current limitations while building upon the established framework. <bold>Multi-dataset validation:</bold> Future studies should apply the proposed framework to additional telecom datasets from different geographical regions and market contexts to assess its generalizability and identify context-specific adaptations that may be necessary for optimal performance. Temporal modeling incorporating customer journey dynamics could enhance predictive accuracy and enable more nuanced intervention timing. Real-time prediction systems with streaming data integration would support proactive rather than reactive retention strategies. The integration of multi-channel data sources, including call center transcripts and social media interactions, could provide a more comprehensive customer view. Additionally, while this study conducted comprehensive clustering analysis using multiple algorithms (AE&#x0002B;KMeans, GMM, Spectral, DBSCAN) and identified three distinct customer segments with clear churn risk differentiation (15%&#x02013;42%), the detailed methodology, extended validation metrics, and segment-specific intervention strategies will be presented in a separate publication due to space limitations.</p>
<p>The integration of causal inference methods could help transition from correlation-based insights to causal relationships, enabling more precise intervention design and resource allocation. The framework established in this research provides a robust foundation for these future advancements, demonstrating that combining advanced machine learning with business-centric explainability enables organizations to move beyond predictive accuracy toward actionable, data-driven customer retention strategies with measurable financial impact.</p>
</sec>
</sec>
<sec sec-type="conclusions" id="s6">
<label>6</label>
<title>Conclusion</title>
<p>This research builds upon existing techniques by systematically comparing multiple machine learning approaches and integrating them within a business-aligned evaluation framework. Our findings confirm the effectiveness of gradient boosting algorithms for this dataset while demonstrating how model interpretability and cost-sensitive evaluation can enhance practical deployment. Specifically, the comprehensive framework for customer churn prediction successfully bridges the gap between machine learning performance and business applicability. The integrated approach combining multiple machine learning models, SMOTE oversampling and probability calibration yielded strong predictive performance. A key finding was that gradient boosting algorithms consistently outperformed other approaches, with XGBoost demonstrating the highest discriminative ability (AUC-ROC: 0.932) and balanced performance (F1-score: 0.84). The soft-voting ensemble of top models effectively consolidated their strengths, matching their F1-score performance (0.84) while maintaining robust discriminative ability (AUC-ROC: 0.918). Random Forest showed solid performance (AUC-ROC: 0.887, F1-score: 0.81) without evidence of the overfitting that might be indicated by perfect training metrics, providing a reliable alternative for certain deployment scenarios.</p>
<p>The systematic comparison revealed critical insights into model behavior: while gradient boosting algorithms (XGBoost, LightGBM, and Gradient Boosting) achieved the highest balanced performance, ensemble methods provided effective consolidation without sacrificing reliability. The integration of cost-sensitive evaluation with realistic telecommunications industry parameters further enhanced the framework&#x00027;s business relevance, with threshold effectively balancing precision and recall.</p>
<p>From a methodological perspective, this study extends existing work by providing a comprehensive evaluation of seven machine learning algorithms and one deep learning model, offering empirical evidence of performance variations in telecom churn prediction. The systematic comparison of clustering approaches revealed the superiority of autoencoder-based methods for high-dimensional customer data. Furthermore, the calibration and threshold optimization process demonstrated how technical model improvements directly translate to business value, enabling more accurate risk assessment.</p>
<p>The practical impact of this research is substantial, enabling a shift from reactive to proactive customer retention. The SHAP-based identification of contract type as the feature making the largest contribution to churn predictions, with month-to-month contracts associated with substantially higher predicted risk than 2-year contracts in the model, provides strategic direction that aligns with observed patterns. Similarly, the quantification of service bundle effects, showing significantly lower churn risk for customers with multiple premium services, offers empirical support for product development and bundling strategies. The customer segmentation analysis revealed three distinct risk profiles (15%, 21%, and 42% churn rates), enabling targeted resource allocation.</p>
<p>For industry practitioners, three key recommendations emerge: prioritize early intervention for high-risk segments identified through tenure and contract type analysis; expand service bundling strategies to increase customer stickiness; and integrate explainable AI techniques to build stakeholder trust and enable data-driven decision making. The framework&#x00027;s business alignment, through cost-sensitive evaluation and threshold optimization, strengthens the case for predictive analytics deployment in customer retention programs.</p>
<p>In summary, this research establishes that a multi-model framework, combined with business-centric explainability and cost-sensitive evaluation, enables organizations to transform predictive analytics into a strategic advantage. The findings not only highlight the superior performance of gradient boosting algorithms for churn prediction but also provide a practical pathway for deployment through robust models and interpretable insights. Future work should focus on dynamic customer journey modeling, multi-channel data integration, and causal inference methods to further enhance predictive accuracy and strategic value in real-world telecom environments.</p></sec>
</body>
<back>
<sec sec-type="data-availability" id="s7">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="s8">
<title>Author contributions</title>
<p>AE: Conceptualization, Data curation, Formal analysis, Funding acquisition, Investigation, Methodology, Project administration, Resources, Software, Supervision, Validation, Visualization, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. ME: Conceptualization, Funding acquisition, Investigation, Methodology, Project administration, Resources, Supervision, Visualization, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="s12">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/frai.2026.1748799/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/frai.2026.1748799/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Supplementary_file_1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Asif</surname> <given-names>D.</given-names></name> <name><surname>Arif</surname> <given-names>M. S.</given-names></name> <name><surname>Mukheimer</surname> <given-names>A.</given-names></name></person-group> (<year>2025</year>). <article-title>A data-driven approach with explainable artificial intelligence for customer churn prediction in the telecommunications industry</article-title>. <source>Results Eng</source>. <volume>26</volume>:<fpage>104629</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.rineng.2025.104629</pub-id></mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Blagus</surname> <given-names>R.</given-names></name> <name><surname>Lusa</surname> <given-names>L.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;Self-adapting cyclic oversampling for imbalanced data,&#x0201D;</article-title> in <source>International Conference on Machine Learning and Data Mining in Pattern Recognition</source>, <fpage>680</fpage>&#x02013;<lpage>688</lpage>.</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chang</surname> <given-names>V.</given-names></name> <name><surname>Hall</surname> <given-names>K.</given-names></name> <name><surname>Xu</surname> <given-names>Q. A.</given-names></name> <name><surname>Amao</surname> <given-names>F. O.</given-names></name> <name><surname>Ganatra</surname> <given-names>M. A.</given-names></name> <name><surname>Benson</surname> <given-names>V.</given-names></name></person-group> (<year>2024</year>). <article-title>Prediction of customer churn behavior in the telecommunication industry using machine learning models</article-title>. <source>Algorithms</source> <volume>17</volume>:<fpage>231</fpage>. doi: <pub-id pub-id-type="doi">10.3390/a17060231</pub-id></mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>T.</given-names></name> <name><surname>Guestrin</surname> <given-names>C.</given-names></name></person-group> (<year>2016</year>). <article-title>&#x0201C;Xgboost: a scalable tree boosting system,&#x0201D;</article-title> in <source>Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery And Data Mining</source>, 785&#x02013;794. doi: <pub-id pub-id-type="doi">10.1145/2939672.2939785</pub-id></mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Christopher</surname> <given-names>D.</given-names></name> <name><surname>Anand</surname> <given-names>G.</given-names></name></person-group> (<year>2024</year>). <article-title>&#x0201C;Comparative analysis of predictive models for customer churn prediction in the telecommunication industry,&#x0201D;</article-title> in <source>2024 International Conference on Communication, Computer Sciences and Engineering (IC3SE)</source> (<publisher-loc>IEEE</publisher-loc>), <fpage>534</fpage>&#x02013;<lpage>539</lpage>. doi: <pub-id pub-id-type="doi">10.1109/IC3SE62002.2024.10592931</pub-id></mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Eswarapu</surname> <given-names>S. T.</given-names></name> <name><surname>Seshathri</surname> <given-names>S.</given-names></name> <name><surname>Deshaboina</surname> <given-names>Y.</given-names></name> <name><surname>Bhargawa</surname> <given-names>P.</given-names></name> <name><surname>Jo</surname> <given-names>A. A.</given-names></name> <name><surname>Raj</surname> <given-names>E. D.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;Integrated customer analytics using explainability and automl for telecommunications,&#x0201D;</article-title> in <source>2023 2nd International Conference on Applied Artificial Intelligence and Computing (ICAAIC)</source> (<publisher-loc>IEEE</publisher-loc>), <fpage>228</fpage>&#x02013;<lpage>235</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ICAAIC56838.2023.10141019</pub-id></mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Glenn</surname> <given-names>W. B.</given-names></name></person-group> (<year>1950</year>). <article-title>Verification of forecasts expressed in terms of probability</article-title>. <source>Monthly Weather Rev</source>. <volume>78</volume>, <fpage>1</fpage>&#x02013;<lpage>3</lpage>. doi: <pub-id pub-id-type="doi">10.1175/1520-0493(1950)078&#x0003C;0001:VOFEIT&#x0003E;2.0.CO;2</pub-id></mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Guo</surname> <given-names>C.</given-names></name> <name><surname>Pleiss</surname> <given-names>G.</given-names></name> <name><surname>Sun</surname> <given-names>Y.</given-names></name> <name><surname>Weinberger</surname> <given-names>K. Q.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;On calibration of modern neural networks,&#x0201D;</article-title> in <source>International Conference on Machine Learning</source> (<publisher-loc>PMLR</publisher-loc>), <fpage>1321</fpage>&#x02013;<lpage>1330</lpage>.</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hastie</surname> <given-names>T.</given-names></name> <name><surname>Tibshirani</surname> <given-names>R.</given-names></name> <name><surname>Friedman</surname> <given-names>J. H.</given-names></name> <name><surname>Friedman</surname> <given-names>J. H.</given-names></name></person-group> (<year>2009</year>). <source>The Elements of Statistical Learning: Data Mining, Inference, and Prediction</source>. Cham: Springer. doi: <pub-id pub-id-type="doi">10.1007/978-0-387-84858-7</pub-id></mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>B.</given-names></name> <name><surname>Kechadi</surname> <given-names>M. T.</given-names></name> <name><surname>Buckley</surname> <given-names>B.</given-names></name></person-group> (<year>2012</year>). <article-title>Customer churn prediction in telecommunications</article-title>. <source>Expert Syst. Appl</source>. <volume>39</volume>, <fpage>1414</fpage>&#x02013;<lpage>1425</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.eswa.2011.08.024</pub-id></mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>Y.</given-names></name> <name><surname>Kechadi</surname> <given-names>T.</given-names></name></person-group> (<year>2013</year>). <article-title>An effective hybrid learning system for telecommunication churn prediction</article-title>. <source>Expert Syst. Appl</source>. <volume>40</volume>, <fpage>5635</fpage>&#x02013;<lpage>5647</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.eswa.2013.04.020</pub-id></mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="web"><collab>Kaggle and blastchar</collab> (<year>2021</year>). <source>Telco customer churn dataset</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/datasets/blastchar/telco-customer-churn">https://www.kaggle.com/datasets/blastchar/telco-customer-churn</ext-link> (Accessed July 28, 2025).</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lema&#x000EE;tre</surname> <given-names>G.</given-names></name> <name><surname>Nogueira</surname> <given-names>F.</given-names></name> <name><surname>Aridas</surname> <given-names>C. K.</given-names></name></person-group> (<year>2020</year>). <article-title>Smote for high-dimensional class-imbalanced data</article-title>. <source>BMC Bioinformatics</source> <volume>21</volume>, <fpage>1</fpage>&#x02013;<lpage>18</lpage>. doi: <pub-id pub-id-type="doi">10.1186/1471-2105-14-106</pub-id><pub-id pub-id-type="pmid">23522326</pub-id></mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lundberg</surname> <given-names>S. M.</given-names></name> <name><surname>Erion</surname> <given-names>G. G.</given-names></name> <name><surname>Lee</surname> <given-names>S.-I.</given-names></name></person-group> (<year>2018</year>). <article-title>Consistent individualized feature attribution for tree ensembles</article-title>. <source>arXiv preprint arXiv:1802.03888</source>.</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lundberg</surname> <given-names>S. M.</given-names></name> <name><surname>Lee</surname> <given-names>S.-I.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;A unified approach to interpreting model predictions,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems</source>, 30.</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Naeini</surname> <given-names>M. P.</given-names></name> <name><surname>Cooper</surname> <given-names>G.</given-names></name> <name><surname>Hauskrecht</surname> <given-names>M.</given-names></name></person-group> (<year>2015</year>). <article-title>&#x0201C;Obtaining well calibrated probabilities using bayesian binning,&#x0201D;</article-title> in <source>Proceedings of the AAAI Conference on Artificial Intelligence</source>. <pub-id pub-id-type="pmid">25927013</pub-id></mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Neslin</surname> <given-names>S. A.</given-names></name> <name><surname>Gupta</surname> <given-names>S.</given-names></name> <name><surname>Kamakura</surname> <given-names>W.</given-names></name> <name><surname>Lu</surname> <given-names>J.</given-names></name> <name><surname>Mason</surname> <given-names>C. H.</given-names></name></person-group> (<year>2006</year>). <article-title>Defection detection: measuring and understanding the predictive accuracy of customer churn models</article-title>. <source>J. Market. Res</source>. <volume>43</volume>, <fpage>204</fpage>&#x02013;<lpage>211</lpage>. doi: <pub-id pub-id-type="doi">10.1509/jmkr.43.2.204</pub-id></mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Niculescu-Mizil</surname> <given-names>A.</given-names></name> <name><surname>Caruana</surname> <given-names>R.</given-names></name></person-group> (<year>2005</year>). <article-title>&#x0201C;Predicting good probabilities with supervised learning,&#x0201D;</article-title> in <source>Proceedings of the 22nd International Conference on Machine Learning</source>, 625&#x02013;632. doi: <pub-id pub-id-type="doi">10.1145/1102351.1102430</pub-id></mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Noviandy</surname> <given-names>T. R.</given-names></name> <name><surname>Idroes</surname> <given-names>G. M.</given-names></name> <name><surname>Hardi</surname> <given-names>I.</given-names></name> <name><surname>Afjal</surname> <given-names>M.</given-names></name> <name><surname>Ray</surname> <given-names>S.</given-names></name></person-group> (<year>2024</year>). <article-title>A model-agnostic interpretability approach to predicting customer churn in the telecommunications industry</article-title>. <source>Infolitika J. Data Sci</source>. <volume>2</volume>, <fpage>34</fpage>&#x02013;<lpage>44</lpage>. doi: <pub-id pub-id-type="doi">10.60084/ijds.v2i1.199</pub-id></mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>&#x000D3;skarsd&#x000F3;ttir</surname> <given-names>M.</given-names></name> <name><surname>Bravo</surname> <given-names>C.</given-names></name> <name><surname>Verbeke</surname> <given-names>W.</given-names></name> <name><surname>Sarraute</surname> <given-names>C.</given-names></name> <name><surname>Baesens</surname> <given-names>B.</given-names></name> <name><surname>Vanthienen</surname> <given-names>J.</given-names></name></person-group> (<year>2017</year>). <article-title>Social network analytics for churn prediction in telco: model building, evaluation and network architecture</article-title>. <source>Expert Syst. Appl</source>. <volume>85</volume>, <fpage>204</fpage>&#x02013;<lpage>220</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.eswa.2017.05.028</pub-id></mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Platt</surname> <given-names>J.</given-names></name></person-group> (<year>1999</year>). <article-title>Probabilistic outputs for support vector machines and comparisons to regularized likelihood methods</article-title>. <source>Adv. Large Margin Class</source>. <volume>10</volume>, <fpage>61</fpage>&#x02013;<lpage>74</lpage>. doi: <pub-id pub-id-type="doi">10.7551/mitpress/1113.003.0008</pub-id></mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Prashanthan</surname> <given-names>A.</given-names></name></person-group> (<year>2025</year>). <article-title>An integrated framework for optimizing customer retention budget using clustering, classification, and mathematical optimization</article-title>. <source>J. Comput. Theor. Applic</source>. <volume>3</volume>, <fpage>45</fpage>&#x02013;<lpage>63</lpage>. doi: <pub-id pub-id-type="doi">10.62411/jcta.13194</pub-id></mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Prashanthan</surname> <given-names>A.</given-names></name> <name><surname>Roshan</surname> <given-names>R.</given-names></name> <name><surname>Maduranga</surname> <given-names>M. W.</given-names></name></person-group> (<year>2025</year>). <article-title>Retennet: a deployable machine learning pipeline with explainable ai and prescriptive optimization for customer churn management</article-title>. <source>J. Future Artif. Intell. Technol</source>. <volume>2</volume>, <fpage>182</fpage>&#x02013;<lpage>201</lpage>. doi: <pub-id pub-id-type="doi">10.62411/faith.3048-3719-110</pub-id></mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shafer</surname> <given-names>G.</given-names></name> <name><surname>Vovk</surname> <given-names>V.</given-names></name></person-group> (<year>2008</year>). <article-title>A tutorial on conformal prediction</article-title>. <source>J. Mach. Learn. Res</source>. <volume>9</volume>, <fpage>371</fpage>&#x02013;<lpage>421</lpage>.</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shapley</surname> <given-names>L. S.</given-names></name></person-group> (<year>1953</year>). <article-title>A value for n-person games</article-title>. <source>Contr. Theory Games</source> <volume>2</volume>, <fpage>307</fpage>&#x02013;<lpage>317</lpage>. doi: <pub-id pub-id-type="doi">10.1515/9781400881970-018</pub-id></mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Thankam</surname> <given-names>M. S.</given-names></name> <name><surname>El Gayar</surname> <given-names>N.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;Comparative study of different machine learning models for customer churn analysis using smote and feature variation along with customer segmentation,&#x0201D;</article-title> in <source>2023 International Conference on Modeling, Simulation &#x00026; Intelligent Computing (MoSICom)</source> (<publisher-loc>IEEE</publisher-loc>), <fpage>637</fpage>&#x02013;<lpage>642</lpage>. doi: <pub-id pub-id-type="doi">10.1109/MoSICom59118.2023.10458848</pub-id></mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ullah</surname> <given-names>I.</given-names></name> <name><surname>Raza</surname> <given-names>B.</given-names></name> <name><surname>Malik</surname> <given-names>A. K.</given-names></name> <name><surname>Imran</surname> <given-names>M.</given-names></name> <name><surname>Islam</surname> <given-names>S. U.</given-names></name> <name><surname>Kim</surname> <given-names>S. W.</given-names></name></person-group> (<year>2019</year>). <article-title>A churn prediction model using random forest: analysis of machine learning techniques for churn prediction and factor identification in telecom sector</article-title>. <source>IEEE Access</source> <volume>7</volume>, <fpage>60134</fpage>&#x02013;<lpage>60149</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ACCESS.2019.2914999</pub-id></mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Verbeke</surname> <given-names>W.</given-names></name> <name><surname>Dejaeger</surname> <given-names>K.</given-names></name> <name><surname>Martens</surname> <given-names>D.</given-names></name> <name><surname>Hur</surname> <given-names>J.</given-names></name> <name><surname>Baesens</surname> <given-names>B.</given-names></name></person-group> (<year>2012</year>). <article-title>New insights into churn prediction in the telecommunication sector: a profit driven data mining approach</article-title>. <source>Eur. J. Oper. Res</source>. <volume>218</volume>, <fpage>211</fpage>&#x02013;<lpage>229</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ejor.2011.09.031</pub-id></mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Verbraken</surname> <given-names>T.</given-names></name> <name><surname>Verbeke</surname> <given-names>W.</given-names></name> <name><surname>Baesens</surname> <given-names>B.</given-names></name></person-group> (<year>2012</year>). <article-title>A novel profit maximizing metric for measuring classification performance of customer churn prediction models</article-title>. <source>IEEE Trans. Knowl. Data Eng</source>. <volume>25</volume>, <fpage>961</fpage>&#x02013;<lpage>973</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TKDE.2012.50</pub-id></mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>S.</given-names></name> <name><surname>Yau</surname> <given-names>W.-C.</given-names></name> <name><surname>Ong</surname> <given-names>T.-S.</given-names></name> <name><surname>Chong</surname> <given-names>S.-C.</given-names></name></person-group> (<year>2021</year>). <article-title>Integrated churn prediction and customer segmentation framework for telco business</article-title>. <source>IEEE Access</source> <volume>9</volume>, <fpage>62118</fpage>&#x02013;<lpage>62136</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ACCESS.2021.3073776</pub-id></mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zadrozny</surname> <given-names>B.</given-names></name> <name><surname>Elkan</surname> <given-names>C.</given-names></name></person-group> (<year>2002</year>). <article-title>&#x0201C;Transforming classifier scores into accurate multiclass probability estimates,&#x0201D;</article-title> in <source>Proceedings of the Eighth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source>, 694&#x02013;699. doi: <pub-id pub-id-type="doi">10.1145/775047.775151</pub-id></mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2907889/overview">Ernesto Leon-Castro</ext-link>, Universidad Catolica de la Santisima Concepcion, Chile</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3289558/overview">Mbayandjambe Masheke Alidor</ext-link>, Universit&#x000E9; de Kinshasa, Democratic Republic of Congo</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3294455/overview">Lorita Angeline</ext-link>, Universiti Malaysia Sabah, Malaysia</p>
</fn>
</fn-group>
</back>
</article>