<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2024.1343447</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Artificial Intelligence</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>A decision support system to recommend appropriate therapy protocol for AML patients</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Castro</surname> <given-names>Giovanna A.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2667305/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Almeida</surname> <given-names>Jade M.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2658821/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Machado-Neto</surname> <given-names>Jo&#x000E3;o A.</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2034512/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Almeida</surname> <given-names>Tiago A.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2039971/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Department of Computer Science, Federal University of S&#x000E3;o Carlos (UFSCar) Sorocaba</institution>, <addr-line>S&#x000E3;o Paulo</addr-line>, <country>Brazil</country></aff>
<aff id="aff2"><sup>2</sup><institution>Institute of Biomedical Sciences, The University of S&#x000E3;o Paulo (USP)</institution>, <addr-line>S&#x000E3;o Paulo</addr-line>, <country>Brazil</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Francesco Napolitano, University of Sannio, Italy</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Fernanda Marconi Roversi, Emory University, United States</p>
<p>Joao Paulo Papa, S&#x000E3;o Paulo State University, Brazil</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Giovanna A. Castro <email>giovannacastro&#x00040;estudante.ufscar.br</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>06</day>
<month>03</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>7</volume>
<elocation-id>1343447</elocation-id>
<history>
<date date-type="received">
<day>23</day>
<month>11</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>19</day>
<month>02</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2024 Castro, Almeida, Machado-Neto and Almeida.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Castro, Almeida, Machado-Neto and Almeida</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Acute Myeloid Leukemia (AML) is one of the most aggressive hematological neoplasms, emphasizing the critical need for early detection and strategic treatment planning. The association between prompt intervention and enhanced patient survival rates underscores the pivotal role of therapy decisions. To determine the treatment protocol, specialists heavily rely on prognostic predictions that consider the response to treatment and clinical outcomes. The existing risk classification system categorizes patients into favorable, intermediate, and adverse groups, forming the basis for personalized therapeutic choices. However, accurately assessing the intermediate-risk group poses significant challenges, potentially resulting in treatment delays and deterioration of patient conditions.</p></sec>
<sec>
<title>Methods</title>
<p>This study introduces a decision support system leveraging cutting-edge machine learning techniques to address these issues. The system automatically recommends tailored oncology therapy protocols based on outcome predictions.</p></sec>
<sec>
<title>Results</title>
<p>The proposed approach achieved a high performance close to 0.9 in F1-Score and AUC. The model generated with gene expression data exhibited superior performance.</p></sec>
<sec>
<title>Discussion</title>
<p>Our system can effectively support specialists in making well-informed decisions regarding the most suitable and safe therapy for individual patients. The proposed decision support system has the potential to not only streamline treatment initiation but also contribute to prolonged survival and improved quality of life for individuals diagnosed with AML. This marks a significant stride toward optimizing therapeutic interventions and patient outcomes.</p></sec></abstract>
<kwd-group>
<kwd>Acute Myeloid Leukemia</kwd>
<kwd>risk classification</kwd>
<kwd>prognostic prediction</kwd>
<kwd>supervised learning model</kwd>
<kwd>machine learning</kwd>
<kwd>decision support system</kwd>
</kwd-group>
<contract-num rid="cn001">2021/11606-3</contract-num>
<contract-num rid="cn001">2021/13325-1</contract-num>
<contract-sponsor id="cn001">Funda&#x000E7;&#x000E3;o de Amparo &#x000E0; Pesquisa do Estado de S&#x000E3;o Paulo<named-content content-type="fundref-id">10.13039/501100001807</named-content></contract-sponsor>
<contract-sponsor id="cn002">Coordena&#x000E7;&#x000E3;o de Aperfei&#x000E7;oamento de Pessoal de N&#x000ED;vel Superior<named-content content-type="fundref-id">10.13039/501100002322</named-content></contract-sponsor>
<contract-sponsor id="cn003">Conselho Nacional de Desenvolvimento Cient&#x000ED;fico e Tecnol&#x000F3;gico<named-content content-type="fundref-id">10.13039/501100003593</named-content></contract-sponsor>
<counts>
<fig-count count="7"/>
<table-count count="8"/>
<equation-count count="6"/>
<ref-count count="51"/>
<page-count count="15"/>
<word-count count="9123"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Medicine and Public Health</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>Acute Myeloid Leukemia (AML) is a highly aggressive hematological malignancy characterized by the infiltration of cancer cells in the bone marrow. It is associated with lower remission rates as patients age, and the average overall survival rate ranges from 12 to 18 months (Rose-Inman and Kuehl, <xref ref-type="bibr" rid="B38">2014</xref>; Pelcovits and Niroula, <xref ref-type="bibr" rid="B33">2020</xref>).</p>
<p>The European Leukemia Net (ELN) established guidelines for diagnosing and treating AML in 2010 (D&#x000F6;hner et al., <xref ref-type="bibr" rid="B11">2010</xref>). These served as a cornerstone in the field, providing valuable insights. Subsequent updates were published in 2017 (D&#x000F6;hner et al., <xref ref-type="bibr" rid="B10">2017</xref>) and 2022 (D&#x000F6;hner et al., <xref ref-type="bibr" rid="B12">2022</xref>), reflecting advancements in understanding AML&#x00027;s biomarkers, disease subtypes, and overall behavior. These updates have contributed to a more comprehensive and up-to-date disease management approach.</p>
<p>According to the current diagnostic criteria for AML, established by the World Health Organization (WHO), the presence of at least 10 or 20% myeloblasts in the bone marrow or peripheral blood is required, depending on the specific molecular subtype of the disease (Arber et al., <xref ref-type="bibr" rid="B4">2022</xref>). These guidelines, outlined in the Classification of Tumours of Haematopoietic and Lymphoid Tissues, provide standardized criteria for diagnosing AML accurately.</p>
<p>Apart from the initial diagnosis, patients with AML also undergo a prognostic evaluation to determine their risk profile, typically categorized into favorable, intermediate, and adverse. This risk stratification relies on analyzing cytogenetic and molecular characteristics (The Cancer Genome Atlas Research Network, <xref ref-type="bibr" rid="B42">2013</xref>). Cytogenetic characteristics involve specific chromosome alterations, while mutations in genes such as <italic>NPM1, RUNX1, ASXL1, TP53, BCOR, EZH2, SF3B1, SRSF2, STAG2</italic>, and <italic>ZRSR2</italic> determine molecular characteristics. Healthcare professionals extensively employ the ELN risk classification to make critical treatment decisions, as it directly influences the patient&#x00027;s prognosis, quality of life, and overall survival.</p>
<p>The main problem with the current ELN risk classification is the significant variability within the same risk group. Accurately assessing the intermediate-risk group is especially challenging, potentially causing delays in starting treatment and worsening patients&#x00027; conditions. To address this problem, we present a decision support system that automatically recommends therapeutic protocols for AML patients based on their survival prediction. By minimizing subjectivity and streamlining the decision-making process, the proposed approach can enhance patient outcomes, extending survival time and improving overall quality of life.</p></sec>
<sec id="s2">
<title>2 Related work</title>
<p>Treatment decisions for AML rely heavily on predicting the patients&#x00027; response and clinical outcomes, primarily based on cytogenetic factors (Estey, <xref ref-type="bibr" rid="B13">2019</xref>). However, significant heterogeneity within the same risk groups results in diverse outcomes, ranging from rapid decease to unexpected remission (D&#x000F6;hner et al., <xref ref-type="bibr" rid="B11">2010</xref>).</p>
<p>Chemotherapy has been the established standard therapy since the mid-1970&#x00027;s, but its effectiveness in terms of survival rates has been limited (Bennett et al., <xref ref-type="bibr" rid="B5">1976</xref>). Recent advancements have facilitated the collection and analysis of extensive data on genetic mutations and gene expressions (The Cancer Genome Atlas Research Network, <xref ref-type="bibr" rid="B42">2013</xref>), leading to novel therapeutic strategies and a more targeted approach to treatment. These have opened up new possibilities for improving the outcomes and overall management of AML patients.</p>
<p>In 1976, a study conducted by an international collaboration of French, American, and British researchers known as the FAB (French-American-British) group introduced a classification system for AML. Based on the analysis of morphological characteristics in the bone marrow and peripheral blood, this classification system aimed to stratify AML patients into distinct subtypes. The FAB classification scheme defined six subtypes (M1, M2, M3, M4, M5, and M6) based on the differentiation and maturation levels of the leukemic cells. This classification system was a fundamental framework for understanding and characterizing AML, contributing to subsequent research and guiding clinical approaches (Bennett et al., <xref ref-type="bibr" rid="B5">1976</xref>).</p>
<p>In 2010, the European Leukemia Net (ELN) introduced a novel risk classification system that considers cytogenetic and molecular information, providing a more comprehensive assessment of disease severity (D&#x000F6;hner et al., <xref ref-type="bibr" rid="B11">2010</xref>). This updated scheme includes four risk categories: favorable, intermediate I, intermediate II, and adverse. While this stratification system offers improved accuracy compared to traditional cytogenetic analysis, it is challenging to implement it during the initial clinical evaluation due to the high costs associated with sample collection and the subsequent molecular analyses required. Nonetheless, this risk classification plays a crucial role in guiding treatment decisions and optimizing patient outcomes in the management of AML.</p>
<p>A significant update to the ELN&#x00027;s guidelines was published in 2017 based on findings regarding AML behavior (D&#x000F6;hner et al., <xref ref-type="bibr" rid="B10">2017</xref>). The updated risk classification grouped patients into three categories (favorable, intermediate, and adverse) and refined the prognostic value of specific genetic mutations. Since then, specialists have commonly used this stratification to support important decisions about the course of each treatment, which can directly impact the patient&#x00027;s quality of life and life expectancy.</p>
<p>In 2022, the ELN updated its risk classification system, incorporating significant changes based on emerging research findings. One notable revision includes the <italic>FLT3-ITD</italic> gene expression as a key determinant. Patients with high expression levels of this gene, lacking other adverse risk characteristics, are now categorized as intermediate risk. Furthermore, mutations in genes such as <italic>BCOR, EZH2, SF3B1, SRSF2, STAG2</italic>, and <italic>ZRSR2</italic> are now associated with the adverse risk classification (D&#x000F6;hner et al., <xref ref-type="bibr" rid="B12">2022</xref>). These updates reflect new insights regarding the impact of these genetic factors on disease progression and treatment outcomes (The Cancer Genome Atlas Research Network, <xref ref-type="bibr" rid="B42">2013</xref>; Angenendt et al., <xref ref-type="bibr" rid="B3">2019</xref>). The evolving understanding of these molecular characteristics provides valuable information for risk stratification and personalized management of AML patients.</p>
<p>Patients with a favorable risk profile typically exhibit favorable responses to chemotherapy. Conversely, those with an adverse risk profile often display limited responsiveness to standard chemotherapy and may require alternative treatments, such as Hematopoietic stem cell transplantation (The Cancer Genome Atlas Research Network, <xref ref-type="bibr" rid="B42">2013</xref>). However, the therapeutic response of AML patients with an intermediate risk profile remains less clearly defined. The heterogeneous nature of this subgroup makes it challenging to predict their specific treatment outcomes, demanding further research and tailored approaches to optimize their clinical management.</p>
<p>The current risk classifications present challenges due to significant variability within the same risk group. Factors such as age and gender can significantly influence treatment outcomes. For instance, patients under 60 years old tend to respond better to high-dose chemotherapy. In comparison, patients over 60 years old may have a lower tolerance for intense chemotherapy and require alternative palliative therapies (Lagunas-Rangel et al., <xref ref-type="bibr" rid="B25">2017</xref>). However, current risk classifications do not consider age a relevant factor in treatment decision-making. As a result, even among patients classified as having intermediate risk, specialists often rely on additional information, such as results from other tests and analyses, to determine the most appropriate therapy despite limited evidence of efficacy (D&#x000F6;hner et al., <xref ref-type="bibr" rid="B11">2010</xref>). This reliance on supplementary information can delay treatment initiation and worsen the patient&#x00027;s clinical condition. Therefore, there is a need for improved risk stratification models that consider diverse patient characteristics to ensure more precise and timely therapy decisions in AML.</p>
<p>To address these challenges, recent studies have applied machine learning (ML) techniques to predict patient survival and treatment outcomes. By leveraging ML algorithms, researchers aim to automate the prediction of patient response to specific treatments and the likelihood of achieving complete remission. These ML models can handle huge clinical and molecular features to compute predictions, allowing for a more data-driven approach to treatment decision-making. The main goal is to provide valuable insights and assist clinicians in making informed decisions that can optimize patient outcomes and improve the overall management of the disease.</p>
<p>Gal et al. (<xref ref-type="bibr" rid="B15">2019</xref>) employed supervised machine learning models to predict complete remission in pediatric patients with AML. They used data extracted from RNA sequencing and clinical information as input features for their models. The k-nearest neighbors algorithm achieved the highest performance among the ML techniques evaluated. Additionally, the authors observed notable differences in gene expression patterns between the pre- and post-treatment periods, suggesting the potential of using gene expression data as predictive markers for treatment response in AML patients.</p>
<p>In a subsequent study, Mosquera Orgueira et al. (<xref ref-type="bibr" rid="B28">2021</xref>) employed clinical and genetic data to train a random forest classifier to predict the survival probability of AML patients. The researchers identified patient age and gene expressions of <italic>KDM5B</italic> and <italic>LAPTM4B</italic> as the three most influential variables. These findings suggest that combining ML techniques with clinical and molecular data holds significant predictive potential for AML diagnosis and supporting therapeutic decision-making. The study emphasizes the importance of incorporating genetic information into predictive models, as it provides valuable insights into the prognosis and treatment response. Such ML-based approaches offer a promising avenue for enhancing patient management and personalized treatment strategies.</p>
<p>Gerstung et al. (<xref ref-type="bibr" rid="B16">2017</xref>) presented a statistical decision support model to predict personalized treatment outcomes for AML patients. The model employs prognostic data available in a knowledge bank and demonstrates the significant impact of clinical and demographic factors, including age and blood cell count, on early death rates, particularly mortality related to treatment. Through the knowledge bank-based model, the authors observed that approximately one-third of the analyzed patients would modify their treatment protocols when comparing the model&#x00027;s recommendations to those of the ELN. This highlights the potential of leveraging comprehensive prognostic data and statistical modeling to enhance treatment decisions and potentially improve patient outcomes. The study underscores the importance of incorporating personalized and data-driven approaches in the management of AML.</p>
<p>In a comprehensive study, Itzykson et al. (<xref ref-type="bibr" rid="B20">2021</xref>) proposed a rule-based decision support system that integrates statistical and machine learning techniques to facilitate treatment decision-making for elderly patients diagnosed with AML. The model employs overall survival predictions based on the Kaplan-Meier method and incorporates seven oncogenetic markers (<italic>NPM1, FLT3-ITD, DNMT3A, NRAS, ASXL1, KRAS</italic>, and <italic>TP53</italic>) to stratify patients into distinct treatment groups. These groups provide insights into the intensity of treatment required and offer guidance on whether treatment decisions should be strictly adhered to, cautiously analyzed, or entirely discarded. The authors found that their model exhibited more discriminative ability than the 2017 ELN stratification, successfully identifying 30&#x02013;35% of patients with superior outcomes and accurately censoring the need for hematopoietic stem-cell transplantation in the first remission. Moreover, multivariate logistic regression analysis identified mutations in the <italic>NRAS, SETBP1, RUNX1</italic>, and <italic>ASXL1</italic> genes as independent predictors of poor complete remission rates in non-adverse risk patients. These findings further emphasize the significance of incorporating decision-support tools that consider clinical and genetic data for accurate treatment prediction.</p>
<p>An interpretable model for predicting the survival of AML patients was presented by Almeida et al. (<xref ref-type="bibr" rid="B2">2023</xref>). This model leveraged the Explicable Boosting Machines (EBM) technique, and the results emphasized the importance of using genetic data in AML analysis, particularly gene expression data. Furthermore, they highlighted the importance of selecting specific treatment groups for patient survival.</p>
<p>Several recent studies on AML-related diseases have also demonstrated the effectiveness of applying state-of-the-art ML techniques in pattern recognition, risk prediction, and survival prediction. These diseases include acute lymphoblastic leukemia (Fitter et al., <xref ref-type="bibr" rid="B14">2021</xref>), myelodysplastic syndrome (Radhachandran et al., <xref ref-type="bibr" rid="B35">2021</xref>), breast cancer (Kate and Nadig, <xref ref-type="bibr" rid="B24">2017</xref>), prostate cancer (Zolbanin et al., <xref ref-type="bibr" rid="B51">2015</xref>; Rabaan et al., <xref ref-type="bibr" rid="B34">2022</xref>), rectal cancer (Wang et al., <xref ref-type="bibr" rid="B45">2022</xref>), skin cancer (Ahmed et al., <xref ref-type="bibr" rid="B1">2022</xref>), nasopharynx cancer (Jing et al., <xref ref-type="bibr" rid="B22">2020</xref>), pancreatic cancer (Walczak and Velanovich, <xref ref-type="bibr" rid="B44">2018</xref>; Muhammad et al., <xref ref-type="bibr" rid="B29">2019</xref>; Wang et al., <xref ref-type="bibr" rid="B46">2020</xref>), infective endocarditis (Ris et al., <xref ref-type="bibr" rid="B36">2019</xref>), AML in pediatric patients (Hoch et al., <xref ref-type="bibr" rid="B19">2021</xref>), and AML with myelodysplasia-related changes (Yu et al., <xref ref-type="bibr" rid="B48">2021</xref>). The success observed indicates that contemporary ML techniques can automatically uncover meaningful patterns within vast datasets.</p>
<p>In this context, this study presents a decision support system designed to recommend suitable therapeutic protocols automatically for AML patients based on their survival prediction. The primary aim is to mitigate the subjectivity inherent in treatment decisions and reduce the time involved in the decision-making process. Consequently, we can deliver more accurate and reliable treatment recommendations that minimize adverse effects. Our ultimate goal is to improve patient outcomes by extending their survival time and enhancing their overall quality of life.</p></sec>
<sec sec-type="materials and methods" id="s3">
<title>3 Materials and methods</title>
<p>The decision support system proposed in this work combines supervised models computed by three established machine learning methods commonly employed in the medical field: Random Forests (RF), Support Vector Machines (SVM), and Logistic Regression (LR). This system automatically recommends the best treatments for AML patients based on the automatic prediction of clinical outcome (survival/decease).</p>
<p>Altogether, we have trained nine clinical outcome prediction models using selected attributes from real and public databases composed of (<italic>i</italic>) clinical data (<monospace>CLIN</monospace>), (<italic>ii</italic>) mutation data (<monospace>MUT</monospace>), and (<italic>iii</italic>) gene expression data (<monospace>EXP</monospace>). Then, we combined the best-trained models, one for each combination of the three databases. The ensemble outputs are aggregated to compose a robust final prediction model. <xref ref-type="fig" rid="F1">Figure 1</xref> summarizes the processes for generating the proposed system, and <xref ref-type="fig" rid="F2">Figure 2</xref> illustrates the architecture of the resulting ensemble models. In the following, we detail each process involved in designing the method proposed in this work.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>Pipeline for training and evaluating the ensemble. Initially, three sets of clinical and genetic data are input. Then, these data are preprocessed and cleaned to facilitate the feature selection process. This process is expert-guided for clinical data (Expert) and automated for genetic data (Chi-Square and LASSO). With these validated datasets, we trained individual models (HPO stands for Hyperparameter optimization). The best individual predictors are selected to compose the classification committee. Subsequently, a new training and evaluation process is carried out, now based on the classification committee (ensemble). In the end, a therapy recommendation is computed.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-07-1343447-g0001.tif"/>
</fig>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>The architecture of the proposed decision support system. Initially, individual models are trained using the datasets [clinical (CLIN), gene mutation (MUT), and gene expression (EXP)] with the three Machine Learning techniques [Random Forests (RF), Logistic Regression (LR), and Support Vector Machine (SVM)]. Subsequently, the best individual models are selected for each kind of data from all those produced (the asterisk symbol represents these models). In the following, a combination of these models is created by a classification committee with the final prediction vote weighted. Since the result of this committee and the models is a survival response, the process is repeated for all possible <italic>Treatment Intensity</italic>. In the end, the recommended therapy is the one that maximizes the probability of patient survival.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-07-1343447-g0002.tif"/>
</fig>
<p>The final output encompasses recommending a therapeutic course from distinct treatment groups defined by experts in the field and outlined below. As both the individual models and the committee yield survival predictions, the recommendation rests on selecting the group that optimizes the survival forecast for the AML patient.</p>
<sec>
<title>3.1 Datasets</title>
<p>The datasets used to train and evaluate the prediction models come from studies by <italic>The Cancer Genome Atlas Program</italic> (TCGA) and <italic>Oregon Health and Science University</italic> (OHSU). These datasets are known as <italic>Acute Myeloid Leukemia</italic> (The Cancer Genome Atlas Research Network, <xref ref-type="bibr" rid="B42">2013</xref>; Tyner et al., <xref ref-type="bibr" rid="B43">2018</xref>) and comprise clinical and genetic data of AML patients. Both are real and available in the public domain at: <ext-link ext-link-type="uri" xlink:href="https://www.cbioportal.org/">https://www.cbioportal.org/</ext-link>. We used three sets with data collected from the same patients: one with clinical information, another with gene mutation data, and another with gene expression data. <xref ref-type="table" rid="T1">Table 1</xref> summarizes the original data in the three feature sets extracted from the two databases.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Amount of original data in each database.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th/>
<th/>
<th/>
<th valign="top" align="center" colspan="3"><bold>Features</bold></th>
</tr>
</thead>
<tbody>
<tr style="background-color:#919498;color:#ffffff">
<td valign="top" align="left"><bold>Database</bold></td>
<td valign="top" align="center"><bold>&#x00023;Samples</bold></td>
<td valign="top" align="center"><bold>&#x00023;Patients</bold></td>
<td valign="top" align="center"><bold>Clinical</bold></td>
<td valign="top" align="center"><bold>Mutation</bold></td>
<td valign="top" align="center"><bold>Expression</bold></td>
</tr> <tr>
<td valign="top" align="left">TCGA</td>
<td valign="top" align="center">200</td>
<td valign="top" align="center">200</td>
<td valign="top" align="center">31</td>
<td valign="top" align="center">25,000</td>
<td valign="top" align="center">25,000</td>
</tr> <tr>
<td valign="top" align="left">OSHU</td>
<td valign="top" align="center">672</td>
<td valign="top" align="center">562</td>
<td valign="top" align="center">97</td>
<td valign="top" align="center">606</td>
<td valign="top" align="center">22,825</td>
</tr></tbody>
</table>
</table-wrap>
<p>Specialists in the data domain analyzed and grouped the treatments in the clinical data into four categories according to the intensity of each therapy (Almeida et al., <xref ref-type="bibr" rid="B2">2023</xref>):</p>
<list list-type="order">
<list-item><p><italic>Target therapy</italic>&#x02014;therapy that uses a therapeutic target to inhibit some mutation/AML-related gene or protein;</p></list-item>
<list-item><p><italic>Regular therapy</italic>&#x02014;therapy with any classical chemotherapy;</p></list-item>
<list-item><p><italic>Low-intensity therapy</italic>&#x02014;non-targeted palliative therapy, generally recommended for elderly patients; and</p></list-item>
<list-item><p><italic>High-intensity therapy</italic>&#x02014;chemotherapy followed by autologous or allogenic hematopoietic stem cell transplantation.</p></list-item>
</list>
<p>Likewise, cytogenetic information was normalized and grouped by specialists in the data domain. In addition, to reduce the original fragmentation in the data related to the patient&#x00027;s race, we binarized the values with 1 indicating that the patient is white and 0 otherwise. This is because white patients represent about 75% of the data.</p>
</sec>
<sec>
<title>3.2 Data cleaning and preprocessing</title>
<p>Since the data comes from two sources, we have processed them to ensure consistency and integrity. With the support of specialists in the application domain, we removed the following data:</p>
<list list-type="order">
<list-item><p>Samples not considered AML in adults observed by (<italic>i</italic>) the age of the patient, which must not be &#x0003C; 18 years, and (<italic>ii</italic>) the percentage of blasts in the bone marrow, which should be &#x02265;20%;</p></list-item>
<list-item><p>Samples without information on survival elapsed time after starting treatment (<italic>Overall Status Survival</italic>);</p></list-item>
<list-item><p>Duplicated samples. We have removed all instances from the OHSU database in which the attribute value of <italic>Site of Sample</italic> differed from <italic>Bone Marrow Aspirate</italic>. As the original dataset contains multiple samples from the same patient, all blood samples collected outside the bone marrow were removed.</p></list-item>
<list-item><p>All instances from the OHSU database were excluded where the value of the attribute <italic>Sample Timepoint</italic> differed from <italic>de novo</italic>. This is because the TCGA database contains only blood samples from patients with <italic>AML de novo</italic>;</p></list-item>
<list-item><p>Attributes identifying the type of cancer, as all patients were diagnosed with AML; and</p></list-item>
<list-item><p>Any other feature that is not present in both databases.</p></list-item>
</list>
<p>We used the 3-Nearest Neighbor method (KNN; Cover and Hart, <xref ref-type="bibr" rid="B8">1967</xref>) to fill empty values in clinical data features (<monospace>CLIN</monospace>) automatically. We used the features with empty values as the target attributes and filled them using the value predicted from the model trained with other attributes (i.e., without empty values). After the preprocessing stage, the gene expression (<monospace>EXP</monospace>) and mutation (<monospace>MUT</monospace>) data do not have empty values. Nevertheless, we removed the features of 37 genes with no mutations.</p>
<p>Subsequently, we kept only the samples in which all the variables are compatible, observing data related to the exams and treatment received by the patients, as these affect the nature of the clinical, mutation, and gene expression data. Of the 872 initial samples in the two databases, 272 were kept at the end of the preprocessing, integration, and data-cleaning processes. Finally, specialists in the data domain checked and validated all the data.</p>
</sec>
<sec>
<title>3.3 Feature selection</title>
<p>This section describes the feature selection process used to represent clinical, gene mutation, and gene expression data.</p>
<sec>
<title>3.3.1 Clinical data</title>
<p>Among the clinical attributes common in the two databases (TCGA and OSHU), specialists in the data domain selected the following according to their relevance for predicting clinical outcomes. In <xref ref-type="table" rid="T2">Table 2</xref>, we briefly describe all selected clinical features, and <xref ref-type="table" rid="T3">Table 3</xref> summarizes the main statistics of those with a continuous nature. <xref ref-type="fig" rid="F3">Figures 3</xref>, <xref ref-type="fig" rid="F4">4</xref> summarize their main statistics.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Clinical features description.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Feature</bold></th>
<th valign="top" align="left"><bold>Description</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Diagnosis age</td>
<td valign="top" align="left">Patient age when diagnosed with AML</td>
</tr> <tr>
<td valign="top" align="left">Bone marrow blast %</td>
<td valign="top" align="left">Percentage of blasts in the bone marrow</td>
</tr> <tr>
<td valign="top" align="left">Mutation count</td>
<td valign="top" align="left">Number of genetic mutations observed</td>
</tr> <tr>
<td valign="top" align="left">PB blast %</td>
<td valign="top" align="left">Percentage of blasts in peripheral blood</td>
</tr> <tr>
<td valign="top" align="left">WBC</td>
<td valign="top" align="left">White blood cell count</td>
</tr> <tr>
<td valign="top" align="left">Gender</td>
<td valign="top" align="left">Patient gender</td>
</tr> <tr>
<td valign="top" align="left">Race</td>
<td valign="top" align="left">Whether the patient is white or not</td>
</tr> <tr>
<td valign="top" align="left">Cytogenetic info</td>
<td valign="top" align="left">Cytogenetic information that the specialist used in diagnosing the patient</td>
</tr> <tr>
<td valign="top" align="left">ELN risk classification</td>
<td valign="top" align="left">ELN risk groups (favorable, intermediate, and adverse)</td>
</tr> <tr>
<td valign="top" align="left">Treatment intensity classification</td>
<td valign="top" align="left">The intensity of treatment received by the patient (target, regular, low-intensity, or high-intensity therapy)</td>
</tr> <tr>
<td valign="top" align="left">Overall survival status</td>
<td valign="top" align="left">Patient survival status (living or deceased).</td>
</tr></tbody>
</table>
</table-wrap>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Main statistics of clinical features with a continuous nature.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Feature</bold></th>
<th valign="top" align="center"><bold>Minimum</bold></th>
<th valign="top" align="center"><bold>Maximum</bold></th>
<th valign="top" align="center"><bold>Median</bold></th>
<th valign="top" align="center"><bold>Mean</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Diagnosis age</td>
<td valign="top" align="center">18</td>
<td valign="top" align="center">88</td>
<td valign="top" align="center">58</td>
<td valign="top" align="center">55.38</td>
</tr> <tr>
<td valign="top" align="left">Bone marrow blast %</td>
<td valign="top" align="center">20</td>
<td valign="top" align="center">100</td>
<td valign="top" align="center">72</td>
<td valign="top" align="center">68.18</td>
</tr> <tr>
<td valign="top" align="left">Mutation count</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">34</td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">9.72</td>
</tr> <tr>
<td valign="top" align="left">PB blast %</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">99.20</td>
<td valign="top" align="center">38.15</td>
<td valign="top" align="center">40.54</td>
</tr> <tr>
<td valign="top" align="left">WBC</td>
<td valign="top" align="center">0.4</td>
<td valign="top" align="center">483</td>
<td valign="top" align="center">39.44</td>
<td valign="top" align="center">18.04</td>
</tr></tbody>
</table>
</table-wrap>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Boxplots illustrating the distribution of continuous clinical features. The data is segmented based on the classes of the target attribute. Blue bars represent the <italic>Living</italic> patients, while red represents the <italic>Deceased</italic> ones. &#x0201C;WBC&#x0201D; refers to White Blood Count, a numerical measurement of the total count of white blood cells in a given blood volume. Additionally, &#x0201C;PB&#x0201D; stands for Peripheral Blood, indicating blood collected from the peripheral circulatory system rather than from specific organs or tissues.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-07-1343447-g0003.tif"/>
</fig>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>Distribution of categorical clinical features segmented according to the classes of the target attribute. Blue bars correspond to the <italic>Living</italic> patients, while red represents the <italic>Deceased</italic> ones. &#x0201C;ELN risk classification&#x0201D; corresponds to the European LeukemiaNet risk classification system, a recognized panel extensively referenced in leukemias.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-07-1343447-g0004.tif"/>
</fig>
<p>The <italic>Diagnosis age</italic> is concentrated in the range of 50&#x02013;70 years, with an outlier below 20 years and above 18 among those who did not survive during the analyzed period. In contrast, the age range among surviving patients is between 32 and 60. It is in line with the literature that the diagnosis age can influence the course of the disease (Gal et al., <xref ref-type="bibr" rid="B15">2019</xref>; Mosquera Orgueira et al., <xref ref-type="bibr" rid="B28">2021</xref>).</p>
<p>The percentage of blasts in peripherical blood (<italic>PB blast %</italic>) and bone marrow (<italic>Bone marrow blast %</italic>) shows a similar distribution concerning the &#x0201C;living&#x0201D; and &#x0201C;deceased&#x0201D; patients. The white blood cell count (<italic>WBC</italic>) revealed outliers, with its distribution concentrated between 0 and 100. Finally, most patients&#x00027; mutations range (<italic>mutation count</italic>) from 5 to 15, with outliers exceeding 25.</p>
<p>Most patients fall into the &#x0201C;Intermediate&#x0201D; group. Given that this classification commonly influences therapeutic decisions, a higher proportion of patients in this group experienced adverse outcomes (D&#x000F6;hner et al., <xref ref-type="bibr" rid="B10">2017</xref>).</p>
<p>Regarding <italic>Gender</italic>, there are more males than females in these studies. Furthermore, the mortality incidence is notably higher among males, a factor that can be considered in the therapeutic decision-making process. Regarding <italic>Race</italic>, there is a predominant incidence of patients identified as white, while other races are grouped under the <italic>Other</italic> category.</p>
<p>Finally, the data predominantly focused on the <italic>High Intensity</italic> and <italic>Regular Therapy</italic> groups, treatments that involve oral chemotherapy, and, in the case of high intensity, there is consolidation with bone marrow transplantation. Most patients who received <italic>Low-Intensity</italic> treatment succumbed to the disease. This type of therapeutic choice is commonly employed for patients in the terminal stage.</p></sec>
<sec>
<title>3.3.2 Gene expression data</title>
<p>After data preprocessing and cleaning, 14,712 gene expression attributes remained. To select the most relevant ones for survival prediction, we employed the LASSO method (SVM with L1 regularization). This calculates coefficients for each attribute based on its relevance for classification.</p>
<p>We have trained the method using all gene expression attributes with a regularization factor of <italic>C</italic> = 0.01. At the end of the training process, 22 expression attributes were selected: <italic>CCDC144A, CPNE8, CYP2E1, CYTL1, HAS1, KIAA0141, KIAA1549, LAMA2, LTK, MICALL2, MX1, PPM1H, PTH2R, PTP4A3, RAD21, RGS9BP, SLC29A2, TMED4, TNFSF11, TNK1, TSKS</italic>, and <italic>XIST</italic>.</p></sec>
<sec>
<title>3.3.3 Gene mutation data</title>
<p>After cleaning and preprocessing the data, 281 gene mutation features remained. Then, we employed the &#x003C7;<sup>2</sup> statistical method to select a subset of these features. For this, we defined the following hypotheses: H0&#x02014;patient survival is independent of gene mutation; H1&#x02014;both groups are dependent. Using <italic>p</italic> &#x0003C; 0.1, a set of 10 gene mutation features were selected: <monospace>SRSF2</monospace>, <monospace>U2AF1</monospace>, <monospace>RIF1</monospace>, <monospace>PRKAA2</monospace>, <monospace>CALR</monospace>, <monospace>CADM2</monospace>, <monospace>PTPN11</monospace>, <monospace>PHF6</monospace>, <monospace>CTNNA2</monospace>, and <monospace>TP53</monospace>.</p>
<p>After the cleaning and preprocessing stage, we obtained the final database used to train and evaluate the prediction models. It has 272 samples (patient data) consisting of 11 clinical features (<monospace>CLIN</monospace> dataset), 22 gene expression features (<monospace>EXP</monospace> dataset), and 10 gene mutation features (<monospace>MUT</monospace> dataset). <xref ref-type="table" rid="T4">Table 4</xref> summarizes each of these datasets. All the code used in this paper and the final database are publicly available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/jdmanzur/ml4aml">https://github.com/jdmanzur/ml4aml</ext-link>.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>Final datasets used to train and evaluate the outcome prediction models.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Dataset</bold></th>
<th valign="top" align="center"><bold>&#x00023;Features</bold></th>
<th valign="top" align="left"><bold>Features</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Clinical (<monospace>CLIN</monospace>)</td>
<td valign="top" align="center">11</td>
<td valign="top" align="left">Diagnosis age, Bone marrow blast (%), Mutation count, PB blast (%), WBC, Gender, isWhite, Cytogenetic info, ELN risk classification, Treatment intensity classification, and Overall survival status (class)</td>
</tr> <tr>
<td valign="top" align="left">Gene expression (<monospace>EXP</monospace>)</td>
<td valign="top" align="center">22</td>
<td valign="top" align="left"><italic>CCDC144A, CPNE8, CYP2E1, CYTL1, HAS1, KIAA0141, KIAA1549, LAMA2, LTK, MICALL2, MX1, PPM1H, PTH2R, PTP4A3, RAD21, RGS9BP, SLC29A2, TMED4, TNFSF11, TNK1, TSKS, and XIST</italic></td>
</tr> <tr>
<td valign="top" align="left">Gene mutation (<monospace>MUT</monospace>)</td>
<td valign="top" align="center">10</td>
<td valign="top" align="left"><italic>SRSF2, U2AF1, RIF1, PRKAA2, CALR, CADM2, PTPN11, CTNNA2, PHF6, and TP53</italic></td>
</tr></tbody>
</table>
</table-wrap>
</sec>
<sec>
<title>3.3.4 Expression impact survival analysis</title>
<p>Three genes caught our attention in the gene expression selection process: <italic>MICALL2, KIAA0141</italic>, and <italic>SLC29A2</italic>. Thus, we deeply analyzed the impact on survival outcomes and biological characteristics of patients with AML. First, we compare their mRNA levels between AML patients and samples of normal hematopoietic cells. Then, we plot the Kaplan-Meier curves to check the overall survival for AML patients dichotomized according to high or low expression. Next, we compute a heatmap using ClusterVis to summarize the expression of the top-25 upregulated and 25 downregulated genes for high vs. low expression (<xref ref-type="fig" rid="F5">Figure 5</xref>). Additionally, we use Volcano plots to depict the extent and significance of differential gene expression for each gene, comparing high vs. low. Finally, we also compute Gene Set Enrichment Analysis plots for biological processes associated with the three gene expressions in AML patients (<xref ref-type="fig" rid="F6">Figure 6</xref>).</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p><italic>MICALL2, KIAA0141</italic>, and <italic>SLC29A2</italic> expression impact survival outcomes and biological characteristics in AML patients. <bold>(A)</bold> <italic>MICALL2</italic> (probe 219332_at), <italic>KIAA0141</italic> (201977_s_at), or <italic>SLC29A2</italic> (probe 204717_s_at) mRNA levels were compared between AML patients (<italic>n</italic> &#x0003D; 577), and samples of normal hematopoietic cells (normal bone marrow <italic>n</italic> &#x0003D; 5, CD34&#x0002B; cells <italic>n</italic> &#x0003D; 8). The &#x0201C;<italic>y</italic>&#x0201D; axis represents mRNA expression levels at arbitrary values. Horizontal lines represent the median. <bold>(B)</bold> Kaplan-Meier curves represent overall survival for AML patients dichotomized according to high or low <italic>MICALL2, KIAA0141</italic>, or <italic>SLC29A2</italic> expression (using the ROC curve as the cut-off point). Hazard ratio (HR), 95% confidence interval, and <italic>p</italic> values are indicated (log-rank test). <bold>(C)</bold> Heatmap constructed using ClusterVis that summarizes the expression of the top-25 upregulated and 25 downregulated genes for high vs. low <italic>MICALL2, KIAA0141</italic>, or <italic>SLC29A2</italic> expression. Color intensity represents the <italic>z</italic>-score within each row.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-07-1343447-g0005.tif"/>
</fig>
<fig id="F6" position="float">
<label>Figure 6</label>
<caption><p><italic>MICALL2, KIAA0141</italic>, and <italic>SLC29A2</italic> expression impact survival outcomes and biological characteristics in Acute Myeloid Leukemia (AML) patients. <bold>(A)</bold> Volcano plots depicting the extent (<italic>x</italic>-axis) and significance (<italic>y</italic>-axis) of differential gene expression for each gene, comparing high vs. low <italic>MICALL2, KIAA0141</italic>, or <italic>SLC29A</italic>. <bold>(B)</bold> Gene Set Enrichment Analysis plots for biological processes associated with <italic>MICALL2, KIAA0141</italic>, or <italic>SLC29A2</italic> expression in AML patients. The top portion of the plot shows the running enrichment scores (ES) for the gene set. The point with the maximum deviation from zero is defined as the ES for the gene set. The leading-edge subset (the subset of genes with the most significant contribution to the ES) is shown as a vertical bar accumulating before the peak score for a positive ES or after the peak score for a negative ES. FDR-adjusted <italic>p</italic>&#x02013;values (NOM <italic>p</italic>-value) and enrichment scores normalized for gene set size (NES) are indicated.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-07-1343447-g0006.tif"/>
</fig>
</sec>
</sec>
<sec>
<title>3.4 Training the outcome prediction models</title>
<p>We have used three established supervised machine learning methods to train models that automatically predict the clinical outcome (survival/decease) based on the chosen treatment intensity (target, regular, low-intensity, and high-intensity) for a given patient. The methods are Random Forest (RF), Logistic Regression (LR), and Support Vector Machines (SVM). <xref ref-type="table" rid="T5">Table 5</xref> briefly describes the ML methods used in this study.</p>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>Supervised machine learning methods used in this study.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Algorithm</bold></th>
<th valign="top" align="left"><bold>Description</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">RF (Breiman, <xref ref-type="bibr" rid="B7">2001</xref>)</td>
<td valign="top" align="left">An ensemble learning method that operates by training decision trees. For predicting a class, the output of the random forest is the class predicted by most trees.</td>
</tr> <tr>
<td valign="top" align="left">LR (Cramer, <xref ref-type="bibr" rid="B9">2003</xref>)</td>
<td valign="top" align="left">A statistical model that computes the probability of a sample belonging to some class having the log-odds for the class be a linear combination of one or more independent features.</td>
</tr> <tr>
<td valign="top" align="left">SVM (Boser et al., <xref ref-type="bibr" rid="B6">1992</xref>)</td>
<td valign="top" align="left">A supervised learning method that maps training samples to points in space, aiming to maximize the width of the margin that separates the two classes. It is versatile because different Kernel functions can be specified for the decision boundary.</td>
</tr></tbody>
</table>
</table-wrap>
<p>Due to the small amount of data available to train and evaluate the models, we did not employ deep learning techniques, as these methods demand a huge amount of data. Furthermore, the explainability of prediction models is a desirable characteristic in this context.</p>
<p>We fit the main hyperparameters through a grid search (Mitchell, <xref ref-type="bibr" rid="B26">1997</xref>). <xref ref-type="table" rid="T6">Table 6</xref> presents the range of values evaluated. We kept the default values for all other parameters.</p>
<table-wrap position="float" id="T6">
<label>Table 6</label>
<caption><p>Hyperparameters evaluated in a grid search.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Method</bold></th>
<th valign="top" align="left"><bold>Hyperparameters</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">RF</td>
<td valign="top" align="left"><italic>n_estimators</italic>={10, 15, 20, 25, 30, 45, 50}, <italic>min_samples_leaf</italic>={1, 2, 3, 4}, <italic>max_depth</italic>={8, 10, <italic>None</italic>}, <italic>class_weight</italic>={balanced, none}</td>
</tr> <tr>
<td valign="top" align="left">LR</td>
<td valign="top" align="left"><italic>C</italic>={10<sup>&#x02212;6</sup>, 5.62 &#x000D7; 10<sup>&#x02212;5</sup>, 3.166 &#x000D7; 10<sup>&#x02212;3</sup>, 1.77 &#x000D7; 10<sup>&#x02212;1</sup>, 10}, <italic>class_weight</italic>={balanced, none}, <italic>penalty</italic>={L2}, <italic>random_state</italic>=1</td>
</tr> <tr>
<td valign="top" align="left">SVM</td>
<td valign="top" align="left"><italic>kernel</italic>={linear, rbf}, <italic>C</italic>={10<sup>&#x02212;6</sup>, 10<sup>&#x02212;5</sup>, 10<sup>&#x02212;4</sup>, 10<sup>&#x02212;3</sup>, 10<sup>&#x02212;2</sup>}<italic>class_weight</italic>={balanced and none}, <italic>random_state</italic>=1</td>
</tr></tbody>
</table>
</table-wrap>
<p>We have trained the three classification methods (RF, LR, and SVM) with the three datasets (CLIN, MUT, and EXP), resulting in a total of nine individual prediction models (3 ML Models &#x000D7; 3 datasets).</p>
</sec>
<sec>
<title>3.5 Ensemble</title>
<p>Among the nine outcome prediction models, we selected the ones that obtained the best results for each data set (<xref ref-type="fig" rid="F2">Figure 2</xref>). We then combined these three models as a classifier committee that computes the predicted survival outcome for a given patient. <xref ref-type="disp-formula" rid="E1">Equation 1</xref> presents how we have weighted the vote for an individual prediction model <italic>M</italic><sub><italic>i</italic></sub>. The F1-Score corresponds to the f-measure attained by the prediction model in the validation set. The ensemble classification output is then computed from the outcome (live/decease) with the highest final vote, considering the output of all individual prediction models (<xref ref-type="fig" rid="F7">Figure 7</xref>).</p>
<disp-formula id="E1"><label>(1)</label><mml:math id="M1"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>v</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>M</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>&#x02308;</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mo class="qopname">log</mml:mo></mml:mrow><mml:mrow><mml:mn>10</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>F</mml:mi><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:mi>S</mml:mi><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>M</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:mrow><mml:mo>&#x02309;</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<fig id="F7" position="float">
<label>Figure 7</label>
<caption><p>Weighted function for computing the vote of an individual prediction model <italic>M</italic><sub><italic>i</italic></sub>. It provides a weighted vote based on the F1-Score metric for each prediction. The higher the F1-Score, the higher the weight of the models.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-07-1343447-g0007.tif"/>
</fig>
</sec>
<sec>
<title>3.6 Performance evaluation</title>
<p>The performance of the prediction models was assessed using the traditional hold-out validation approach (Mitchell, <xref ref-type="bibr" rid="B26">1997</xref>). The dataset was partitioned into four subsets: 70% was randomly selected for training the models, 10% for feature selection, another 10% for validating the individual models, and the remaining 10% as an independent test set for evaluating the performance of the ensemble. Addressing the model performance in the separated test partition simulates its application in a prospective independent patient cohort.</p>
<p>We have calculated the following measures to assess and compare the performance obtained by the prediction models. In the equations below, TP (true positive) is the number of patients correctly predicted by the model who deceased; FP (false positive) is the number of patients who survived, but the model incorrectly predicted to decease; TN (true negative) is the number of patients correctly predicted by the model who survived; FN (false negative) is the number of patients who deceased, but the model incorrectly predicted to survive.</p>
<p><bold>Accuracy (ACC):</bold> the percentage of correct predictions.</p>
<disp-formula id="E2"><mml:math id="M2"><mml:mtable columnalign="left"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">ACC</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p><bold>Recall (REC) or Sensitivity:</bold> the proportion of true positives (patients predicted to decease) to the actual positive samples (patients who deceased) that should have been detected.</p>
<disp-formula id="E3"><mml:math id="M3"><mml:mtable columnalign="left"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">Recall</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p><bold>Precision (PREC):</bold> the proportion of true positives (patients predicted to decease) to the actual positive results (patients who deceased), including those incorrectly identified by the prediction model.</p>
<disp-formula id="E4"><mml:math id="M4"><mml:mtable columnalign="left"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">Precision</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p><bold>F1-Score (F1):</bold> the harmonic mean between Precision and Recall. The F1-Score is often used when the dataset is imbalanced.</p>
<disp-formula id="E5"><mml:math id="M5"><mml:mtable columnalign="left"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">F1</mml:mtext><mml:mo>=</mml:mo><mml:mn>2</mml:mn><mml:mo>*</mml:mo><mml:mfrac><mml:mrow><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mo>*</mml:mo><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p><bold>Matthews Correlation Coefficient (MCC):</bold> provides a balanced assessment of a model&#x00027;s performance, considering both positive and negative cases. The MCC ranges from -1 to &#x0002B;1, where &#x0002B;1 indicates a perfect prediction, 0 indicates a random prediction, and -1 indicates total disagreement between the model&#x00027;s predictions and the true labels.</p>
<disp-formula id="E6"><mml:math id="M6"><mml:mtable columnalign="left"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">MCC</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>*</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi><mml:mo>-</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi><mml:mo>*</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:msqrt><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>T</mml:mi><mml:mi>N</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>T</mml:mi><mml:mi>N</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msqrt></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p><bold>AUC:</bold> the receiver operating characteristics (ROC) curve is used to address the success of a prediction model across several classification thresholds. The area under the ROC curve (AUC) tests the whole two-dimensional field under the entire ROC curve. AUC ranges from zero to one, and the higher, the better (Spackman, <xref ref-type="bibr" rid="B40">1989</xref>).</p></sec></sec>
<sec id="s4">
<title>4 Results and discussion</title>
<p>In this section, we detail and analyze all the results obtained. First, we present the main findings in selecting and analyzing clinical and genetic data. Then, we reported the results of the individual prediction models and, finally, the performance of the outcome prediction computed by the ensemble.</p>
<sec>
<title>4.1 Genes that impact survival outcomes and biological characteristics</title>
<p>The protein encoded by <italic>MICALL2</italic> potentially regulates cytoskeleton dynamics, tight junction formation, and neuritis outgrowth. To the best of our knowledge, no study has analyzed the biological role of <italic>MICALL2</italic> in AML and other leukemias. Therefore, it could be addressed in future clinical and functional studies.</p>
<p>The gene <italic>KIAA0141</italic>, also known as <italic>DELE1</italic>, could indicate a patient&#x00027;s response to drug and radiation therapies (Jia et al., <xref ref-type="bibr" rid="B21">2014</xref>; Sato et al., <xref ref-type="bibr" rid="B39">2021</xref>). There are results in the literature relating this gene as a prognosis indicator because it can have a main function on mitochondrial stress (Guo et al., <xref ref-type="bibr" rid="B18">2020</xref>). Sui et al. (<xref ref-type="bibr" rid="B41">2023</xref>) suggest that <italic>DELE1</italic> has an important role in improving therapy protocols for cancer.</p>
<p>The high expression of <italic>SLC29A2</italic>, also known as <italic>ENT2</italic>, suggests its importance in facilitating hypoxanthine transport, which is necessary for enhanced DNA synthesis through hypoxanthine recycling. In conclusion, <italic>ENT2</italic> shows potential as a target for developing therapeutics (Naes et al., <xref ref-type="bibr" rid="B30">2023</xref>). Elevated levels of <italic>ENT2</italic> in the blasts at the time of diagnosis of AML were associated with a lower response to induction therapy (Rodr&#x000ED;guez-Mac&#x000ED;as et al., <xref ref-type="bibr" rid="B37">2023</xref>). Moreover, higher <italic>ENT2</italic> levels were linked to a poor response to treatment. These findings align with the observation that <italic>ENT2</italic> upregulation is associated with advanced stages of various cancer types, including mantle cell lymphoma, hepatocellular carcinoma, and colorectal cancer (Pastor-Anglada and P&#x000E9;rez-Torras, <xref ref-type="bibr" rid="B32">2018</xref>).</p>
<p>Among the gene mutations identified as relevant to the model, the <italic>TP53</italic> mutation is the best-known. Several studies show the relationship between <italic>TP53</italic> mutation and therapeutic response and prognosis. The <italic>TP53</italic> gene is considered the guardian of genomic stability, as it controls cell cycle progression and apoptosis in situations of stress or DNA damage, and mutations in this gene are found in 1/2 of the cancer patients (Kastenhuber and Lowe, <xref ref-type="bibr" rid="B23">2017</xref>; Monti et al., <xref ref-type="bibr" rid="B27">2020</xref>). Although mutations in <italic>TP53</italic> are less common in AML patients (about 10%), they predict a poor prognosis (Papaemmanuil et al., <xref ref-type="bibr" rid="B31">2016</xref>; Grob et al., <xref ref-type="bibr" rid="B17">2022</xref>).</p>
<p>Mutations in <italic>U2AF1</italic> and <italic>SRSF2</italic> are more common in myelodysplastic syndrome and rare in <italic>de novo</italic> AML (Papaemmanuil et al., <xref ref-type="bibr" rid="B31">2016</xref>; Xu et al., <xref ref-type="bibr" rid="B47">2017</xref>), but have been associated with an unfavorable prognosis in myeloid neoplasms (Zhu et al., <xref ref-type="bibr" rid="B50">2021</xref>). <italic>U2AF1</italic> regulates the pre-mRNA splicing processes to generate functional mRNAs, and is considered a key element in the spliceosome (Zhao et al., <xref ref-type="bibr" rid="B49">2022</xref>).</p>
</sec>
<sec>
<title>4.2 Prediction models</title>
<p>We have evaluated the nine single outcome prediction models (each one trained with a different dataset and classification method), applying the traditional 8:1:1 hold-out validation strategy (Section 3.4). Specifically, the training set consisted of 216 samples randomly selected, the validation set was composed of 28 samples, and the test set also contained 28 samples. It is noteworthy that the data partitioning for training and testing was kept consistent across all models.</p>
<p><xref ref-type="table" rid="T7">Table 7</xref> summarizes the performance achieved by each model. Three stand out as the top performers, each associated with a distinct dataset (lines highlighted in bold). Notably, all these models exhibit good results, but the one using genetic expression data shows particularly promising performance.</p>
<table-wrap position="float" id="T7">
<label>Table 7</label>
<caption><p>Performance achieved by individual models using the three datasets individually.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Dataset</bold></th>
<th valign="top" align="center"><bold>Methods</bold></th>
<th valign="top" align="center"><bold>F1</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
<th valign="top" align="center"><bold>ACC</bold></th>
<th valign="top" align="center"><bold>PREC</bold></th>
<th valign="top" align="center"><bold>REC</bold></th>
<th valign="top" align="center"><bold>MCC</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" rowspan="3">CLIN</td>
<td valign="top" align="center">RF</td>
<td valign="top" align="center">0.6562</td>
<td valign="top" align="center">0.6222</td>
<td valign="top" align="center">0.6666</td>
<td valign="top" align="center">0.6554</td>
<td valign="top" align="center">0.6666</td>
<td valign="top" align="center">0.2603</td>
</tr>
<tr>
<td valign="top" align="left">SVM</td>
<td valign="top" align="center">0.6713</td>
<td valign="top" align="center">0.6888</td>
<td valign="top" align="center">0.6666</td>
<td valign="top" align="center">0.7132</td>
<td valign="top" align="center">0.6666</td>
<td valign="top" align="center">0.3670</td>
</tr>
 <tr>
<td valign="top" align="left"><bold>LR</bold></td>
<td valign="top" align="center"><bold>0.7044</bold></td>
<td valign="top" align="center"><bold>0.6777</bold></td>
<td valign="top" align="center"><bold>0.7083</bold></td>
<td valign="top" align="center"><bold>0.7031</bold></td>
<td valign="top" align="center"><bold>0.7083</bold></td>
<td valign="top" align="center"><bold>0.3651</bold></td>
</tr> <tr>
<td valign="top" align="left" rowspan="3">MUT</td>
<td valign="top" align="center"><bold>RF</bold></td>
<td valign="top" align="center"><bold>0.7129</bold></td>
<td valign="top" align="center"><bold>0.7222</bold></td>
<td valign="top" align="center"><bold>0.7083</bold></td>
<td valign="top" align="center"><bold>0.7395</bold></td>
<td valign="top" align="center"><bold>0.7083</bold></td>
<td valign="top" align="center"><bold>0.4303</bold></td>
</tr>
 <tr>
<td valign="top" align="left">SVM</td>
<td valign="top" align="center">0.4807</td>
<td valign="top" align="center">0.5000</td>
<td valign="top" align="center">0.6250</td>
<td valign="top" align="center">0.7656</td>
<td valign="top" align="center">0.6250</td>
<td valign="top" align="center">0.0000</td>
</tr>
 <tr>
<td valign="top" align="left"><bold>LR</bold></td>
<td valign="top" align="center"><bold>0.7129</bold></td>
<td valign="top" align="center"><bold>0.7222</bold></td>
<td valign="top" align="center"><bold>0.7083</bold></td>
<td valign="top" align="center"><bold>0.7395</bold></td>
<td valign="top" align="center"><bold>0.7083</bold></td>
<td valign="top" align="center"><bold>0.4303</bold></td>
</tr> <tr>
<td valign="top" align="left" rowspan="3">EXP</td>
<td valign="top" align="center"><bold>RF</bold></td>
<td valign="top" align="center"><bold>0.7803</bold></td>
<td valign="top" align="center"><bold>0.7444</bold></td>
<td valign="top" align="center"><bold>0.7916</bold></td>
<td valign="top" align="center"><bold>0.7986</bold></td>
<td valign="top" align="center"><bold>0.7916</bold></td>
<td valign="top" align="center"><bold>0.5465</bold></td>
</tr>
<tr>
<td valign="top" align="left">SVM</td>
<td valign="top" align="center">0.6284</td>
<td valign="top" align="center">0.6111</td>
<td valign="top" align="center">0.6250</td>
<td valign="top" align="center">0.6339</td>
<td valign="top" align="center">0.6250</td>
<td valign="top" align="center">0.2182</td>
</tr>
 <tr>
<td valign="top" align="left">LR</td>
<td valign="top" align="center">0.6200</td>
<td valign="top" align="center">0.5888</td>
<td valign="top" align="center">0.6250</td>
<td valign="top" align="center">0.6171</td>
<td valign="top" align="center">0.6250</td>
<td valign="top" align="center">0.1825</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>The best results are highlighted in bold.</p>
</table-wrap-foot>
</table-wrap>
<p>The logistic regression model trained with clinical data achieved a reasonable performance. It can be valuable to healthcare specialists as it represents the initial data acquired during a patient&#x00027;s clinical visit. Consequently, when genetic data are inaccessible, a clinical outcome prediction model can assist the specialist in deciding the most appropriate treatment intensity for each patient. Nevertheless, including genetic data can substantially enhance predictive performance, as these attributes contribute to improved class separability.</p>
<p>The models created with genetic mutation data obtained superior performances compared to those trained with the clinical model. The logistic regression and random forest models obtained the same results. Regarding the classification methods evaluated, the Random Forest and Logistic Regression achieved the best overall performances. In addition, these methods have the advantage that their prediction models can be somehow explained.</p>
</sec>
<sec>
<title>4.3 Classification committee</title>
<p>We have combined the three highest-performing prediction models presented in <xref ref-type="table" rid="T7">Table 7</xref> as a classifier committee to compute the predicted survival outcome for a given patient. The outcome prediction is computed by the weighted output (detailed in Section 3.4) provided by the individual classifiers, considering all possible treatment intensities (target, regular, low-intensity, and high-intensity; <xref ref-type="fig" rid="F2">Figure 2</xref>).</p>
<p>We have evaluated the performance of the committees using a 9:1 hold-out (incorporating the prior validation set into the training set). <xref ref-type="table" rid="T8">Table 8</xref> presents the results of all ensembles created by combining the individual prediction models.</p>
<table-wrap position="float" id="T8">
<label>Table 8</label>
<caption><p>Performance obtained by the ensemble of classifiers.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Dataset</bold></th>
<th valign="top" align="center"><bold>F1</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
<th valign="top" align="center"><bold>ACC</bold></th>
<th valign="top" align="center"><bold>PREC</bold></th>
<th valign="top" align="center"><bold>REC</bold></th>
<th valign="top" align="center"><bold>MCC</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">CLIN&#x0002B;MUT</td>
<td valign="top" align="center">0.6946</td>
<td valign="top" align="center">0.6429</td>
<td valign="top" align="center">0.6786</td>
<td valign="top" align="center">0.7250</td>
<td valign="top" align="center">0.6786</td>
<td valign="top" align="center">0.2582</td>
</tr> <tr>
<td valign="top" align="left">CLIN&#x0002B;EXP</td>
<td valign="top" align="center">0.8179</td>
<td valign="top" align="center">0.8128</td>
<td valign="top" align="center">0.8214</td>
<td valign="top" align="center">0.8345</td>
<td valign="top" align="center">0.8214</td>
<td valign="top" align="center">0.6512</td>
</tr> <tr>
<td valign="top" align="left">MUT&#x0002B;EXP</td>
<td valign="top" align="center">0.8179</td>
<td valign="top" align="center">0.8128</td>
<td valign="top" align="center">0.8214</td>
<td valign="top" align="center">0.8345</td>
<td valign="top" align="center">0.8214</td>
<td valign="top" align="center">0.6512</td>
</tr> <tr>
<td valign="top" align="left">CLIN&#x0002B;MUT&#x0002B; EXP</td>
<td valign="top" align="center"><bold>0.8907</bold></td>
<td valign="top" align="center"><bold>0.8846</bold></td>
<td valign="top" align="center"><bold>0.8929</bold></td>
<td valign="top" align="center"><bold>0.9107</bold></td>
<td valign="top" align="center"><bold>0.8929</bold></td>
<td valign="top" align="center"><bold>0.8006</bold></td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>Bold values represent the best achieved results.</p>
</table-wrap-foot>
</table-wrap>
<p>The results obtained are promising as most of the performance measures improved significantly, indicating we can safely use it as a decision support system to recommend appropriate therapy protocol for AML patients, with precision higher than 90%. We obtained the best overall results by combining the models trained with the three datasets available.</p>
<p>Regarding clinical data, the ensemble that combined these data with genetic expression presented a significant improvement compared to individual models trained in both contexts. These results suggest that combining these data types leads to substantial predictive power for survival in AML patients. In the case of combining genetic mutation and clinical data, a decrease in predictive ability was observed compared to the individual model trained only with genetic mutation data. Nevertheless, the ensemble that combined genetic data exhibited enhanced performance compared to individual models of this data type.</p></sec>
</sec>
<sec sec-type="conclusions" id="s5">
<title>5 Conclusions</title>
<p>To guide the selection of therapy protocols for patients with AML, healthcare specialists commonly rely on prognostic evaluations based on treatment response predictions and clinical outcomes. The prevailing ELN risk stratification categorizes patients into favorable, intermediate, and adverse risk groups. However, this classification tends to be conservative, with most patients falling into the intermediate risk category. Consequently, specialists often demand additional examinations, leading to delays in treatment initiation and potentially compromising the patient&#x00027;s clinical condition.</p>
<p>This paper presented a decision support system that automatically recommends appropriate intensity oncology therapies based on the clinical outcome prediction for a given patient. The core of this system is composed of a committee of classifiers trained with clinical data and gene mutation and expression data. The proposed ensemble achieved a high performance close to 0.9 in F1-Score and AUC.</p>
<p>We also conducted an evaluation of individual models trained solely with specific types of data. Among them, the model generated with gene expression data exhibited superior performance and could independently assist healthcare specialists in determining the most suitable treatment for individual patients. For further improvement, specialists could employ the ensemble model incorporating all data types. In cases where genetic data collection is unavailable in the clinical setting, the single model trained solely with clinical data can be employed as an alternative.</p>
<p>The findings presented in this work indicate that we can employ state-of-the-art machine learning techniques to automatically process and analyze large volumes of clinical and gene data. These approaches can effectively support specialists in making well-informed decisions regarding the most suitable and safe therapy for individual patients. By significantly reducing the time required for treatment selection, these techniques can enhance overall patient outcomes, leading to extended survival and improved quality of life for individuals afflicted with the disease.</p>
<p>Despite the promising results presented in this study, it is essential to highlight its main limitations. The amount of public data available and used is restrictive to train more sophisticated and accurate machine-learning models. Furthermore, the data represents the characteristics of a particular regionality. At least 75% of the blood samples are from patients of white race, which may hinder the generalization power of the decision support system across different races.</p>
<p>In future work, we aim to assess the performance of the proposed system in a real-world scenario. Furthermore, we recommend further investigating the genes selected in the feature selection stage. This analysis would provide valuable insights into the biological significance and functional implications of the selected genes, mainly <italic>MICALL2, KIAA0141</italic>, and <italic>SLC29A2</italic>, potentially revealing novel biomarkers or therapeutic targets related to AML. Such endeavors would contribute to refining and validating the proposed system, ultimately enhancing its application and impact on clinical decision-making processes.</p></sec>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found in the article/supplementary material.</p></sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>GC: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Resources, Software, Validation, Visualization, Writing&#x02014;original draft, Writing&#x02014;review &#x00026; editing. JA: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Resources, Software, Validation, Visualization, Writing&#x02014;original draft, Writing&#x02014;review &#x00026; editing. JM-N: Conceptualization, Data curation, Formal analysis, Funding acquisition, Validation, Writing&#x02014;original draft, Writing&#x02014;review &#x00026; editing. TA: Conceptualization, Data curation, Formal analysis, Funding acquisition, Investigation, Methodology, Project administration, Supervision, Validation, Writing&#x02014;original draft, Writing&#x02014;review &#x00026; editing.</p></sec>
</body>
<back>
<sec sec-type="funding-information" id="s8">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. The financial support was provided by the Brazilian Coordination for the Improvement of Higher Education Personnel (CAPES), the Brazilian National Council for Scientific and Technological Development (CNPq), and The S&#x000E3;o Paulo Research Foundation (FAPESP), grants &#x00023;2021/11606-3 and &#x00023;2021/13325-1.</p>
</sec>
<ack><p>We thank our colleague Breno Freitas<xref ref-type="fn" rid="fn0001"><sup>1</sup></xref> for his valuable assistance in the initial stages of this research project.</p>
</ack>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest. The author(s) declared that they were an editorial board member of Frontiers, at the time of submission. This had no impact on the peer review process and the final decision.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<fn-group>
<fn id="fn0001"><p><sup>1</sup>Data Science Manager at Shopify&#x02014;Canada. Homepage: <ext-link ext-link-type="uri" xlink:href="https://breno.io/">https://breno.io/</ext-link>. E-mail: <email>breno.limadefreitas&#x00040;shopify.com</email></p></fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ahmed</surname> <given-names>Y. B.</given-names></name> <name><surname>Al-Bzour</surname> <given-names>A. N.</given-names></name> <name><surname>Ababneh</surname> <given-names>O. E.</given-names></name> <name><surname>Abushukair</surname> <given-names>H. M.</given-names></name> <name><surname>Saeed</surname> <given-names>A.</given-names></name></person-group> (<year>2022</year>). <article-title>Genomic and transcriptomic predictors of response to immune checkpoint inhibitors in melanoma patients: a machine learning approach</article-title>. <source>Cancers</source> <volume>14</volume>:<fpage>5605</fpage>. <pub-id pub-id-type="doi">10.3390/cancers14225605</pub-id><pub-id pub-id-type="pmid">36428698</pub-id></citation></ref>
<ref id="B2">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Almeida</surname> <given-names>J. M.</given-names></name> <name><surname>Castro</surname> <given-names>G. A.</given-names></name> <name><surname>Machado-Neto</surname> <given-names>J. A.</given-names></name> <name><surname>Almeida</surname> <given-names>T. A.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;An explainable model to support the decision about the therapy protocol for AML,&#x0201D;</article-title> in <source>Proceedings of the 12th Brazilian Conference on Intelligent Systems (BRACIS&#x02013;23)</source> (<publisher-loc>Springer</publisher-loc>: <publisher-name>Belo Horizonte, MG</publisher-name>), <fpage>1</fpage>&#x02013;<lpage>15</lpage>.</citation>
</ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Angenendt</surname> <given-names>L.</given-names></name> <name><surname>R&#x000F6;llig</surname> <given-names>C.</given-names></name> <name><surname>Montesinos</surname> <given-names>P.</given-names></name> <name><surname>Mart&#x000ED;nez-Cuadr&#x000F3;n</surname> <given-names>D.</given-names></name> <name><surname>Barragan</surname> <given-names>E.</given-names></name> <name><surname>Garc&#x000ED;a</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Chromosomal abnormalities and prognosis in NPM1&#x02013;mutated acute myeloid leukemia: a pooled analysis of individual patient data from nine international cohorts</article-title>. <source>J. Clin. Oncol.</source> <volume>37</volume>, <fpage>2632</fpage>&#x02013;<lpage>2642</lpage>. <pub-id pub-id-type="doi">10.1200/JCO.19.00416</pub-id><pub-id pub-id-type="pmid">31430225</pub-id></citation></ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Arber</surname> <given-names>D. A.</given-names></name> <name><surname>Orazi</surname> <given-names>A.</given-names></name> <name><surname>Hasserjian</surname> <given-names>R. P.</given-names></name> <name><surname>Borowitz</surname> <given-names>M. J.</given-names></name> <name><surname>Calvo</surname> <given-names>K. R.</given-names></name> <name><surname>Kvasnicka</surname> <given-names>H.-M.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>International consensus classification of myeloid neoplasms and acute leukemias: integrating morphologic, clinical, and genomic data</article-title>. <source>Blood</source> <volume>140</volume>, <fpage>1200</fpage>&#x02013;<lpage>1228</lpage>. <pub-id pub-id-type="doi">10.1182/blood.2022015850</pub-id><pub-id pub-id-type="pmid">35767897</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bennett</surname> <given-names>J. M.</given-names></name> <name><surname>Catovsky</surname> <given-names>D.</given-names></name> <name><surname>Daniel</surname> <given-names>M.-T.</given-names></name> <name><surname>Flandrin</surname> <given-names>G.</given-names></name> <name><surname>Galton</surname> <given-names>D. A. G.</given-names></name> <name><surname>Gralnick</surname> <given-names>H. R.</given-names></name> <etal/></person-group>. (<year>1976</year>). <article-title>Proposals for the classification of the acute leukaemias French-American-British (FAB) co-operative group</article-title>. <source>Br. J. Haematol.</source> <volume>33</volume>, <fpage>451</fpage>&#x02013;<lpage>458</lpage>.<pub-id pub-id-type="pmid">188440</pub-id></citation></ref>
<ref id="B6">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Boser</surname> <given-names>B. E.</given-names></name> <name><surname>Guyon</surname> <given-names>I. M.</given-names></name> <name><surname>Vapnik</surname> <given-names>V. N.</given-names></name></person-group> (<year>1992</year>). <article-title>&#x0201C;A training algorithm for optimal margin classifiers,&#x0201D;</article-title> in <source>Proceedings of the Fifth Annual Workshop on Computational Learning Theory&#x02014;COLT&#x02013;92</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name>), <fpage>144</fpage>&#x02013;<lpage>152</lpage>.</citation>
</ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Breiman</surname> <given-names>L.</given-names></name></person-group> (<year>2001</year>). <article-title>Random Forests</article-title>. <source>Machine Learn.</source> <volume>45</volume>, <fpage>5</fpage>&#x02013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id></citation>
</ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cover</surname> <given-names>T.</given-names></name> <name><surname>Hart</surname> <given-names>P.</given-names></name></person-group> (<year>1967</year>). <article-title>Nearest neighbor pattern classification</article-title>. <source>IEEE Trans. Inform. Theor.</source> <volume>13</volume>, <fpage>21</fpage>&#x02013;<lpage>27</lpage>. <pub-id pub-id-type="doi">10.1109/TIT.1967.1053964</pub-id></citation>
</ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cramer</surname> <given-names>J.</given-names></name></person-group> (<year>2003</year>). <article-title>The origins of logistic regression</article-title>. <source>SSRN Electr. J.</source> <volume>119</volume>:<fpage>16</fpage>. <pub-id pub-id-type="doi">10.2139/ssrn.360300</pub-id></citation>
</ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>D&#x000F6;hner</surname> <given-names>H.</given-names></name> <name><surname>Estey</surname> <given-names>E.</given-names></name> <name><surname>Grimwade</surname> <given-names>D.</given-names></name> <name><surname>Amadori</surname> <given-names>S.</given-names></name> <name><surname>Appelbaum</surname> <given-names>F. R.</given-names></name> <name><surname>B&#x000FC;chner</surname> <given-names>T.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Diagnosis and management of AML in adults: 2017 ELN recommendations from an international expert panel</article-title>. <source>Blood</source> <volume>129</volume>, <fpage>424</fpage>&#x02013;<lpage>447</lpage>. <pub-id pub-id-type="doi">10.1182/blood-2016-08-733196</pub-id><pub-id pub-id-type="pmid">27895058</pub-id></citation></ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>D&#x000F6;hner</surname> <given-names>H.</given-names></name> <name><surname>Estey</surname> <given-names>E. H.</given-names></name> <name><surname>Amadori</surname> <given-names>S.</given-names></name> <name><surname>Appelbaum</surname> <given-names>F. R.</given-names></name> <name><surname>B&#x000FC;chner</surname> <given-names>T.</given-names></name> <name><surname>Burnett</surname> <given-names>A. K.</given-names></name> <etal/></person-group>. (<year>2010</year>). <article-title>Diagnosis and management of acute myeloid leukemia in adults: recommendations from an international expert panel, on behalf of the European LeukemiaNet</article-title>. <source>Blood</source> <volume>115</volume>, <fpage>453</fpage>&#x02013;<lpage>474</lpage>. <pub-id pub-id-type="doi">10.1182/blood-2009-07-235358</pub-id><pub-id pub-id-type="pmid">19880497</pub-id></citation></ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>D&#x000F6;hner</surname> <given-names>H.</given-names></name> <name><surname>Wei</surname> <given-names>A. H.</given-names></name> <name><surname>Appelbaum</surname> <given-names>F. R.</given-names></name> <name><surname>Craddock</surname> <given-names>C.</given-names></name> <name><surname>DiNardo</surname> <given-names>C. D.</given-names></name> <name><surname>Dombret</surname> <given-names>H.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Diagnosis and management of AML in adults: 2022 recommendations from an international expert panel on behalf of the ELN</article-title>. <source>Blood</source> <volume>140</volume>, <fpage>1345</fpage>&#x02013;<lpage>1377</lpage>. <pub-id pub-id-type="doi">10.1182/blood.2022016867</pub-id><pub-id pub-id-type="pmid">35797463</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Estey</surname> <given-names>E. A.</given-names></name></person-group> (<year>2019</year>). <article-title>Acute myeloid leukemia: 2019 update on risk-stratification and management</article-title>. <source>Am. J. Hematol.</source> <volume>93</volume>, <fpage>1267</fpage>&#x02013;<lpage>1291</lpage>. <pub-id pub-id-type="doi">10.1002/ajh.25214</pub-id><pub-id pub-id-type="pmid">30328165</pub-id></citation></ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fitter</surname> <given-names>S.</given-names></name> <name><surname>Bradey</surname> <given-names>A. L.</given-names></name> <name><surname>Kok</surname> <given-names>C. H.</given-names></name> <name><surname>Noll</surname> <given-names>J. E.</given-names></name> <name><surname>Wilczek</surname> <given-names>V. J.</given-names></name> <name><surname>Venn</surname> <given-names>N. C.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title><italic>CKLF</italic> and <italic>IL1B</italic> transcript levels at diagnosis are predictive of relapse in children with pre&#x02013;B&#x02013;cell acute lymphoblastic leukaemia</article-title>. <source>Br. J. Haematol.</source> <volume>193</volume>, <fpage>171</fpage>&#x02013;<lpage>175</lpage>. <pub-id pub-id-type="doi">10.1111/bjh.17161</pub-id><pub-id pub-id-type="pmid">33620089</pub-id></citation></ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gal</surname> <given-names>O.</given-names></name> <name><surname>Auslander</surname> <given-names>N.</given-names></name> <name><surname>Fan</surname> <given-names>Y.</given-names></name> <name><surname>Meerzaman</surname> <given-names>D.</given-names></name></person-group> (<year>2019</year>). <article-title>Predicting complete remission of acute myeloid leukemia: machine learning applied to gene expression</article-title>. <source>Cancer Informat.</source> <volume>18</volume>, <fpage>1</fpage>&#x02013;<lpage>5</lpage>. <pub-id pub-id-type="doi">10.1177/1176935119835544</pub-id><pub-id pub-id-type="pmid">30911218</pub-id></citation></ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gerstung</surname> <given-names>M.</given-names></name> <name><surname>Papaemmanuil</surname> <given-names>E.</given-names></name> <name><surname>Martincorena</surname> <given-names>I.</given-names></name> <name><surname>Bullinger</surname> <given-names>L.</given-names></name> <name><surname>Gaidzik</surname> <given-names>V. I.</given-names></name> <name><surname>Paschka</surname> <given-names>P.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Precision oncology for acute myeloid leukemia using a knowledge bank approach</article-title>. <source>Nat. Genet.</source> <volume>49</volume>, <fpage>332</fpage>&#x02013;<lpage>340</lpage>. <pub-id pub-id-type="doi">10.1038/ng.3756</pub-id><pub-id pub-id-type="pmid">28092685</pub-id></citation></ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Grob</surname> <given-names>T.</given-names></name> <name><surname>Al Hinai</surname> <given-names>A. S. A.</given-names></name> <name><surname>Sanders</surname> <given-names>M. A.</given-names></name> <name><surname>Kavelaars</surname> <given-names>F. G.</given-names></name> <name><surname>Rijken</surname> <given-names>M.</given-names></name> <name><surname>Gradowska</surname> <given-names>P. L.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Molecular characterization of mutant TP53 acute myeloid leukemia and high-risk myelodysplastic syndrome</article-title>. <source>Blood</source> <volume>139</volume>, <fpage>2347</fpage>&#x02013;<lpage>2354</lpage>. <pub-id pub-id-type="doi">10.1182/blood.2021014472</pub-id><pub-id pub-id-type="pmid">35108372</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Guo</surname> <given-names>X.</given-names></name> <name><surname>Aviles</surname> <given-names>G.</given-names></name> <name><surname>Liu</surname> <given-names>Y.</given-names></name> <name><surname>Tian</surname> <given-names>R.</given-names></name> <name><surname>Unger</surname> <given-names>B. A.</given-names></name> <name><surname>Lin</surname> <given-names>Y.-H. T.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Mitochondrial stress is relayed to the cytosol by an OMA1-DELE1-HRI pathway</article-title>. <source>Nature</source> <volume>579</volume>, <fpage>427</fpage>&#x02013;<lpage>432</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-020-2078-2</pub-id><pub-id pub-id-type="pmid">32132707</pub-id></citation></ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hoch</surname> <given-names>R. E. E.</given-names></name> <name><surname>C&#x000F3;ser</surname> <given-names>V. M.</given-names></name> <name><surname>Santos</surname> <given-names>I. S.</given-names></name> <name><surname>de Souza</surname> <given-names>A. P. D.</given-names></name></person-group> (<year>2021</year>). <article-title>Lymphoid markers predict prognosis of pediatric and adolescent acute myeloid leukemia</article-title>. <source>Leukemia Res.</source> <volume>107</volume>:<fpage>106603</fpage>. <pub-id pub-id-type="doi">10.1016/j.leukres.2021.106603</pub-id><pub-id pub-id-type="pmid">33957373</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Itzykson</surname> <given-names>R.</given-names></name> <name><surname>Fournier</surname> <given-names>E.</given-names></name> <name><surname>Berthon</surname> <given-names>C.</given-names></name> <name><surname>R&#x000F6;llig</surname> <given-names>C.</given-names></name> <name><surname>Braun</surname> <given-names>T.</given-names></name> <name><surname>Marceau-Renaut</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Genetic identification of patients with aml older than 60 years achieving long-term survival with intensive chemotherapy</article-title>. <source>Blood</source> <volume>138</volume>, <fpage>507</fpage>&#x02013;<lpage>519</lpage>. <pub-id pub-id-type="doi">10.1182/blood.2021011103</pub-id><pub-id pub-id-type="pmid">34410352</pub-id></citation></ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jia</surname> <given-names>Y.</given-names></name> <name><surname>Ye</surname> <given-names>L.</given-names></name> <name><surname>Ji</surname> <given-names>K.</given-names></name> <name><surname>Zhang</surname> <given-names>L.</given-names></name> <name><surname>Hargest</surname> <given-names>R.</given-names></name> <name><surname>Ji</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>Death-associated protein-3, DAP-3, correlates with preoperative chemotherapy effectiveness and prognosis of gastric cancer patients following perioperative chemotherapy and radical gastrectomy</article-title>. <source>Br. J. Cancer</source> <volume>110</volume>, <fpage>421</fpage>&#x02013;<lpage>429</lpage>. <pub-id pub-id-type="doi">10.1038/bjc.2013.712</pub-id><pub-id pub-id-type="pmid">24300973</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jing</surname> <given-names>B.</given-names></name> <name><surname>Deng</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>T.</given-names></name> <name><surname>Hou</surname> <given-names>D.</given-names></name> <name><surname>Li</surname> <given-names>B.</given-names></name> <name><surname>Qiang</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Deep learning for risk prediction in patients with nasopharyngeal carcinoma using multi-parametric mris</article-title>. <source>Comput. Methods Progr. Biomed.</source> <volume>197</volume>:<fpage>105684</fpage>. <pub-id pub-id-type="doi">10.1016/j.cmpb.2020.105684</pub-id><pub-id pub-id-type="pmid">32781421</pub-id></citation></ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kastenhuber</surname> <given-names>E. R.</given-names></name> <name><surname>Lowe</surname> <given-names>S. W.</given-names></name></person-group> (<year>2017</year>). <article-title>Putting p53 in context</article-title>. <source>Cell</source> <volume>170</volume>, <fpage>1062</fpage>&#x02013;<lpage>1078</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2017.08.028</pub-id></citation>
</ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kate</surname> <given-names>R. J.</given-names></name> <name><surname>Nadig</surname> <given-names>R.</given-names></name></person-group> (<year>2017</year>). <article-title>Stage-specific predictive models for breast cancer survivability</article-title>. <source>Int. J. Med. Informat.</source> <volume>97</volume>, <fpage>304</fpage>&#x02013;<lpage>311</lpage>. <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2016.11.001</pub-id><pub-id pub-id-type="pmid">27919388</pub-id></citation></ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lagunas-Rangel</surname> <given-names>F. A.</given-names></name> <name><surname>Ch&#x000E1;vez-Valencia</surname> <given-names>V.</given-names></name> <name><surname>&#x000C1;ngel G&#x000F3;mez-Guijosa</surname> <given-names>M.</given-names></name> <name><surname>Cortes-Penagos</surname> <given-names>C.</given-names></name></person-group> (<year>2017</year>). <article-title>Acute myeloid leukemia&#x02013;genetic alterations and their clinical prognosis</article-title>. <source>Int. J. Hematol. Oncol. Stem Cell Res.</source> <volume>11</volume>, <fpage>328</fpage>&#x02013;<lpage>339</lpage>.</citation>
</ref>
<ref id="B26">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Mitchell</surname> <given-names>T. M.</given-names></name></person-group> (<year>1997</year>). <source>Machine Learning</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>McGraw-Hill</publisher-name>.</citation>
</ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Monti</surname> <given-names>P.</given-names></name> <name><surname>Menichini</surname> <given-names>P.</given-names></name> <name><surname>Speciale</surname> <given-names>A.</given-names></name> <name><surname>Cutrona</surname> <given-names>G.</given-names></name> <name><surname>Fais</surname> <given-names>F.</given-names></name> <name><surname>Taiana</surname> <given-names>E.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Heterogeneity of TP53 mutations and P53 protein residual function in cancer: does it matter?</article-title> <source>Front. Oncol.</source> <volume>10</volume>:<fpage>593383</fpage>. <pub-id pub-id-type="doi">10.3389/fonc.2020.593383</pub-id><pub-id pub-id-type="pmid">33194757</pub-id></citation></ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mosquera Orgueira</surname> <given-names>A.</given-names></name> <name><surname>Peleteiro Ra&#x000ED;ndo</surname> <given-names>A.</given-names></name> <name><surname>Cid L&#x000F3;pez</surname> <given-names>M.</given-names></name> <name><surname>D&#x000ED;az Arias</surname> <given-names>J. &#x000C1;.</given-names></name> <name><surname>Gonz&#x000E1;lez P&#x000E9;rez</surname> <given-names>M. S.</given-names></name> <name><surname>Antelo Rodr&#x000ED;guez</surname> <given-names>B.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Personalized survival prediction of patients with acute myeloblastic leukemia using gene expression profiling</article-title>. <source>Front. Oncol.</source> <volume>11</volume>:<fpage>657191</fpage>. <pub-id pub-id-type="doi">10.3389/fonc.2021.657191</pub-id><pub-id pub-id-type="pmid">33854980</pub-id></citation></ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Muhammad</surname> <given-names>W.</given-names></name> <name><surname>Hart</surname> <given-names>G. R.</given-names></name> <name><surname>Nartowt</surname> <given-names>B.</given-names></name> <name><surname>Farrell</surname> <given-names>J. J.</given-names></name> <name><surname>Johung</surname> <given-names>K.</given-names></name> <name><surname>Liang</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Pancreatic cancer prediction through an artificial neural network</article-title>. <source>Front. Artif. Intell.</source> <volume>2019</volume>:<fpage>2</fpage>. <pub-id pub-id-type="doi">10.3389/frai.2019.00002</pub-id><pub-id pub-id-type="pmid">33733091</pub-id></citation></ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Naes</surname> <given-names>S. M.</given-names></name> <name><surname>Ab-Rahim</surname> <given-names>S.</given-names></name> <name><surname>Mazlan</surname> <given-names>M.</given-names></name> <name><surname>Amir Hashim</surname> <given-names>N. A.</given-names></name> <name><surname>Abdul Rahman</surname> <given-names>A.</given-names></name></person-group> (<year>2023</year>). <article-title>Increased ENT2 expression and its association with altered purine metabolism in cell lines derived from different stages of colorectal cancer</article-title>. <source>Exp. Therapeut. Med.</source> <volume>25</volume>:<fpage>212</fpage>. <pub-id pub-id-type="doi">10.3892/etm.2023.11911</pub-id><pub-id pub-id-type="pmid">37123217</pub-id></citation></ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Papaemmanuil</surname> <given-names>E.</given-names></name> <name><surname>Gerstung</surname> <given-names>M.</given-names></name> <name><surname>Bullinger</surname> <given-names>L.</given-names></name> <name><surname>Gaidzik</surname> <given-names>V. I.</given-names></name> <name><surname>Paschka</surname> <given-names>P.</given-names></name> <name><surname>Roberts</surname> <given-names>N. D.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>Genomic classification and prognosis in acute myeloid leukemia</article-title>. <source>N. Engl. J. Med.</source> <volume>374</volume>, <fpage>2209</fpage>&#x02013;<lpage>2221</lpage>. <pub-id pub-id-type="doi">10.1056/NEJMoa1516192</pub-id><pub-id pub-id-type="pmid">27276561</pub-id></citation></ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pastor-Anglada</surname> <given-names>M.</given-names></name> <name><surname>P&#x000E9;rez-Torras</surname> <given-names>S.</given-names></name></person-group> (<year>2018</year>). <article-title>Emerging roles of nucleoside transporters</article-title>. <source>Front. Pharmacol.</source> <volume>9</volume>:<fpage>606</fpage>. <pub-id pub-id-type="doi">10.3389/fphar.2018.00606</pub-id><pub-id pub-id-type="pmid">29928232</pub-id></citation></ref>
<ref id="B33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pelcovits</surname> <given-names>A.</given-names></name> <name><surname>Niroula</surname> <given-names>R.</given-names></name></person-group> (<year>2020</year>). <article-title>Acute myeloid leukemia: a review</article-title>. <source>Rhode Island Med. J.</source> <volume>103</volume>, <fpage>38</fpage>&#x02013;<lpage>40</lpage>.</citation>
</ref>
<ref id="B34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rabaan</surname> <given-names>A. A.</given-names></name> <name><surname>Bakhrebah</surname> <given-names>M. A.</given-names></name> <name><surname>AlSaihati</surname> <given-names>H.</given-names></name> <name><surname>Alhumaid</surname> <given-names>S.</given-names></name> <name><surname>Alsubki</surname> <given-names>R. A.</given-names></name> <name><surname>Turkistani</surname> <given-names>S. A.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Artificial intelligence for clinical diagnosis and treatment of prostate cancer</article-title>. <source>Cancers</source> <volume>14</volume>:<fpage>5595</fpage>. <pub-id pub-id-type="doi">10.3390/cancers14225595</pub-id><pub-id pub-id-type="pmid">36428686</pub-id></citation></ref>
<ref id="B35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Radhachandran</surname> <given-names>A.</given-names></name> <name><surname>Garikipati</surname> <given-names>A.</given-names></name> <name><surname>Iqbal</surname> <given-names>Z.</given-names></name> <name><surname>Siefkas</surname> <given-names>A.</given-names></name> <name><surname>Barnes</surname> <given-names>G.</given-names></name> <name><surname>Hoffman</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>A machine learning approach to predicting risk of myelodysplastic syndrome</article-title>. <source>Leukemia Res.</source> <volume>109</volume>:<fpage>106639</fpage>. <pub-id pub-id-type="doi">10.1016/j.leukres.2021.106639</pub-id><pub-id pub-id-type="pmid">34171604</pub-id></citation></ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ris</surname> <given-names>T.</given-names></name> <name><surname>Teixeira-Carvalho</surname> <given-names>A.</given-names></name> <name><surname>Coelho</surname> <given-names>R. M. P.</given-names></name> <name><surname>Brandao-de-Resende</surname> <given-names>C.</given-names></name> <name><surname>Gomes</surname> <given-names>M. S.</given-names></name> <name><surname>Amaral</surname> <given-names>L. R.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Inflammatory biomarkers in infective endocarditis: machine learning to predict mortality</article-title>. <source>Clin. Exp. Immunol.</source> <volume>196</volume>, <fpage>374</fpage>&#x02013;<lpage>382</lpage>. <pub-id pub-id-type="doi">10.1111/cei.13266</pub-id><pub-id pub-id-type="pmid">30697694</pub-id></citation></ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rodr&#x000ED;guez-Mac&#x000ED;as</surname> <given-names>G.</given-names></name> <name><surname>Briz</surname> <given-names>O.</given-names></name> <name><surname>Cives-Losada</surname> <given-names>C.</given-names></name> <name><surname>Chill&#x000F3;n</surname> <given-names>M. C.</given-names></name> <name><surname>Mart&#x000ED;nez-Laperche</surname> <given-names>C.</given-names></name> <name><surname>Mart&#x000ED;nez-Arranz</surname> <given-names>I.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Role of intracellular drug disposition in the response of acute myeloid leukemia to cytarabine and idarubicin induction chemotherapy</article-title>. <source>Cancers</source> <volume>15</volume>:<fpage>3145</fpage>. <pub-id pub-id-type="doi">10.3390/cancers15123145</pub-id><pub-id pub-id-type="pmid">37370755</pub-id></citation></ref>
<ref id="B38">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rose-Inman</surname> <given-names>H.</given-names></name> <name><surname>Kuehl</surname> <given-names>D.</given-names></name></person-group> (<year>2014</year>). <article-title>Acute leukemia</article-title>. <source>Emerg. Med. Clin. N. Am.</source> <volume>32</volume>, <fpage>579</fpage>&#x02013;<lpage>596</lpage>. <pub-id pub-id-type="doi">10.1016/j.emc.2014.04.004</pub-id></citation>
</ref>
<ref id="B39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sato</surname> <given-names>Y.</given-names></name> <name><surname>Yoshino</surname> <given-names>H.</given-names></name> <name><surname>Kashiwakura</surname> <given-names>I.</given-names></name> <name><surname>Tsuruga</surname> <given-names>E.</given-names></name></person-group> (<year>2021</year>). <article-title>DAP3 is involved in modulation of cellular radiation response by RIG-I-Like receptor agonist in human lung adenocarcinoma cells</article-title>. <source>Int. J. Mol. Sci.</source> <volume>22</volume>, <fpage>420</fpage>. <pub-id pub-id-type="doi">10.3390/ijms22010420</pub-id><pub-id pub-id-type="pmid">33401559</pub-id></citation></ref>
<ref id="B40">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Spackman</surname> <given-names>K. A.</given-names></name></person-group> (<year>1989</year>). <article-title>&#x0201C;Signal detection theory: valuable tools for evaluation inductive learning,&#x0201D;</article-title> in <source>6th International Workshop on Machine Learning</source> (<publisher-loc>San Francisco, CA</publisher-loc>: <publisher-name>Morgan Kaufmann</publisher-name>), <fpage>160</fpage>&#x02013;<lpage>163</lpage>.</citation>
</ref>
<ref id="B41">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sui</surname> <given-names>L.</given-names></name> <name><surname>Zeng</surname> <given-names>J.</given-names></name> <name><surname>Zhao</surname> <given-names>H.</given-names></name> <name><surname>Ye</surname> <given-names>L.</given-names></name> <name><surname>Martin</surname> <given-names>T. A.</given-names></name> <name><surname>Sanders</surname> <given-names>A. J.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Death associated protein&#x02013;3 (DAP3) and DAP3 binding cell death enhancer&#x02013;1 (DELE1) in human colorectal cancer, and their impacts on clinical outcome and chemoresistance</article-title>. <source>Int. J. Oncol.</source> <volume>62</volume>:<fpage>7</fpage>. <pub-id pub-id-type="doi">10.3892/ijo.2022.5455</pub-id><pub-id pub-id-type="pmid">36382667</pub-id></citation></ref>
<ref id="B42">
<citation citation-type="journal"><person-group person-group-type="author"><collab>The Cancer Genome Atlas Research Network</collab></person-group> (<year>2013</year>). <article-title>Genomic and epigenomic landscapes of adult <italic>de novo</italic> acute myeloid leukemia</article-title>. <source>N. Engl. J. Med.</source> <volume>368</volume>, <fpage>2059</fpage>&#x02013;<lpage>2074</lpage>. <pub-id pub-id-type="doi">10.1056/NEJMoa1301689</pub-id></citation>
</ref>
<ref id="B43">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tyner</surname> <given-names>J. W.</given-names></name> <name><surname>Tognon</surname> <given-names>C. E.</given-names></name> <name><surname>Bottomly</surname> <given-names>D.</given-names></name> <name><surname>Wilmot</surname> <given-names>B.</given-names></name> <name><surname>Kurtz</surname> <given-names>S. E.</given-names></name> <name><surname>Savage</surname> <given-names>S. L.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Functional genomic landscape of acute myeloid leukaemia</article-title>. <source>Nature</source> <volume>562</volume>, <fpage>526</fpage>&#x02013;<lpage>531</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-018-0623-z</pub-id><pub-id pub-id-type="pmid">30333627</pub-id></citation></ref>
<ref id="B44">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Walczak</surname> <given-names>S.</given-names></name> <name><surname>Velanovich</surname> <given-names>V.</given-names></name></person-group> (<year>2018</year>). <article-title>Improving prognosis and reducing decision regret for pancreatic cancer treatment using artificial neural networks</article-title>. <source>Decision Supp. Syst.</source> <volume>106</volume>, <fpage>110</fpage>&#x02013;<lpage>118</lpage>. <pub-id pub-id-type="doi">10.1016/j.dss.2017.12.007</pub-id></citation>
</ref>
<ref id="B45">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>D.</given-names></name> <name><surname>Lee</surname> <given-names>S. H.</given-names></name> <name><surname>Geng</surname> <given-names>H.</given-names></name> <name><surname>Zhong</surname> <given-names>H.</given-names></name> <name><surname>Plastaras</surname> <given-names>J.</given-names></name> <name><surname>Wojcieszynski</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Interpretable machine learning for predicting pathologic complete response in patients treated with chemoradiation therapy for rectal adenocarcinoma</article-title>. <source>Front. Artif. Intell.</source> <volume>5</volume>:<fpage>1059033</fpage>. <pub-id pub-id-type="doi">10.3389/frai.2022.1059033</pub-id><pub-id pub-id-type="pmid">36568580</pub-id></citation></ref>
<ref id="B46">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>W.</given-names></name> <name><surname>Sheng</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>C.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Palta</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Fluence map prediction using deep learning models&#x02014;direct plan generation for pancreas stereotactic body radiation therapy</article-title>. <source>Front. Artif. Intell.</source> <volume>3</volume>:<fpage>68</fpage>. <pub-id pub-id-type="doi">10.3389/frai.2020.00068</pub-id><pub-id pub-id-type="pmid">33733185</pub-id></citation></ref>
<ref id="B47">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>F.</given-names></name> <name><surname>Wu</surname> <given-names>L.-Y.</given-names></name> <name><surname>He</surname> <given-names>Q.</given-names></name> <name><surname>Wu</surname> <given-names>D.</given-names></name> <name><surname>Zhang</surname> <given-names>Z.</given-names></name> <name><surname>Song</surname> <given-names>L.-X.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Exploration of the role of gene mutations in myelodysplastic syndromes through a sequencing design involving a small number of target genes</article-title>. <source>Sci. Rep.</source> <volume>7</volume>:<fpage>43113</fpage>. <pub-id pub-id-type="doi">10.1038/srep43113</pub-id><pub-id pub-id-type="pmid">28220884</pub-id></citation></ref>
<ref id="B48">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>J.</given-names></name> <name><surname>Du</surname> <given-names>Y.</given-names></name> <name><surname>Jalil</surname> <given-names>A.</given-names></name> <name><surname>Ahmed</surname> <given-names>Z.</given-names></name> <name><surname>Mori</surname> <given-names>S.</given-names></name> <name><surname>Patel</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Mutational profiling of myeloid neoplasms associated genes may aid the diagnosis of acute myeloid leukemia with myelodysplasia-related changes</article-title>. <source>Leukemia Res.</source> <volume>110</volume>:<fpage>106701</fpage>. <pub-id pub-id-type="doi">10.1016/j.leukres.2021.106701</pub-id><pub-id pub-id-type="pmid">34481124</pub-id></citation></ref>
<ref id="B49">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>Y.</given-names></name> <name><surname>Cai</surname> <given-names>W.</given-names></name> <name><surname>Hua</surname> <given-names>Y.</given-names></name> <name><surname>Yang</surname> <given-names>X.</given-names></name> <name><surname>Zhou</surname> <given-names>J.</given-names></name></person-group> (<year>2022</year>). <article-title>The biological and clinical consequences of RNA splicing factor U2AF1 mutation in myeloid malignancies</article-title>. <source>Cancers</source> <volume>14</volume>:<fpage>4406</fpage>. <pub-id pub-id-type="doi">10.3390/cancers14184406</pub-id><pub-id pub-id-type="pmid">36139566</pub-id></citation></ref>
<ref id="B50">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhu</surname> <given-names>Y.</given-names></name> <name><surname>Song</surname> <given-names>D.</given-names></name> <name><surname>Guo</surname> <given-names>J.</given-names></name> <name><surname>Jin</surname> <given-names>J.</given-names></name> <name><surname>Tao</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>U2AF1 mutation promotes tumorigenicity through facilitating autophagy flux mediated by FOXO3a activation in myelodysplastic syndromes</article-title>. <source>Cell Death Dis.</source> <volume>12</volume>:<fpage>655</fpage>. <pub-id pub-id-type="doi">10.1038/s41419-021-03573-3</pub-id><pub-id pub-id-type="pmid">34183647</pub-id></citation></ref>
<ref id="B51">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zolbanin</surname> <given-names>H. M.</given-names></name> <name><surname>Delen</surname> <given-names>D.</given-names></name> <name><surname>Hassan Zadeh</surname> <given-names>A.</given-names></name></person-group> (<year>2015</year>). <article-title>Predicting overall survivability in comorbidity of cancers: a data mining approach</article-title>. <source>Decision Supp. Syst.</source> <volume>74</volume>, <fpage>150</fpage>&#x02013;<lpage>161</lpage>. <pub-id pub-id-type="doi">10.1016/j.dss.2015.04.003</pub-id></citation>
</ref>
</ref-list>
</back>
</article>