<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Neuroinform.</journal-id>
<journal-title>Frontiers in Neuroinformatics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Neuroinform.</abbrev-journal-title>
<issn pub-type="epub">1662-5196</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fninf.2024.1378281</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Neuroscience</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Early detection of mild cognitive impairment through neuropsychological tests in population screenings: a decision support system integrating ontologies and machine learning</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>G&#x00F3;mez-Valad&#x00E9;s</surname> <given-names>Alba</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/976775/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Mart&#x00ED;nez-Tom&#x00E1;s</surname> <given-names>Rafael</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/354796/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Garc&#x00ED;a-Herranz</surname> <given-names>Sara</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Bj&#x00F8;rnerud</surname> <given-names>Atle</given-names></name>
<xref rid="aff3" ref-type="aff"><sup>3</sup></xref>
<xref rid="aff4" ref-type="aff"><sup>4</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Rinc&#x00F3;n</surname> <given-names>Mariano</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/538865/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Department of Artificial Intelligence, Universidad Nacional de Educaci&#x00F3;n a Distancia (UNED)</institution>, <addr-line>Madrid</addr-line>, <country>Spain</country></aff>
<aff id="aff2"><sup>2</sup><institution>Cogni-UNED Research Group, Faculty of Psychology, UNED</institution>, <addr-line>Madrid</addr-line>, <country>Spain</country></aff>
<aff id="aff3"><sup>3</sup><institution>Computational Radiology and Artificial Intelligence Unit, Department of Physics and Computational Radiology, Clinic for Radiology and Nuclear Medicine, Oslo University Hospital</institution>, <addr-line>Oslo</addr-line>, <country>Norway</country></aff>
<aff id="aff4"><sup>4</sup><institution>Department of Physics, University of Oslo</institution>, <addr-line>Oslo</addr-line>, <country>Norway</country></aff>
<author-notes>
<fn fn-type="edited-by" id="fn0001">
<p>Edited by: Emi A. Yuda, Tohoku University, Japan</p>
</fn>
<fn fn-type="edited-by" id="fn0002">
<p>Reviewed by: Jos&#x00E9; Aparecido Da Silva, University of Brasilia, Brazil</p>
<p>Isabel Echeverri, Universidad Aut&#x00F3;noma de Manizales, Colombia</p>
</fn>
<corresp id="c001">&#x002A;Correspondence: Alba G&#x00F3;mez-Valad&#x00E9;s, <email>albagvb@dia.uned.es</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>16</day>
<month>10</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>18</volume>
<elocation-id>1378281</elocation-id>
<history>
<date date-type="received">
<day>29</day>
<month>01</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>04</day>
<month>10</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2024 G&#x00F3;mez-Valad&#x00E9;s, Mart&#x00ED;nez-Tom&#x00E1;s, Garc&#x00ED;a-Herranz, Bj&#x00F8;rnerud and Rinc&#x00F3;n.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>G&#x00F3;mez-Valad&#x00E9;s, Mart&#x00ED;nez-Tom&#x00E1;s, Garc&#x00ED;a-Herranz, Bj&#x00F8;rnerud and Rinc&#x00F3;n</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Machine learning (ML) methodologies for detecting Mild Cognitive Impairment (MCI) are progressively gaining prevalence to manage the vast volume of processed information. Nevertheless, the black-box nature of ML algorithms and the heterogeneity within the data may result in varied interpretations across distinct studies. To avoid this, in this proposal, we present the design of a decision support system that integrates a machine learning model represented using the Semantic Web Rule Language (SWRL) in an ontology with specialized knowledge in neuropsychological tests, the NIO ontology. The system&#x2019;s ability to detect MCI subjects was evaluated on a database of 520 neuropsychological assessments conducted in Spanish and compared with other well-established ML methods. Using the <italic>F2</italic> coefficient to minimize false negatives, results indicate that the system performs similarly to other well-established ML methods (<italic>F2<sub>TE2</sub></italic>&#x2009;=&#x2009;0.830, only below bagging, <italic>F2<sub>BAG</sub></italic>&#x2009;=&#x2009;0.832) while exhibiting other significant attributes such as explanation capability and data standardization to a common framework thanks to the ontological part. On the other hand, the system&#x2019;s versatility and ease of use were demonstrated with three additional use cases: evaluation of new cases even if the acquisition stage is incomplete (the case records have missing values), incorporation of a new database into the integrated system, and use of the ontology capabilities to relate different domains. This makes it a useful tool to support physicians and neuropsychologists in population-based screenings for early detection of MCI.</p>
</abstract>
<kwd-group>
<kwd>ontology</kwd>
<kwd>machine learning</kwd>
<kwd>SWRL</kwd>
<kwd>decision tree</kwd>
<kwd>ensemble</kwd>
<kwd>decision support system</kwd>
<kwd>MCI</kwd>
</kwd-group>
<counts>
<fig-count count="12"/>
<table-count count="5"/>
<equation-count count="0"/>
<ref-count count="51"/>
<page-count count="15"/>
<word-count count="9443"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec1">
<label>1</label>
<title>Introduction</title>
<p>Alzheimer&#x2019;s disease (AD) is the most common cause of dementia affecting the elderly (<xref ref-type="bibr" rid="ref16">Jitsuishi and Yamaguchi, 2022</xref>; <xref ref-type="bibr" rid="ref33">Sherimon et al., 2021</xref>; <xref ref-type="bibr" rid="ref50">Zekri et al., 2015</xref>), and its incidence is expected to continue to increase as the population ages (<xref ref-type="bibr" rid="ref14">Ivascu et al., 2015</xref>; <xref ref-type="bibr" rid="ref51">Zhang et al., 2014</xref>). Mild Cognitive Impairment (MCI) has attracted a great deal of attention as a transitional stage between normal aging and AD (<xref ref-type="bibr" rid="ref16">Jitsuishi and Yamaguchi, 2022</xref>; <xref ref-type="bibr" rid="ref34">Panza et al., 2005</xref>; <xref ref-type="bibr" rid="ref51">Zhang et al., 2014</xref>). Early detection of this stage is of vital importance for appropriate early intervention to help slow disease progression and improve patients&#x2019; quality of life (<xref ref-type="bibr" rid="ref14">Ivascu et al., 2015</xref>; <xref ref-type="bibr" rid="ref19">K&#x00F6;nig et al., 2018</xref>). Therefore, significant efforts have been dedicated to identifying more efficiently early features and symptoms of MCI (<xref ref-type="bibr" rid="ref39">Petersen et al., 2014</xref>) which has produced an exponential growth of biomedical data (<xref ref-type="bibr" rid="ref13">Hoehndorf et al., 2015</xref>).</p>
<p>In recent years, ML techniques have been used to obtain an early diagnosis of MCI, either using MRI imaging (<xref ref-type="bibr" rid="ref16">Jitsuishi and Yamaguchi, 2022</xref>) or neuropsychological tests (<xref ref-type="bibr" rid="ref2">Clark et al., 2016</xref>; <xref ref-type="bibr" rid="ref24">Linz et al., 2017</xref>; <xref ref-type="bibr" rid="ref26">L&#x00F3;pez-de-Ipi&#x00F1;a et al., 2018</xref>) due to their capability of handling large amounts of information and obtaining clinically relevant knowledge (<xref ref-type="bibr" rid="ref33">Sherimon et al., 2021</xref>; <xref ref-type="bibr" rid="ref48">Weakley et al., 2015</xref>). But for this information to be useful, and the results obtained in studies with ML models to be generalizable, the data must be in a standardized format (<xref ref-type="bibr" rid="ref9">Gomez-Valades et al., 2021</xref>; <xref ref-type="bibr" rid="ref33">Sherimon et al., 2021</xref>; <xref ref-type="bibr" rid="ref51">Zhang et al., 2014</xref>). This allows efficient retrieval of data (<xref ref-type="bibr" rid="ref35">Patrick and Li, 2012</xref>; <xref ref-type="bibr" rid="ref42">Sahoo et al., 2022</xref>), shareability between different centers (<xref ref-type="bibr" rid="ref10">Gomez-Valad&#x00E9;s et al., 2019</xref>), and univocal interpretation (<xref ref-type="bibr" rid="ref33">Sherimon et al., 2021</xref>). Otherwise, the analyses could lead to different interpretations at centers other than where the data came from, or even in the same center because inexperienced staff may not be familiar with the original guidelines, or the population distribution changes over time. This is extremely critical in the healthcare field (<xref ref-type="bibr" rid="ref33">Sherimon et al., 2021</xref>).</p>
<p>In this scenario, ontologies play a critical role in the management and interoperability of information, allowing the consistent representation of knowledge, standardizing data acquired and stored under different formats and protocols (<xref ref-type="bibr" rid="ref3">Costa, 2014</xref>; <xref ref-type="bibr" rid="ref13">Hoehndorf et al., 2015</xref>), providing a unique meaning to each element (<xref ref-type="bibr" rid="ref33">Sherimon et al., 2021</xref>), avoiding interoperability problems (<xref ref-type="bibr" rid="ref10">Gomez-Valad&#x00E9;s et al., 2019</xref>; <xref ref-type="bibr" rid="ref21">Kulmanov et al., 2020</xref>), easing the retrieval of information and records (<xref ref-type="bibr" rid="ref35">Patrick and Li, 2012</xref>), and improving data analysis and efficiency of clinical diagnostic support systems (<xref ref-type="bibr" rid="ref43">Shoaip et al., 2019</xref>).</p>
<p>Thus, on the one hand, ML models are used to obtain knowledge by searching for patterns of interest in large volumes of data (<xref ref-type="bibr" rid="ref47">Tsymbal et al., 2007</xref>; <xref ref-type="bibr" rid="ref48">Weakley et al., 2015</xref>), while, on the other hand, ontologies provide the basis for reusing and unambiguously integrating domain knowledge within applications (<xref ref-type="bibr" rid="ref15">Jensen et al., 2013</xref>; <xref ref-type="bibr" rid="ref18">Kang et al., 2019</xref>; <xref ref-type="bibr" rid="ref47">Tsymbal et al., 2007</xref>). Our proposal seeks to leverage the benefits of both technologies, which separately have their inconveniences. In the case of ML models, it is usually difficult or even impossible to know the logical process behind a decision (<xref ref-type="bibr" rid="ref47">Tsymbal et al., 2007</xref>; <xref ref-type="bibr" rid="ref48">Weakley et al., 2015</xref>). Moreover, as they do not check data integrity, they can operate with conceptually but not technically incorrect data, leading to erroneous patterns when working with poorly curated databases (<xref ref-type="bibr" rid="ref33">Sherimon et al., 2021</xref>). In the case of ontologies, a high-level representation for the formalization of knowledge (<xref ref-type="bibr" rid="ref47">Tsymbal et al., 2007</xref>) can reach levels of abstraction and complexity that make their use impractical or not viable in real-world scenarios (<xref ref-type="bibr" rid="ref50">Zekri et al., 2015</xref>). Although significant efforts have been made to combine both technologies (<xref ref-type="bibr" rid="ref22">Kulmanov et al., 2021</xref>; <xref ref-type="bibr" rid="ref41">Robinson and Haendel, 2020</xref>), methods that integrate them into decision support systems are still under development (<xref ref-type="bibr" rid="ref21">Kulmanov et al., 2020</xref>).</p>
<p>In this paper, we propose to integrate a set of bootstrap aggregated (or bagged) decision trees for early diagnosis of MCI, which are represented as rules using the Semantic Web Rule Language (SWRL), with an already defined Ontology Web Language (OWL) ontology with specialized knowledge in neuropsychological tests, NIO (<xref ref-type="bibr" rid="ref9">Gomez-Valades et al., 2021</xref>). In this way, the integrated system eases data standardization while providing a fast and interpretable first assessment of the cognitive status of subjects, saving physicians and neuropsychologists time and allowing them to reach a wider population during the screenings.</p>
<p>The rest of the paper continues as follows: Section 2 summarizes the state of the art of other approaches integrating ontologies and ML; Section 3 describes the methodology used to build the integrated system, detailing the ontology, the learning model, and the integration method, as well as the database; Section 4 details the performance results, compares them with other well-established ML models, and presents three use cases that show some advantages of this integration; Section 5 introduces the discussion of these results; and finally, Section 6 closes with the conclusions.</p>
</sec>
<sec id="sec2">
<label>2</label>
<title>State of the art</title>
<p>Ontologies and ML models conform the two main technologies for extracting, manipulating, and obtaining new knowledge within a domain (<xref ref-type="bibr" rid="ref21">Kulmanov et al., 2020</xref>; <xref ref-type="bibr" rid="ref42">Sahoo et al., 2022</xref>; <xref ref-type="bibr" rid="ref47">Tsymbal et al., 2007</xref>). It seems logical that proper integration between them would result in an overall improvement in the performance of decision support systems (<xref ref-type="bibr" rid="ref21">Kulmanov et al., 2020</xref>; <xref ref-type="bibr" rid="ref42">Sahoo et al., 2022</xref>; <xref ref-type="bibr" rid="ref51">Zhang et al., 2014</xref>). However, both technologies are usually employed separately (<xref ref-type="bibr" rid="ref47">Tsymbal et al., 2007</xref>), although recently there has been an increased effort to combine them (<xref ref-type="bibr" rid="ref18">Kang et al., 2019</xref>). This combination is performed following different objectives, such as the automatic completion of ontologies (<xref ref-type="bibr" rid="ref30">Me&#x017E;nar et al., 2022</xref>), the search for emerging knowledge in ontologies using ML techniques (<xref ref-type="bibr" rid="ref22">Kulmanov et al., 2021</xref>; <xref ref-type="bibr" rid="ref41">Robinson and Haendel, 2020</xref>), or the improvement of diagnosis in decision support systems.</p>
<p>Within this last group, some studies use ontologies and ML models sequentially: an ontology is first used to standardize and add semantic knowledge to a database, which is subsequently used to train the automatic system (<xref ref-type="bibr" rid="ref23">Lakshmi et al., 2019</xref>; <xref ref-type="bibr" rid="ref42">Sahoo et al., 2022</xref>; <xref ref-type="bibr" rid="ref47">Tsymbal et al., 2007</xref>). Other studies focus on the integration of the predictive ML model in an ontology. Thus, a compact decision support system is generated. In this area, one of the first approaches appears in the work of <xref ref-type="bibr" rid="ref51">Zhang et al. (2014)</xref> with Ontology-Driven Decision, which combines an ontology with a decision tree to create a decision support system for the early diagnosis of AD employing MRI images. In that system, the ontology is used to standardize the data and reduce subjectivity, while the decision tree generates the diagnosis. To integrate both parts, the decision tree rules were transformed into RDF rules, and the diagnosis was obtained using a reasoner. The work of <xref ref-type="bibr" rid="ref44">Shoaip et al. (2021)</xref> proposes the integration of an existing ontology, ADDO (<xref ref-type="bibr" rid="ref45">Shoaip et al., 2020</xref>), with a set of rules extracted from both a decision tree and a Repeated Incremental Pruning to Produce Error Reduction (RIPPER) method (<xref ref-type="bibr" rid="ref17">F&#x00FC;rnkranz and Widmer, 1994</xref>) to differentiate between four categories (healthy, significant memory concern, early MCI, and late MCI). Unlike the other studies, which use their own database, they use a heterogeneous dataset obtained from the ADNI database (<xref ref-type="bibr" rid="ref38">Petersen et al., 2010</xref>). This dataset includes neuropsychological tests, imaging tests, and chemical and genetic biomarkers which, together with the sociodemographic variables, are collectively called &#x201C;biomarkers.&#x201D; The rules obtained from ML were translated to SWRL rules to integrate them into the ontology. They also link different properties defined in the ontology using SWRL to give rules more expressiveness. Another approach also based on SWRL rules is the one proposed by <xref ref-type="bibr" rid="ref29">Massari et al. (2022c)</xref> for diabetes detection, which combines a decision tree with ontologies. The diagnosis is obtained through inference using a reasoner. The system described was adapted afterward for the early detection of breast cancer (<xref ref-type="bibr" rid="ref27">Massari et al., 2022a</xref>) and covid-19 (<xref ref-type="bibr" rid="ref28">Massari et al., 2022b</xref>).</p>
<p>These papers showed that a proper integration of both, ontologies and ML algorithms, allows heterogeneous data to be accessed and put into a standardized framework, improving the performance of automatic models, and facilitating the exchange of data and results. However, there are still problems that need to be addressed. The first is the need to reuse or adapt previous ontologies in some studies that create their ontologies from scratch. Therefore, they may have redundancies and inconsistencies with prior ontologies. The second one is the selection of a decision tree or a RIPPER as the ML model, which was made to ease its translation and integration to rules (SWRL or RDF), something not possible with more complex ML models due to their black box structure. However, decision trees have strong training set dependency (<xref ref-type="bibr" rid="ref51">Zhang et al., 2014</xref>), RIPPER methods have problems with noise and complex databases and need categorical data (<xref ref-type="bibr" rid="ref20">Kotelnikov and Milov, 2018</xref>), and both methods are prone to overfitting if not pruned properly. <xref ref-type="bibr" rid="ref44">Shoaip et al. (2021)</xref> mention using a decision tree and a RIPPER method to create SWRL rules but need to explain how they combine both sets of rules to function as one. Another problem is that of missing values. The most common ways to deal with missing values are either deleting the affected records or tests or imputing the missing values. However, there is an inherent risk of altering the database and hence the results.</p>
<p>Therefore, in this work we propose a decision support system that integrates a set of decision trees that work together as an ensemble to provide a diagnosis based solely on neuropsychological tests with an already established ontology, NIO. We used neuropsychological tests for being cheaper, faster, and less invasive than the alternatives while keeping a good diagnostic capability, making them the most suitable for population screenings. The ontological part will allow the data to be standardized and placed in a semantic context, and the tree ensemble will establish a diagnosis that combines the explainability of decision trees with the power and robustness of an ensemble method. The system is also designed to operate directly with databases with missing values without the need for prior preprocessing by deleting or imputing records.</p>
</sec>
<sec sec-type="materials|methods" id="sec3">
<label>3</label>
<title>Materials and methods</title>
<sec id="sec4">
<label>3.1</label>
<title>System modeling</title>
<p>To generate the decision support system that supports the diagnosis, the process was split into three stages, as shown in the diagram in <xref ref-type="fig" rid="fig1">Figure 1</xref>: (1) ontology selection and adaptation, (2) generation of the ML model (tree ensemble), translation to SWRL rules and integration within the ontology, and (3) the database is loaded into the ontology, and a reasoner compatible with the SWRL rules is used to infer the diagnosis. Note that in this approach the set of rules is particularized for the specific neurological test battery used in that dataset. Within the decision support system, different sets of rules could coexist, one for each test battery. The results aggregation rule that establishes the final diagnosis only uses the trees associated with a particular neuropsychological test battery, excluding any other decision trees that might be present. The following sections explain the detailed process followed in each stage.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>Scheme of the creation of the integrated support decision system between the ontology and ML.</p>
</caption>
<graphic xlink:href="fninf-18-1378281-g001.tif"/>
</fig>
<sec id="sec5">
<label>3.1.1</label>
<title>Database</title>
<p>To show how the integrated system works, we used an anonymized database formed by a sample from a large longitudinal study on the incidence of incipient MCI in the Autonomous Community of Madrid (Spain; <xref ref-type="bibr" rid="ref4">D&#x00ED;az-Mardomingo et al., 2017</xref>; <xref ref-type="bibr" rid="ref5">D&#x00ED;az-Mardomingo and Peraita, 2008</xref>; <xref ref-type="bibr" rid="ref7">Garc&#x00ED;a-Herranz et al., 2016</xref>, <xref ref-type="bibr" rid="ref8">2019</xref>; <xref ref-type="bibr" rid="ref37">Peraita et al., 2011</xref>). Subjects with a previous diagnosis of neurodegenerative disease, disabling chronic disease, psychiatric disorders such as major depression, established neurological abnormality, severe sensory impairment, diabetes, stroke, or loss of consciousness were excluded from the database. The cognitive and emotional status of the subjects was assessed using the Spanish version of the Mini-Mental State Examination (<xref ref-type="bibr" rid="ref25">Lobo et al., 1979</xref>) and the Geriatric Depression Scale (<xref ref-type="bibr" rid="ref49">Yesavage et al., 1982</xref>). The diagnosis of MCI was established based on the Petersen criteria, considering tests that evaluated different cognitive abilities (<xref ref-type="bibr" rid="ref7">Garc&#x00ED;a-Herranz et al., 2016</xref>). The study gathered data from 233 monolingual Spanish subjects aged between 58 and 93&#x2009;years and with an educational level between 0 and 22&#x2009;years of study. Each subject underwent from one to three evaluations, spaced approximately 1&#x2009;year apart, classified on each one as Healthy or MCI. This process yielded a total of 520 cases, which we considered as independent in this study to make the most of the small sample. <xref ref-type="table" rid="tab1">Table 1</xref> shows the summary of the sociodemographic variables in the database.</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Summary of the sociodemographic variables of the database, as well as their performance on the MEC (Spanish version of the MMSE).</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Classification</th>
<th align="center" valign="top">No. of subjects</th>
<th align="center" valign="top">Men/Women</th>
<th align="center" valign="top">Age mean (std.)</th>
<th align="center" valign="top">Scholarity mean (st)</th>
<th align="center" valign="top">MEC mean (std)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Healthy</td>
<td align="center" valign="middle">309</td>
<td align="center" valign="middle">80/229</td>
<td align="center" valign="middle">70.69 (6.03)</td>
<td align="center" valign="middle">11.64 (5.18)</td>
<td align="center" valign="middle">32.94 (2.06)</td>
</tr>
<tr>
<td align="left" valign="middle">MCI</td>
<td align="center" valign="middle">211</td>
<td align="center" valign="middle">59/152</td>
<td align="center" valign="middle">73.00 (6.96)</td>
<td align="center" valign="middle">9.19 (6.06)</td>
<td align="center" valign="middle">30.69 (3.12)</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec6">
<label>3.1.2</label>
<title>Ontology and rule system</title>
<p>The NIO ontology (<xref ref-type="bibr" rid="ref9">Gomez-Valades et al., 2021</xref>) was selected for this project because it includes many neuropsychological tests. As NIO is a large ontology with many classes and axioms, it was analyzed and reduced to the appropriate <italic>Classes</italic> for the study to ease its handling and prevent its size from slowing down the reasoner. We used SWRL to integrate the ML model into the ontology since it allows writing rules for reasoning and inferring new knowledge in OWL. We select the SWRL because, since it is rule-oriented, is possible to translate certain machine learning systems such as decision trees as a rule set, that can be integrated inside an ontology. Finally, to enable the rules to function with the data, the tests were defined as <italic>Individuals</italic>, and the scores and sociodemographic variables as <italic>Data Properties</italic> associated with these <italic>Individuals</italic>.</p>
</sec>
<sec id="sec7">
<label>3.1.3</label>
<title>ML model</title>
<p>Decision trees emerge as the most suitable ML models because their rules can be expressed as a concatenation of conditionals. This allows easy translation to other types of rule systems and simple inference interpretation. However, decision trees are unstable and prone to overfitting, which could lead to inaccuracies and make them less competent for complex problems (<xref ref-type="bibr" rid="ref12">Ho, 1995</xref>). An improvement is bagging, an ensemble learning method based on a set of bootstrap aggregated decision trees whose combined classification is more robust and accurate than the individual decision trees that comprise it and is commonly used to reduce variance.</p>
<p>To train the model, we used 80% of the dataset for training and 20% for testing. From the training set, various sampling subsets of the same size (25% of the training set) were obtained by sampling with replacement to train different decision trees. To keep the explainability of the bootstrap, a reduced number of decision trees was defined. The final classification was obtained by voting, using an odd number of decision trees to avoid ties. The threshold was defined as the minimum number of trees that maximized recall without falling into a trivial classification.</p>
<p>Once the model was obtained, it was translated into SWRL rules. The SWRL rule system is monotonic, which has the following implications:</p>
<list list-type="alpha-lower">
<list-item><p>The system uses deductive reasoning.</p></list-item>
<list-item><p>Rules always move from antecedent to consequent.</p></list-item>
<list-item><p>A rule is only activated if all parts of the antecedent are true, so the consequent is also true.</p></list-item>
<list-item><p>The system uses valid and known elements, not incomplete or unknown facts.</p></list-item>
<list-item><p>The results are always true, so there is no possibility of modification or retraction. Therefore, the addition of new knowledge does not modify the previous knowledge of the model, unlike non-monotonic systems, which can change according to situations or conditions consistent with new knowledge.</p></list-item>
</list>
<p>Taking that into account, to translate each decision tree to SWRL, each leaf of the tree was converted into a SWRL rule (<xref ref-type="bibr" rid="ref27">Massari et al., 2022a</xref>; <xref ref-type="bibr" rid="ref44">Shoaip et al., 2021</xref>). Here is an example:</p>
<p>The leaf of the decision tree:</p>
<disp-quote>
<p><italic>&#x201C;if (Praxias_cons&#x2009;&#x003C;&#x2009;=&#x2009;9.5) and (cal_rey&#x2009;&#x003C;&#x2009;=&#x2009;24.5) and (TrailATi&#x2009;&#x003E;&#x2009;32.5) and (cal_rey&#x2009;&#x003E;&#x2009;14.75) then class: MCI (proba: 81.16%).&#x201D;</italic></p>
</disp-quote>
<p>was translated to:</p>
<disp-quote>
<p><italic>&#x201C;Subject(?p) ^ has_praxias_score (?p, ?PC) ^ swrlb:lessThanOrEqual (?PC, 9.5) ^ has_Rey_complex_figure_score (?p, ?CR) ^ swrlb:lessThanOrEqual (?CR, 24.5) ^ has_Trail_Making_test_A_score (?p, ?TMA) ^ swrlb:greaterThan (?TMA, 32.5) ^ has_Rey_complex_figure_score (?p, ?CR2) ^ swrlb:greaterThan (?CR2, 14.75) -&#x2009;&#x003E;&#x2009;pred_n(?p, 1).&#x201D;</italic></p>
</disp-quote>
<p>To aggregate the decisions of different trees and give the final diagnosis, it is not possible to use the &#x201C;count&#x201D; operation because it is not supported in SWRL (the variable &#x201C;count&#x201D; would have to change every time an increment occurs and, according to implication e) of the monotonic systems defined above, this is not allowed. Instead, the final classification is established through the &#x201C;sum&#x201D; of each tree prediction, which should always be numerical (e.g., 0 for Healthy and 1 for MCI). The rule that adds up the individual classifications of each tree is always executed after all trees have issued a decision.</p>
<p>To identify the optimal threshold of the system, both the <italic>ROC</italic> curve and the <italic>Precision-Recall (P-R)</italic> curve were analyzed. To detect the threshold that maximizes the system sensitivity to the target Class (MCI), the <italic>F-score</italic> curve was analyzed for different values of <italic>&#x03B2;</italic>. The <italic>F-score</italic> is a relation between <italic>precision</italic> and <italic>recall</italic> in which, depending on the value of <italic>&#x03B2;</italic>, both metrics contribute equally to the score (<italic>&#x03B2;</italic>&#x2009;=&#x2009;1) or more importance is given to <italic>precision</italic> (<italic>&#x03B2;</italic>&#x2009;&#x003C;&#x2009;1) or to <italic>recall</italic> (<italic>&#x03B2;</italic>&#x2009;&#x003E;&#x2009;1).</p>
</sec>
<sec id="sec8">
<label>3.1.4</label>
<title>System implementation</title>
<p>We used the following environments to implement the system: Python 3.4 with the Scikit-Learn (<xref ref-type="bibr" rid="ref36">Pedregosa et al., 2011</xref>) module was used to generate the decision trees and their automatic translation into SWRL rules; Prot&#x00E9;g&#x00E9; 5.6.1 (<xref ref-type="bibr" rid="ref31">Musen, 2015</xref>) for ontology management due to its ease and wide use; the Prot&#x00E9;g&#x00E9; SWRL Tab plugin 2.1.0 (<xref ref-type="bibr" rid="ref32">O&#x2019;Connor et al., 2005</xref>) for the incorporation and management of SWRL rules; the Cellfie plugin to load the database into the ontology; and Pellet (<xref ref-type="bibr" rid="ref46">Sirin et al., 2007</xref>) as the reasoner to establish inferences since it is capable of operating with SWRL rules.</p>
</sec>
</sec>
</sec>
<sec sec-type="results" id="sec9">
<label>4</label>
<title>Results</title>
<p>The reliability of the integrated system in identifying cases with MCI was assessed by evaluating its performance in accurately classifying the cases. The versatility and ease of use of the system were also demonstrated through three additional practical use cases based on real-world scenarios: screening of new cases with the possibility of missing records, incorporation of a new database into the system, and the use of ontological capabilities to link different domains and generate new knowledge.</p>
<sec id="sec10">
<label>4.1</label>
<title>Tree ensemble performance</title>
<p>Establishing a rule-based decision system allows fast and direct modification of the threshold to suit it to the context of the study (initially, screening). First, both the <italic>ROC</italic> and the <italic>Precision-Recall</italic> curves were used to identify the optimal threshold. As it is shown in <xref ref-type="fig" rid="fig2">Figure 2</xref>, the inflection point in both curves is at threshold 5 (<italic>th</italic>&#x2009;=&#x2009;5). Next, the behavior of the <italic>F-score</italic> curves was analyzed to establish the threshold to optimize the <italic>recall</italic>, i.e., the minimum number of decision trees necessary to classify a sample as MCI that maximizes <italic>recall</italic> while keeping <italic>precision</italic> high. These curves are shown in <xref ref-type="fig" rid="fig3">Figure 3</xref>. Excluding the trivial option of <italic>th</italic>&#x2009;=&#x2009;0, for all curves with <italic>&#x03B2;</italic>&#x2009;&#x003E;&#x2009;1, an inflection is observed at <italic>th</italic>&#x2009;=&#x2009;2. It is also shown that majority voting leads to lower <italic>F-scores</italic> for all <italic>&#x03B2;</italic>&#x2009;&#x003E;&#x2009;1, ratifying the results obtained in the analysis of the <italic>ROC</italic> and the <italic>P-R</italic> curves.</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p><italic>ROC</italic> curve (orange) and <italic>P-R</italic> curve (blue) for all possible thresholds of the system.</p>
</caption>
<graphic xlink:href="fninf-18-1378281-g002.tif"/>
</fig>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p>F-score curves for different B for all possible thresholds of the system.</p>
</caption>
<graphic xlink:href="fninf-18-1378281-g003.tif"/>
</fig>
<p>Different numbers of decision trees were evaluated, and the number of 11 decision trees was selected for our system since it obtained the best performance while maintaining a manageable number of trees. <xref ref-type="table" rid="tab2">Table 2</xref> shows the comparison between the average performance of the 11 trees independently and the tree ensemble for three different thresholds: the majority voting option (<italic>th</italic>&#x2009;=&#x2009;6), the overall most efficient threshold obtained by the <italic>ROC</italic> and <italic>P-R</italic> curves (<italic>th</italic>&#x2009;=&#x2009;5), and the best threshold to reduce <italic>false negatives</italic> (<italic>th</italic>&#x2009;=&#x2009;2). It demonstrated that there is an overall improvement in the tree ensemble concerning the average performance of the individual decision trees. Between the three thresholds, <italic>th</italic>&#x2009;=&#x2009;6 is outperformed by both <italic>th</italic>&#x2009;=&#x2009;5 and <italic>th</italic>&#x2009;=&#x2009;2 in all metrics except <italic>precision</italic>. The comparison of system performance between <italic>th</italic>&#x2009;=&#x2009;5 and <italic>th</italic>&#x2009;=&#x2009;2 yields better results for <italic>th</italic>&#x2009;=&#x2009;2 in <italic>F2</italic> and <italic>recall</italic>, while <italic>th</italic>&#x2009;=&#x2009;5 presents better results in <italic>accuracy, precision</italic>, and <italic>ROC-AUC</italic>, as expected. However, <italic>F1</italic> remains the same for both thresholds. Using <italic>F2</italic> as the discriminant metric, the tree ensemble (TE) with <italic>th</italic>&#x2009;=&#x2009;2 was selected as the most appropriate for evaluating performance (<italic>F2<sub>TE2</sub></italic>&#x2009;=&#x2009;0.830), widely surpassing both th&#x2009;=&#x2009;6 (<italic>F2<sub>TE6</sub></italic>&#x2009;=&#x2009;0.691) and <italic>th</italic>&#x2009;=&#x2009;5 (<italic>F2<sub>TE5</sub></italic>&#x2009;=&#x2009;0.730).</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>Comparison of the performance of the individual decision trees concerning the ensemble using the thresholds corresponding to majority vote (<italic>th</italic>&#x2009;=&#x2009;6), <italic>ROC/PR</italic> curve (<italic>th</italic>&#x2009;=&#x2009;5), and <italic>F&#x03B2;</italic> curve.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Method</th>
<th align="center" valign="top">F2</th>
<th align="center" valign="top">Accuracy</th>
<th align="center" valign="top">F1</th>
<th align="center" valign="top">Recall</th>
<th align="center" valign="top">Precision</th>
<th align="center" valign="top">ROC-AUC</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Tree average</td>
<td align="center" valign="top">0.651</td>
<td align="center" valign="top">0.746</td>
<td align="center" valign="top">0.669</td>
<td align="center" valign="top">0.635</td>
<td align="center" valign="top">0.721</td>
<td align="center" valign="top">0.744</td>
</tr>
<tr>
<td align="left" valign="middle">Tree ensemble with <italic>th</italic> =&#x2009;6</td>
<td align="center" valign="top">0.691</td>
<td align="center" valign="top">0.817</td>
<td align="center" valign="top">0.725</td>
<td align="center" valign="top">0.630</td>
<td align="center" valign="top">0.875</td>
<td align="center" valign="top">0.785</td>
</tr>
<tr>
<td align="left" valign="middle">Tree ensemble with <italic>th</italic> =&#x2009;5</td>
<td align="center" valign="top">0.730</td>
<td align="center" valign="top">0.820</td>
<td align="center" valign="top">0.750</td>
<td align="center" valign="top">0.700</td>
<td align="center" valign="top">0.826</td>
<td align="center" valign="top">0.801</td>
</tr>
<tr>
<td align="left" valign="middle">Tree ensemble with <italic>th</italic> =&#x2009;2</td>
<td align="center" valign="top">0.830</td>
<td align="center" valign="top">0.775</td>
<td align="center" valign="top">0.751</td>
<td align="center" valign="top">0.896</td>
<td align="center" valign="top">0.654</td>
<td align="center" valign="top">0.797</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec11">
<label>4.2</label>
<title>Performance comparison with other ML models</title>
<p><xref ref-type="table" rid="tab3">Table 3</xref> shows the comparison of the integrated system with <italic>th</italic>&#x2009;=&#x2009;2 with seven ML models widely used in biomedical data analysis: Adaboosting (ADAB), Bagging (BAG), Multilayer perceptron (MLP), Logistic Regression (RLog), Random Forest (RF), Support Vector Machine (SVM), XGBoosting tree (XGB). Ten repetitions of the analysis were performed with different initialization seeds to ensure the robustness of the results. To allow proper comparisons between all systems, the thresholds for each system were adjusted to optimize <italic>F2.</italic> As can be seen in <xref ref-type="table" rid="tab3">Table 3</xref>, the performance of <italic>F2<sub>TE2</sub></italic> for the tree ensemble exceeds all the other ML models except for the BAG (<italic>F2<sub>TE2</sub></italic>&#x2009;=&#x2009;0.830 vs. <italic>F2<sub>BAG</sub></italic>&#x2009;=&#x2009;0.832). Although the difference was expected as both methods are based on the same type of ensemble and the BAG uses a larger number of components, the difference is dim, and our proposal facilitates the explainability of the results and can be used within the ontology without affecting its performance.</p>
<table-wrap position="float" id="tab3">
<label>Table 3</label>
<caption>
<p>Comparison of a total of 7 ML models: adaboosting (ADAB), bagging (BAG), multilayer perceptron (MLP), logistic regression (RLog), random forest (RF), support vector machine (SVM), XGBoosting tree (XGB) using the thresholds that maximize their performance for <italic>F2</italic>.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Methods</th>
<th align="center" valign="top">F2</th>
<th align="center" valign="top">F1</th>
<th align="center" valign="top">Accuracy</th>
<th align="center" valign="top">Recall</th>
<th align="center" valign="top">Precision</th>
<th align="center" valign="top">ROC-AUC</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">ADAB <italic>th</italic> =&#x2009;0.4</td>
<td align="center" valign="top">0.800</td>
<td align="center" valign="top">0.628</td>
<td align="center" valign="top">0.542</td>
<td align="center" valign="top">0.985</td>
<td align="center" valign="top">0.464</td>
<td align="center" valign="top">0.623</td>
</tr>
<tr>
<td align="left" valign="middle">BAG <italic>th</italic> =&#x2009;0.3</td>
<td align="center" valign="top">0.832</td>
<td align="center" valign="top">0.772</td>
<td align="center" valign="top">0.798</td>
<td align="center" valign="top">0.880</td>
<td align="center" valign="top">0.711</td>
<td align="center" valign="top">0.812</td>
</tr>
<tr>
<td align="left" valign="middle">MLP <italic>th</italic> =&#x2009;0.2</td>
<td align="center" valign="top">0.790</td>
<td align="center" valign="top">0.694</td>
<td align="center" valign="top">0.701</td>
<td align="center" valign="top">0.872</td>
<td align="center" valign="top">0.578</td>
<td align="center" valign="top">0.733</td>
</tr>
<tr>
<td align="left" valign="middle">RLog <italic>th</italic> =&#x2009;0.2</td>
<td align="center" valign="top">0.767</td>
<td align="center" valign="top">0.644</td>
<td align="center" valign="top">0.618</td>
<td align="center" valign="top">0.884</td>
<td align="center" valign="top">0.552</td>
<td align="center" valign="top">0.667</td>
</tr>
<tr>
<td align="left" valign="middle">RF <italic>th</italic> =&#x2009;0.4</td>
<td align="center" valign="top">0.820</td>
<td align="center" valign="top">0.806</td>
<td align="center" valign="top">0.845</td>
<td align="center" valign="top">0.830</td>
<td align="center" valign="top">0.788</td>
<td align="center" valign="top">0.844</td>
</tr>
<tr>
<td align="left" valign="middle">SVM <italic>th</italic> =&#x2009;0.3</td>
<td align="center" valign="top">0.770</td>
<td align="center" valign="top">0.707</td>
<td align="center" valign="top">0.734</td>
<td align="center" valign="top">0.821</td>
<td align="center" valign="top">0.626</td>
<td align="center" valign="top">0.751</td>
</tr>
<tr>
<td align="left" valign="middle">XGB <italic>th</italic> =&#x2009;0.4</td>
<td align="center" valign="top">0.824</td>
<td align="center" valign="top">0.816</td>
<td align="center" valign="top">0.852</td>
<td align="center" valign="top">0.826</td>
<td align="center" valign="top">0.812</td>
<td align="center" valign="top">0.851</td>
</tr>
<tr>
<td align="left" valign="middle">Tree ensemble with <italic>th</italic> =&#x2009;2</td>
<td align="center" valign="top">0.830</td>
<td align="center" valign="top">0.751</td>
<td align="center" valign="top">0.775</td>
<td align="center" valign="top">0.896</td>
<td align="center" valign="top">0.654</td>
<td align="center" valign="top">0.797</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec12">
<label>4.3</label>
<title>General system operation</title>
<p>NIO is an ontology with many <italic>Classes</italic>, so it was first reduced to the necessary <italic>Classes</italic> for the study. Additionally, to compare the results of the tree ensemble integrated into the ontology with the original ML model, we added those <italic>Classes</italic> corresponding to the confusion matrix: &#x201C;TN_Scores&#x201D; (<italic>true negatives</italic>), &#x201C;FN_Scores&#x201D; (<italic>false negatives</italic>), &#x201C;FP_Scores&#x201D; (<italic>false positives</italic>) y &#x201C;TP_Scores&#x201D; (<italic>true positives</italic>). Finally, all restrictions related to the range of available values for each test were checked to minimize as much as possible the incorporation of database mistakes.</p>
<p>The rules were integrated into the ontology through the Prot&#x00E9;g&#x00E9; SWRL plugin, while the database was incorporated through Cellfie. A fragment of this integration that shows the rules corresponding to decision tree #10, the confusion matrix, and the aggregation of the individual tree decisions is shown in <xref ref-type="fig" rid="fig4">Figure 4</xref>.</p>
<fig position="float" id="fig4">
<label>Figure 4</label>
<caption>
<p>Fragment of SWRL rules corresponding to a complete decision tree (Tree #10), rules for integrating the ensemble predictions, rules of the final diagnosis, and rules defining the confusion matrix (for evaluation purposes).</p>
</caption>
<graphic xlink:href="fninf-18-1378281-g004.tif"/>
</fig>
<p>The reasoner was activated once the rules and the database were incorporated into the ontology, generating the diagnosis. <xref ref-type="fig" rid="fig5">Figure 5</xref> shows the results inferred by the reasoner for case 107, with &#x201C;Healthy&#x201D; and &#x201C;MCI&#x201D; corresponding to Healthy and MCI <italic>Classes</italic>, respectively. Each individual tree prediction can be seen under &#x201C;Property assertions,&#x201D; where &#x201C;pred_(<italic>n</italic>)&#x201D; is a tree, (<italic>n</italic>) is the ID of the tree, and the number following is the classification (0 for Healthy and 1 for MCI). For example, &#x201C;pred_1 0&#x201D; means that tree 1 classifies case 107 as &#x201C;Healthy.&#x201D; The results of the individual trees are added and stored on the <italic>Data Property</italic> &#x201C;sum.&#x201D; In this case, it is 0, and the final classification of that case is &#x201C;Healthy&#x201D; since it is less than the threshold (<italic>th</italic>&#x2009;=&#x2009;2).</p>
<fig position="float" id="fig5">
<label>Figure 5</label>
<caption>
<p>Results inferred by the reasoner, showing in the central window the number of Individuals classified as &#x201C;Healthy&#x201D; or &#x201C;MCI&#x201D; and the results of the confusion matrix. An example is case 107, showing the prediction per tree (window &#x201C;Property assertions: 107&#x201D;) as well as the final diagnosis (inferred in the window &#x201C;Description: 107&#x201D;).</p>
</caption>
<graphic xlink:href="fninf-18-1378281-g005.tif"/>
</fig>
<p>Finally, <xref ref-type="fig" rid="fig6">Figure 6</xref> shows part of the reasoning followed to establish the diagnosis for case 107. This allows the experts to know the process followed by the system to classify a case as &#x201C;Healthy&#x201D; or &#x201C;MCI.&#x201D;</p>
<fig position="float" id="fig6">
<label>Figure 6</label>
<caption>
<p>Justification of the reasoning for case 107.</p>
</caption>
<graphic xlink:href="fninf-18-1378281-g006.tif"/>
</fig>
</sec>
<sec id="sec13">
<label>4.4</label>
<title>Other use cases</title>
<p>In addition to the usual classification of individuals belonging to the same population distribution as the sample, the integrated system was evaluated in three other situations to demonstrate its usefulness and versatility:</p>
<list list-type="bullet">
<list-item><p>To evaluate new cases even when records have missing values.</p></list-item>
<list-item><p>To incorporate a new database in the ontology, evaluating similarities and differences between both databases, and making inferences about the new database to obtain a diagnosis.</p></list-item>
<list-item><p>To relate different domains and generate emergent knowledge that relates the performance of subjects in certain tests with cognitive domains and associated brain areas already modeled in the ontology.</p></list-item>
</list>
<sec id="sec14">
<label>4.4.1</label>
<title>Screening of new cases</title>
<p>During population screening, a large volume of tests is generated. Those tests need to be evaluated individually, slowing down the screening process. Those tests may lack results in certain parts or subtests due to several factors, such as the refusal or inability of some person to perform a test or a test being applied later in the follow-up. In this case, a decision support system provides an initial classification that can be used to make a first filtering and focusing of the subsequent study. Efficiency increases if the system can handle records with missing data, speeding up the process by avoiding the need to eliminate or preprocess those records beforehand.</p>
<p>To exemplify this use case, a new database consisting of 354 cases belonging to the same project but not yet classified will be used. To bring this use case closer to a real screening scenario, this database will be incorporated directly into the system without preprocessing, including those cases with missing records. The data was incorporated into the ontology using Cellfie, running the reasoner next. The threshold used was <italic>th</italic>&#x2009;=&#x2009;2. The inferred results can be seen in <xref ref-type="fig" rid="fig7">Figure 7</xref>. The first thing that can be appreciated is that none of the <italic>Classes</italic> has all 354 cases except the <italic>Class</italic> referring to the test subject identifier (&#x201C;Study_Subject&#x201D;). This indicates the presence of records with missing data, as shown in <xref ref-type="fig" rid="fig8">Figure 8</xref> for specific case 570.</p>
<fig position="float" id="fig7">
<label>Figure 7</label>
<caption>
<p>Diagnosis of new cases.</p>
</caption>
<graphic xlink:href="fninf-18-1378281-g007.tif"/>
</fig>
<fig position="float" id="fig8">
<label>Figure 8</label>
<caption>
<p>Example of a case with no tests results, showing only the subject ID, the evaluation and the code used for sex.</p>
</caption>
<graphic xlink:href="fninf-18-1378281-g008.tif"/>
</fig>
<p>In the cases in which the system issued the final diagnosis, 197 cases were classified as healthy and 143 as MCI, making a total of 340 cases. <xref ref-type="fig" rid="fig9">Figure 9</xref> shows one of these cases, along with the individual diagnoses from the decision trees and the sum used to generate the final diagnosis.</p>
<fig position="float" id="fig9">
<label>Figure 9</label>
<caption>
<p>Example of a case diagnosed by the system.</p>
</caption>
<graphic xlink:href="fninf-18-1378281-g009.tif"/>
</fig>
<p>Nine of those 14 cases that did not include the diagnosis only had the records of ID, evaluation number, and sex, as shown in <xref ref-type="fig" rid="fig8">Figure 8</xref>. Therefore, no classification could be obtained for those cases. The remaining five cases had missing values in some of their records that prevented obtaining a result in one or more trees. <xref ref-type="fig" rid="fig10">Figure 10</xref> shows one of those cases, where trees #0, #1, and #7 were not activated. However, the result shown by the rest of the decision trees that make up the tree ensemble [encoded in the properties &#x201C;pred_(<italic>n</italic>)&#x201D;] would allow a final diagnosis of MCI for that case, by summing the classifications and then comparing it with the established threshold (<italic>th</italic>&#x2009;=&#x2009;2). This way the ensemble manages the missing values, which focus on the data it has without altering the original database and allows a first insight into the cognitive state of the subjects. However, such classifications should be treated with caution, especially if the number of total activated trees is low and the result is under the threshold (which would give a preliminary and unsafe assessment of healthy).</p>
<fig position="float" id="fig10">
<label>Figure 10</label>
<caption>
<p>Example of a case with missing values that prevented the generation of the general diagnosis because the diagnoses for trees #0, #1, and #7 (&#x201C;pred_0,&#x201D; &#x201C;pred_1,&#x201D; and &#x201C;pred_7&#x201D;) could not be generated due to missing values.</p>
</caption>
<graphic xlink:href="fninf-18-1378281-g010.tif"/>
</fig>
<p>This use case shows how the decision support system can be used for the evaluation of new cases obtained during cognitive screening. The system allows the classification of several hundred cases in a few minutes, even handling cases with missing data in one or more tests and obtaining the final classification from the activated decision trees.</p>
</sec>
<sec id="sec15">
<label>4.4.2</label>
<title>Incorporation of another database into the system</title>
<p>A significant objective of decision support systems is that their structure is functional in contexts other than those in which they are designed. To show how a different database can be coupled into this integrated system, we used a new anonymized database from the Dementia Disease Initiation (DDI) study (<xref ref-type="bibr" rid="ref6">Fladby et al., 2017</xref>), a Norwegian MCI cohort composed of data collected across different medical centers and hospitals in Norway and focused on early detection of Alzheimer&#x2019;s and other neurodegenerative dementias. This database integrates biomarkers, MRI, and neuropsychological tests. However, for this example, only <italic>Classes</italic> corresponding to raw scores on neuropsychological tests were selected. The sociodemographic data of the selected cases are detailed in <xref ref-type="table" rid="tab4">Table 4</xref>.</p>
<table-wrap position="float" id="tab4">
<label>Table 4</label>
<caption>
<p>Summary of the Norwegian database.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Method</th>
<th align="center" valign="top">No. of subjects</th>
<th align="center" valign="top">Men/Women</th>
<th align="center" valign="top">Age mean (std)</th>
<th align="center" valign="top">Scholarity mean (std)</th>
<th align="center" valign="top">MMSE mean (std)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Healthy</td>
<td align="center" valign="top">447</td>
<td align="center" valign="top">209/238</td>
<td align="center" valign="top">64.79 (9.35)</td>
<td align="center" valign="top">13.74 (2.98)</td>
<td align="center" valign="top">29.09 (1.25)</td>
</tr>
<tr>
<td align="left" valign="top">MCI</td>
<td align="center" valign="top">387</td>
<td align="center" valign="top">180/207</td>
<td align="center" valign="top">66.20 (9.35)</td>
<td align="center" valign="top">13.60 (3.31)</td>
<td align="center" valign="top">27.26 (3.03)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>First, the tests in both databases were analyzed for equivalences. Although the tests used in the Norwegian dataset encompassed similar objectives to those in the Spanish dataset, the divergence in test types, each with its own rules, scoring systems, and execution methods, hindered their classification as identical assessments. For example, the COWAT is a type of verbal fluency test but composed of different subtests than those used in the Spanish battery. The exception is the Trail Making Test A and B, being the same test in both databases. Therefore, it was necessary to repeat all the steps to adapt the system to the new database. First, the NIO ontology was reviewed to ensure that all tests presented in the Norwegian database were already modeled. Next, a new set of decision trees was generated, so they could establish a diagnosis from this new test battery. The same steps were followed as for the Spanish database: use of the bagging method for the generation of 11 decision trees, selection of the most efficient threshold, translation into SWRL rules, incorporation of these rules into the ontology, and evaluation of the performance of the system regarding six machine learning models (<xref ref-type="table" rid="tab5">Table 5</xref>). The most efficient threshold was used to maximize <italic>recall</italic> for all models.</p>
<table-wrap position="float" id="tab5">
<label>Table 5</label>
<caption>
<p>Comparison between six machine learning models and the decision tree ensemble, using the threshold that maximize their performance for <italic>F2</italic>.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Method</th>
<th align="center" valign="top">F2</th>
<th align="center" valign="top">F1</th>
<th align="center" valign="top">Accuracy</th>
<th align="center" valign="top">Recall</th>
<th align="center" valign="top">Precision</th>
<th align="center" valign="top">ROC-AUC</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">ADAB <italic>th</italic> =&#x2009;0.5</td>
<td align="center" valign="middle">0.895</td>
<td align="center" valign="middle">0.900</td>
<td align="center" valign="middle">0.913</td>
<td align="center" valign="middle">0.892</td>
<td align="center" valign="middle">0.910</td>
<td align="center" valign="middle">0.911</td>
</tr>
<tr>
<td align="left" valign="top">BAG <italic>th</italic> =&#x2009;0.2</td>
<td align="center" valign="middle">0.900</td>
<td align="center" valign="middle">0.830</td>
<td align="center" valign="middle">0.827</td>
<td align="center" valign="middle">0.954</td>
<td align="center" valign="middle">0.736</td>
<td align="center" valign="middle">0.841</td>
</tr>
<tr>
<td align="left" valign="top">MLP <italic>th</italic> =&#x2009;0.3</td>
<td align="center" valign="middle">0.896</td>
<td align="center" valign="middle">0.842</td>
<td align="center" valign="middle">0.845</td>
<td align="center" valign="middle">0.937</td>
<td align="center" valign="middle">0.765</td>
<td align="center" valign="middle">0.855</td>
</tr>
<tr>
<td align="left" valign="top">RLog <italic>th</italic> =&#x2009;0.3</td>
<td align="center" valign="middle">0.884</td>
<td align="center" valign="middle">0.846</td>
<td align="center" valign="middle">0.846</td>
<td align="center" valign="middle">0.916</td>
<td align="center" valign="middle">0.777</td>
<td align="center" valign="middle">0.854</td>
</tr>
<tr>
<td align="left" valign="top">RF <italic>th</italic> =&#x2009;0.2</td>
<td align="center" valign="middle">0.881</td>
<td align="center" valign="middle">0.766</td>
<td align="center" valign="middle">0.766</td>
<td align="center" valign="middle">0.961</td>
<td align="center" valign="middle">0.663</td>
<td align="center" valign="middle">0.787</td>
</tr>
<tr>
<td align="left" valign="top">SVM <italic>th</italic> =&#x2009;0.3</td>
<td align="center" valign="middle">0.894</td>
<td align="center" valign="middle">0.856</td>
<td align="center" valign="middle">0.856</td>
<td align="center" valign="middle">0.926</td>
<td align="center" valign="middle">0.787</td>
<td align="center" valign="middle">0.863</td>
</tr>
<tr>
<td align="left" valign="top">Tree ensemble with <italic>th</italic> =&#x2009;2</td>
<td align="center" valign="middle">0.889</td>
<td align="center" valign="middle">0.838</td>
<td align="center" valign="middle">0.829</td>
<td align="center" valign="middle">0.927</td>
<td align="center" valign="middle">0.766</td>
<td align="center" valign="middle">0.846</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As in the Spanish database, the most efficient threshold was <italic>th</italic>&#x2009;=&#x2009;2. As expected, the tree ensemble outperforms the individual tree average as in the Spanish database. In comparison with other ML systems, the tree ensemble performance is in the middle of the other systems, scoring the best <italic>F2</italic> the BAG with <italic>F2</italic>&#x2009;=&#x2009;0.900, and the third method in having both the highest <italic>F2</italic> and <italic>recall</italic> (<italic>F2<sub>TE2</sub></italic>&#x2009;=&#x2009;0.889, <italic>recall<sub>TE2</sub></italic>&#x2009;=&#x2009;0.927) surpassed only by BAG (<italic>F2<sub>BAG</sub></italic>&#x2009;=&#x2009;0.900, <italic>recall<sub>BAG</sub></italic>&#x2009;=&#x2009;0.954) and MLP (<italic>F2<sub>MLP</sub></italic>&#x2009;=&#x2009;0.896, <italic>recall<sub>MLP</sub></italic>&#x2009;=&#x2009;0.937).</p>
</sec>
<sec id="sec16">
<label>4.4.3</label>
<title>Relationship between different domains modeled in the ontology</title>
<p>NIO is an ontology designed to model four different interrelated domains: neuropsychological tests, cognitive domains, brain areas, and neurodegenerative diseases. In this use case, we took advantage of it to relate low performance in verbal fluency tests with potential alterations in cognitive functions and brain areas associated with these tests. First, we searched the literature for which cognitive functions and brain areas related to each neuropsychological test (<xref ref-type="bibr" rid="ref1">Baldo et al., 2006</xref>; <xref ref-type="bibr" rid="ref40">Prescott et al., 2006</xref>), as well as the thresholds above which impairment is considered (<xref ref-type="bibr" rid="ref8">Garc&#x00ED;a-Herranz et al., 2019</xref>). Next, the necessary relationships between the <italic>Classes</italic> and <italic>Data Properties</italic> involved were established through SWRL rules. Finally, the reasoner was activated to obtain the inference of those relationships, along with the &#x201C;Healthy/MCI&#x201D; classification.</p>
<p>The results can be seen in <xref ref-type="fig" rid="fig11">Figure 11</xref>, where 71 cases were classified with possible temporal lobe damage, 165 with frontal lobe damage and 71 with possible semantic memory problems.</p>
<fig position="float" id="fig11">
<label>Figure 11</label>
<caption>
<p>Inference of possible cognitive or brain alterations from the set of cases without diagnosis.</p>
</caption>
<graphic xlink:href="fninf-18-1378281-g011.tif"/>
</fig>
<p><xref ref-type="fig" rid="fig12">Figure 12</xref> shows as an example two cases of <italic>Individuals</italic> who present potential alterations in semantic memory, one classified as &#x201C;Healthy&#x201D; and another as &#x201C;MCI.&#x201D; The system assigns the status of &#x201C;impaired&#x201D; to the associated <italic>Data Property</italic> of &#x201C;has_semantic_memory_state,&#x201D; and the status of &#x201C;possible Damage&#x201D; to the properties of &#x201C;has_temporal_lobe_state&#x201D; (both cases) and &#x201C;has_frontal_lobe_state&#x201D; in one of them (case 920).</p>
<fig position="float" id="fig12">
<label>Figure 12</label>
<caption>
<p>Example of two cases marked as having possible damage in the cognitive domain of semantic memory, with one of the cases evaluated as Healthy, and the other as MCI.</p>
</caption>
<graphic xlink:href="fninf-18-1378281-g012.tif"/>
</fig>
<p>These complementary relationships would allow a deeper understanding of the cognitive and physical status of the subjects, allowing the refinement of both follow-up and diagnosis. This would lead to a more accurate identification of the type of MCI of each subject and which disease it is most likely to lead to. This use case also shows that the relationship established between performance and cognitive functions/brain areas is independent of diagnosis, allowing a complementary analysis using the semantic relationship between different domains given by the ontology.</p>
</sec>
</sec>
</sec>
<sec sec-type="discussion" id="sec17">
<label>5</label>
<title>Discussion</title>
<p>Owing to the operational mechanisms of the SWRL rules, the system meticulously records all intermediary decisions involved in deducing the final diagnosis from data. Its integration in an ontology allows the recovery of the inference process followed by the reasoner to reach a specific diagnosis. Using a decision tree ensemble instead of a single tree increases the system power, making it more accurate at identifying cases with MCI and less prone to overfitting the training set. Furthermore, the integrated system allows cases with missing data to be treated directly, without the need for prior data preprocessing. This avoids the possible inclusion of artifacts in the system.</p>
<p><italic>Recall</italic> measures the model&#x2019;s ability to retrieve positive samples (<xref ref-type="bibr" rid="ref11">Gupta et al., 2021</xref>). Therefore, for our study, it is of great interest to increase <italic>recall</italic> to detect as many positive cases as possible. However, using <italic>recall</italic> alone without considering <italic>precision</italic> may result in a trivial model that would classify all subjects as MCI by default, while the goal of screenings is to reduce the number of subjects to focus on. <italic>F2</italic> was selected as the most appropriate metric to evaluate the performance of a screening method as it gives more weight to <italic>recall</italic> while keeping a balance between <italic>FP</italic> and <italic>FN</italic>. Higher <italic>F-score</italic> was discarded as it could turn the screening system into a trivial one.</p>
<p>Two conclusions were drawn from the analysis of the tree ensemble thresholds. First, the most efficient threshold for a scenario in which both <italic>recall</italic> and <italic>precision</italic> would be optimized with 11 trees corresponds to <italic>th</italic>&#x2009;=&#x2009;5. This means that a minimum of 5 decision trees would be necessary to issue a diagnosis of MCI and classify a case as such. Second, the tree ensemble with <italic>th</italic>&#x2009;=&#x2009;2 is the most efficient detecting MCI cases, with <italic>F2<sub>TE2</sub></italic>&#x2009;=&#x2009;0.830 just after the BAG (<italic>F2<sub>BAG</sub></italic>&#x2009;=&#x2009;0.832), as shown in <xref ref-type="table" rid="tab3">Table 3</xref>. Therefore, the performance of the system is suitable to identify MCI subjects. It is worth noting that, in general, the thresholds in all machine learning models were low (from <italic>th</italic>&#x2009;=&#x2009;0.2 to <italic>th</italic>&#x2009;=&#x2009;0.4). This could indicate that when MCI symptoms are still mild, a low threshold would help detect a higher number of MCI cases that otherwise would have been classified as healthy.</p>
<p>The following can be inferred from the three use cases shown. In the first one, the integrated system can issue a diagnosis of new cases fast and without the need for preprocessing that could alter the data. The system also allows physicians and neuropsychologists to review each case and the inference followed if necessary. It is also possible to establish a diagnosis in those cases with missing data, even though the final diagnosis could not be obtained, using the &#x201C;partial diagnosis&#x201D; of the activated trees. The use of the original records, without the need for prior preprocessing to remove or impute missing values before analysis, allows experts to focus on the evaluation and diagnosis of the subjects. This shows the system&#x2019;s ability to be used as a population screening tool, saving time in the diagnostic evaluation and, therefore, allowing more people to be reached.</p>
<p>In the second use case, the use of another database revealed the great heterogeneity present in the field of early detection of Alzheimer&#x2019;s disease through neuropsychological tests. Because almost none of the tests presented in the Spanish and Norwegian databases matched, it was necessary to first check that all tests were modeled in the ontology. It was also necessary to generate a new tree ensemble model able to establish a classification from the new database. However, the process of creating and evaluating the new tree ensemble was faster as the entire system methodology was already defined. The performance analysis showed that the system with threshold <italic>th</italic>&#x2009;=&#x2009;2 was among the best models when considering both <italic>F2</italic> and <italic>recall</italic> for detecting MCI cases. Also, all methods present a clear improvement compared to the results with the Spanish database. Future analyses are needed to determine the reasons for the discrepancies in the performance of both databases.</p>
<p>The last use case showed how the ontology&#x2019;s semantic relationship capabilities can be used to relate data from different domains. Specifically, to identify cognitive problems and potentially affected brain areas using their test performance. This complementary analysis can help to highlight the main cognitive problems of a subject regardless of his or her diagnosis. However, the information that links neuropsychological tests with both cognitive processes and brain areas is scarce, scattered, and often contradictory (<xref ref-type="bibr" rid="ref9">Gomez-Valades et al., 2021</xref>), even in widely used and studied tests such as verbal fluency tests. Therefore, although an increase in this type of relationships may help to refine the MCI diagnosis, identifying the type of MCI and what kind of neurodegenerative disease is more likely to lead based on the test performance, in-depth modeling of this type of relationships is a project in its own, and it is left as future work.</p>
<sec id="sec18">
<label>5.1</label>
<title>System limitations</title>
<p>The system can make inferences on cases with missing data and issue diagnoses from the decision trees not affected by the missing data. However, because both OWL and SWRL rules assume an open world, the system is not able to establish an automatic final diagnosis in case missing values cause one or more of the rule sets corresponding to a decision tree fail to activate. In such cases, the diagnosis is obtained semi-automatically, where partial classification are obtained automatically(the results given by the SWRL rules of the activated decision trees), and the final diagnosis must be obtained manually by summing them and comparing the result with the established threshold. Of course, this classification should be treated with caution because it is based on partial information and it is only conclusive when the classification threshold is exceeded.</p>
<p>Finally, we are aware that the ML system&#x2019;s performance is not optimal because most of the subjects with MCI were at a very early stage, presenting very mild symptoms that could be mistaken for normal aging, and the small size of the sample does not allow for the training of a robust and reliable ML model.</p>
</sec>
</sec>
<sec sec-type="conclusions" id="sec19">
<label>6</label>
<title>Conclusion</title>
<p>This paper presents the design of a decision support system that integrates an ontology with a tree ensemble written under SWRL. The system allows the explainability of the generated diagnosis while maintaining performance on par with other well-established ML systems. Its ontological base allows the system to operate within the ontological framework: integrating the data in the ontology allows the standardization and univocal interpretation of the stored data, and defining value limits for each test minimizes the inclusion of tests with erroneous values. The use of a small tree ensemble to obtain the diagnosis allows us to combine the explainability and translation capacity of the decision tree with the power of a bagging method. Integrating it within the ontology allows a reasoner to explain the reasoning process. The use cases show its practical utility in three additional contexts: direct cognitive screening from a dataset without requiring previous preprocessing, such as the one obtained during real population screenings which can have missing values; integrating the necessary rules so that the system can generate diagnoses from information in a different database; and establishing relationships between different domains based on the performance of subjects in the tests.</p>
<p>It demonstrates the ability of the system to be used to perform a preliminary automatic diagnosis of subjects, using the available results obtained in the neuropsychological tests. The system is designed to filter out as many suspected cases of MCI as possible, allowing its use as an initial screening method in primary care units for older adults. We also showed its ability to be extended with new knowledge, and to employ semantic capabilities for inference of new knowledge.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec20">
<title>Data availability statement</title>
<p>The data analyzed in this study is subject to the following licenses/restrictions: The data used in this study is available for research purposes on reasonable request to M.R. Requests to access these datasets should be directed to Mariano Rinc&#x00F3;n, <email>mrincon@dia.uned.es</email>.</p>
</sec>
<sec sec-type="ethics-statement" id="sec21">
<title>Ethics statement</title>
<p>Ethical approval was not required for the study involving humans in accordance with the local legislation and institutional requirements. Written informed consent to participate in this study was not required from the participants or the participants&#x2019; legal guardians/next of kin in accordance with the national legislation and the institutional requirements.</p>
</sec>
<sec sec-type="author-contributions" id="sec22">
<title>Author contributions</title>
<p>AG-V: Conceptualization, Formal analysis, Funding acquisition, Investigation, Methodology, Software, Validation, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. RM-T: Conceptualization, Funding acquisition, Methodology, Supervision, Validation, Writing &#x2013; review &#x0026; editing. SG-H: Data curation, Writing &#x2013; review &#x0026; editing. AB: Data curation, Supervision, Writing &#x2013; review &#x0026; editing. MR: Conceptualization, Methodology, Supervision, Validation, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec sec-type="funding-information" id="sec23">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This work was supported by a grant &#x201C;<italic>Ayuda de la UNED para contrato predoctoral para la formaci&#x00F3;n de personal investigador</italic>,&#x201D; a grant from the &#x201C;<italic>Ayudas de movilidad internacional del Banco Santander para doctorandos matriculados en la EIDUNED</italic>&#x201D; to A.G.-V. as part of the research project presented in this paper, and a grant: CPP 2021-009109 of the Spanish Public&#x2013;Private R&#x0026;D program, Spain.</p>
</sec>
<ack>
<p>We would like to thank Maria del Carmen Mardomingo and Tormod Fladby for giving permission to use their databases in our project.</p>
</ack>
<sec sec-type="COI-statement" id="sec24">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="sec25">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Baldo</surname> <given-names>J. V.</given-names></name> <name><surname>Schwartz</surname> <given-names>S.</given-names></name> <name><surname>Wilkins</surname> <given-names>D.</given-names></name> <name><surname>Dronkers</surname> <given-names>N. F.</given-names></name></person-group> (<year>2006</year>). <article-title>Role of frontal versus temporal cortex in verbal fluency as revealed by voxel-based lesion symptom mapping</article-title>. <source>J. Int. Neuropsychol. Soc.</source> <volume>12</volume>, <fpage>896</fpage>&#x2013;<lpage>900</lpage>. doi: <pub-id pub-id-type="doi">10.1017/S1355617706061078</pub-id>, PMID: <pub-id pub-id-type="pmid">17064451</pub-id></citation>
</ref>
<ref id="ref2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Clark</surname> <given-names>D. G.</given-names></name> <name><surname>McLaughlin</surname> <given-names>P. M.</given-names></name> <name><surname>Woo</surname> <given-names>E.</given-names></name> <name><surname>Hwang</surname> <given-names>K.</given-names></name> <name><surname>Hurtz</surname> <given-names>S.</given-names></name> <name><surname>Ramirez</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>Novel verbal fluency scores and structural brain imaging for prediction of cognitive outcome in mild cognitive impairment</article-title>. <source>Alzheimers Dement. Diagn. Assess. Dis. Monit.</source> <volume>2</volume>, <fpage>113</fpage>&#x2013;<lpage>122</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.dadm.2016.02.001</pub-id>, PMID: <pub-id pub-id-type="pmid">27239542</pub-id></citation>
</ref>
<ref id="ref3">
<citation citation-type="journal"><person-group person-group-type="author">
<name><surname>Costa</surname> <given-names>F. F.</given-names></name>
</person-group> (<year>2014</year>). <article-title>Big data in biomedicine</article-title>. <source>Drug Discov. Today</source> <volume>19</volume>, <fpage>433</fpage>&#x2013;<lpage>440</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.drudis.2013.10.012</pub-id>, PMID: <pub-id pub-id-type="pmid">24183925</pub-id></citation>
</ref>
<ref id="ref4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>D&#x00ED;az-Mardomingo</surname> <given-names>M.</given-names></name> <name><surname>Garc&#x00ED;a-Herranz</surname> <given-names>S.</given-names></name> <name><surname>Rodr&#x00ED;guez-Fern&#x00E1;ndez</surname> <given-names>R.</given-names></name> <name><surname>Venero</surname> <given-names>C.</given-names></name> <name><surname>Peraita</surname> <given-names>H.</given-names></name></person-group> (<year>2017</year>). <article-title>Problems in classifying mild cognitive impairment (MCI): one or multiple syndromes?</article-title> <source>Brain Sci.</source> <volume>7</volume>:<fpage>111</fpage>. doi: <pub-id pub-id-type="doi">10.3390/brainsci7090111</pub-id>, PMID: <pub-id pub-id-type="pmid">28862676</pub-id></citation>
</ref>
<ref id="ref5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>D&#x00ED;az-Mardomingo</surname> <given-names>M. C.</given-names></name> <name><surname>Peraita</surname> <given-names>H.</given-names></name></person-group> (<year>2008</year>). <article-title>Detecci&#x00F3;n precoz del deterioro cognitivo ligero de la tercera edad</article-title>. <source>Psicothema</source> <volume>20</volume>, <fpage>438</fpage>&#x2013;<lpage>444</lpage>, PMID: <pub-id pub-id-type="pmid">18674440</pub-id></citation>
</ref>
<ref id="ref6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fladby</surname> <given-names>T.</given-names></name> <name><surname>P&#x00E5;lhaugen</surname> <given-names>L.</given-names></name> <name><surname>Selnes</surname> <given-names>P.</given-names></name> <name><surname>Waterloo</surname> <given-names>K.</given-names></name> <name><surname>Br&#x00E5;then</surname> <given-names>G.</given-names></name> <name><surname>Hessen</surname> <given-names>E.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Detecting at-risk Alzheimer&#x2019;s disease cases</article-title>. <source>J. Alzheimers Dis.</source> <volume>60</volume>, <fpage>97</fpage>&#x2013;<lpage>105</lpage>. doi: <pub-id pub-id-type="doi">10.3233/JAD-170231</pub-id>, PMID: <pub-id pub-id-type="pmid">28826181</pub-id></citation>
</ref>
<ref id="ref17">
<citation citation-type="other"><person-group person-group-type="author"><name><surname>F&#x00FC;rnkranz</surname> <given-names>J.</given-names></name> <name><surname>Widmer</surname> <given-names>G.</given-names></name></person-group>, (<year>1994</year>). <source>Incremental reduced error pruning</source>. In: Machine Learning Proceedings 1994. <publisher-name>Elsevier</publisher-name>, pp. <fpage>70</fpage>&#x2013;<lpage>77</lpage>.</citation>
</ref>
<ref id="ref7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Garc&#x00ED;a-Herranz</surname> <given-names>S.</given-names></name> <name><surname>D&#x00ED;az-Mardomingo</surname> <given-names>M. C.</given-names></name> <name><surname>Peraita</surname> <given-names>H.</given-names></name></person-group> (<year>2016</year>). <article-title>Neuropsychological predictors of conversion to probable Alzheimer disease in ederly with mild cognitive impairment</article-title>. <source>J. Neuropsychol.</source> <volume>10</volume>, <fpage>239</fpage>&#x2013;<lpage>255</lpage>. doi: <pub-id pub-id-type="doi">10.1111/jnp.12067</pub-id>, PMID: <pub-id pub-id-type="pmid">25809316</pub-id></citation>
</ref>
<ref id="ref8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Garc&#x00ED;a-Herranz</surname> <given-names>S.</given-names></name> <name><surname>D&#x00ED;az-Mardomingo</surname> <given-names>M. C.</given-names></name> <name><surname>Venero</surname> <given-names>C.</given-names></name> <name><surname>Peraita</surname> <given-names>H.</given-names></name></person-group> (<year>2019</year>). <article-title>Accuracy of verbal fluency tests in the discrimination of mild cognitive impairment and probable Alzheimer&#x2019;s disease in older Spanish monolingual individuals</article-title>. <source>Neuropsychol. Dev. Cogn. B Aging Neuropsychol.</source> <volume>27</volume>, <fpage>826</fpage>&#x2013;<lpage>840</lpage>. doi: <pub-id pub-id-type="doi">10.1080/13825585.2019.1698710</pub-id>, PMID: <pub-id pub-id-type="pmid">31822214</pub-id></citation>
</ref>
<ref id="ref9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gomez-Valades</surname> <given-names>A.</given-names></name> <name><surname>Martinez-Tomas</surname> <given-names>R.</given-names></name> <name><surname>Rincon</surname> <given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>Integrative Base ontology for the research analysis of Alzheimer&#x2019;s disease-related mild cognitive impairment</article-title>. <source>Front. Neuroinform.</source> <volume>15</volume>, &#x2013;<lpage>561691</lpage>. doi: <pub-id pub-id-type="doi">10.3389/fninf.2021.561691</pub-id>, PMID: <pub-id pub-id-type="pmid">33613222</pub-id></citation>
</ref>
<ref id="ref10">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Gomez-Valad&#x00E9;s</surname> <given-names>A.</given-names></name> <name><surname>Mart&#x00ED;nez-Tom&#x00E1;s</surname> <given-names>R.</given-names></name> <name><surname>Rinc&#x00F3;n-Zamorano</surname> <given-names>M.</given-names></name></person-group>, (<year>2019</year>). <article-title>Ontologies for early detection of the Alzheimer disease and other neurodegenerative diseases</article-title>, in: <person-group person-group-type="editor"><name><surname>Vicente</surname> <given-names>J.M. Ferr&#x00E1;ndez</given-names></name> <name><surname>&#x00C1;lvarez-S&#x00E1;nchez</surname> <given-names>J.R.</given-names></name> <name><surname>Paz L&#x00F3;pez</surname> <given-names>F.</given-names><prefix>de la</prefix></name> <name><surname>Moreo</surname> <given-names>J. Toledo</given-names></name> <name><surname>Adeli</surname> <given-names>H</given-names></name></person-group>. (Eds.), <source>Understanding the brain function and emotions, lecture notes in computer science</source>. Vol. <volume>11486</volume>. <publisher-loc>Switzerland</publisher-loc>: <publisher-name>Springer Nature Switzerland AG</publisher-name>, pp. <fpage>42</fpage>&#x2013;<lpage>50</lpage>.</citation>
</ref>
<ref id="ref11">
<citation citation-type="other"><person-group person-group-type="author"><name><surname>Gupta</surname> <given-names>A.</given-names></name> <name><surname>Anand</surname> <given-names>A.</given-names></name> <name><surname>Hasija</surname> <given-names>Y.</given-names></name></person-group>, (<year>2021</year>). &#x201C;Recall-based machine learning approach for early detection of cervical cancer,&#x201D; in <italic>2021 6th international conference for convergence in technology (I2CT)</italic>. pp. 1&#x2013;5.</citation>
</ref>
<ref id="ref12">
<citation citation-type="other"><person-group person-group-type="author">
<name><surname>Ho</surname> <given-names>T.K.</given-names></name>
</person-group>, (<year>1995</year>). &#x201C;Random decision forests.&#x201D; in <italic>Proceedings of 3rd International Conference on Document Analysis and Recognition</italic>. pp. 278&#x2013;282, vol. 1.</citation>
</ref>
<ref id="ref13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hoehndorf</surname> <given-names>R.</given-names></name> <name><surname>Schofield</surname> <given-names>P. N.</given-names></name> <name><surname>Gkoutos</surname> <given-names>G. V.</given-names></name></person-group> (<year>2015</year>). <article-title>The role of ontologies in biological and biomedical research: a functional perspective</article-title>. <source>Brief. Bioinform.</source> <volume>16</volume>, <fpage>1069</fpage>&#x2013;<lpage>1080</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bib/bbv011</pub-id>, PMID: <pub-id pub-id-type="pmid">25863278</pub-id></citation>
</ref>
<ref id="ref14">
<citation citation-type="other"><person-group person-group-type="author"><name><surname>Ivascu</surname> <given-names>T.</given-names></name> <name><surname>Manate</surname> <given-names>B.</given-names></name> <name><surname>Negru</surname> <given-names>V.</given-names></name></person-group>, (<year>2015</year>). &#x201C;A multi-agent architecture for ontology-based diagnosis of mental disorders.&#x201D; in <italic>2015 17th International Symposium on Symbolic and Numeric Algorithms for Scientific Computing (SYNASC)</italic>. IEEE. pp. 423&#x2013;430.</citation>
</ref>
<ref id="ref15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jensen</surname> <given-names>M.</given-names></name> <name><surname>Cox</surname> <given-names>A. P.</given-names></name> <name><surname>Chaudhry</surname> <given-names>N.</given-names></name> <name><surname>Ng</surname> <given-names>M.</given-names></name> <name><surname>Sule</surname> <given-names>D.</given-names></name> <name><surname>Duncan</surname> <given-names>W.</given-names></name> <etal/></person-group>. (<year>2013</year>). <article-title>The neurological disease ontology</article-title>. <source>J. Biomed. Semant.</source> <volume>4</volume>:<fpage>42</fpage>. doi: <pub-id pub-id-type="doi">10.1186/2041-1480-4-42</pub-id>, PMID: <pub-id pub-id-type="pmid">24314207</pub-id></citation>
</ref>
<ref id="ref16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jitsuishi</surname> <given-names>T.</given-names></name> <name><surname>Yamaguchi</surname> <given-names>A.</given-names></name></person-group> (<year>2022</year>). <article-title>Searching for optimal machine learning model to classify mild cognitive impairment (MCI) subtypes using multimodal MRI data</article-title>. <source>Sci. Rep.</source> <volume>12</volume>:<fpage>4284</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-022-08231-y</pub-id>, PMID: <pub-id pub-id-type="pmid">35277565</pub-id></citation>
</ref>
<ref id="ref18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kang</surname> <given-names>M. J.</given-names></name> <name><surname>Kim</surname> <given-names>S. Y.</given-names></name> <name><surname>Na</surname> <given-names>D. L.</given-names></name> <name><surname>Kim</surname> <given-names>B. C.</given-names></name> <name><surname>Yang</surname> <given-names>D. W.</given-names></name> <name><surname>Kim</surname> <given-names>E.-J.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Prediction of cognitive impairment via deep learning trained with multi-center neuropsychological test data</article-title>. <source>BMC Med. Inform. Decis. Mak.</source> <volume>19</volume>:<fpage>231</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12911-019-0974-x</pub-id>, PMID: <pub-id pub-id-type="pmid">31752864</pub-id></citation>
</ref>
<ref id="ref19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>K&#x00F6;nig</surname> <given-names>A.</given-names></name> <name><surname>Linz</surname> <given-names>N.</given-names></name> <name><surname>Tr&#x00F6;ger</surname> <given-names>J.</given-names></name> <name><surname>Wolters</surname> <given-names>M.</given-names></name> <name><surname>Alexandersson</surname> <given-names>J.</given-names></name> <name><surname>Robert</surname> <given-names>P.</given-names></name></person-group> (<year>2018</year>). <article-title>Fully automatic speech-based analysis of the semantic verbal fluency task</article-title>. <source>Dement. Geriatr. Cogn. Disord.</source> <volume>45</volume>, <fpage>198</fpage>&#x2013;<lpage>209</lpage>. doi: <pub-id pub-id-type="doi">10.1159/000487852</pub-id>, PMID: <pub-id pub-id-type="pmid">29886493</pub-id></citation>
</ref>
<ref id="ref20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kotelnikov</surname> <given-names>E. V.</given-names></name> <name><surname>Milov</surname> <given-names>V. R.</given-names></name></person-group> (<year>2018</year>). <article-title>Comparison of rule induction, decision trees and formal concept analysis approaches for classification</article-title>. <source>J. Phys. Conf. Ser.</source> <volume>1015</volume>:<fpage>032068</fpage>. doi: <pub-id pub-id-type="doi">10.1088/1742-6596/1015/3/032068</pub-id></citation>
</ref>
<ref id="ref21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kulmanov</surname> <given-names>M.</given-names></name> <name><surname>Smaili</surname> <given-names>F. Z.</given-names></name> <name><surname>Gao</surname> <given-names>X.</given-names></name> <name><surname>Hoehndorf</surname> <given-names>R.</given-names></name></person-group> (<year>2020</year>). <article-title>Machine learning with biomedical ontologies</article-title>. <source>Bioinformatics</source> <volume>36</volume>, <fpage>422</fpage>&#x2013;<lpage>429</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btz595</pub-id>, PMID: <pub-id pub-id-type="pmid">31350877</pub-id></citation>
</ref>
<ref id="ref22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kulmanov</surname> <given-names>M.</given-names></name> <name><surname>Smaili</surname> <given-names>F. Z.</given-names></name> <name><surname>Gao</surname> <given-names>X.</given-names></name> <name><surname>Hoehndorf</surname> <given-names>R.</given-names></name></person-group> (<year>2021</year>). <article-title>Semantic similarity and machine learning with ontologies</article-title>. <source>Brief. Bioinform.</source> <volume>22</volume>:<fpage>bbaa199</fpage>. doi: <pub-id pub-id-type="doi">10.1093/bib/bbaa199</pub-id>, PMID: <pub-id pub-id-type="pmid">33049044</pub-id></citation>
</ref>
<ref id="ref23">
<citation citation-type="other"><person-group person-group-type="author"><name><surname>Lakshmi</surname> <given-names>V.S.</given-names></name> <name><surname>Nithya</surname> <given-names>V.</given-names></name> <name><surname>Sripriya</surname> <given-names>K.</given-names></name> <name><surname>Preethi</surname> <given-names>C.</given-names></name> <name><surname>Logeshwari</surname> <given-names>K.</given-names></name></person-group>, (<year>2019</year>). &#x201C;Prediction of diabetes patient stage using ontology based machine learning system.&#x201D; in <italic>2019 IEEE International Conference on System, Computation, Automation and Networking (ICSCAN)</italic>. pp. 1&#x2013;4.</citation>
</ref>
<ref id="ref24">
<citation citation-type="other"><person-group person-group-type="author"><name><surname>Linz</surname> <given-names>N.</given-names></name> <name><surname>Troger</surname> <given-names>J.</given-names></name> <name><surname>Alexandersson</surname> <given-names>J.</given-names></name> <name><surname>Konig</surname> <given-names>A.</given-names></name></person-group> (<year>2017</year>). <source>Using neural word embeddings in the analysis of the clinical semantic verbal fluency task</source>, International Conference on Computational Semantics, Computer Science. <fpage>7</fpage>.</citation>
</ref>
<ref id="ref25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lobo</surname> <given-names>A.</given-names></name> <name><surname>Ezquerra</surname> <given-names>J.</given-names></name> <name><surname>G&#x00F3;mez Burgada</surname> <given-names>F.</given-names></name> <name><surname>Sala</surname> <given-names>J. M.</given-names></name> <name><surname>Seva D&#x00ED;az</surname> <given-names>A.</given-names></name></person-group> (<year>1979</year>). <article-title>Cognocitive mini-test (a simple practical test to detect intellectual changes in medical patients)</article-title>. <source>Actas Luso Esp. Neurol. Psiquiatr. Cienc. Afines</source> <volume>7</volume>, <fpage>189</fpage>&#x2013;<lpage>202</lpage>, PMID: <pub-id pub-id-type="pmid">474231</pub-id></citation>
</ref>
<ref id="ref26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>L&#x00F3;pez-de-Ipi&#x00F1;a</surname> <given-names>K.</given-names></name> <name><surname>Martinez-de-Lizarduy</surname> <given-names>U.</given-names></name> <name><surname>Calvo</surname> <given-names>P. M.</given-names></name> <name><surname>Beitia</surname> <given-names>B.</given-names></name> <name><surname>Garc&#x00ED;a-Melero</surname> <given-names>J.</given-names></name> <name><surname>Fern&#x00E1;ndez</surname> <given-names>E.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>On the analysis of speech and disfluencies for automatic detection of mild cognitive impairment</article-title>. <source>Neural Comput. &#x0026; Applic.</source> <volume>32</volume>, <fpage>15761</fpage>&#x2013;<lpage>15769</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s00521-018-3494-1</pub-id></citation>
</ref>
<ref id="ref27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Massari</surname> <given-names>H. E.</given-names></name> <name><surname>Gherabi</surname> <given-names>N.</given-names></name> <name><surname>Mhammedi</surname> <given-names>S.</given-names></name> <name><surname>Ghandi</surname> <given-names>H.</given-names></name> <name><surname>Qanouni</surname> <given-names>F.</given-names></name> <name><surname>Bahaj</surname> <given-names>M.</given-names></name></person-group> (<year>2022a</year>). <article-title>An ontological model based on machine learning for predicting breast cancer</article-title>. <source>Int. J. Adv. Comput. Sci. Appl.</source> <volume>13</volume>:<fpage>715</fpage>. doi: <pub-id pub-id-type="doi">10.14569/IJACSA.2022.0130715</pub-id></citation>
</ref>
<ref id="ref28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Massari</surname> <given-names>H. E.</given-names></name> <name><surname>Gherabi</surname> <given-names>N.</given-names></name> <name><surname>Mhammedi</surname> <given-names>S.</given-names></name> <name><surname>Ghandi</surname> <given-names>H.</given-names></name> <name><surname>Qanouni</surname> <given-names>F.</given-names></name> <name><surname>Bahaj</surname> <given-names>M.</given-names></name></person-group> (<year>2022b</year>). <article-title>Integration of ontology with machine learning to predict the presence of covid-19 based on symptoms</article-title>. <source>Bull. Electr. Eng. Inform.</source> <volume>11</volume>, <fpage>2805</fpage>&#x2013;<lpage>2816</lpage>. doi: <pub-id pub-id-type="doi">10.11591/eei.v11i5.4392</pub-id></citation>
</ref>
<ref id="ref29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Massari</surname> <given-names>H. E.</given-names></name> <name><surname>Sabouri</surname> <given-names>Z.</given-names></name> <name><surname>Mhammedi</surname> <given-names>S.</given-names></name> <name><surname>Gherabi</surname> <given-names>N.</given-names></name></person-group> (<year>2022c</year>). <article-title>Diabetes prediction using machine learning algorithms and ontology</article-title>. <source>J. ICT Stand.</source> <volume>10</volume>, <fpage>319</fpage>&#x2013;<lpage>338</lpage>. doi: <pub-id pub-id-type="doi">10.13052/jicts2245-800X.10212</pub-id></citation>
</ref>
<ref id="ref30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Me&#x017E;nar</surname> <given-names>S.</given-names></name> <name><surname>Bevec</surname> <given-names>M.</given-names></name> <name><surname>Lavra&#x010D;</surname> <given-names>N.</given-names></name> <name><surname>&#x0160;krlj</surname> <given-names>B.</given-names></name></person-group> (<year>2022</year>). <article-title>Ontology completion with graph-based machine learning: a comprehensive evaluation</article-title>. <source>Mach. Learn. Knowl. Extr.</source> <volume>4</volume>, <fpage>1107</fpage>&#x2013;<lpage>1123</lpage>. doi: <pub-id pub-id-type="doi">10.3390/make4040056</pub-id></citation>
</ref>
<ref id="ref31">
<citation citation-type="journal"><person-group person-group-type="author">
<name><surname>Musen</surname> <given-names>M. A.</given-names></name>
</person-group> (<year>2015</year>). <article-title>The prot&#x00E9;g&#x00E9; project: a look back and a look forward</article-title>. <source>AI Matters</source> <volume>1</volume>, <fpage>4</fpage>&#x2013;<lpage>12</lpage>. doi: <pub-id pub-id-type="doi">10.1145/2757001.2757003</pub-id>, PMID: <pub-id pub-id-type="pmid">27239556</pub-id></citation>
</ref>
<ref id="ref32">
<citation citation-type="other"><person-group person-group-type="author"><name><surname>O&#x2019;Connor</surname> <given-names>M.</given-names></name> <name><surname>Knublauch</surname> <given-names>H.</given-names></name> <name><surname>Tu</surname> <given-names>S.</given-names></name> <name><surname>Musen</surname> <given-names>M.</given-names></name></person-group>, (<year>2005</year>). <source>Writing rules for the semantic web using SWRL and Jess</source>. Computer Science.</citation>
</ref>
<ref id="ref33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sherimon</surname> <given-names>P. C.</given-names></name> <name><surname>Sherimon</surname> <given-names>V.</given-names></name> <name><surname>Preethii</surname> <given-names>S. P.</given-names></name> <name><surname>Nair</surname> <given-names>R.</given-names></name> <name><surname>Mathew</surname> <given-names>R.</given-names></name></person-group> (<year>2021</year>). <article-title>A systematic review of clinical decision support systems in Alzheimer&#x2019;s disease domain</article-title>. <source>Int. J. Onl. Eng.</source> <volume>17</volume>, <fpage>74</fpage>&#x2013;<lpage>90</lpage>. doi: <pub-id pub-id-type="doi">10.3991/ijoe.v17i08.23643</pub-id>, PMID: <pub-id pub-id-type="pmid">33091740</pub-id></citation>
</ref>
<ref id="ref34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Panza</surname> <given-names>F.</given-names></name> <name><surname>D&#x2019;Introno</surname> <given-names>A.</given-names></name> <name><surname>Colacicco</surname> <given-names>A. M.</given-names></name> <name><surname>Capurso</surname> <given-names>C.</given-names></name> <name><surname>Del Parigi</surname> <given-names>A.</given-names></name> <name><surname>Caselli</surname> <given-names>R. J.</given-names></name> <etal/></person-group>. (<year>2005</year>). <article-title>Current epidemiology of mild cognitive impairment and other predementia syndromes</article-title>. <source>Am. J. Geriatr. Psychiatry</source> <volume>13</volume>, <fpage>633</fpage>&#x2013;<lpage>644</lpage>. doi: <pub-id pub-id-type="doi">10.1097/00019442-200508000-00002</pub-id>, PMID: <pub-id pub-id-type="pmid">16085779</pub-id></citation>
</ref>
<ref id="ref35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Patrick</surname> <given-names>J.</given-names></name> <name><surname>Li</surname> <given-names>M.</given-names></name></person-group> (<year>2012</year>). <article-title>An ontology for clinical questions about the contents of patient notes</article-title>. <source>J. Biomed. Inform.</source> <volume>45</volume>, <fpage>292</fpage>&#x2013;<lpage>306</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jbi.2011.11.008</pub-id>, PMID: <pub-id pub-id-type="pmid">22142949</pub-id></citation>
</ref>
<ref id="ref36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pedregosa</surname> <given-names>F.</given-names></name> <name><surname>Varoquaux</surname> <given-names>G.</given-names></name> <name><surname>Gramfort</surname> <given-names>A.</given-names></name> <name><surname>Michel</surname> <given-names>V.</given-names></name> <name><surname>Thirion</surname> <given-names>B.</given-names></name> <name><surname>Grisel</surname> <given-names>O.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>Scikit-learn: machine learning in Python</article-title>. <source>J. Mach. Learn. Res.</source> <volume>12</volume>, <fpage>2825</fpage>&#x2013;<lpage>2830</lpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1201.0490</pub-id></citation>
</ref>
<ref id="ref37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Peraita</surname> <given-names>H.</given-names></name> <name><surname>Garc&#x00ED;a-Herranz</surname> <given-names>S.</given-names></name> <name><surname>D&#x00ED;az-Mardomingo</surname> <given-names>M. C.</given-names></name></person-group> (<year>2011</year>). <article-title>Evolution of specific cognitive subprofiles of mild cognitive impairment in a three-year longitudinal study</article-title>. <source>Curr. Aging Sci.</source> <volume>4</volume>, <fpage>171</fpage>&#x2013;<lpage>182</lpage>. doi: <pub-id pub-id-type="doi">10.2174/1874609811104020171</pub-id>, PMID: <pub-id pub-id-type="pmid">21418005</pub-id></citation>
</ref>
<ref id="ref38">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Petersen</surname> <given-names>R. C.</given-names></name> <name><surname>Aisen</surname> <given-names>P. S.</given-names></name> <name><surname>Beckett</surname> <given-names>L. A.</given-names></name> <name><surname>Donohue</surname> <given-names>M. C.</given-names></name> <name><surname>Gamst</surname> <given-names>A. C.</given-names></name> <name><surname>Harvey</surname> <given-names>D. J.</given-names></name> <etal/></person-group>. (<year>2010</year>). <article-title>Alzheimer&#x2019;s disease neuroimaging initiative (ADNI)</article-title>. <source>Neurology</source> <volume>74</volume>, <fpage>201</fpage>&#x2013;<lpage>209</lpage>. doi: <pub-id pub-id-type="doi">10.1212/WNL.0b013e3181cb3e25</pub-id>, PMID: <pub-id pub-id-type="pmid">20042704</pub-id></citation>
</ref>
<ref id="ref39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Petersen</surname> <given-names>R. C.</given-names></name> <name><surname>Caracciolo</surname> <given-names>B.</given-names></name> <name><surname>Brayne</surname> <given-names>C.</given-names></name> <name><surname>Gauthier</surname> <given-names>S.</given-names></name> <name><surname>Jelic</surname> <given-names>V.</given-names></name> <name><surname>Fratiglioni</surname> <given-names>L.</given-names></name></person-group> (<year>2014</year>). <article-title>Mild cognitive impairment: a concept in evolution</article-title>. <source>J. Intern. Med.</source> <volume>275</volume>, <fpage>214</fpage>&#x2013;<lpage>228</lpage>. doi: <pub-id pub-id-type="doi">10.1111/joim.12190</pub-id>, PMID: <pub-id pub-id-type="pmid">24605806</pub-id></citation>
</ref>
<ref id="ref40">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Prescott</surname> <given-names>T. J.</given-names></name> <name><surname>Newton</surname> <given-names>L. D.</given-names></name> <name><surname>Mir</surname> <given-names>N. U.</given-names></name> <name><surname>Woodruff</surname> <given-names>P. W. R.</given-names></name> <name><surname>Parks</surname> <given-names>R. W.</given-names></name></person-group> (<year>2006</year>). <article-title>A new dissimilarity measure for finding semantic structure in category fluency data with implications for understanding memory organization in schizophrenia</article-title>. <source>Neuropsychology</source> <volume>20</volume>, <fpage>685</fpage>&#x2013;<lpage>699</lpage>. doi: <pub-id pub-id-type="doi">10.1037/0894-4105.20.6.685</pub-id>, PMID: <pub-id pub-id-type="pmid">17100513</pub-id></citation>
</ref>
<ref id="ref41">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Robinson</surname> <given-names>P. N.</given-names></name> <name><surname>Haendel</surname> <given-names>M. A.</given-names></name></person-group> (<year>2020</year>). <article-title>Ontologies, knowledge representation, and machine learning for translational research: recent contributions</article-title>. <source>Yearb. Med. Inform.</source> <volume>29</volume>, <fpage>159</fpage>&#x2013;<lpage>162</lpage>. doi: <pub-id pub-id-type="doi">10.1055/s-0040-1701991</pub-id>, PMID: <pub-id pub-id-type="pmid">32823310</pub-id></citation>
</ref>
<ref id="ref42">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sahoo</surname> <given-names>S. S.</given-names></name> <name><surname>Kobow</surname> <given-names>K.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Buchhalter</surname> <given-names>J.</given-names></name> <name><surname>Dayyani</surname> <given-names>M.</given-names></name> <name><surname>Upadhyaya</surname> <given-names>D. P.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Ontology-based feature engineering in machine learning workflows for heterogeneous epilepsy patient records</article-title>. <source>Sci. Rep.</source> <volume>12</volume>:<fpage>19430</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-022-23101-3</pub-id>, PMID: <pub-id pub-id-type="pmid">36371527</pub-id></citation>
</ref>
<ref id="ref43">
<citation citation-type="other"><person-group person-group-type="author"><name><surname>Shoaip</surname> <given-names>N.</given-names></name> <name><surname>Barakat</surname> <given-names>S.</given-names></name> <name><surname>Elmogy</surname> <given-names>M.</given-names></name></person-group>, (<year>2019</year>). &#x201C;Alzheimer&#x2019;s disease integrated ontology (ADIO).&#x201D; in <italic>2019 14th international conference on computer engineering and systems (ICCES)</italic>. pp. 374&#x2013;379.</citation>
</ref>
<ref id="ref44">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shoaip</surname> <given-names>N.</given-names></name> <name><surname>Rezk</surname> <given-names>A.</given-names></name> <name><surname>El-Sappagh</surname> <given-names>S.</given-names></name> <name><surname>Abuhmed</surname> <given-names>T.</given-names></name> <name><surname>Barakat</surname> <given-names>S.</given-names></name> <name><surname>Elmogy</surname> <given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>Alzheimer&#x2019;s disease diagnosis based on a semantic rule-based modeling and reasoning approach</article-title>. <source>Comput. Mater. Contin.</source> <volume>69</volume>, <fpage>3531</fpage>&#x2013;<lpage>3548</lpage>. doi: <pub-id pub-id-type="doi">10.32604/cmc.2021.019069</pub-id></citation>
</ref>
<ref id="ref45">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shoaip</surname> <given-names>N.</given-names></name> <name><surname>Rezk</surname> <given-names>A.</given-names></name> <name><surname>El-Sappagh</surname> <given-names>S.</given-names></name> <name><surname>Alarabi</surname> <given-names>L.</given-names></name> <name><surname>Barakat</surname> <given-names>S.</given-names></name> <name><surname>Elmogy</surname> <given-names>M.</given-names></name></person-group> (<year>2020</year>). <article-title>A comprehensive fuzzy ontology-based decision support system for Alzheimer&#x2019;s disease diagnosis</article-title>. <source>IEEE Access</source> <volume>9</volume>, <fpage>31350</fpage>&#x2013;<lpage>31372</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ACCESS.2020.3048435</pub-id></citation>
</ref>
<ref id="ref46">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sirin</surname> <given-names>E.</given-names></name> <name><surname>Parsia</surname> <given-names>B.</given-names></name> <name><surname>Grau</surname> <given-names>B. C.</given-names></name> <name><surname>Kalyanpur</surname> <given-names>A.</given-names></name> <name><surname>Katz</surname> <given-names>Y.</given-names></name></person-group> (<year>2007</year>). <article-title>Pellet: a practical OWL-DL reasoner. Web Semant</article-title>. <source>Sci. Serv. Agents World Wide Web</source> <volume>5</volume>, <fpage>51</fpage>&#x2013;<lpage>53</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.websem.2007.03.004</pub-id></citation>
</ref>
<ref id="ref47">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Tsymbal</surname> <given-names>A.</given-names></name> <name><surname>Zillner</surname> <given-names>S.</given-names></name> <name><surname>Huber</surname> <given-names>M.</given-names></name></person-group> (<year>2007</year>). &#x201C;<article-title>Ontology &#x2013; supported machine learning and decision support in biomedicine</article-title>&#x201D; in <source>Data integration in the life sciences, lecture notes in computer science</source>. eds. <person-group person-group-type="editor"><name><surname>Cohen-Boulakia</surname> <given-names>S.</given-names></name> <name><surname>Tannen</surname> <given-names>V.</given-names></name></person-group> (<publisher-loc>Berlin Heidelberg</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>156</fpage>&#x2013;<lpage>171</lpage>.</citation>
</ref>
<ref id="ref48">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Weakley</surname> <given-names>A.</given-names></name> <name><surname>Williams</surname> <given-names>J. A.</given-names></name> <name><surname>Schmitter-Edgecombe</surname> <given-names>M.</given-names></name> <name><surname>Cook</surname> <given-names>D. J.</given-names></name></person-group> (<year>2015</year>). <article-title>Neuropsychological test selection for cognitive impairment classification: a machine learning approach</article-title>. <source>J. Clin. Exp. Neuropsychol.</source> <volume>37</volume>, <fpage>899</fpage>&#x2013;<lpage>916</lpage>. doi: <pub-id pub-id-type="doi">10.1080/13803395.2015.1067290</pub-id>, PMID: <pub-id pub-id-type="pmid">26332171</pub-id></citation>
</ref>
<ref id="ref49">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yesavage</surname> <given-names>J. A.</given-names></name> <name><surname>Brink</surname> <given-names>T. L.</given-names></name> <name><surname>Rose</surname> <given-names>T. L.</given-names></name> <name><surname>Lum</surname> <given-names>O.</given-names></name> <name><surname>Huang</surname> <given-names>V.</given-names></name> <name><surname>Adey</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>1982</year>). <article-title>Development and validation of a geriatric depression screening scale: a preliminary report</article-title>. <source>J. Psychiatr. Res.</source> <volume>17</volume>, <fpage>37</fpage>&#x2013;<lpage>49</lpage>. doi: <pub-id pub-id-type="doi">10.1016/0022-3956(82)90033-4</pub-id>, PMID: <pub-id pub-id-type="pmid">7183759</pub-id></citation>
</ref>
<ref id="ref50">
<citation citation-type="other"><person-group person-group-type="author"><name><surname>Zekri</surname> <given-names>F.</given-names></name> <name><surname>Bouaziz</surname> <given-names>R.</given-names></name> <name><surname>Turki</surname> <given-names>E.</given-names></name></person-group>, (<year>2015</year>). &#x201C;A fuzzy-based ontology for Alzheimer&#x2019;s disease decision support.&#x201D; in <italic>2015 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE)</italic>. IEEE, pp. 1&#x2013;6.</citation>
</ref>
<ref id="ref51">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Hu</surname> <given-names>B.</given-names></name> <name><surname>Ma</surname> <given-names>X.</given-names></name> <name><surname>Moore</surname> <given-names>P.</given-names></name> <name><surname>Chen</surname> <given-names>J.</given-names></name></person-group> (<year>2014</year>). <article-title>Ontology driven decision support for the diagnosis of mild cognitive impairment</article-title>. <source>Comput. Methods Prog. Biomed.</source> <volume>113</volume>, <fpage>781</fpage>&#x2013;<lpage>791</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cmpb.2013.12.023</pub-id>, PMID: <pub-id pub-id-type="pmid">24468160</pub-id></citation>
</ref>
</ref-list>
</back>
</article>