<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="review-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Med. Eng.</journal-id>
<journal-title>Frontiers in Medical Engineering</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Med. Eng.</abbrev-journal-title>
<issn pub-type="epub">2813-687X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1369265</article-id>
<article-id pub-id-type="doi">10.3389/fmede.2024.1369265</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Medical Engineering</subject>
<subj-group>
<subject>Review</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Artificial intelligence in routine blood tests</article-title>
<alt-title alt-title-type="left-running-head">Santos-Silva et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fmede.2024.1369265">10.3389/fmede.2024.1369265</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Santos-Silva</surname>
<given-names>Miguel A.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2401751/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Sousa</surname>
<given-names>Nuno</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2465/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Sousa</surname>
<given-names>Jo&#xe3;o Carlos</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/6211/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Life and Health Sciences Research Institute (ICVS)</institution>, <institution>School of Medicine</institution>, <institution>University of Minho</institution>, <addr-line>Braga</addr-line>, <country>Portugal</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>ICVS/3B&#x2019;s&#x2013;PT Government Associate Laboratory</institution>, <addr-line>Braga</addr-line>, <country>Portugal</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Clinical Academic Center-Braga (2CA)</institution>, <addr-line>Braga</addr-line>, <country>Portugal</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Association P5 Digital Medical Center (ACMP5)</institution>, <addr-line>Braga</addr-line>, <country>Portugal</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2014937/overview">Lia Morra</ext-link>, Polytechnic University of Turin, Italy</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/788412/overview">Satya Ranjan Dash</ext-link>, KIIT University, India</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1572964/overview">Massimo La Rosa</ext-link>, National Research Council (CNR), Italy</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Jo&#xe3;o Carlos Sousa, <email>jcsousa@med.uminho.pt</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>25</day>
<month>03</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>2</volume>
<elocation-id>1369265</elocation-id>
<history>
<date date-type="received">
<day>11</day>
<month>01</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>05</day>
<month>03</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Santos-Silva, Sousa and Sousa.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Santos-Silva, Sousa and Sousa</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Routine blood tests drive diagnosis, prognosis, and monitoring in traditional clinical decision support systems. As a routine diagnostic tool with standardized laboratory workflows, clinical blood analysis offers superior accessibility to a comprehensive assessment of physiological parameters. These parameters can be integrated and automated at scale, allowing for in-depth clinical inference and cost-effectiveness compared to other modalities such as imaging, genetic testing, or histopathology. Herein, we extensively review the analytical value of routine blood tests leveraged by artificial intelligence (AI), using the ICD-10 classification as a reference. A significant gap exists between standard disease-associated features and those selected by machine learning models. This suggests an amount of non-perceived information in traditional decision support systems that AI could leverage with improved performance metrics. Nonetheless, AI-derived support for clinical decisions must still be harmonized regarding external validation studies, regulatory approvals, and clinical deployment strategies. Still, as we discuss, the path is drawn for the future application of scalable artificial intelligence (AI) to enhance, extract, and classify patterns potentially correlated with pathological states with restricted limitations in terms of bias and representativeness.</p>
</abstract>
<kwd-group>
<kwd>blood analyses</kwd>
<kwd>blood</kwd>
<kwd>artificial intelligence (AI)</kwd>
<kwd>machine learning (ML)</kwd>
<kwd>diagnosis</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computational Medicine</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p>Artificial intelligence (AI) stems from the data generated mainly since the beginning of the fourth industrial revolution, which has progressively changed how people live, interact, and work (<xref ref-type="bibr" rid="B84">Sarker, 2021</xref>). Automated systems, meant to emulate human cognitive capabilities, deploy supervised applications to perform repetitive tasks more accurately and efficiently, saving time and effort for high-volume workloads. In medicine, AI has become a valuable tool for improving patient outcomes, particularly in diagnostics, where image and text-based systems supported by machine learning (ML) and deep learning (DL) technologies are reaching remarkable clinical results (<xref ref-type="bibr" rid="B80">Reardon, 2019</xref>). The COVID-19 pandemic is the paramount example of how AI applications enable new screening tools and achieve early diagnosis by measuring disease severity (<xref ref-type="bibr" rid="B60">Luo et al., 2021</xref>), progression (<xref ref-type="bibr" rid="B32">Demichev et al., 2021</xref>), and mortality prediction (<xref ref-type="bibr" rid="B59">Lin et al., 2021</xref>) through the interpretation of routine blood tests. For instance, a recent meta-analysis from Li et al. demonstrated that computational methods based on multi-center clinical datasets could generate more accurate COVID-19 diagnosis, stratify patients into clusters of severity and discriminate them from Influenza with 97.9% specificity (<xref ref-type="bibr" rid="B58">Li et al., 2020</xref>). Applications such as the previous example become even more relevant when applied to low-income underdeveloped countries where access to diagnostic workflow is limited and the need for real-time point-of-care systems for disease screening is imperative.</p>
<p>John McCarthy first outlined the concept of AI in 1956 during the Dartmouth conference, on which several scientists discussed the concept of &#x201c;thinking machines&#x201d; in different areas such as abstraction, creativity, computational theory, natural language processing, and neural networks (<xref ref-type="bibr" rid="B53">Kline, 2011</xref>). Since then, progress slowed and remained stationary until 2012, when an ImageNet-DL-Algorithm triggered significant attention for the technology, with high-accuracy performance classification metrics that disrupted the current state-of-the-art (<xref ref-type="bibr" rid="B56">Krizhevsky et al., 2012</xref>). AI is defined as a computer science subdivision that aims to automatically understand and create intelligent systems based on high amounts of data (<xref ref-type="bibr" rid="B87">Shukla Shubhendu and Vijay, 2013</xref>). In medicine, the inequities and deficiencies that arose from the global COVID-19 pandemic catalyzed a boost in AI applications. Therefore, it aims to deliver effective, high-quality care, leveraging increasing clinical world data to democratize and decentralize health into patient care. The transformation of a patient&#x2019;s blood analysis into a probability state to epitomize a likely diagnosis is already a reality (<xref ref-type="bibr" rid="B40">Gun&#x10d;ar et al., 2018</xref>).</p>
<p>The purpose of improving population health and patient care with parallel reduction of healthcare costs supports the implementation of AI strategies in the medium and long-term periods. <xref ref-type="bibr" rid="B10">Bajwa et al., 2021</xref> Concepts such as precision medicine, ranging from diagnostics to prognostics and therapeutics with connected care, are under development (<xref ref-type="bibr" rid="B94">The Medical Futurist, 2022</xref>). In parallel, AI strategies disrupt the classical paradigm of scientific knowledge construction. Instead of collecting small datasets that try to answer sequential questions (classical approach), the new paradigm settles on collecting high amounts of data where scientists try to find multiple answers directly (<xref ref-type="bibr" rid="B2">Ahmad et al., 2021</xref>)<italic>.</italic> However, significant challenges arise during this new paradigm: the black-box nature of AI algorithms endorses the need to generate explanatory, comprehensive systems able to dialogue with the physician to justify each clinical prediction or outcome (<xref ref-type="bibr" rid="B19">Bruckert et al., 2020</xref>). Also, legal and regulatory matters are under development, which will be crucial to regulate how AI algorithms are built and how continuous learning is evaluated.</p>
<p>Here, we focus on routine blood analysis as a proxy for determining pathological states supported by AI algorithms. We offer a comprehensive description of the ML pipeline with contextualization on the learning strategies (machine, reinforcement, deep, and federated learning), model development (application, preprocessing, modelling, and validation), and clinic deployment. We summarize the pathologies based on general health parameters (summarized according to their function and associated causes of variation), their inherent classification performance, and principal findings associated with model development and selected blood parameters. Finally, we discuss challenges related to clinic deployment and suggest future research directions for the development of models.</p>
<p>Overall, this review provides guidance for future research by summarizing reports combining AI and routine blood tests to diagnose disease or prognosis. Finally, it describes the methodologies used and contributes to the continued use of this technique in providing deeper insights into the potential of non-appraised blood metabolites in traditional clinical decision support systems.</p>
<sec id="s1-1">
<title>How AI learns</title>
<p>Currently, AI drives innovation processes involving analytical (data-driven decision-making), functional (operating according to analytical AI), interactive (communication), textual (nature language processing), and visual (augmented reality) technologies. AI enables the development of models to solve real-world problems based on different learning strategies, such as machine learning, deep learning, data mining, rule-based modelling, fuzzy logic, knowledge representation, case-based reasoning, text mining, visual analytics, and optimization, among others (<xref ref-type="bibr" rid="B85">Sarker, 2022</xref>). Next, we will briefly explain these learning strategies.</p>
<p>ML is a pattern recognition method that automatically detects regularities in large amounts of data. Based on statistical methods, this process evaluates interactions between variables and finds the most effective way of using them to reach a predetermined goal without requiring human intervention to define a strict set of rules or programming hypotheses (<xref ref-type="bibr" rid="B51">Kerr et al., 2012</xref>). ML has become the preferred framework for deploying AI applications, supported and leveraged by the continuous increase of data availability (big data). Although these concepts are similar and closely related, they are distinct: pattern recognition is one possible approach to artificial intelligence, and machine learning is a way to pattern recognition (<xref ref-type="bibr" rid="B3">Alsuliman et al., 2020</xref>). Data is considered mandatory for the model&#x2019;s development, and it is commonly available in different forms such as structured (highly organized on relational databases), unstructured (without pre-defined format), semi-structured (organized but not on relational databases), and metadata (data properties about data) (<xref ref-type="bibr" rid="B84">Sarker, 2021</xref>).</p>
<p>ML algorithms such as gaussian na&#xef;ve Bayes (GNB), k-nearest neighbors (KNN), support vector machines (SVM), decision trees (DT), linear regression (LR), or (<xref ref-type="table" rid="T13">Box 1</xref>) are the most common techniques generally applied for supervised learning strategies (<xref ref-type="table" rid="T1">Table 1</xref> provides a comprehensive list). These algorithms use sample inputs for model development and subsequent data for model prediction. Apart from predicting specific diseases, other methods such as K-means, principal component analysis (PCA), or Pearson correlation (&#x3c1;) allow data exploration for clustering and dimensionality reduction through maximizing variance between samples. Thus, they enable an in-depth exploration of biomedical data with significant importance in medical diagnosis.</p>
<table-wrap id="T13" position="float">
<label>Box 1</label>
<caption>
<p>Glossary of key terms.</p>
</caption>
<table>
<tbody valign="top">
<tr>
<td align="left">AUROC</td>
<td align="left">The area under the receiving operating characteristic curve computed with the true positive <italic>versus</italic> false positive rates. It provides an aggregate measure of performance across all possible classification thresholds</td>
</tr>
<tr>
<td align="left">Bootstrap</td>
<td align="left">A statistical technique for sample extraction with replacement, allowing repeated training and fixed test</td>
</tr>
<tr>
<td align="left">Cross-validation</td>
<td align="left">The re-sampling method used to test and train different portions of data in several iterations of the model development</td>
</tr>
<tr>
<td align="left">Ensemble</td>
<td align="left">Combination of base estimators&#x2019; predictions to improve robustness and generalizability over a single estimator</td>
</tr>
<tr>
<td align="left">Feature</td>
<td align="left">Information input into the model during training and evaluation</td>
</tr>
<tr>
<td align="left">Kernel</td>
<td align="left">The function applied to the original non-linear data to create higher-dimensional spaces in which data will become separable</td>
</tr>
<tr>
<td align="left">Overfitting</td>
<td align="left">Process in which the statistical model adapts perfectly to the training data but does not generalize well on new data</td>
</tr>
<tr>
<td align="left">Training set</td>
<td align="left">The subset of data used for the model&#x2019;s learning and optimization</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>A comprehensive list of supervised ML algorithms for classification according to the desired learning strategy (<xref ref-type="bibr" rid="B73">Pedregosa et al., 2011</xref>).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Learning</th>
<th align="left">Algorithm</th>
<th align="left">Description</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="6" align="left">Linear</td>
<td align="left">Linear regression (LR)</td>
<td align="left">The target value is a linear combination of the features</td>
</tr>
<tr>
<td align="left">Lasso</td>
<td align="left">Uses variable selection and regularization to estimate sparse coefficients, improving the accuracy and interpretability of the model</td>
</tr>
<tr>
<td align="left">Linear discriminant analysis (LDA)</td>
<td align="left">Transforms input data into a linear subspace that maximizes separation between classes and predicts targets in closed-form solutions. Can also be used to provide dimensionality reduction in high-dimensional data sets</td>
</tr>
<tr>
<td align="left">Generalized linear models (GLM)</td>
<td align="left">Statistical model that could be used to model dependent variables that represent non-normally distributed data, i.e., glmnet.</td>
</tr>
<tr>
<td align="left">linear regression (LR)</td>
<td align="left">Statistical approach that seeks to generate a model from a collection of data that allows the prediction of values taken by a categorical variable (usually binary), using a series of continuous explanatory variables and/or binaries</td>
</tr>
<tr>
<td align="left">Multivariate logistic regression (MLR)</td>
<td align="left">Extension of logistic regression to problems with more than two discrete outcomes</td>
</tr>
<tr>
<td rowspan="3" align="left">Bayesian</td>
<td align="left">Na&#xef;ve Bayes (NB)</td>
<td align="left">This algorithm uses the Bayes&#x2019; theorem with conditional independence between every pair of features to the value of the class variable</td>
</tr>
<tr>
<td align="left">Bayesian networks (BN)</td>
<td align="left">These networks are adaptable since they fit joint probability distributions and allow knowledge extraction, reflecting how the occurrence of one variable is affected by the state of another</td>
</tr>
<tr>
<td align="left">Average two dependence estimators (A2DE)</td>
<td align="left">A2DE achieves high accuracy by averaging among a small number of plausible Na&#xef;ve-Bayes-like models that have fewer (and hence less detrimental) independence assumptions than Na&#xef;ve Bayes</td>
</tr>
<tr>
<td align="left">Nearest neighbors</td>
<td align="left">K-Nearest Neighbors (KNN)</td>
<td align="left">Uses the location of training samples that are closest in distance to the new point to estimate its label, based on the initial k data points</td>
</tr>
<tr>
<td align="left">Support vector machines</td>
<td align="left">Support Vector Machines (SVM)</td>
<td align="left">Effective in smaller, but high-dimensional data sets, that assigns training examples to points in space in order to maximize the distance between the two categories. Different kernels could be used to evaluate new instances, which are mapped into that same space and classified according to which side of the gap they fall</td>
</tr>
<tr>
<td rowspan="2" align="left">Decision trees</td>
<td align="left">Chi-square automatic interaction detection (CHAID)</td>
<td align="left">A type of decision trees predicts the target value by learning simple decision rules inferred from data features. Specifically, it selects the most important feature using a chi-square measurement and iterates the procedure until all sub-informational data have a single choice</td>
</tr>
<tr>
<td align="left">classification and regression trees (CART)</td>
<td align="left">CART builds binary trees by selecting the feature and threshold that provides the most information gain at each node</td>
</tr>
<tr>
<td rowspan="6" align="left">Neural Networks</td>
<td align="left">Artificial Neural Network (ANN)</td>
<td align="left">This algorithm uses non-linear functions made from one or two hidden layers between the input and the output dimensions</td>
</tr>
<tr>
<td align="left">Deep Neural Network (DNN)</td>
<td align="left">DNN is an ANN with multiple layers between the input and output dimensions, and it is designed to emulate the principles and structure of a human neural network</td>
</tr>
<tr>
<td align="left">Multiple Layer Perceptron (MLP)</td>
<td align="left">Non-linear function different from logistic regression since it can employ one or more non-linear layers between the input and output dimensions</td>
</tr>
<tr>
<td align="left">Shallow neural network</td>
<td align="left">Employs a linear function in the second hidden layer of the two-layer network</td>
</tr>
<tr>
<td align="left">Recurrent Neural Networks (RNN)</td>
<td align="left">It is cyclic DNN that loops outputs from specific nodes to affect the subsequent ones, and it is mainly applied to text-recognition and natural language processing</td>
</tr>
<tr>
<td align="left">Long short-term memory (LSTM)</td>
<td align="left">It refers to a type of recurrent neural networks (RNNs) that learn long-term dependencies with feedback connections, and it is especially used in time-series data</td>
</tr>
<tr>
<td rowspan="8" align="left">Ensemble</td>
<td align="left">Random forests</td>
<td align="left">Random forests employ averaging to increase prediction accuracy and control overfitting by merging various trees, using different sub-samples of the training set, which decreases variances and results in a superior overall model</td>
</tr>
<tr>
<td align="left">Adaboost</td>
<td align="left">Adaboost fits weak learners (models that are just slightly better than random guessing) in sub-samples of the training set and gets a final forecast by merging the guesses by majority vote</td>
</tr>
<tr>
<td align="left">Extra-trees</td>
<td align="left">Extra-trees differ from traditional decision trees since it applies random splits across randomly selected features, picking the best split for creating the tree. In ensemble, it averages a meta estimator that fits randomized extra trees to improve accuracy and prevent overfitting</td>
</tr>
<tr>
<td align="left">Dynamic ensemble selection</td>
<td align="left">Uses the most locally accurate decision classifier by calculating the accuracy of each individual classifier in specific local parts of the feature space surrounding a test sample</td>
</tr>
<tr>
<td align="left">Gradient boosting (GBM)</td>
<td align="left">Gradient boosting trains a large number of weak learners in a gradient descent function, where each learner minimizes the loss function of the previous model, resulting in an ensemble of learners that is improved incrementally until a stopping condition is achieved</td>
</tr>
<tr>
<td align="left">Extreme gradient boosting (XGBoost)</td>
<td align="left">XGBoost is a regularized variation of GBM that controls overfitting and enhances performance by using linear and tree-based models that improve its capacity to execute parallel computation on a single computer, making it faster and more efficient</td>
</tr>
<tr>
<td align="left">Light gradient boosting (LightGBM)</td>
<td align="left">LightGBM learns from data more effectively than standard GBMs because it employs histogram-based binning, which converts continuous feature values into discrete bins, reducing training time and memory consumption</td>
</tr>
<tr>
<td align="left">Category boosting (Catboost)</td>
<td align="left">In opposition to GBM, lightGBM or XGBoost, Catboost uses symmetric and balanced trees, keeping the decision criteria consistent across all nodes, which makes this algorithm less prone to overfitting</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Supervised ML algorithms differ from unsupervised ones because they comprise training data wherein the real state of the data is known, for instance, which subjects have anemia and which are healthy. Based on training data, the algorithm generates a model that is applied to predict the state of a set of subjects for which the true state is unknown. These predictions settle in the form of a classification problem that identifies discrete states, such as different stages of anemia. Alternatively, they are established as a regression problem that evaluates continuous variables and predicts, for example, the numeric value of hemoglobin (<xref ref-type="bibr" rid="B92">Svensson et al., 2015</xref>). However, suppose the true state of the data is unknown. In that case, learning might be conducted unsupervised, where algorithms infer underlying patterns in unlabeled data to find sub-clusters of the original data, identify outliers, or produce low-dimensional representations. This way, it could be possible to recognize new associations that were not perceptible. In the example above, the algorithm could separate clusters of patients between anemic and non-anemic, even without knowing the true value of hemoglobin.</p>
<p>Reinforcement learning (RL) is a feedback-based approach where algorithms learn through trial and error by balancing the management of input knowledge with exploring unknown data. The model executes the task by understanding some basic rules and learns by weighting certain variables to find the correct solution. The supervisor should only indicate whether the algorithm&#x2019;s answer is correct; it is like supervised learning but delegates decision-making (weighting) to the algorithm&#x2019;s trial-error. An up-to-date application is the continuous management of oxygen flow rate for critically ill COVID-19 patients (<xref ref-type="bibr" rid="B103">Zheng et al., 2021</xref>), where the algorithm learned the appropriate flow rate for each patient, reducing the mortality rate and increasing the savings of oxygen-scarce resources in the pandemic. However, these algorithms are known as &#x2018;data-hungry&#x2019; since they need large amounts of data to train different paths to achieve sustainable performance, which is a limitation when applied to non-structured clinical information.</p>
<p>Deep learning (DL) is another class of machine learning, conceptually similar to the human brain since it mimics the inner mechanisms of brain neurons to transport and process data, create patterns, and enlighten decision-making. These algorithms extract high-level interactions between hidden layers of features from the input and learn complex interactions to develop accurate models from raw data. In medical diagnosis, this method has an application in image analysis, namely, on X-ray risk fracture diagnosis, breast density mammography analysis, or cardiovascular and pulmonary image reconstruction, all with 510(k) premarket notification clearance from the FDA (<xref ref-type="bibr" rid="B14">Benjamens et al., 2020</xref>). Deep learning requires minimal human intervention (except for sample labelling) but large amounts of curated data sets. Additionally, computational power is also relevant to conducting these tasks. Some researchers estimated that a life cycle to train several large AI models could emit nearly five times the CO2 of an average American car (<xref ref-type="bibr" rid="B43">Hao, 2023</xref>). These challenges conducted the study and development of a new learning approach based on sparse modeling technology. The key differences compared to DL are the ability to provide comparable or even better accuracy results, working with small datasets, and performing feature extraction with much less computational power (1% of the energy required for DL). It also provides an explainable &#x2018;white box&#x2019; the user can perceive (<xref ref-type="bibr" rid="B39">Fujiwara, 2021</xref>). Although this technology opens new routes in medical AI, the applications are still at a proof-of-principle and feasibility stage for cerebral infarction diagnosis assistance, liver cancer classification with a diagnosis support system, or anomaly detection in ECG signals. Therefore, it is still far from being approved for clinical deployment.</p>
<p>Federated learning (FL) is a new learning paradigm aiming to correct limitations in the current state-of-the-art model development for data governance, privacy, update, and sharing. FL moves the model to the data instead of input data for modeling. This approach enables training common AI models from multiple independent data sources (with proprietary data governance, privacy, and access policies) to deploy unbiased, generalizable, and appropriate-fitting models. The most established workflow of FL was proposed by <xref ref-type="bibr" rid="B17">Brendan McMahan et al. (2017)</xref>. This workflow includes the distribution of the global model on independent &#x2018;clients&#x2019; that train the model in its data and send the adjusted local model to the global server to perform the trained models&#x2019; aggregation; this cycle repeats until the global model converges. <xref ref-type="bibr" rid="B31">Dayan et al. (2021)</xref> implemented an FL approach for the COVID-19 prognosis of oxygen supplies on symptomatic patients using the inputs of vital signs, laboratory data, and chest X-rays from 20 institutes. The federated model impacted prediction metrics across all participating sites (trained locally) by an average increase of 16% and 38% in AUC and generalizability, respectively.</p>
<p>While learning remains a matter of study with newer approaches being developed (<xref ref-type="bibr" rid="B49">Kairouz et al., 2021</xref>), the pipeline of AI-based prediction models is still under standardization. The increasing number of reports in this field compels the establishment of guidelines not to gauge the quality of the prediction models but to provide indications for transparent and unified reporting of this matter.</p>
<p>Since this review focuses on AI application to blood parameters to extract clinical value, we next provide a brief overview of the most common routine blood tests before going deep into how AI has been used to extract clinical value from them.</p>
</sec>
<sec id="s1-2">
<title>Blood and routine blood tests</title>
<p>Blood is the only fluid tissue present in the human body. Typically, an average adult has 6&#x2013;7&#xa0;L of blood in total. Cell elements compose approximately 45% of the blood; the remaining 55% is the fluid portion, designated plasma or serum. Many diseases cause changes in blood composition; therefore, blood analysis is important in clinical diagnosis (<xref ref-type="bibr" rid="B9">Badrick, 2013</xref>). Routine blood tests (RBT) typically merge the hematology and biochemistry analysis to explore changes in the cellular and molecular parts of the blood (<xref ref-type="table" rid="T2">Table 2</xref>). Depending on the type of blood analysis, laboratory workflows require at least two independent blood-collecting tubes for the separate study of hematology and biochemistry, which often forces the patient to provide 4&#x2013;10&#xa0;mL of venous blood.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>
<bold>General health panel required in routine blood examinations</bold> (<xref ref-type="bibr" rid="B66">Matthew and Pincus, 2011</xref>).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left"/>
<th align="left">Parameter</th>
<th align="left">Description</th>
<th align="left">Below normality, related causes</th>
<th align="left">Above normality, related causes</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="12" align="center">CBC with differential</td>
<td align="left">RBC</td>
<td align="left">Hemoglobin transport</td>
<td align="left">Anemia; Blood loss; Bone marrow disorders; Cancer (certain types)</td>
<td align="left">Low oxygen related to heart disease, pulmonary fibrosis, smoking, or high-altitude living; High consumption of anabolic steroids such as erythropoietin; Myeloproliferative diseases such as polycythemia vera; kidney diseases</td>
</tr>
<tr>
<td align="left">Hemoglobin</td>
<td align="left">Oxygen and carbon dioxide exchange from the lungs to the tissues</td>
<td align="left">Anemia; Blood loss; Thalassemia</td>
<td align="left">
<italic>Same as RBC causes</italic>
</td>
</tr>
<tr>
<td align="left">Hematocrit</td>
<td align="left">The proportion of red blood cells in the whole blood</td>
<td align="left">Anemia; Blood loss; Cancer (certain types)</td>
<td align="left">Dehydration, smoking or high-altitude living; Heart, lung, or kidney diseases; Polycythemia vera</td>
</tr>
<tr>
<td align="left">MCV</td>
<td align="left">The average volume of red blood cells in the whole blood</td>
<td align="left">Iron-deficiency anemia; Thalassemia; Lead-poisoning; Chronic disease</td>
<td align="left">Folic acid or B12 deficiency; Preleukemia; Immune hemolytic anemia; Liver disease</td>
</tr>
<tr>
<td align="left">RDW</td>
<td align="left">Size (anisocytosis) of red blood cells</td>
<td align="left">-</td>
<td align="left">Heart, kidney, or liver disease; Diabetes; Cancer</td>
</tr>
<tr>
<td align="left">WBC</td>
<td rowspan="6" align="left">Immunity</td>
<td align="left">Chemotherapy; Myelodysplastic syndrome; Autoimmune disorders; Leukemia; HIV</td>
<td align="left">Viral or bacterial infection; Inflammation; Rheumatoid arthritis; Pregnancy; Allergies, smoking, or stress</td>
</tr>
<tr>
<td align="left">Neutrophils</td>
<td align="left">Viral infection; Hepatitis; Aplastic anemia; Lupus</td>
<td align="left">Bacterial infection</td>
</tr>
<tr>
<td align="left">Eosinophils</td>
<td align="left">Bacterial infection</td>
<td align="left">Allergies; Parasitic infection</td>
</tr>
<tr>
<td align="left">Basophils</td>
<td align="left">Hyperthyroidism; Allergies; Infections</td>
<td align="left">Chronic inflammation; Hypothyroidism; Myeloproliferative disorders</td>
</tr>
<tr>
<td align="left">Lymphocytes</td>
<td align="left">Infections; Tuberculosis; Drug reactions; Stress</td>
<td align="left">Viral infections (i.e., Epstein-Barr virus)</td>
</tr>
<tr>
<td align="left">Monocytes</td>
<td align="left">Bone marrow disorders; infections; Systemic lupus erythematosus</td>
<td align="left">Infections; rheumatoid arthritis; chronic myelomonocytic leukemia</td>
</tr>
<tr>
<td align="left">Platelets</td>
<td align="left">Blood coagulation</td>
<td align="left">Cancer (leukemia, lymphoma); Viral infections; Anemia (certain types); Chemo and radiotherapy</td>
<td align="left">Genes mutation (essential thrombocythemia); Infection; Cancer; Inflammation; Iron deficiency</td>
</tr>
<tr>
<td rowspan="11" align="center">Metabolic</td>
<td align="left">Glucose</td>
<td align="left">Energy regulation</td>
<td align="left">Diabetes treatment; Drug reactions</td>
<td align="left">Diabetes mellitus; Infection (severe)</td>
</tr>
<tr>
<td align="left">Urea</td>
<td align="left">A waste product of protein digestion</td>
<td align="left">A low-protein diet (malnutrition); Severe liver damage</td>
<td align="left">Dehydration; Urinary tract obstruction; Congestive heart failure or recent heart attack; Kidney malfunction</td>
</tr>
<tr>
<td align="left">Creatinine</td>
<td align="left">A waste product of muscles</td>
<td align="left">Muscle diseases; Excess water loss; Liver diseases</td>
<td align="left">Dehydration; High-intensity exercise; Kidney malfunction (stones, infection, failure)</td>
</tr>
<tr>
<td align="left">Potassium</td>
<td align="left">Electrolyte on body fluid regulation and nerve function</td>
<td align="left">Vomiting or diarrhea; Kidney damage</td>
<td align="left">Diabetes mellitus; Advanced renal failure; Alcohol, burns</td>
</tr>
<tr>
<td align="left">Sodium</td>
<td align="left">Electrolyte on body fluid regulation and nerve function</td>
<td align="left">Vomiting, diarrhea, or burns; Nephritis or diabetic acidosis; Kidney or heart failure</td>
<td align="left">Severe vomiting, diarrhea, or burns; Dehydration, excessive sweating, or adrenal glands disorders</td>
</tr>
<tr>
<td align="left">Chloride</td>
<td align="left">Electrolyte on blood volume and osmotic pressure regulation</td>
<td align="left">Severe vomiting, diarrhea, or excessive sweating Congestive heart failure, lung disease</td>
<td align="left">Dehydration Kidney disease or Cushing&#x2019;s syndrome</td>
</tr>
<tr>
<td align="left">Albumin</td>
<td align="left">A protein carrier for hormones, vitamins, and enzymes and prevents leaking on blood vessel</td>
<td align="left">Kidney, liver, digestive, or thyroid diseases; Malnutrition or infection</td>
<td align="left">Dehydration, severe diarrhea; Steroids, insulin, and hormones intake</td>
</tr>
<tr>
<td align="left">ALP</td>
<td align="left">An enzyme that removes the phosphate group of several proteins</td>
<td align="left">Malnutrition, vitamin deficiency; Hypothyroidism</td>
<td align="left">Liver or bone disorders</td>
</tr>
<tr>
<td align="left">ALT</td>
<td align="left">An enzyme that converts alanine for energy production</td>
<td align="left">Chronic kidney disease; B6 vitamin deficiency</td>
<td align="left">Liver disease; Hemochromatosis; Mononucleosis</td>
</tr>
<tr>
<td align="left">AST</td>
<td align="left">An enzyme that catalyzes aspartate conversion</td>
<td align="left">Kidney, liver, or cancer disease; B6 vitamin deficiency; Autoimmune or genetic conditions</td>
<td align="left">Bruising, trauma, necrosis; Infection; Neoplasia of liver or muscle</td>
</tr>
<tr>
<td align="left">Calcium</td>
<td align="left">Mineral with a vital role in muscle tone and excitability</td>
<td align="left">Acute pancreatitis; renal disease; D vitamin deficiency</td>
<td align="left">Excess secretion of PTH; Cancer</td>
</tr>
<tr>
<td rowspan="4" align="center">Lipidic</td>
<td align="left">Triglycerides</td>
<td align="left">Lipid</td>
<td align="left">Low-fat diet; Hyperthyroidism; Malabsorption syndrome</td>
<td align="left">Liver, kidney, or thyroid disease; Alcohol, obesity, smoking; Uncontrolled diabetes</td>
</tr>
<tr>
<td align="left">HDL-cholesterol, direct</td>
<td align="left">High-density lipoprotein</td>
<td align="left">Unhealthy lifestyle; Smoking</td>
<td align="left">Unhealthy diet; Genetics; Hypothyroidism</td>
</tr>
<tr>
<td align="left">Total cholesterol</td>
<td align="left">Lipoprotein</td>
<td align="left">Malnutrition or malabsorption; Anemia; Thyroid or liver disease</td>
<td align="left">Unhealthy diet and lifestyle; Obesity</td>
</tr>
<tr>
<td align="left">LDL-cholesterol, calculated</td>
<td align="left">Low-density lipoprotein</td>
<td align="left">Hemorrhagic stroke; Cancer; Anxiety or depression</td>
<td align="left">Unhealthy lifestyle; Genetics; Age</td>
</tr>
<tr>
<td rowspan="4" align="center">Others</td>
<td align="left">Uric acid</td>
<td align="left">A waste product of purines metabolization</td>
<td align="left">Wilson&#x2019;s disease; Fanconi syndrome; Alcoholism</td>
<td align="left">Unhealthy lifestyle; Diabetes mellitus; Alcoholism</td>
</tr>
<tr>
<td align="left">GGT</td>
<td align="left">Enzyme</td>
<td align="left">Unhealthy diet; B6 or magnesium deficiency</td>
<td align="left">Liver or bile ducts disease</td>
</tr>
<tr>
<td align="left">TSH</td>
<td align="left">Hormone</td>
<td align="left">Hyperthyroidism</td>
<td align="left">Hypothyroidism</td>
</tr>
<tr>
<td align="left">C-reactive protein</td>
<td align="left">Acute phase reactant protein</td>
<td align="left">-</td>
<td align="left">Inflammation; Bacterial or viral infections; Autoimmune disorders; Heart attack; Sepsis</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>RBC, red blood cells; MCV, mean corpuscular volume; <italic>RDW</italic>, red cell distribution width; <italic>WBC</italic>, white blood cells; <italic>ALP</italic>, alkaline phosphatase; <italic>AST</italic>, aspartate transferase; <italic>ALT</italic>, alanine transaminase; <italic>GGT</italic>, Gama-glutamyl Transferase; <italic>TSH</italic>, thyroid stimulating hormone; <italic>HIV</italic>, human immunodeficiency virus; <italic>PTH</italic>, parathyroid hormone.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>In hematology, cell blood count (CBC) is the most performed exam. It includes not only the analysis of the three most important types of cells, erythrocytes (red blood cell, RBC), leukocytes (white blood cell, WBC), and thrombocytes (platelet) but also comprises differential information on WBCs subgroups (lymphocytes, segments, monocytes, eosinophils, basophils). Hematology also enlightens on hemoglobin concentration (Hb), hematocrit percentage (HTC), mean corpuscular volume (MCV), mean cell hemoglobin concentration (MCHC), and the red cell distribution width (RDW) (<xref ref-type="bibr" rid="B23">Celkan, 2020</xref>).</p>
<p>Biochemistry panels examine other chemical substances, such as electrolytes, hormones, and proteins. The portion of the blood that remains after all blood cells removal is composed mainly of water (90%), proteins (9%) that regulate plasma osmotic pressure and are important in the transport of fatty acids, thyroid and steroid hormones, and other chemical substances (1%) such as gases, nutrients, and vitamins (<xref ref-type="bibr" rid="B64">Marieb and Hoehn, 2012</xref>). The serum refers to plasma without clotting factors, i.e., fibrinogen, and is commonly used for chemistry testing and coagulation studies (<xref ref-type="bibr" rid="B26">Chatburn and Hematology, 2010</xref>). A general health blood parameters panel typically includes CBC with differential, comprehensive metabolic and lipid panels, uric acid, GGT, and TSH (<xref ref-type="bibr" rid="B81">Richard et al., 2011</xref>).</p>
</sec>
</sec>
<sec sec-type="methods" id="s2">
<title>Methods</title>
<p>We conducted a literature review of studies published between 2012 and 2022 that used artificial intelligence methodologies, namely, machine learning algorithms, to extrapolate clinical outcomes from routine blood tests. Using the query &#x2018;artificial intelligence OR machine learning AND routine blood tests&#x2019; in the PubMed<sup>&#xae;</sup> electronic database, we found 164 articles that proceeded to the screening stage. Rayyan Management Software was used to import discovered reports, conduct study selection, and apply eligibility criteria.</p>
<p>Original English-language studies that reported diagnosis or prognosis of ICD-10 diseases based only on predefined blood parameters, namely, RBC, Hemoglobin, Hematocrit, MCV, RDW, WBC, Neutrophils, Eosinophils, Basophils, Lymphocytes, Platelets, Glucose, Urea, Creatinine, Potassium, Sodium, Chloride, Albumin, ALP, ALT, AST, Calcium, Triglycerides, HDL-cholesterol, Total cholesterol, LDL-cholesterol, Uric acid, GGT, TSH and C-reactive protein, were eligible for inclusion. The analysis did not include studies that included other biofluids parameters or reviews, systematic reviews, meta-analyses, protocols, commentaries, or book chapters.</p>
<p>The International Statistical Classification of Diseases and Related Health Problems 10th revision (ICD-10) was used to categorize the 54 studies, which were divided into 10 disease classes: infections (or parasitic diseases) (9), neoplasms (6), blood (3), endocrine (nutritional or metabolic) (5), mental (behavioral or neurodevelopmental) (2), circulatory (3), respiratory (2), digestive (5), genitourinary (1), and particular diseases (COVID-19) (18).</p>
</sec>
<sec id="s3">
<title>An overview of machine-learning studies based on routine blood tests for diagnosis or prognosis of ICD-10 pathologies</title>
<p>Most of the studies covered in this review fall into the diagnosis category; we identify the exceptions in the outcome column of each study (see tables below). We next describe how machine learning has been applied to extract clinical value from routine blood tests for specific diseases (using the ICD-10 classification as reference); <xref ref-type="table" rid="T3">Table 3</xref> to 13 summarize information (outcome, sample, selected features, methods, and most relevant findings) for each study.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Machine-learning-based routine blood tests for the diagnosis (or prognosis) of certain infections or parasitic diseases.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Study</th>
<th align="left">Outcome</th>
<th align="left">Sample</th>
<th align="left">Selected features</th>
<th align="left">Methods (AUC)</th>
<th align="left">Findings</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<xref ref-type="bibr" rid="B83">Sarbaz et al. (2013)</xref>
</td>
<td align="left">HTLV-I</td>
<td align="left">101 (normal) 94 (leukemia) 107 (HTLV-I)</td>
<td align="left">WBC, PLT, EO%</td>
<td align="left">L: supervised, classification; FS: Pearson correlation; C: CHAID (accuracy 91%); V: NA</td>
<td align="left">HTLV-I distinguished from leukemia or normal patients with clinical data based on differential CBC. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B78">Ratzinger et al. (2014)</xref>
</td>
<td align="left">Bacteremia</td>
<td align="left">15,985 (1,286)</td>
<td align="left">NE/WBC</td>
<td align="left">L: supervised, classification; FS: wrapper approach; C: A2DE-20 variables (0.76) and A2DE-10 variables (0.75); V: 0.80 and 0.78</td>
<td align="left">Low-risk group: NPV &#x3e;98.8%. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B89">Soguero-Ruiz et al. (2015)</xref>
</td>
<td align="left">Surgical-site infection (post-operative)</td>
<td align="left">1,005 (101)</td>
<td align="left">Thrombocytes, ALP, CRP, Albumin, Creatinine, WBC</td>
<td align="left">L: supervised, classification FS: RBF-RFE C: non-linear SVM (0.87) V: leave-one-out cv</td>
<td align="left">Adjusting the temporal structure of blood tests improves the system&#x2019;s accuracy. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B79">Rawson et al. (2019)</xref>
</td>
<td align="left">Bacterial infection (hospital admission)</td>
<td align="left">104 (35%)</td>
<td align="left">CRP, WBC, Creatinine, ALT, Bilirubin, ALP</td>
<td align="left">L: supervised, classification FS: NA C: SVM (0.84) V: 10-fold cv</td>
<td align="left">Infection predicted in a timeframe of 72&#xa0;h after admission. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B54">Kocbek et al. (2019)</xref>
</td>
<td align="left">Surgical site infection</td>
<td align="left">1,137 (233)</td>
<td align="left">CRP, WBC, Sodium, Hb, Thrombocytes, Albumin</td>
<td align="left">L: supervised, classification FS: NA C: Full Lasso Model (0.95) V: repeated hold-out cv</td>
<td align="left">Infection was predicted based on three timeframes of 60, 30, and 15 days before surgery. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B70">Moranga et al. (2020)</xref>
</td>
<td align="left">Malaria</td>
<td align="left">2,207 (UM&#x3d;703) (SM&#x3d;526) (nMI&#x3d;978)</td>
<td align="left">UM &#x2260; nMI: PLT, RBC, LY; SM &#x2260; nMI: MPV, MCV; SM: RBC, PLT</td>
<td align="left">L: supervised, classification FS: NA C: ANN [UM &#x2260; nMI (0.866), SM &#x2260; nMI (0.983)], V: 10-fold cv</td>
<td align="left">Models are classified based on the combination of PLT, RBC, LY, LY%, and MPV. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B45">Ho et al. (2020)</xref>
</td>
<td align="left">Dengue</td>
<td align="left">4,894 (2,942)</td>
<td align="left">Age, Temperature, WBC</td>
<td align="left">L: supervised, classification FS: NA C: DNN (0.858) V: 10-fold cv</td>
<td align="left">For all three models, pre-peak sensitivities (&#x3c;35 weeks) were higher than 90%. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B69">Mooney et al. (2021)</xref>
</td>
<td align="left">Bacteremia (pregnant and post-partum)</td>
<td align="left">255 (129)</td>
<td align="left">NLR, MPV, BA</td>
<td align="left">L: supervised, classification FS: NA C: RF (0.98) V: 10-fold cv</td>
<td align="left">NLR &#x3e;20 achieved a negative predictive value of 97.4% for a 3% prevalence cohort. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B105">Zoabi et al. (2021)</xref>
</td>
<td align="left">Bloodstream infection</td>
<td align="left">7,889 (2,590)</td>
<td align="left">Albumin, RDW, Creatinine</td>
<td align="left">L: supervised, classification FS: NA C: Decision-Tree (gradient boosting): inclusive (0.82), compact (0.81) V: cross-validation</td>
<td align="left">ML showed substantial improvement in the AUC score compared to traditional methods (0.83vs0.62 on the inclusive model) and (0.81vs0.62 on the compact model). External validation (proxy): available at github.com/nshomron/infecpred Clinical deployment: NA</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>
<italic>AUC</italic>, Area under the ROC (receiver-operating characteristic) curve, <italic>HLTV-I</italic>, Human T-lymphotropic virus type-I, <italic>UM</italic>, uncomplicated malaria; <italic>nMI</italic>, Non-Malarial Infections; <italic>SM</italic>, severe malaria; <italic>WBC</italic>, white blood cells; <italic>PLT</italic>, platelets, <italic>EO%</italic> eosinophils count, <italic>NE</italic>, neutrophils; <italic>ALP</italic>, alkaline phosphatase, <italic>CRP</italic> C-Reactive Protein, <italic>Hb</italic> Hemoglobin, <italic>LY</italic>, lymphocytes, <italic>LY%</italic> lymphocytes count, <italic>MPV</italic>, mean platelet volume; <italic>MCV</italic>, mean corpuscular volume; <italic>RBC</italic>, red blood cells; <italic>NLR</italic>, Neutrophil-to-<italic>Lymphocyte ratio, BA</italic>, basophils; <italic>RDW</italic>, red cell distribution width, <italic>L</italic> Learning, <italic>FS</italic>, feature selection, <italic>C</italic> Classification, <italic>V</italic> Validation, <italic>CHAID</italic>, Chi-Squared Automatic Interaction Detection; <italic>NA</italic>, not available, <italic>A2DE</italic>, Averaged 2-Dependence Estimator; <italic>RBF-RFE</italic>, Radial Basis Function - Recursive Feature Elimination; <italic>SVM</italic>, support vector machines; <italic>CV</italic>, Cross-Validation; <italic>ANN</italic>, artificial neural networks; <italic>DNN</italic>, deep neural networks; <italic>RF</italic>, Random-Forests; <italic>CBC</italic>, cell blood count; <italic>NPV</italic>, negative predictive value; <italic>ML</italic>, machine learning.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<sec id="s3-1">
<title>Infectious or parasitic diseases (ICD-10 class I)</title>
<p>The infections or parasitic diseases studied include the human T-lymphotropic virus, bacteremia, bloodstream infection, general bacterial infection (in the surgical room and at hospital admission), malaria, and dengue. The studies used the traditional blood-based indicators of infection stated in <xref ref-type="table" rid="T3">Table 3</xref>, namely, white blood cells, platelets, glucose, creatinine, albumin, AST, and CRP. However, &#x2018;exceptions&#x2019; associated with feature selection were verified in surgical-site infection (ALP and sodium), malaria (RBC, MPV, MCV), bacteremia (MPV), and bloodstream infection (RDW). The studies on surgical-site infection were concerned with the relationship between time of blood analysis and the prediction of a diagnosis. Significant progression was accomplished by Kocbek et al., with the prediction of the full lasso model (AUC&#x3d;0.95) at different timeframes (60, 30, and 15 days), benefiting from the findings of Soguero-Ruiz et al. (AUC&#x3d;0.87) stating the importance to adjust the temporal structure of blood analysis to increase classification performance (<xref ref-type="bibr" rid="B89">Soguero-Ruiz et al., 2015</xref>; <xref ref-type="bibr" rid="B54">Kocbek et al., 2019</xref>). While ALP was selected for post-operative surgical infection, sodium was featured for surgical site infection. Malaria was studied for supervised classification with discrimination between uncomplicated malaria (UM), severe malaria (SM), and non-malaria infections (nMI). Distinctions were modeled by an artificial neural network with three different layers using distinctive features for each discrimination. Interestingly, SM separated from nMI based on the unique combination between MPV and MCV as classifiers of SM (AUC&#x3d;0.98). UM and nMI were also distinguished based on PLT, RBC, and LY (AUC&#x3d;0.86). The approach to Dengue differed from the approach to malaria. Ho et al. evaluated the probability of the condition in a predefined timespan of 35 weeks (<xref ref-type="bibr" rid="B45">Ho et al., 2020</xref>). Clinical data was fed into a deep neural network in competition with other learners, reaching an internal validation that surpassed sensitivities of 90% in a 3% prevalence cohort. Surprisingly, the weak learners performed similarly, indicating the clinical data value based only on age, temperature, and WBC. The approach from Sarbaz et al. outlined an infection by the human t-lymphotropic virus type I, a retrovirus known to be asymptomatic in most cases and evolve to malignancy and neural diseases in a few patients (<xref ref-type="bibr" rid="B83">Sarbaz et al., 2013</xref>). Even so, the dataset used has a relative balance between three outcomes: normal (n&#x3d;101), leukemia (n&#x3d;94), and HTLV-I (n&#x3d;107). The supervised classification model is based on a decision-trees algorithm&#x2013;CHAID, which is the chi-squared automatic interaction detection, evaluating the association between input features exploring the levels of the three to maximize the classification performance. The internal validation achieved excellent performance (AUC&#x3e;0.90) with a sensitivity of 95.8% in recognition of patients based on leukocytes, platelets, and percentage of eosinophils information. Bacteremia was initially studied in 2014 by Ratzinger et al. in a cross-sectional study with the largest cohort associated with infectious diseases (n&#x3d;15,985) with 1,286 presenting a positive blood culture result: <italic>E. coli</italic> (n&#x3d;406), <italic>S. aureus</italic> (n&#x3d;297), <italic>K. pneumonie</italic> (n&#x3d;83) and others (n&#x3d;500) (<xref ref-type="bibr" rid="B78">Ratzinger et al., 2014</xref>). The dataset split kept the 8% prevalence of bacteremia in training and validation sets, and the statistical analysis identified NE/WBC as the most important individual predictor (AUC&#x3d;0.694). The A2DE algorithm (na&#xef;ve-Bayes-based) produced two models with similar performance: model 1 (20 variables, NPV&#x3d;0.966) and model 2 (10 variables, NPV&#x3d;0.966). Results of internal validation kept the classification performance constant and selected age, creatinine, CRP, eosinophil, bilirubin, lymphocytes, monocytes, monocytes (%), neutrophils (%), and sodium as important predictors of bacteremia. Mooney et al. focused on a pregnant or post-partum cohort, where the bacteremia prevalence was lower (nearly 3% in 255 patients) (<xref ref-type="bibr" rid="B69">Mooney et al., 2021</xref>). The random forests classifier achieved an NPV of 97.4%, supported by the NLR, MPV, and BA indexes. Finally, Zoabi et al. evaluated bloodstream infection with a gradient-boosting decision tree and compared the results of the full (AUC&#x3d;0.83) and compact (AUC&#x3d;0.81) models with the standard conventional scores (AUC&#x3d;0.62) (<xref ref-type="bibr" rid="B105">Zoabi et al., 2021</xref>). The evaluation of the model was made available, differentiating this study from the previous and enabling a prospective assessment of the method.</p>
</sec>
<sec id="s3-2">
<title>Neoplasms (ICD-10 class II)</title>
<p>The application of AI and blood parameters for neoplasia herein reviewed include colorectal, leukemia (pediatric acute lymphoblastic, leukemias differentiation and lymphocytic prognosis), and non-specified cancer diagnosis in a primary care center. Erythrocytes, hemoglobin, hematocrit, RDW, leukocytes, platelets, aspartate aminotransferase (AST), calcium, and LDL-cholesterol were the Cancer-related blood parameters identified (<xref ref-type="table" rid="T2">Table 2</xref>). Studies related to colorectal cancer were <xref ref-type="table" rid="T4">Table 4</xref> highly consistent since they built on a high dimensional sample size (&#x3e;10k patients), employed a supervised decision tree classification algorithm with similar internal validation (AUC&#x3d;0.82 and AUC&#x3d;0.81), and externally validated with equal or higher performance (AUC&#x3d;0.81, AUC&#x3d;0.87, and AUC&#x3d;0.85). This consistency endorses the relevance of its findings. Noteworthily, different outcomes were verified: while Kinar et al. demonstrated sensitivity stability between 480 and 240 days before diagnosis (AUC&#x2248;0.76) with a posterior increase in the last 240 days (AUC&#x3e;0.80) (<xref ref-type="bibr" rid="B52">Kinar et al., 2016</xref>), Hornbrook et al. identified sub-regions of colorectal cancer that were better diagnosed, namely, the cecum and the ascending colon (<xref ref-type="bibr" rid="B47">Hornbrook et al., 2017</xref>).</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Machine-learning-based routine blood tests for the diagnosis (or prognosis) of neoplasms.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Study</th>
<th align="left">Outcome</th>
<th align="left">Sample</th>
<th align="left">Selected features</th>
<th align="left">Methods (AUC)</th>
<th align="left">Findings</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="4" align="left">
<xref ref-type="bibr" rid="B52">Kinar et al. (2016)</xref>
</td>
<td rowspan="4" align="left">CRC</td>
<td rowspan="4" align="left">Israel: 606,403 (3,135) UK: 25,613 (5,061)</td>
<td rowspan="4" align="left">Hb, MCH, MCHC, HTC, MCV, RDW</td>
<td rowspan="4" align="left">L: supervised, classification FS: NA C: Decision trees (0.82) V: cross-validation</td>
<td align="left">The model&#x2019;s performance on a 10&#x2013;12-month time window achieved AUC&#x3d;0.79</td>
</tr>
<tr>
<td align="left">Sensitivities at a 6-month time window were 10% higher compared to anemia guidelines</td>
</tr>
<tr>
<td align="left">External validation: UK (0.81)</td>
</tr>
<tr>
<td align="left">Clinical deployment: NA</td>
</tr>
<tr>
<td rowspan="3" align="left">
<xref ref-type="bibr" rid="B47">Hornbrook et al. (2017)</xref>
</td>
<td rowspan="3" align="left">CRC</td>
<td rowspan="3" align="left">17,095 (900)</td>
<td rowspan="3" align="left">Gender, Birth year, CBC</td>
<td rowspan="3" align="left">L: supervised, classification FS: NA C: Decision trees (0.81) V: cross-validation</td>
<td align="left">The CRC detection model performed best in detecting cecum and ascending colon tumors rather than in transverse and sigmoid colon and rectum</td>
</tr>
<tr>
<td align="left">External validation: MHS (Israel) (0.87), NHS (0.85)</td>
</tr>
<tr>
<td align="left">Clinical deployment: CRC program in Israel</td>
</tr>
<tr>
<td rowspan="3" align="left">
<xref ref-type="bibr" rid="B62">Mahmood et al. (2020)</xref>
</td>
<td rowspan="3" align="left">Pediatric ALL</td>
<td rowspan="3" align="left">94 (50)</td>
<td rowspan="3" align="left">PLT, Hb, WBC, Gender</td>
<td rowspan="3" align="left">L: supervised, classification FS: CART C: CART (0.87) V: 10-fold cv</td>
<td align="left">Platelet abnormality significant predictor in pediatric ALL.</td>
</tr>
<tr>
<td align="left">External validation: NA</td>
</tr>
<tr>
<td align="left">Clinical deployment: NA</td>
</tr>
<tr>
<td rowspan="3" align="left">
<xref ref-type="bibr" rid="B88">Soerensen et al. (2022)</xref>
</td>
<td rowspan="3" align="left">Cancer diagnosis within 90 days on primary care</td>
<td rowspan="3" align="left">Cohort I: 5,224 (1,042) Cohort II: 1,712 (1,368)</td>
<td rowspan="3" align="left">ALB, PLT</td>
<td rowspan="3" align="left">L: supervised, classification FS: NA C: LR (0.80), ANN (0.91) on cohort I and LR (0.79), ANN (0.79) on cohort II V: NA</td>
<td align="left">Reduced albumin and increased platelet levels increase cancer risk in a concentration-dependent way</td>
</tr>
<tr>
<td align="left">External validation: NA</td>
</tr>
<tr>
<td align="left">Clinical deployment: NA</td>
</tr>
<tr>
<td rowspan="3" align="left">
<xref ref-type="bibr" rid="B42">Haider et al. (2022)</xref>
</td>
<td rowspan="3" align="left">Leukemias differentiation: AML, APML, CML, ALL, CLL, Other&#x2019;s</td>
<td rowspan="3" align="left">1,577: (354), (96), (213), (272), (153), (489)</td>
<td rowspan="3" align="left">CBC</td>
<td rowspan="3" align="left">L: supervised, classification FS: NA C: ANN (0.83) V: NA</td>
<td align="left">CBC not only differentiates from six lineages of leukemia but also remains predictive for the type (acute, chronic, or other)</td>
</tr>
<tr>
<td align="left">External validation: NA</td>
</tr>
<tr>
<td align="left">Clinical deployment: NA</td>
</tr>
<tr>
<td rowspan="3" align="left">
<xref ref-type="bibr" rid="B67">Meiseles et al. (2022)</xref>
</td>
<td rowspan="3" align="left">Prognosis of Lymphocytic leukemia treatment within 2 years</td>
<td rowspan="3" align="left">109</td>
<td rowspan="3" align="left">Hb, Time from diagnosis, RDW, NLR</td>
<td rowspan="3" align="left">L: supervised, classification FS: NA C: GBM using inexpensive features (0.86); decision trees (0.74) V: 10-fold cv</td>
<td align="left">Low NLR and high values of RDW are relevant predictors for treatment need</td>
</tr>
<tr>
<td align="left">External validation: NA</td>
</tr>
<tr>
<td align="left">Clinical deployment: NA</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>
<italic>AUC</italic>, Area under the ROC (receiver-operating characteristic) curve, <italic>CRC</italic>, colorectal cancer; <italic>ALL</italic>, acute lymphoblastic leukemia<italic>; AML</italic>, acute myeloid leukemia<italic>; APML</italic>, acute promyelocytic leukemia<italic>; CML</italic>, chronic myeloid leukemia<italic>; CLL</italic>, chronic lymphoid leukemia<italic>, Hb</italic> Hemoglobin; <italic>MCH</italic>, mean corpuscular hemoglobin; <italic>MCHC</italic>, mean corpuscular hemoglobin concentration; <italic>HTC</italic>, hematocrit; <italic>MCV</italic>, mean corpuscular volume; <italic>RDW</italic>, red cell distribution width; <italic>CBC</italic>, cell blood count; <italic>PLT</italic>, platelets; <italic>WBC</italic>, white blood cells; <italic>ALB</italic>, albumin; <italic>NLR</italic>, Neutrophil-to-<italic>Lymphocyte ratio, L</italic> Learning, <italic>FS</italic>, feature selection, <italic>C</italic> Classification, <italic>V</italic> Validation, <italic>CART</italic>, classification and regression trees algorithm, <italic>NA</italic> not available; <italic>GBM</italic>, gradient boosting model; <italic>CV</italic>, Cross-Validation; <italic>UK</italic> united kingdom; <italic>MHS</italic>, maccabi healthcare services; <italic>NHS</italic>, national health service.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Regarding leukemia, Mahmood et al. began to evaluate the ability to diagnose pediatric acute lymphoblastic leukemia (ALL) in a small cohort (n&#x3d;94), where fifty patients had the disease (<xref ref-type="bibr" rid="B62">Mahmood et al., 2020</xref>). The study findings were achieved through the comparison of four classifiers wherein the classification and regression trees (CART) performed better (accuracy&#x3d;0.87) with a decision tree that included low platelet (43%) and hemoglobin (24%) levels and high levels of white blood cells (4%). The disease was furtherly distinguished by Haider et al. on a set of other pathologic conditions: acute myeloid leukemia (AML, n&#x3d;354), acute promyelocytic leukemia (APML, n&#x3d;96), chronic myeloid leukemia (CML, n&#x3d;213), and chronic lymphoid leukemia (CLL, n&#x3d;153) (<xref ref-type="bibr" rid="B42">Haider et al., 2022</xref>). The authors based the approach on a conventional cell blood count analysis and developed an artificial neural network to classify the six lineages of the disease: AML (AUC&#x3d;0.905), APML (AUC&#x3d;0.805), CML (AUC&#x3d;0.937), CLL (AUC&#x3d;0.870) and ALL (AUC&#x3d;0.829). Internal validation sets increased overall accuracy from 83.1% to 84.7%, which denotes a non-overfitted model. Meiseles et al. evaluated the prognosis of treatment needed in 2&#xa0;years for patients with lymphocytic leukemia with a dataset of 109 patients (<xref ref-type="bibr" rid="B67">Meiseles et al., 2022</xref>). The outcome was predicted with a gradient boosting model (GBM, AUC&#x3d;0.768) and compared with a general linear model (GLM, AUC&#x3d;0.753), both with higher performances when related to the current scoring system for prognostic evaluation of patients with CLL (CLL-IPI, AUC&#x3d;0.52). Despite predicting the general progress of the disease, the CLL-IPI does not evaluate the necessity of the treatment, and even a simple decision tree based on inexpensive features&#x2013;Hb, time since diagnosis, NLR, and RDW&#x2013;achieved higher performance (AUC&#x3d;0.74).</p>
<p>Finally, a non-specified cancer diagnosis was approached by Soerensen et al., through the modeling (training and internal validation) on cohort I (n&#x3d;6,266 from 2011 to 2018) and the evaluation on cohort II (n&#x3d;3,080 from 2019 to 2020). The primary outcome was &#x201c;cancer within 90 days,&#x201d; and the proposed methodology included an artificial neural networks <italic>versus</italic> logistic regression approaches (<xref ref-type="bibr" rid="B88">Soerensen et al., 2022</xref>). The results were slightly different since ANN predicted better in the modeling cohort (AUC&#x3d;0.91) but decreased its performance in the evaluation cohort (AUC&#x3d;0.79); in opposition, LR demonstrated higher stability in both cohorts (n1, AUC&#x3d;0.80, and n2, AUC&#x3d;0.79). The concentration decrease in albumin with a dependent increase in platelet levels was related to an increased risk of cancer, even for patients whose metabolite relationship was verified in &#x2018;normal&#x2019; ranges.</p>
</sec>
<sec id="s3-3">
<title>Diseases of the blood or blood-forming organs (ICD-10 class III)</title>
<p>The approach to blood diseases fundamentally combined standard CBC parameters with artificial neural networks. The outcomes included the diagnosis of iron deficiency anemia in women, thalassemia minor (TM) in the general adult population, and the distinction between iron-deficiency anemia and &#x3b2;-thalassemia in three scenarios (males, females, and both). The approach from Yilmaz et al. involved studying several ANN strategies to verify which one had the best accuracy without performing feature selection in the dataset (<xref ref-type="bibr" rid="B101">Y&#x131;lmaz and Bozkurt, 2012</xref>). The accuracy results obtained were highly similar between the studied strategies (accuracies&#x2265;0.98), and comparison with previous studies (<xref ref-type="bibr" rid="B6">Azarkhish et al., 2012</xref>) showed a slight increase in sensitivity from 0.968 to 0.976, conferring an excellent opportunity to perform an external validation of the model with consequent valid clinical deployment. In thalassemia minor, Magen et al., studied a cohort of 185 verified alpha and beta TM patients with a control group that included IDA, myelodysplastic (MDS), and healthy subjects (<xref ref-type="bibr" rid="B12">Barnhart-Magen et al., 2013</xref>). Despite feeding the ANN with six CBC metabolites, only RBC, RDW, and MCV values achieved higher metrics (<xref ref-type="table" rid="T5">Table 5</xref>). However, the specificity of 1.00 (TM vs. healthy and MDS) decreased to 0.90 (TM vs. healthy, MDS, and IDA). &#xc7;il et al. reported improvement with the distinction of &#x3b2;-thalassemia and IDA studied in gender groups with different algorithms (weak learners and neural networks) (<xref ref-type="bibr" rid="B27">&#xc7;il et al., 2020</xref>). The principal findings include a common gender RELM algorithm (specificity&#x3d;0.966), an ELM, RELM for females (specificity&#x3d;0.952), and an SVM model for males (specificity&#x3d;0.938). While these scores surpassed the previous studies, the sample size was small, limiting the study findings.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Machine-learning-based routine blood tests for the diagnosis (or prognosis) of diseases of the blood or blood-forming organs.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Study</th>
<th align="left">Outcome</th>
<th align="left">Sample</th>
<th align="left">Selected features</th>
<th align="left">Methods (AUC)</th>
<th align="left">Findings</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<xref ref-type="bibr" rid="B101">Y&#x131;lmaz and Bozkurt (2012)</xref>
</td>
<td align="left">Women&#x2019;s IDA</td>
<td align="left">Training: 2000(NA) Test: 600 (122)</td>
<td align="left">RBC, Hb, HCT, MCV, MCH, MCHC</td>
<td align="left">L: supervised, classification FS: NA C: ANN (0.99) V: test-set</td>
<td align="left">ANN and medical diagnosis achieved comparable results. ANN training with several strategies (FFN, CFN, DDN, TDN, LVQ, PNN) produced similar results (accuracy &#x2265;0.98). External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B12">Barnhart-Magen et al. (2013)</xref>
</td>
<td align="left">TM</td>
<td align="left">526 (185)</td>
<td align="left">RBC, Hb, MCV, RDW, MCH, PLT</td>
<td align="left">L: supervised, classification FS: NA C: ANN (specificity&#x3d;0.967, sensitivity&#x3d;1) V: test-set</td>
<td align="left">ANN only differentiates TM from the control group based on MCV, RDW, and RBC. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B27">&#xc7;il et al. (2020)</xref>
</td>
<td align="left">&#x3b2;-thalassemia and IDA distinction</td>
<td align="left">342 (152)</td>
<td align="left">RBC, HCT, MCV, MCH, MCHC, RDW</td>
<td align="left">L: supervised, classification FS: NA C: several week learners and ANN (&#x3e;0.90)V: test-set</td>
<td align="left">Different models were best according to gender: SVM for males, RELM for both, and ELM and RELM for females. External validation: NA Clinical deployment: NA</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>
<italic>AUC</italic>, Area under the ROC (receiver-operating characteristic) curve, <italic>IDA</italic>, iron deficiency anemia; <italic>TM</italic>, thalassemia minor; <italic>RBC</italic>, red blood cells, <italic>Hb</italic> Hemoglobin; <italic>HTC</italic>, hematocrit; <italic>MCV</italic>, mean corpuscular volume; <italic>MCH</italic>, mean corpuscular hemoglobin; <italic>MCHC</italic>, mean corpuscular hemoglobin concentration; <italic>RDW</italic>, red cell distribution width; <italic>PLT</italic>, platelets, <italic>L</italic> Learning, <italic>FS</italic>, feature selection, <italic>C</italic> Classification, <italic>V</italic> Validation, <italic>NA</italic>, not available; <italic>ANN</italic>, artificial neural networks; <italic>FFN</italic>, feedforward networks; <italic>CFN</italic>, cascade forward networks; <italic>DDN</italic>, distributed delay networks<italic>; TDN</italic>, time delay networks<italic>; LVQ</italic>, learning vector quantization<italic>; PNN</italic>, probabilistic neural network; <italic>SVM</italic>, support vector machines; <italic>RELM</italic>, Regular Over-learning Machine; <italic>ELM</italic>, extreme learning machine.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3-4">
<title>Endocrine, nutritional, or metabolic diseases (ICD-10 class IV)</title>
<p>This review reports endocrine, nutritional, or metabolic diseases mainly applied to the diagnosis of type 2 diabetes mellitus (T2DM) and the prediction of blood glucose on type 1 diabetes mellitus (T1DM) and ensuing disease complications (insulin resistance, polyneuropathy, and iatrogenic hypoglycemia). (<xref ref-type="table" rid="T6">Table 6</xref>) Regarding diagnosis, Kopitar et al. approached early T2DM on a cohort of 3,723 individuals employing different ML algorithms without significant improvements related to diagnosis accuracy or newly relevant features (<xref ref-type="bibr" rid="B55">Kopitar et al., 2020</xref>). Indeed, the authors concluded that the model&#x2019;s stability in linear regression was preferred against other learning algorithms, and the increased data available in electronic health records was useful to update prediction models and stabilize important features: hyperglycemia, age HDL-cholesterol, and triglycerides.</p>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>Machine-learning-based routine blood tests for the diagnosis (or prognosis) of endocrine, nutritional, or metabolic diseases.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Study</th>
<th align="left">Outcome</th>
<th align="left">Sample</th>
<th align="left">Selected features</th>
<th align="left">Methods (AUC)</th>
<th align="left">Findings</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<xref ref-type="bibr" rid="B15">Bernardini et al. (2019)</xref>
</td>
<td align="left">Clinical factors related to insulin resistance</td>
<td align="left">968</td>
<td align="left">HDL cholesterol, Total cholesterol, Age, Uricemia, WBC, GGT</td>
<td align="left">L: supervised, regression FS: permutation out-of-bag C: Ensemble RF &#x2b; data imputation (MSE &#x3c;0.17) V: 10-fold cv</td>
<td align="left">The ensemble approach correlated with insulin resistance based on non-glycemic blood data. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B55">Kopitar et al. (2020)</xref>
</td>
<td align="left">Early T2DM diagnosis</td>
<td align="left">3,723</td>
<td align="left">Hyperglycemia, Age, HDL cholesterol, Triglycerides</td>
<td align="left">L: supervised, regression FS: NA C: lm (0.747), glmnet (0.740), lightgbm (0.723), xgboost (0.715), RF (0.723) V: 10-fold cv</td>
<td align="left">No clinically relevant improvement with more sophisticated ML algorithms. Higher variables&#x2019; stability is preferred for model calibration and clinic interpretation.External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B68">Metsker et al. (2020)</xref>
</td>
<td align="left">Risk of diabetes polyneuropathy</td>
<td align="left">5,846 (2,342)</td>
<td align="left">Retinopathy, Nephropathy, Hb, Neutrophils, ALT, AST, Glucose</td>
<td align="left">L: supervised, classification FS: NA C: ANN (0.892), SVM (0.864), decision tree (0.898), lm (0.892), logistic regression (0.894) V: 5-fold cv</td>
<td align="left">Different models showed different results in terms of the feature&#x2019;s importance and significance: lm (glucose), rf (neutrophils), and ANN (co-morbidities). Depending on the needs, the choice of the algorithm should vary. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B65">Mathioudakis et al. (2021)</xref>
</td>
<td align="left">Risk of iatrogenic hypoglycemia</td>
<td align="left">1 612,425 (50,354)</td>
<td align="left">Basal insulin dose, BG coefficient of variation, Previous hypoglycemic episodes</td>
<td align="left">L: supervised, classification FS: NA C: MLR, RF, NB, SGB (0.90) V: 10-fold cv</td>
<td align="left">Iatrogenic hypoglycemia predicted after short-term blood glucose measurement in-hospital based on EHR data. External validation: Hospital 2 (0.88), Hospital 3 (0.87), Hospital 4 (0.86), Hospital 5 (0.86) Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B57">Kushner et al. (2020)</xref>
</td>
<td align="left">Blood glucose prediction in T1DM</td>
<td align="left">24</td>
<td align="left">Historic continuous glucose monitoring</td>
<td align="left">L: supervised, regression FS: NA C: shallow neural network (RMSE): t&#x3d;60 (28 &#xb1; 4), t&#x3d;90 (33 &#xb1; 4), t&#x3d;120 (38 &#xb1; 6), t&#x3d;180 (40 &#xb1; 8), t&#x3d;240 (43 &#xb1; 12) mg/dL V: test-set</td>
<td align="left">93% of predictions were clinically acceptable, according to the Clarke error grid. External validation: NA Clinical deployment: NA</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>
<italic>AUC</italic>, Area under the ROC (receiver-operating characteristic) curve, <italic>T2DM</italic>, Type 2 diabetes mellitus, <italic>T1DM</italic>, Type 1 diabetes mellitus; <italic>HDL</italic>, High-Density Lipoprotein; WBC, white blood cells; <italic>GGT</italic>, Gama-glutamyl Transferase, <italic>Hb</italic> Hemoglobin; <italic>ALT</italic>, alanine transaminase; <italic>AST</italic>, aspartate transferase; <italic>BG</italic>, blood glucose, <italic>L</italic> Learning, <italic>FS</italic>, feature selection, <italic>C</italic> Classification, <italic>V</italic> Validation, <italic>NA</italic>, not available; <italic>RF</italic>, Random-Forests; <italic>MSE</italic>, mean squared error; <italic>CV</italic>, Cross-Validation, <italic>lm</italic> Linear Regression Model, <italic>glmnet</italic> Regularized Generalized Linear Model with Lasso (Least Absolute Shrinkage and Selection Operator) Regression, <italic>LightGBM</italic>, Light Gradient-Boosting Machine, <italic>XGBoost</italic> Extreme Gradient Boosting, <italic>RF</italic>, random forests; <italic>ANN</italic>, artificial neural networks; <italic>SVM</italic>, support vector machines; <italic>MLR</italic>, multivariable logistic regression; <italic>NB</italic>, Na&#xef;ve-Bayes; <italic>SGB</italic>, stochastic gradient boosting; <italic>EHR</italic>, electronic health records.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Kushner et al. studied T1DM blood glucose prediction using a shallow neural network based on historical continuous blood glucose monitoring (<xref ref-type="bibr" rid="B57">Kushner et al., 2020</xref>). The results improved the current condition through a more extended prediction (t&#x3d;240min vs. 120min) with lower error (RMSE, 60min&#x3d;28&#xa0;mg/dL vs. 43&#xa0;mg/dL). Bernardini et al. initially featured disease complications with evaluating clinical factors associated with insulin resistance (<xref ref-type="bibr" rid="B15">Bernardini et al., 2019</xref>). The ensemble regression forest allowed the identification of non-glycemic blood parameters (HDL and total cholesterol, age, uricemia, WBC, and GGT) as clinical factors that could provide early detection of glucose deterioration. These findings agree with previous literature that individually associated uricemia and WBC to insulin-resistant conditions and GGT in high-risk T2DM individuals. Higher sample studies employed supervised classification algorithms for risk prediction of polyneuropathy (n&#x3d;5,846) and iatrogenic hypoglycemia. Regarding polyneuropathy, the authors found that different ML models produced different features selection and consequent classification metrics, relating co-morbidities (nephropathy or retinopathy) to a rise in ANN (AUC&#x3d;0.892), increased neutrophil levels in random forests boosting (AUC&#x3d;0.898) or blood glucose levels in linear regression (AUC&#x3d;0.892). Of notice, the principal finding suggests that the choice of the ML algorithm should consider not only the performance metrics but also the kind of clinical information to assess: the identification of early (i.e., ANN) or late biomarkers (i.e., linear regression) of polyneuropathy, or the identification of pathophysiological mechanisms (i.e., decision trees). The risk of developing iatrogenic hypoglycemia (glucose&#x2264;70&#xa0;mg/dL) was approached by Mathioudakis et al. using a stochastic gradient boosting ML model in an extensive data study (n&#x3d;1 612,425) (<xref ref-type="bibr" rid="B65">Mathioudakis et al., 2021</xref>). Performance metrics of the developed model (43 predictors) were slightly lower (c-statistic&#x3d;0.86:0.90) than previous reports (c-statistic&#x3d;0.80:0.99) but was the first to be externally validated in 4 different hospitals with stability in model predictions, working 24&#xa0;h after each blood glucose measurement.</p>
</sec>
<sec id="s3-5">
<title>Mental, behavioral, or neurodevelopmental disorders (ICD-10 class V)</title>
<p>Using AI, depression was the only condition studied in the context of mental, behavioral, or neurodevelopmental diseases (<xref ref-type="table" rid="T3">Table 3</xref>). Despite the association between depression and routine blood biomarkers still being under clarification, low HDL-cholesterol values were previously associated with the condition. The studies reviewed approached depression under the NHANES database differently: while Dipnall et al. used data mining, machine learning, and traditional statistics to identify related biomarkers (<xref ref-type="bibr" rid="B34">Dipnall et al., 2016</xref>), Hochman et al. aimed to build a low-cost diagnostic tool to perform diagnosis based on blood data (<xref ref-type="bibr" rid="B46">Hochman et al., 2021</xref>). The methodology described in the first study explains the feature selection process in three sequential hybrid processes: multiple imputations, ML regression, and traditional statistical regression. From 67 laboratory parameters, the workflow selected 21 after ML regression and only six after univariate analysis. The final multiple logistic regression model suggested two related effects (hemoglobin from bilirubin and cotinine from cadmium), which resulted in the exclusion of Hb and cotinine. The posterior cadmium elimination occurred since only RDW, glucose, and total bilirubin remained significant to several confounder covariates, namely, age (<italic>p</italic>&#x3c;0.05). The authors explained related literature associations between the selected biomarkers and depression, yet all with indirect relationships. The subsequent study from Hochman et al. configured a supervised approach for predicting depression using a random forests classifier in four subgroups (<xref ref-type="bibr" rid="B46">Hochman et al., 2021</xref>). Feature selection was made using the stepwise backward method, which starts modeling with all features and successively eliminates the least important feature in iterative steps until all features are removed from the model. Results were similar across the four groups with full dataset [ratio of income to poverty (RIP), GGT, glucose, triglyceride and RDW, AUC&#x3d;0.83], overweight and obesity (GGT, RIP, creatinine, RDW and glucose, AUC&#x3d;0.80), diabetes (GGT, eosinophils, RIP, basophils and eosinophils, AUC&#x3d;0.82) and patients with metabolic syndrome (RIP, GGT, eosinophils, bilirubin and basophils, AUC&#x3d;0.82). <xref ref-type="table" rid="T7">Table 7</xref> Despite the developed models accounting for the features selected in the first study, namely, glucose and RDW (full dataset and overweight and obesity), and bilirubin (patients with metabolic syndrome), the results of internal validation did maintain the performance in the external validation dataset (AUC, average&#x3d;0.66); this fact compromises the predictive ability of the developed models.</p>
<table-wrap id="T7" position="float">
<label>TABLE 7</label>
<caption>
<p>Machine-learning-based routine blood tests for the diagnosis (or prognosis) of mental, behavioral, or neurodevelopmental disorders.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Study</th>
<th align="left">Outcome</th>
<th align="left">Sample</th>
<th align="left">Selected features</th>
<th align="left">Methods (AUC)</th>
<th align="left">Findings</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<xref ref-type="bibr" rid="B34">Dipnall et al. (2016)</xref>
</td>
<td align="left">Depression associated biomarkers</td>
<td align="left">5,227</td>
<td align="left">RDW, Glucose, Total bilirubin</td>
<td align="left">L: supervised, regression FS: multiple imputation, boosted regression, imputed weighted logistic regression C: multivariate weighted logistic regression V: cross-validation</td>
<td align="left">The hybrid approach provided a variable selection of three biomarkers for the prediction of depression. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B46">Hochman et al. (2021)</xref>
</td>
<td align="left">Depression</td>
<td align="left">Training: 7,702 (522) Validation: 1,752 (117)</td>
<td align="left">Family income, GGT, Glucose, Triglycerides, RDW, Creatinine, BA%, EO%, Bilirubin</td>
<td align="left">L: supervised, classification FS: backward feature selection C: random forests: full (0.83), overweight (0.80), diabetes (0.82), metabolic syndrome (0.82) V: cross-validation</td>
<td align="left">Selected features demonstrated good predictive value in distinguishing depression cases on the four studied datasets. External validation: full (0.69), overweight (0.63), diabetes (0.66), metabolic syndrome (0.64) Clinical deployment: NA</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>
<italic>AUC</italic>, Area under the ROC (receiver-operating characteristic) curve, <italic>RDW</italic>, red cell distribution width; <italic>GGT</italic>, Gama-glutamyl Transferase, <italic>BA%</italic> basophils count, <italic>EO%</italic> eosinophils count, <italic>L</italic> Learning, <italic>FS</italic>, feature selection, <italic>C</italic> Classification, <italic>V</italic> Validation, <italic>NA</italic>, not available.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3-6">
<title>Diseases of the circulatory system (ICD-10 class IX)</title>
<p>Outcomes related to the reviewed circulatory system diseases include the prognosis of postoperative blood coagulation in children with congenital heart disease and the diagnosis of cardiac workload and ischemic stroke. Numerous studies refer to associations between blood analysis and diseases of the circulatory system. However, known routine blood tests associated with heart disease are the low levels of sodium and chloride and the elevated levels of erythrocytes, hematocrit, RDW, urea, and c-reactive protein. The prognosis of postoperative blood coagulation in children was assessed by comparing three different classifiers (decision trees, na&#xef;ve-Bayes, and support vector machines). Applying recursive feature elimination resulted in seven features, age being the most relevant (<xref ref-type="table" rid="T8">Table 8</xref>). Traditional statistical tests also evaluated relevant features, which confirmed the significance among the compared groups (abnormal vs. normal blood coagulation). This statistical verification also supports the model&#x2019;s reliability, which achieved accuracy values of 75% in internal validation based on a typical CBC. The cardiac workload is generally measured by the rate pressure product (RRP), which is the product between systolic blood pressure and heart rate. The study from Shou et al. evaluated how blood parameters predicted the biochemical profile related to the resting RRP through the analysis of 55,730 individuals (<xref ref-type="bibr" rid="B86">Shou et al., 2021</xref>). The supervised regression task was accomplished by comparing a linear regression model (r&#x3d;0.352) and a tree-based model, XGBoost (r&#x3d;0.377). The authors found that glucose alone predicted rRRP with a Pearson correlation of 0.247 in the linear model and 0.245 in the non-linear model; total protein and neutrophils count were responsible for the additional variance, exhibiting the recognition ability of ML-based approaches to find new biomarkers. Indeed, Zheng et al. followed a similar strategy for ischemic stroke (<xref ref-type="bibr" rid="B104">Zheng et al., 2022</xref>). Ischemic stroke is still a major burden due to the high number of miss-diagnosed (or late) cases due to challenges related to the triaging process. Four feature selection techniques (univariate logistic regression, least absolute shrinkage and selection operator regression, recursive feature elimination, and the spearman correlation) were applied to the training set, reducing 41 to 15 features. Model development was assessed by comparing six algorithms (XGBoost, RF, NN, LR, gaussian NB, KNN); XGBoost showed the best performance with an accuracy of 0.84, 0.83, and 0.86 in training, internal validation, and external validation, respectively. The model was further analyzed by explaining techniques (permutation feature importance, local-interpretable model-agnostic, and Shapley additive explanations) endorsing the importance of neutrophils count, total protein, HDL-cholesterol, and hemoglobin. Aiming for a future clinical deployment, the model was also made available online for prospective validation.</p>
<table-wrap id="T8" position="float">
<label>TABLE 8</label>
<caption>
<p>Machine-learning-based routine blood tests for the diagnosis (or prognosis) of circulatory system diseases.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Study</th>
<th align="left">Outcome</th>
<th align="left">Sample</th>
<th align="left">Selected features</th>
<th align="left">Methods (AUC)</th>
<th align="left">Findings</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<xref ref-type="bibr" rid="B41">Guo et al. (2021)</xref>
</td>
<td align="left">Postoperative blood coagulation in children with congenital heart disease</td>
<td align="left">1,690</td>
<td align="left">Age, Sex, MCV, MCH, MCHC, WBC, PLT</td>
<td align="left">L: supervised, classification FS: recursive feature elimination C: DT (0.81), NB (0.82), SVM (0.82) V: 5-fold cv</td>
<td align="left">The accuracy rate of the overall forecast was higher than 75%; Age was the most important feature for the decision-tree model. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B86">Shou et al. (2021)</xref>
</td>
<td align="left">Cardiac workload</td>
<td align="left">55,730</td>
<td align="left">Glucose, Total protein, Neutrophil</td>
<td align="left">L: supervised, regression FS: NA C: LR (r&#x3d;0.352), XGBoost (r&#x3d;0.377) V: NA</td>
<td align="left">Positive correlation between the measured resting rate pressure (rRRP) with the predicted rRRP based on blood biomarkers. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B104">Zheng et al. (2022)</xref>
</td>
<td align="left">Ischemic stroke</td>
<td align="left">15,475 (4,999)</td>
<td align="left">Age, NE%, NE, MO%, MCHC, LY%, RDW-CV, MCV, Hb, Total cholesterol, HDL-cholesterol, uric acid, total protein</td>
<td align="left">L: supervised, classification FS: permutation feature importance C: XGBoost (0.91) V: 5-fold cv</td>
<td align="left">The model was developed based on 15 routine blood tests and externally validated with excellent accuracy. External validation: 5,011 (1,076), XGBoost (0.92) Clinical deployment: available online at <ext-link ext-link-type="uri" xlink:href="http://istriage.com">istriage.com</ext-link>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>
<italic>AUC</italic>, Area under the ROC (receiver-operating characteristic) curve, <italic>MCV</italic>, mean corpuscular volume; <italic>MCH</italic>, mean corpuscular hemoglobin; <italic>MCHC</italic>, mean corpuscular hemoglobin concentration; <italic>WBC</italic>, white blood cells; <italic>PLT</italic>, platelets, <italic>NE%</italic> neutrophils count, <italic>NE</italic>, neutrophils, <italic>MO%</italic> monocytes count, <italic>MCHC</italic>, mean corpuscular hemoglobin concentration, <italic>LY%</italic> lymphocytes count, <italic>RDW-CV</italic>, Red Cell Distribution Width-Coefficient of Variation; <italic>MCV</italic>, mean corpuscular volume, <italic>Hb</italic> Hemoglobin; <italic>DT</italic>, decision trees; <italic>NB</italic>, Na&#xef;ve-Bayes; <italic>SVM</italic>, support vector machines; <italic>CV</italic>, Cross-Validation, <italic>L</italic> Learning, <italic>FS</italic>, feature selection, <italic>C</italic> Classification, <italic>V</italic> Validation, <italic>NA</italic>, not available; <italic>LR</italic>, linear regression, <italic>XGBoost</italic> Extreme Gradient Boosting, <italic>rRRP</italic>, resting rate pressure.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3-7">
<title>Diseases of the respiratory system (ICD-10 class X)</title>
<p>Regarding respiratory system diseases, we present here one study related to asthma. Given the impact of smoking on respiratory function, we included a study with AI and smoking-related disorders in the review. Routine blood metabolites associated with smoking were the high levels of erythrocytes, hematocrit, leukocytes, triglycerides, and the low levels of HDL-cholesterol, none related to asthma. Indeed, the study of Mamoshina et al. found that HDL-cholesterol was the principal feature for the classification of the smoking status, along with hemoglobin, RDW, and mean cell volume (<xref ref-type="bibr" rid="B63">Mamoshina et al., 2019</xref>). These findings were accomplished after an iterative analysis that started with the prediction (regression) of biological age based on routine blood tests. The feature importance shows HbA1C, urea, glucose, and ferritin as the most important (training). The 24 features selected were used to predict age in smokers (r2&#x3d;0.55) and non-smokers (r2&#x3d;0.57), showing a potential impact of smoking in the prediction. The addition of the feature &#x2018;smoking status&#x2019; improved the three tested regression models from 0.56 to 0.57 (23&#x2013;24 features), 0.54 to 0.58 (20&#x2013;21 features), and 0.55 to 0.60 (18&#x2013;19 features) in the prediction of biological age. Authors also found, based on the same models of 23, 20, and 18 features, the ability to predict the &#x2018;smoking status&#x2019; with an accuracy of 0.82 (equivalent for the three models), with HDL-cholesterol, hemoglobin, RDW, and MCV the most relevant features for the prediction. The study from Zhan et al. employed a Mahalanobis-Taguchi system (MTS) to classify asthma patients (<xref ref-type="bibr" rid="B102">Zhan et al., 2020</xref>). The algorithm was approached by constructing the Mahalanobis space (collection and distance calculation of the standardized normal and abnormal data), with further identification of useful variables (orthogonal arrays and signal-to-noise ratios for threshold definition and roc curve analysis). Results achieved with the proposed algorithm were compared with an SVM model, where the same features (selected by Pearson correlation) predicted asthma patients with similar accuracy (<xref ref-type="table" rid="T9">Table 9</xref>). The authors claim a more straightforward interpretability of the model by calculating the Mahalanobis distance (MD) with the values of PDW, MPV, WBC, eosinophils count, lymphocytes count, and MCHC data.</p>
<table-wrap id="T9" position="float">
<label>TABLE 9</label>
<caption>
<p>Machine-learning-based routine blood tests for respiratory system disease diagnosis (or prognosis).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Study</th>
<th align="left">Outcome</th>
<th align="left">Sample</th>
<th align="left">Selected features</th>
<th align="left">Methods (AUC)</th>
<th align="left">Findings</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<xref ref-type="bibr" rid="B63">Mamoshina et al. (2019)</xref>
</td>
<td align="left">Smoking status and aging in smokers</td>
<td align="left">149,000 (49,000)</td>
<td align="left">Smoking: HDL-cholesterol, Hb, RDW, MCV Age: HbA1C, urea, glucose, ferritin</td>
<td align="left">L: supervised, classification (smoking), regression (aging) FS: permutation feature importance C: Feed-forward deep neural networks: Age (r&#x3e;0.74), smoking (accuracy&#x3e;0.81) V: 5-fold cv</td>
<td align="left">Blood tests could quantify aging caused by smoking; still, this method was less accurate than DNA methylation. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B102">Zhan et al. (2020)</xref>
</td>
<td align="left">Asthma</td>
<td align="left">1,835 (355)</td>
<td align="left">PDW, MPV, WBC, EO%, LY%, LY, MCHC</td>
<td align="left">L: supervised, classification FS: person correlation C: MTS, 7 var (sensitivity&#x3d;0.941); SVM, 7 var (sensitivity&#x3d;0.935) V: 10-fold cv</td>
<td align="left">MTS showed high classification accuracy on asthma patients (94.15%) and healthy volunteers (97.20%) based on 7 routine blood parameters; SVM achieved similar performance. External validation: NA Clinical deployment: NA</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>
<italic>AUC</italic>, Area under the ROC (receiver-operating characteristic) curve, <italic>HDL</italic>, High-Density Lipoprotein, <italic>Hb</italic> Hemoglobin; <italic>RDW</italic>, red cell distribution width; <italic>MCV</italic>, mean corpuscular volume, <italic>HbA1C</italic> Glycated Hemoglobin, <italic>PDW</italic>, platelet distribution width; <italic>WBC</italic>, white blood cells, <italic>EO%</italic> eosinophils count, <italic>LY%</italic> lymphocytes count, <italic>MCH</italic>, mean corpuscular hemoglobin; <italic>MTS</italic>, Mahalanobis-Taguchi system; <italic>DNA</italic>, deoxyribonucleic acid, <italic>L</italic> Learning, <italic>FS</italic>, feature selection, <italic>C</italic> Classification, <italic>V</italic> Validation, <italic>NA</italic>, not available; <italic>CV</italic>, Cross-Validation; <italic>SVM</italic>, support vector machines.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3-8">
<title>Diseases of the digestive system (ICD-10 class XI)</title>
<p>For digestive system diseases, we focused on studies applying supervised classification methods to diagnose conditions related to liver disease. In general, pathologies related to the liver split into four stages: the inflammation stage [induced by hepatitis B virus (HBV), hepatitis C virus (HCV), alcoholic liver disease (ALD), and nonalcoholic fatty liver disease (NAFL)], the fibrosis stage, the cirrhosis stage and the worst stage related to liver cancer or failure (<xref ref-type="bibr" rid="B95">Tian et al., 2022</xref>). Regarding inflammation, Fialoke et al. studied the discrimination between non-alcoholic steatohepatitis (NASH) and simple steatosis in NAFL (<xref ref-type="bibr" rid="B37">Fialoke et al., 2018</xref>). Since NASH is underdiagnosed due to the lack of patient symptoms and relevant biomarkers (high values of AST and ALT), the authors trained 4&#xa0;ML algorithms with the available data: demographics, the maximum, minimum, and mean values of AST, ALT, AST/ALT, PLT, and the binary diabetes condition. 5-fold cross-validation displayed the model&#x2019;s AUC higher than 0.83, being XGBoost the top classifier (AUC&#x3d;0.876) with the potential to perform external discrimination with promising results. Ma et al. also approached the inflammation stage by diagnosing NAFDL in a cross-sectional study of 10,030 individuals with a prevalence of 24% (<xref ref-type="bibr" rid="B61">Ma et al., 2018</xref>). Four techniques were used for feature selection, and 11&#xa0;ML algorithms were trained. Selected five biomarkers (<xref ref-type="table" rid="T10">Table 10</xref>) resulted in different performance metrics across the tested traditional (KNN, SVM, LR, NB, BN, DT), ensemble (AdaBoost, bagging, RF), and extension algorithms (hidden na&#xef;ve-Bayes, aggregating one-dependence). Since F-measure (harmonic mean between precision and recall) was considered the most important metric, the Bayesian network achieved the best model (F-measure&#x3d;0.655). Comparisons with current diagnostic scores such as the FLI [calculated with triglycerides, BMI, GGT, waist circumference (F-measure&#x3d;0.318)], and HIS [estimated with the values of AST, ALT, BMI, diabetic condition, and gender (F-measure&#x3d;0.524)] demonstrated a superior diagnostic ability of the developed Bayesian network. Cao et al. evaluated HBV-induced liver cirrhosis (inflammation and cirrhosis stages) by studying seven routine blood tests enhanced by a multilayered perceptron and a na&#xef;ve-Bayes algorithm (<xref ref-type="bibr" rid="B22">Cao et al., 2013</xref>). Both classifiers exhibited higher AUC in the internal validation (MLP, AUC&#x3d;0.942, and NB, AUC&#x3d;0.899) rather than the training, with better performance for the MLP (MLP, AUC&#x3d;0.900, and NB, AUC&#x3d;0.831). This study also compared the ML metrics with the currently used scores APRI (AUC&#x3d;0.726), gauged with the AST to PLT index, and the FIB-4 (AUC&#x3d;0.817), calculated with the age, PLT, AST, and ALT levels, with the MLP classifier shown superior performance, enabling a potential reduction in the number of biopsies to perform diagnosis. The worst stage of liver disease&#x2013;liver failure - was studied by Peng et al. to create a forecast model to predict patient deterioration after hospitalization. This type of prediction is routinely assessed through the model for end-stage liver disease (MELD) calculated using the values of creatin, total bilirubin, standardized prothrombin ratio (INR), and the etiology of the disease. In opposition, the authors used a database of 15 clinical metabolites kept for modeling determined by hepatologists; only variables with high missing values were discarded. Except for the GLM model, all other models (AUC&#x3e;0.794) outstand the classification performance of the MELD (AUC&#x3d;0.699). However, the limitation of the sample size (n&#x3d;348) reinforces the need to perform validation in a high number of subjects. Finally, Yao et al. approached non-specific liver disease by deep learning on the largest dataset (n&#x3d;76,914), which comprised 12,688 patients with different stages of liver disease (<xref ref-type="bibr" rid="B100">Yao et al., 2020</xref>). The application of a dense deep neural network (DNN) was compared with standard logistic regression and random forests. The network was explored based on the network width (number of neurons per hidden layer) and dropout rate. Widths of 512 (AUC&#x3d;0.8919) and 1,024 (AUC&#x3d;8,922) were compared along with dropouts of 0.3 (AUC&#x3d;0.8812), 0.4 (AUC&#x3d;0.8891), 0.5 (0.8919), 0.6 (0.8904), and 0.7 (0.8856). Feature importance was assessed with random forests (for reference) since DNN and DenseDNN are black-box algorithms with poor explainability. Global results achieved excellent internal validation (AUC&#x3e;0.87) except for logistic regression (AUC&#x3d;0.79). Indeed, a significant improvement was not verifiable between an explainable random forest and the deep learning approaches for diagnosing non-specific liver disease. The fibrosis stage and liver cancer have not been approached yet.</p>
<table-wrap id="T10" position="float">
<label>TABLE 10</label>
<caption>
<p>Machine-learning-based routine blood tests for the diagnosis (or prognosis) of digestive system diseases.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Study</th>
<th align="left">Outcome</th>
<th align="left">Sample</th>
<th align="left">Selected features</th>
<th align="left">Methods (AUC)</th>
<th align="left">Findings</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<xref ref-type="bibr" rid="B22">Cao et al. (2013)</xref>
</td>
<td align="left">HBV-induced liver cirrhosis</td>
<td align="left">239 (124)</td>
<td align="left">Age, ALT, AST, PT, PLT, Hb, RDW</td>
<td align="left">L: supervised, classification FS: genetic search C: MLP (0.942), NB (0.899) V: 10-fold cv</td>
<td align="left">Compared to currently used scores for liver cirrhosis prediction (APRI (AUC&#x3d;0.726) and FIB-4 (AUC&#x3d;0.817)), the developed MLP achieved excellent performance in the test set. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td rowspan="3" align="left">
<xref ref-type="bibr" rid="B37">Fialoke et al. (2018)</xref>
</td>
<td rowspan="3" align="left">NASH in NAFL</td>
<td rowspan="3" align="left">34,949 (17,359)</td>
<td rowspan="3" align="left">ALT_mean, ALT_max, AST_max, AST mean</td>
<td rowspan="3" align="left">L: supervised, classification FS: genetic search C: LR (0.835), DT (0.842), RF (0.870), XGBoost (0.876) V: 5-fold cv</td>
<td align="left">The model improved by adding longitudinal (temporal) data rather than only using recent values</td>
</tr>
<tr>
<td align="left">External validation: NA</td>
</tr>
<tr>
<td align="left">Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B61">Ma et al. (2018)</xref>
</td>
<td align="left">NAFL</td>
<td align="left">13,030 (2,522)</td>
<td align="left">BMI, Triglycerides, GGT, ALT, Uric acid</td>
<td align="left">L: supervised, classification FS: correlation, redundancy analysis, out-of-bag estimation, Scott-Knot test C: BN (F&#x3d;0.655) V: 10-fold cv</td>
<td align="left">Tested ML algorithms improved the prediction accuracies from nearly 52% in FLI and HIS rules to &#x3e;80% for NAFL diagnosis. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B100">Yao et al. (2020)</xref>
</td>
<td align="left">Liver disease</td>
<td align="left">76,914 (12,688)</td>
<td align="left">AST, Total bilirubin, Direct bilirubin, Age</td>
<td align="left">L: supervised, classification FS: RF C: LR (0.797), RF (879), DNN (0.886), DenseDNN (0.891) V: 5-fold cv</td>
<td align="left">AUC was slightly higher in deep learning than in weak learners; Selected features were achieved by random forests since DNNs are black-box algorithms. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B74">Peng et al. (2020)</xref>
</td>
<td align="left">Exacerbation risk in patients with liver dysfunction</td>
<td align="left">348 (174)</td>
<td align="left">AST, NE, LY, Creatinine, ALT, ALB, Total protein, Total bilirubin</td>
<td align="left">L: supervised, classification FS: manual C: ANN (0.912), CART (0.794), GLM (0.554), SVM (0.853) V: 10-fold cv</td>
<td align="left">While the MELD achieved an AUC of 0.669, ML algorithms enhanced the prediction to nearly 80% (except GLM). External validation: NA Clinical deployment: NA</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>
<italic>AUC</italic>, Area under the ROC (receiver-operating characteristic) curve, <italic>NASH</italic>, Non-Alcoholic Steatohepatitis; <italic>NAFL</italic>, Non-Alcoholic Fatty Liver Disease; <italic>ALT</italic>, alanine transaminase; <italic>AST</italic>, aspartate transferase; <italic>PT</italic>, platelets, <italic>Hb</italic> Hemoglobin; <italic>RDW</italic>, red cell distribution width; <italic>BMI</italic>, body mass index; <italic>GGT</italic>, Gama-glutamyl Transferase; <italic>NE</italic>, neutrophils; <italic>LY</italic>, lymphocytes; <italic>ALB</italic>, albumin, <italic>L</italic> Learning, <italic>FS</italic>, feature selection, <italic>C</italic> Classification, <italic>V</italic> Validation, <italic>NA</italic>, not available; <italic>MLP</italic>, multilayer perceptron; <italic>NB</italic>, Na&#xef;ve-Bayes; <italic>CV</italic>, Cross-Validation, <italic>XGBoost</italic> Extreme Gradient Boosting, <italic>LR</italic>, linear regression; <italic>DT</italic>, decision trees; <italic>RF</italic>, random forests; <italic>BN</italic>, bayesian network; <italic>DNN</italic>, dense neural networks; <italic>CART</italic>, classification and regression trees algorithm; <italic>GLM</italic>, generalized linear models; <italic>SVM</italic>, support vector machines; <italic>APRI</italic>, aspartate aminotransferase to platelet ratio index; <italic>FIB-4</italic>, Fibrosis Index Based on 4 factors; <italic>ML</italic>, machine learning; <italic>MELD</italic>, Model for End-Stage Liver Disease.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3-9">
<title>Diseases of the genitourinary system (ICD-10 class XIV)</title>
<p>Concerning disorders of the genitourinary system, we focus on chronic kidney disease (CKD). CKD lacks early diagnosis since obvious symptoms only appear in an advanced stage of the disease wherein the patient&#x2019;s renal function declines with a glomerular filtration rate (GFR) of 60&#xa0;mL/min/1.73&#xa0;m<sup>2</sup> (<xref ref-type="bibr" rid="B93">Tarwater, 2011</xref>). The need to create screening procedures that perform early diagnosis motivated several studies for routine blood and urine analysis. Indeed, the study of Mahfuz et al. evaluated 250 CKD patients in a cohort of 400 individuals with information regarding urine (specific gravity, albumin, sugar, red blood cells, pus cell, pus cell clumps, bacteria) and blood (glucose, urea, creatinine, sodium, potassium, hemoglobin, packed cell volume, white blood cell count and red blood cell count) metabolites. The authors performed training on five algorithms and performed feature importance based on the SHAP technique, reducing the number of features from 24 to 13 in concordance between tested gradient boosting, random forest, and extreme gradient boosting. With the 13 selected features, the authors manually split the dataset into six different subsets: all features, blood and others, urine and others, only blood, only urine, and only others. A new train-test cycle applied to these subsets resulted in a classification accuracy ranging from 76% to 99%. Interestingly, results were very similar between all features (RF, AUC&#x3d;0.99) and only blood (RF, AUC&#x3d;0.97), with slight variations between the tested classifiers. (<xref ref-type="table" rid="T11">Table 11</xref>) While this study provides an interesting interpretation approach to CKD screening based on different bundles of metabolites (SHAP explained), it lacks sample size, which limits the findings.</p>
<table-wrap id="T11" position="float">
<label>TABLE 11</label>
<caption>
<p>Machine-learning-based routine blood tests for the diagnosis (or prognosis) of diseases of the genitourinary system.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Study</th>
<th align="left">Outcome</th>
<th align="left">Sample</th>
<th align="left">Selected features</th>
<th align="left">Methods (AUC)</th>
<th align="left">Findings</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="3" align="left">
<xref ref-type="bibr" rid="B77">Rashed-Al-Mahfuz et al. (2021)</xref>
</td>
<td rowspan="3" align="left">Chronic kidney disease</td>
<td rowspan="3" align="left">400 (250)</td>
<td rowspan="3" align="left">Hb, Creatinine, Glucose, Urea, RBC, Sodium</td>
<td rowspan="3" align="left">L: supervised, classification FS: SHAP C: RF (0.97), GB (0.96), XGBoost (0.95), LR (0.94), SVM (0.94) V: 10-fold cv</td>
<td align="left">Selected features (SHAP) were consistent with the literature regarding CKD diagnosis, and the performance of ML classifiers was similar for each bundle of features. Hemoglobin was the most important predictor</td>
</tr>
<tr>
<td align="left">External validation: NA</td>
</tr>
<tr>
<td align="left">Clinical deployment: NA</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>
<italic>AUC</italic>, Area under the ROC (receiver-operating characteristic) curve, <italic>Hb</italic> Hemoglobin, <italic>RBC</italic>, red blood cells; <italic>SHAP</italic>, shapley additive explanations; <italic>RF</italic>, random forests; <italic>GB</italic>, gradient boosting, <italic>XGBoost</italic> Extreme Gradient Boosting, <italic>LR</italic>, linear regression; <italic>SVM</italic>, support vector machines, <italic>L</italic> Learning, <italic>FS</italic>, feature selection, <italic>C</italic> Classification, <italic>V</italic> Validation, <italic>CV</italic>, Cross-Validation; <italic>NA</italic>, not available; <italic>CKD</italic>, chronic kidney disease; <italic>ML</italic>, machine learning.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3-10">
<title>Codes for special purposes (ICD-10 class XXII)</title>
<p>Lastly, we included an analysis for special purposes ICD-10 codes, in which, for instance, the coronavirus 2019 (COVID-19) disease is included. COVID-19 is caused by the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2); noticeably, it received particular interest in AI-based diagnostics. First reported in November 2019, this virus emerged as a pandemic in March 2020, accounting for 563M infections (<xref ref-type="bibr" rid="B72">Our World in data, 2023</xref>) and 6.37M deaths (<xref ref-type="bibr" rid="B30">Data, 2023</xref>). The evolution of the virus was irregular, and its spread was facilitated by the struggle to achieve a real-time diagnosis able to distinguish between real positive COVID-19 infections from other viral and bacterial respiratory infections. Indeed, the symptomatology of COVID-19 remains challenging to differentiate from other infections: 40% of patients display mild disease (fever, cough), 40% show moderate disease (pneumonia), 15% are severe (shortness of breath), and 5% refers to critical illness (ICU admission) (<xref ref-type="bibr" rid="B99">Wu and McGoogan, 2020</xref>). Additionally, 9&#x2013;12&#xa0;days is the average time for ICU admission, with a median length of stay of 9&#xa0;days. The median length of mechanical ventilation is 8.4 days, and COVID-19 mortality in the ICU is nearly 30% (<xref ref-type="bibr" rid="B5">Auld et al., 2021</xref>). Regarding diagnosis, reverse transcription polymerase chain reaction (RT-PCR) and computerized tomography (CT) images are still recognized technologies for determining viral infection. However, both methods comprise disadvantages: CT yields radiation (inherently endorses the risk of cancer development), it is bulk, expensive and hinders the possibility of performing screening. RT-PCR tests are less costly, available in higher volume, and offer a specificity close to 100% and, depending of the primers and strain, a remarkably high sensitivity (<xref ref-type="bibr" rid="B16">B&#xf6;ger et al., 2021</xref>). RT-PCR tests require laboratory specialists and infrastructure and produce a 15% false-positive rate on 48&#x2013;72&#xa0;h of turnaround time. Rapid diagnostic tests (RDTs) emerged as a point-of-care solution to facilitate access to diagnosis and reduce dependence on laboratory infrastructures. There are more than 400 RDTs commercially available, based on two technologies: antigen-based (immunoassays) to detect domains of the surface proteins of the virus and molecular nucleic acid amplification tests (NAATs) that reveal the presence of viral gene targets (<xref ref-type="bibr" rid="B33">Diagnostics for All, 2023</xref>). The criteria for approval and commercialization of RDTs are based on a sensitivity superior to 80% and specificity above 98% (<xref ref-type="bibr" rid="B97">World Health Organization, 2021</xref>), tested on a prospective cohort study involving less than 30 persons infected with SARS-CoV-2 and 30 persons without the infection (<xref ref-type="bibr" rid="B38">Food and Drug Administration. Emergency use authorizations for medical devices, 2021</xref>). Accepted by the Food and Drug Administration (FDA), these standards do not require independent verification of clinical validation provided by each test manufacturer. Indeed, several studies reported varying degrees of sensitivity (36%&#x2013;82%) and specificity (98%&#x2013;100%) when these RDTs are tested in asymptomatics (<xref ref-type="bibr" rid="B76">Prince-Guerra et al., 2021</xref>). Notably, most validation studies conducted for RDTs were performed before the appearance of new variants, namely, delta and omicron. The WHO, CDC, and European Center for Disease Prevention and Control guidelines advise using these point-of-care solutions for diagnosing symptomatic persons and screening asymptomatic individuals. Despite the growing need for these solutions, especially in underdeveloped countries, supply-chain limitations hinder the availability and consequent clinical relevance of these tests.</p>
<p>COVID-19 researchers and clinicians explored AI state-of-the-art learning techniques to find alternatives for COVID-19 forecasting, management, surveillance, and recognizing scalable and cost-effective ways to deal with the pandemic. Regarding diagnosis, several studies emerged in mid-April 2020 through the study of routine blood tests using proprietary datasets (single center), characterized by low sample size (n&#x3c;1,000) and mostly without external validation. Joshi et al. provided an interesting methodology, modeling diagnosis using an L2-regularized logistic regression trained only with levels of hematocrit, neutrophils, and lymphocytes, achieving an internal validation (AUC&#x3d;0.78) that was consistent with the evaluation performed in four different sites (average AUC&#x3d;0.77) (<xref ref-type="bibr" rid="B48">Joshi et al., 2020</xref>). Brinati et al. also used logistic regression but compared the classification performance with a random forest classifier, using 14 features (<xref ref-type="table" rid="T12">Table 12</xref>) (<xref ref-type="bibr" rid="B18">Brinati et al., 2020</xref>). Results were very similar among the internal test set, but the decision tree provided larger comprehension with AST (&#x3c;25.4) and lymphocytes (&#x3c;1.3) as major predictors of COVID-19 negativity. Alves et al. also employed a random forest classifier compared with five algorithms in which the ensemble achieved the best internal classification (AUC&#x3d;0.87) (<xref ref-type="bibr" rid="B4">Alves et al., 2021</xref>). A decision tree explained the model, and criteria graphs allowed a visual interpretation of the association between selected blood parameters. An artificial neural network was designed by Banarjee et al. in comparison with a random forest and a lasso-elastic-net regularized generalized linear model (fitting a logistic regression) (<xref ref-type="bibr" rid="B11">Banerjee et al., 2020</xref>). The network was tested in community individuals (n&#x3d;619) and patients in the hospital regular ward (n&#x3d;69). While ANN and RF presented the best metrics for hospitalized and non-hospitalized patients, the glmnet identified a decreasing pattern in monocytes, leukocytes, eosinophils, and platelets that was applied to a logistic regression achieving an AUC of 0.85. The ensemble designed by Abayomi-Alli et al. was built under a small dataset (n&#x3d;279), taking the input of 16 features. Comparisons were made between 15 classifiers wherein the ExtraTrees (AUC&#x3d;0.99) and the AdaBoost (AUC&#x3d;0.98) outperformed the remaining models. Wu et al. also achieved similar internal classification values using a slightly larger cohort (n&#x3d;603) and a novel dynamic ensemble selection method, first approached with data imbalance techniques and modeled with a hybrid clustering with a posterior bagging classifier (<xref ref-type="bibr" rid="B98">Wu et al., 2021</xref>). The authors achieved better results with the hybrid approach rather than by using the bagging approach, tested in divisions 70:30 and 60:40 and with 5-fold cross-validation. Contrary to previous supervised studies, Souza et al. reported an unsupervised clustering approach based on self-organizing maps that detected positive COVID-19 patients with a discrimination power of 83% (LDA model) (<xref ref-type="bibr" rid="B90">Souza et al., 2021</xref>). This clustering approach was performed on 599 registers, of which only 81 were COVID-19 positive. It identified WBC, BA, EO, and RDW as features with a strong influence on clustering performance but was ambiguous regarding the feature range in outcome prediction. While reviewed studies improved the accuracies supported with more complex ML algorithms, the studies with higher sample sizes (n&#x3e;1,000) showed a similar increase in classification metrics in concordance with the addition of blood features. In a cohort of 1,537 participants, Tschoellitsch et al. achieved a moderate AUC of 0.74 and a negative predictive value of 98%, which agreed with previous results using random forest (<xref ref-type="bibr" rid="B96">Tschoellitsch et al., 2021</xref>). Cabitza et al. described a novel methodology comprising cardinality and similarity as metrics of model&#x2019;s reliability in external validation settings (<xref ref-type="bibr" rid="B20">Cabitza et al., 2021</xref>). Considering data regarding demographics and complete blood cell count, the SVM with RBF kernel was applied to eight different external datasets with AUC ranging from 0.66, 0.75, 0.80, 0.83, 0.87, 0.89, 0.97 and 0.98 and similarity values (according to the degree of correspondence) of 0.315, 0.341, 0.348, 0.444, 0.323, 0.447, 0.439, and 0.445, respectively. Babaei et al. compared the performance of 12&#xa0;ML algorithms in three different datasets. In the third dataset, all algorithms&#x2019; comparison performance exhibited DNN with the higher classification metrics (<xref ref-type="table" rid="T12">Table 12</xref>) (<xref ref-type="bibr" rid="B8">Babaei et al., 2022</xref>). Interestingly, the previous studies of Brinati et al. (<xref ref-type="bibr" rid="B18">Brinati et al., 2020</xref>), and Cabitza et al. (<xref ref-type="bibr" rid="B20">Cabitza et al., 2021</xref>), were also compared with DNN surpassing in the first dataset (AUC&#x3d;0.92 vs. AUC&#x3d;0.84, from Brinati et al. (<xref ref-type="bibr" rid="B18">Brinati et al., 2020</xref>)) and the second dataset (AUC&#x3d;0.93 vs. AUC&#x3d;0.84, from Cabitza et al. (<xref ref-type="bibr" rid="B20">Cabitza et al., 2021</xref>)), highlight deep neural networks as a promising approach for COVID-19 diagnosis. Plante et al. used a large cohort of 66 hospitals to perform an internal and external validation of an extreme gradient boosting tree based on 15 features. The external validation performed in 23 different hospitals led to the validation of the methodology (AUC&#x3d;0.91) and allowed a deeper comprehension of the best cutoff score, independently of the disease prevalence (studied for 1%, 10%, and 20%). Campagner et al. validated six algorithms in two different sites (Bergamo, n&#x3d;245 and Desio, n&#x3d;337) with 42% and 48% of COVID-19 positive cases (<xref ref-type="bibr" rid="B21">Campagner et al., 2021</xref>). The models achieved an AUC always higher than 93%, with SVM achieving the best results on both external sets. Violin plots of specific key CBC parameters showed high similarity between the training and validation cohorts, namely, in white blood cells, neutrophils, lymphocytes, red blood cells, platelets count, and patient&#x2019;s age, which justifies the stability in the model&#x2019;s performance. Chadaga et al. used similar approaches in two public datasets: the Albert Einstein hospital in Brazil [n&#x3d;5,644, RF (AUC) &#x3d; 0.80] and the Dr. TMA Pai hospital in India [n&#x3d;1,169, RF (AUC) &#x3d; 0.99]. While both studies used smote to resolve imbalanced data, the second study used explainable methods to describe how parameters influenced the final decision. Significant improvements in performance metrics were observed (especially through the comparison of the same RF algorithm). Nonetheless, neither study has received external validation.</p>
<table-wrap id="T12" position="float">
<label>TABLE 12</label>
<caption>
<p>Machine-learning-based routine blood tests for the diagnosis (or prognosis) of COVID-19.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Study</th>
<th align="left">Outcome</th>
<th align="left">Sample</th>
<th align="left">Selected features</th>
<th align="left">Methods (AUC)</th>
<th align="left">Findings</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<xref ref-type="bibr" rid="B18">Brinati et al. (2020)</xref>
</td>
<td align="left">COVID-19 diagnosis</td>
<td align="left">279 (177)</td>
<td align="left">AST, LY, LDH, CRP, WBC, EO, ALT, Age, NE, GGT, MO, BA, ALP, PLT</td>
<td align="left">L: supervised, classification FS: RF C: LR (0.84), RF (0.85) V: nested-cv</td>
<td align="left">AST&#x3c;25 is predictor of COVID-19 negativity (NPV&#x3d;83%); AST&#x3e;25 is predictor of COVID-19 positivity (PPV&#x3d;76%) External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B48">Joshi et al. (2020)</xref>
</td>
<td align="left">COVID-19 diagnosis</td>
<td align="left">390 (33)</td>
<td align="left">NE, LY, HTC, Gender</td>
<td align="left">L: supervised, classification FS: manual C: L2-regularized LR (c-statistic 0.78) V: cross-validation</td>
<td align="left">NE and LY were negative predictors, while male and HCT were positive COVID-19 predictors. External validation: c-statistic 0.75, 0.75, 0.81 Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B11">Banerjee et al. (2020)</xref>
</td>
<td align="left">COVID-19 diagnosis</td>
<td align="left">786 (81)</td>
<td align="left">EO, WBC, RBC, MPV, BA, PLT</td>
<td align="left">L: supervised, classification FS: glmnet; C: RF (0.94), Flexible ANN (0.95); V: 10-fold-cv</td>
<td align="left">LR subtraction model between MO, WBC, EO, and PLT shows AUC&#x3d;85% (community). External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B75">Plante et al. (2020)</xref>
</td>
<td align="left">COVID-19 diagnosis</td>
<td align="left">12,183 (2,182)</td>
<td align="left">EO, CA, AST, WBC, BA, RDW, RBC, ALB, TB, MCV, MCH, SO, HCO3, UR, Chloride</td>
<td align="left">L: supervised, classification FS: recursive feature elimination; C: XGBoost (0.91); V: 5-fold-cv</td>
<td align="left">NPV for rule-out-ED &#x3e;97% for 1%, 10%, and 20% covid-19 prevalence. External validation: XGBoost (0.91) Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B96">Tschoellitsch et al. (2021)</xref>
</td>
<td align="left">COVID-19 diagnosis</td>
<td align="left">1,537 (65)</td>
<td align="left">WBC, NLR, Hb, CA</td>
<td align="left">L: supervised, classification FS: RF; C: RF (0.74); V: 5-fold-cv</td>
<td align="left">Elevated WBC and NLR improved the model accuracy. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B4">Alves et al. (2021)</xref>
</td>
<td align="left">COVID-19 diagnosis</td>
<td align="left">608 (84)</td>
<td align="left">WBC, PLT, EO, MO, CRP</td>
<td align="left">L: supervised, classification FS: decision-tree-based; C: DTX &#x2b; RF (0.86), LR (0.85), XGBoost (0.85), SVM (0.85), MLP (0.81), Ensemble (0.87); V: nested-cv</td>
<td align="left">Explainable patterns based on selected features, according to previous literature. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B90">Souza et al. (2021)</xref>
</td>
<td align="left">COVID-19 diagnosis</td>
<td align="left">599 (81)</td>
<td align="left">WBC, BA, EO, RDW</td>
<td align="left">L: unsupervised, clustering FS: SOM; C: Neural Network SOM, LDA; V: NA.</td>
<td align="left">Unsupervised pattern recognition applied to routine blood tests. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B20">Cabitza et al. (2021)</xref>
</td>
<td align="left">COVID-19 diagnosis</td>
<td align="left">1736 (NA)</td>
<td align="left">Age, HCT, Hb, MCH, MCHC, MCV, RBC, WBC, PLT, NE, LY%, MO%, EO%, BA%, NE, LY, MO, EO, BA, Gender</td>
<td align="left">L: supervised, classification FS: NA; C: SVM-RBF kernel (0.76); V: 10-fold-nested-cv</td>
<td align="left">Meta-validation with robustness and cardinality implications in COVID-19&#xa0;ML models states significant model degradation when tests are performed in different settings (equipment or populations) External validation: SVM-RBF (0.84) Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B8">Babaei et al. (2022)</xref>
</td>
<td align="left">COVID-19 diagnosis</td>
<td align="left">279 (177) 1,624 (786) 600 (80)</td>
<td align="left">WBC, PLT, MO, EO, Age quantile, CRP, RBC, Hb, LY, BA, CREA, NE, PO, UR, SO, AST, ALT, G</td>
<td align="left">L: supervised, classification FS: SHAP; C: DNN (0.92), SVM (0.87), LR (0.85), NB (0.83), XGBoost (0.81), RNN (0.80), CNN (0.76), DT (0.72), KNN (0.72), LSTM (0.51). V: 4-fold-cv</td>
<td align="left">Deep neural networks performed better than previous studies based on the same dataset; WBC, Age, AST, and LDH were predictors on the three datasets. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B98">Wu et al. (2021)</xref>
</td>
<td align="left">COVID-19 diagnosis</td>
<td align="left">603 (83)</td>
<td align="left">Age, HTC, HGB, PLT, RBC, LY, MCHC, WBC, BA, MCH, EO, MCV, MO, RDW, G, CRP</td>
<td align="left">L: supervised, classification FS: recursive feature elimination; C: Dynamic Ensemble Selection (0.99); V: 70&#x2013;30 and 60&#x2013;40 training-test and 5-fold-cv</td>
<td align="left">Dynamic ensemble selection application on imbalanced data; External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B21">Campagner et al. (2021)</xref>
</td>
<td align="left">COVID-19 diagnosis</td>
<td align="left">1,736 (816)</td>
<td align="left">Age, HTC, Hb, MCH, MCHC, MCV, RBC, WBC, PLT, BA%, NE%, LY%, MO%, EO%, BA, NE, LY, MO, EO, BA, covid-19 specific symptoms, Gender</td>
<td align="left">L: supervised, classification FS: recursive feature-elimination; C: SVM (0.975), LR (0.965), E (0.95), RF (0.945), NB (0.935), KNN (0.93); V: 5-fold-nested-cv</td>
<td align="left">The most important predictors were RBC, MCV, NE, EO, and MO. External validation: SVM (0.98), SVM (0.97) Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B1">Abayomi-Alli et al. (2022)</xref>
</td>
<td align="left">COVID-19 diagnosis</td>
<td align="left">279 (177)</td>
<td align="left">Age, gender, WBC, PLT, CRP, AST, ALT, GGT, ALP, LDH, NE, LY, MO, EO, BA, swab</td>
<td align="left">L: supervised, classification FS: PCA C: Extra-Trees (0.99), Adaboost (0.98), Decision tree (0.98) V: 10-fold cv</td>
<td align="left">Strong comparison between many classifiers, with higher AUC on the proposed ensemble. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B25">Chadaga et al. (2022)</xref>
</td>
<td align="left">COVID-19 diagnosis</td>
<td align="left">5,644 (558)</td>
<td align="left">WBC, EO, PLT, MO</td>
<td align="left">L: supervised, classification FS: Pearson correlation C: RF (0.80), LR (0.78), KNN (0.67), XGBoost (0.79) V: NA</td>
<td align="left">Only internal validation was used to evaluate model performance on an imbalanced dataset (sampled with smote). External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B24">Chadaga et al. (2023)</xref>
</td>
<td align="left">COVID-19 diagnosis</td>
<td align="left">1,169 (270)</td>
<td align="left">ALB, TWBC, BA, SO, AST, PO, TB, DB, UR, TP, LY, NE, Hb, HTC, CREA, MO, NLR</td>
<td align="left">L: supervised, classification FS: Grey wolf optimization (GWO) C: RF (0.99), LR (0.74), DT (0.88), KNN (0.83), STACKA (0.96), Adaboost (0.95), Catboost (0.96), LightGBM (0.98), XGBoost (0.99), STACKB (0.99), STACKC (0.98) V: 5-fold-cv</td>
<td align="left">The RF model&#x2019;s results were interpreted using xAI (Explainable AI): albumin, TWBC, basophil, sodium, and AST are critical for distinguishing COVID-19 from other infections. Increased AST and decreased TWBC and basophils indicate infection with COVID-19</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B60">Luo et al. (2021)</xref>
</td>
<td align="left">COVID-19 severity</td>
<td align="left">196 (129 ICU)</td>
<td align="left">Age, WBC, LY, NE</td>
<td align="left">L: supervised, classification FS: maximum relevance and minimum redundancy C: MCDM (TOPSIS &#x2b; NB) (0.93) V: 80&#x2013;20 (train-test)</td>
<td align="left">Advanced age, low immunity, and combined bacterial infections are reasons for COVID-19 severity; The MCDM algorithm is stable on small datasets. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B13">Benito-Le&#xf3;n et al. (2021)</xref>
</td>
<td align="left">COVID-19 severity</td>
<td align="left">853 C1 (58 ICU) C2 (300&#xa0;H) C3 (495 &#x2b;)</td>
<td align="left">C1: higher levels of AST, LDH, CRP, NE, and lower levels of MO and LY; C2: intermediate levels; C3: lowest AST, LDH, CRP, NE, and higher levels of MO and LY.</td>
<td align="left">L: unsupervised, clustering FS: unsupervised; C: X-means; V: 80&#x2013;20 (train-test)</td>
<td align="left">Serum levels of AST, LDH, CRP, and NE were enough to separate patients&#x2019; severity. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B35">Famiglini et al. (2022)</xref>
</td>
<td align="left">COVID-19 severity</td>
<td align="left">1,004 (181)</td>
<td align="left">Age, LY, NE, MCHC, Gender, MCV, MO</td>
<td align="left">L: supervised, classification FS: SHAP; C: MLP (0.71), DT (0.76), SVM (0.85), XGB (0.81); V: hold-out test set</td>
<td align="left">Data consists of literature; CBC data could be used to predict ICU admission on COVID-19 patients. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B50">Karthikeyan et al. (2021)</xref>
</td>
<td align="left">COVID-19 prognosis</td>
<td align="left">370 (200 recovered) (170 death)</td>
<td align="left">Age, NE, LY, LDH, hs-CRP</td>
<td align="left">L: supervised, classification FS: NN forward feature selection; C: NN (0.99), LR (0.99), XGBoost (0.98), RF (0.98), SVM (0.99), DT (0.97); V: 80&#x2013;20 (train-test) with 5-fold-cv</td>
<td align="left">Higher levels of Age, hs-CRP, neutrophils, LDH, and lower levels of lymphocytes predicted mortality with 96% accuracy during the disease span. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B36">Fernandes et al. (2021)</xref>
</td>
<td align="left">COVID-19 prognosis</td>
<td align="left">1,040 (288 ICU) (106&#xa0;MV) (92&#xa0;M)</td>
<td align="left">Age, LymCRP, CRP, Braden scale</td>
<td align="left">L: supervised, classification FS: SHAP; C: MV: ANN, Extra Trees (0.94), RF, Catboost, Extreme Gradient Boosting M: ANN, Extra Trees (0.97), RF, Catboost, Extreme Gradient Boosting; V: 70&#x2013;30 (train-test) with 10-fold-cv</td>
<td align="left">ML algorithms could predict untrained outcomes (death) based on other outcomes (ICU &#x2b; MV), with AUROC higher than 0.91. External validation: NA Clinical deployment: NA</td>
</tr>
<tr>
<td rowspan="3" align="left">
<xref ref-type="bibr" rid="B71">Murri et al. (2021)</xref>
</td>
<td rowspan="3" align="left">COVID-19 prognosis</td>
<td rowspan="3" align="left">921 (120&#xa0;M)</td>
<td rowspan="3" align="left">Age, Hb, PLT, NE, SO, UR, CRP, SpO2</td>
<td rowspan="3" align="left">L: supervised, classification FS: LR C: LR (0.87) V: 5-fold-cv</td>
<td align="left">Abnormal HGB, PLT, NE, high levels of URE, CRP, SO, and lower SpO2 were associated with an increased risk of death</td>
</tr>
<tr>
<td align="left">External validation: LR (0.82)</td>
</tr>
<tr>
<td align="left">Clinical deployment: NA</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>
<italic>AUC</italic> Area under the ROC (receiver-operating characteristic) curve, <italic>AST</italic> Aspartate Transferase, <italic>ALT</italic> Alanine Transaminase, <italic>Hb</italic> Hemoglobin, <italic>MCH</italic> Mean Corpuscular Hemoglobin, <italic>MCHC</italic> Mean Corpuscular Hemoglobin Concentration, <italic>HTC</italic> Hematocrit, <italic>MCV</italic> Mean Corpuscular Volume, <italic>RDW</italic> Red Cell Distribution Width, <italic>CBC</italic> Cell Blood Count, <italic>ALB</italic> Albumin, <italic>NLR</italic> Neutrophil-to-<italic>Lymphocyte ratio, HDL</italic> High-Density Lipoprotein, <italic>WBC</italic> White Blood Cells, <italic>GGT</italic> Gama-glutamyl Transferase, <italic>RDW</italic> Red Cell Distribution Width, <italic>PLT</italic> Platelets, <italic>CRP</italic> C-Reactive Protein<italic>, LDH</italic> Lactate dehydrogenase, <italic>LY</italic> Lymphocytes<italic>, LY%</italic> Lymphocytes Count<italic>, EO</italic> Eosinophils<italic>, EO%</italic> Eosinophils Count<italic>, NE</italic> Neutrophils<italic>, NE%</italic> Neutrophils Count<italic>, MO</italic> Monocytes<italic>, MO%</italic> Monocytes Count<italic>, BA</italic> Basophils<italic>, BA%</italic> Basophils Count<italic>, ALP</italic> Alkaline Phosphatase<italic>, MPV</italic> Mean Platelet Volume<italic>, CA</italic> Calcium<italic>, ALB</italic> Albumin<italic>, TB</italic> Total Bilirubin<italic>, DB</italic> Direct Bilirubin, <italic>SO</italic> Sodium<italic>, TP</italic> Total Protein, <italic>HCO3</italic> Bicarbonate<italic>, UR</italic> Urea<italic>, PO</italic> Potassium<italic>, CREA</italic> Creatinine<italic>, G</italic> Glucose<italic>, hs-CRP</italic> High-Sensitivity C-Reactive Protein<italic>, LymCRP</italic> Lymphocytes to C-Reactive Protein Ratio<italic>, SpO2 Oxygen Saturation</italic>, <italic>NN</italic> Neural Network, <italic>H</italic> Hospitalized, <italic>M</italic> Mortality, <italic>ICU</italic> Intensive Care Units, <italic>MV</italic> Mechanical Ventilation, <italic>PPV</italic> Positive Predictive Value, <italic>NPV</italic> Negative Predictive Value, <italic>ED</italic> Emergency Department<italic>, L</italic> Learning, <italic>FS</italic> Feature selection, <italic>C</italic> Classification, <italic>V</italic> Validation, <italic>CV</italic> Cross-Validation, <italic>NA</italic> Not Available, <italic>RF</italic> Random Forests, <italic>LR</italic> linear regression, <italic>glmnet</italic> Regularized Generalized Linear Model with Lasso (Least Absolute Shrinkage and Selection Operator) Regression, <italic>ANN</italic> Artificial Neural Networks, <italic>XGBoost</italic> Extreme Gradient Boosting, <italic>DTX</italic> Decision Trees Explainer, <italic>SVM</italic> Support Vector Machines, <italic>MPL</italic> Multilayer Perceptron, <italic>SOM</italic> Self-Organizing maps, <italic>LDA</italic> Linear Discriminant Analysis, <italic>RBF</italic> Radial Basis Function, <italic>DNN</italic> Dense Neural Networks, <italic>RNN</italic> Recurrent Neural Networks, <italic>CNN</italic> Convolutional Neural Networks, <italic>KNN</italic> K Nearest Neighbors, <italic>DT</italic> Decision Trees, <italic>LSTM</italic> Long Short-term Memory, <italic>PCA</italic> Principal Component Analysis, <italic>MCDM</italic> Multi Criteria Decision Making, <italic>TOPSIS</italic> Technique for Order of Preference by Similarity to Ideal Solution, <italic>STACKA</italic> Stacked model.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Regarding the diagnosis of COVID-19 severity, Benito-Le&#xf3;n et al. used a non-supervised clustering model (X-means) to differentiate intensive-care, hospitalized, and non-hospitalized positive patients (<xref ref-type="bibr" rid="B13">Benito-Le&#xf3;n et al., 2021</xref>). According to the David Bouldin index (lowest value refers to best cluster distribution with higher intercluster distance and lower intracluster distance), the algorithm defined three clusters (Manhattan distance &#x3d; 0.701). Relevant features are in <xref ref-type="table" rid="T12">Table 12</xref> for the differentiation among clusters, assessed by the <italic>p</italic>-values and effect size. Famiglini et al. used a supervised classification approach for predicting ICU admission in a cohort of 1004 COVID-19 patients, with only 18.3% admitted to the ICU (imbalanced data) (<xref ref-type="bibr" rid="B35">Famiglini et al., 2022</xref>). Data curation (imputation and bias evaluation) and model selection resulted in better AUC score (classification&#x3d;0.85), lower Brier score (calibration&#x3d;0.144), and standardized net benefit (clinical utility&#x3d;0.69), predicting ICU admission with significant importance of the NLR levels (consistent with the literature). Luo et al. also studied this outcome (mild, n&#x3d;67 and severe, n&#x3d;129) with the application of a hybrid system built on multi-criteria decision-making (MCDM) through the combination of a technique for order of preference by similarity to ideal solution (TOPSIS) algorithm and a na&#xef;ve-Bayes classifier. TOPSIS runs preprocessing and feature ranking while NB performs feature selection. Despite this method achieving a higher AUC (0.93), the sample size was small and did not include external.</p>
<p>Murri et al. developed models for the prognosis of COVID-19 by developing an interpretable logistic regression model constructed with data from 921 hospitalized patients, of which 120 died (prevalence of 13%) (<xref ref-type="bibr" rid="B71">Murri et al., 2021</xref>). Despite the discriminatory ability assessed through the levels of hemoglobin, platelets, neutrophils, urea, c-reactive protein, and sodium was higher (AUC&#x3d;0.87), the subsequent external validation on a population with a prevalence of 22.6%, decreased (AUC&#x3d;0.81). Fernandes et al. extended the discriminative ability for fatality, invasive mechanical ventilation, and ICU (multipurpose algorithms) (<xref ref-type="bibr" rid="B36">Fernandes et al., 2021</xref>). Considering fewer features (age, lymphocyte-to-c-reactive-protein ratio, c-reactive protein, and results from the Braden scale), the authors concluded that each of the studied outcomes (ICU, IMV or fatality) could be predicted using data from the others (outcomes), always with an AUC&#x3e;91%. In the study of Karthikeyan et al., higher predictive performance was accomplished by applying an XGBoost for feature importance and a neural network for feature selection on a dataset comprised of deceased (n&#x3d;170) and recovered (n&#x3d;200) patients. Selected features predicted the number of days until the outcome, and accuracy results were consistently higher than 90% for models trained until 12&#xa0;days before the outcome (with data not only from the closest days&#x2013;case 2). Notably, the authors also showed blood patterns related to mortality prediction, such as high values of hs-CRP, LDH, and neutrophils and low values of eosinophils, consistent with previous literature.</p>
</sec>
</sec>
<sec id="s4">
<title>Challenges</title>
<p>Despite the rising developments in AI reinforced by big data, computational power, and neural networks enhanced the quality of studies relating routine blood analysis with principal diagnosis and prognosis outcomes, the clinical deployment stage remains a foremost challenge. The studies and the pathologies we reviewed confirm the delay in implementing AI-based technology in the clinical setting. The research highlighted in this review was motivated by the available statistical information expressing significant associations between blood metabolites and numerous pathologies and by the opportunity provided by the high number of general health panels typically performed in a medical health center. As referred, these analytical panels include complete cell blood count, metabolic and lipidic that are currently consistently evaluated with gold-standard, highly stabilized techniques, not prone to systematic errors or bias. A vast amount of non-appraised clinical information cannot be 100% perceived by a single clinician acting in a consultation or emergency setting (especially in longitudinal profiles) that can be processed, patterned, statistically evaluated, and flagged, if necessary. Since current clinical decisions are accomplished in a framework of rule-based systems, i.e., thresholds passively updated according to newer guidelines, the primary reasons that explain the resistance to ML-based solutions are the necessity to use external applications (which require manual data input and consumed extra time), and the non-interpretability of ML algorithms, especially the ones concerned to deep learning (&#x2018;black-box&#x2019;). Indeed, a recent study by Henry et al. evaluated the adoption of a AI-based sepsis targeted real-time early warning system (TREWS); these authors found a lack of interpretability of the computation model, but this was not considered a significant barrier, especially after experiencing the system through different patients and following interactions with peers and research team members (<xref ref-type="bibr" rid="B44">Henry et al., 2022</xref>). On the other hand, the theoretical &#x2018;competing diagnosis&#x2019; may be perceived as threat to autonomy by some physicians, making them hesitant in adopting these solutions because it may alter their decision-making process with the risk of acting solely on model recommendations, which may not be completely accurate.</p>
<p>Regarding routine blood analysis, data sources (i.e., equipment, disease incidence, patient demographics) with different reference values should also be evaluated and discussed. The study should distinctly report information regarding the data source type (cohort, randomized control trial, or other), data source quality (representativeness, bias, features, and outcome with the exact time of measurement and associated medication or treatment), and data source quantity. People&#x2019;s biochemical fingerprint variates in basal conditions for several reasons; most have little to do with their clinical condition. Re-test studies could ultimately elucidate if the AI model&#x2019;s predictions connect to features that correlate with the problem of interest or if they only capture external variabilities, such as sensor noise, ambient temperatures, user manipulation, etc. (<xref ref-type="bibr" rid="B91">Stegmann et al., 2020</xref>)</p>
<p>Considering the evaluation of the reviewed medical applications, only a few were performed in external centers, and most were conducted with retrospective data. Therefore, working with data matching the same conditions met in traditional clinical settings is essential, principally user interface (i.e., healthcare professionals or patients) and technology integration into the clinical workflow (physical conditions such as illumination, temperature, humidity, and others). Curiously, a recent evaluation in a prospective assessment regarding the performance of a deep-learning system for the detection of diabetic retinopathy demonstrated a &#x2018;larger-than-expected proportional of the retinal images as ungradable owing to blurring or darkening&#x2019;, caused by poor ambient lighting during the measurement procedure (<xref ref-type="bibr" rid="B29">Co-operation, 2021</xref>). Regarding COVID-19 prediction models, one study found that the underlying data distribution, known as domain shifts, significantly impacts anticipated performance and dependability, resulting in model failure in clinical applications. Domain shifts, which can be induced by changes in disease prevalence, adjustments to RT-PCR testing protocols, or viral mutations, suggest that machine learning models may lose reliability and performance over time, underlining the importance of constant monitoring and updating (<xref ref-type="bibr" rid="B82">Roland et al., 2022</xref>). These examples emphasize how training should incorporate the original conditions to generate truthful coefficients for the desired problem-solving.</p>
<p>These challenges should be revised and improved by consistently implementing the described ML pipeline to develop federated learning (training in multiple institutions) and the deployment in ETL (i.e., extract, transform and load), keeping data &#x2018;healthy&#x2019;.</p>
</sec>
<sec id="s5">
<title>Future perspectives</title>
<p>The deployment of a cost-free real-time blood augmentation diagnostic tool, based on longitudinal data and source-stable (gold-standard), should address probabilistic metrics of diagnosis and provide the clinician with a landscape view for each individual. AI can play a key role in delivering explainable decision support systems to assure that patterns are correctly identified, and biomarkers are accurately measured, directly influencing the outcome. Measures of clinical effectiveness, such as user feedback, clinical reliance, and interpretability, must improve and be better described, particularly in the upcoming guidelines for model development and reporting (TRIPOD-ML). Although this protocol is still under development, The Transparent Reporting of a multivariate prediction model for Individual Prognosis or Diagnosis (TRIPOD, 2015) standard (<xref ref-type="bibr" rid="B28">Collins et al., 2015</xref>) should be considered since It provides guidance and recommendations for reporting a multivariate prediction model for diagnosis or prognosis. Representativeness, in particular, should always be addressed because it is an essential concept in data quality, covering the necessary heterogeneity of the studied population in a balanced proportion, which is especially important when models aim to predict categorical or binary events in the context of medical problems.</p>
<p>In the future, deploying ML models will still face data shifts across time, hindering representativeness and compromising the model&#x2019;s performance. A paramount example refers to the performance of the covid-19 models trained during the initial alpha strain with the current disease condition, led by several other variants of the virus, and the influence of the addition of vaccines that altered disease outcomes for the vast majority of infected individuals. Therefore, deployment should start with isolated pilot studies to receive feedback from healthcare experts on user experience, interface, efficiency, and real-time evaluation performance.</p>
</sec>
<sec id="s6">
<title>Outlook</title>
<p>This review summarizes the application of artificial intelligence algorithms in the diagnosis and prognosis of ICD-10 disorders using routine blood tests only. Reports herein analyzed differ in data source type, quality, and quantity and describe a multitude of ML algorithms for outcome prediction. Principal findings indicate virtuous performance metrics in validation studies and a clear gap between standard disease-associated metabolites and those chosen machine learning models, resulting in higher performance metrics than traditional clinical practice scores.</p>
<p>Although there is still a sizable gap between reviewed studies and their clinical application, AI is changing the practice of medicine, and digital tools are key for helping physicians evaluate patients more personalized, rapid, and efficiently. The use of routine blood parameters as exclusive input features for model development could allow the translation of high-level diagnosis from primary or secondary care to point-of-care, making these analyses more valuable in lowering time to diagnosis and overall healthcare costs.</p>
</sec>
</body>
<back>
<sec id="s7">
<title>Author contributions</title>
<p>MS-S: Data curation, Investigation, Methodology, Writing&#x2013;original draft. NS: Conceptualization, Formal Analysis, Funding acquisition, Supervision, Writing&#x2013;review and editing. JS: Conceptualization, Data curation, Formal Analysis, Methodology, Supervision, Validation, Writing&#x2013;review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s8">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. MS-S received a PhD fellowship from the Foundation for Science and Technology (FCT, Portugal)/FEDER.</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
<p>The author(s) declared that they were an editorial board member of Frontiers, at the time of submission. This had no impact on the peer review process and the final decision.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abayomi-Alli</surname>
<given-names>O. O.</given-names>
</name>
<name>
<surname>Dama&#x161;evi&#x10d;ius</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Maskeli&#x16b;nas</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Misra</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>An ensemble learning model for COVID-19 detection from blood test samples</article-title>. <source>Sensors</source> <volume>22</volume>, <fpage>2224</fpage>. <pub-id pub-id-type="doi">10.3390/s22062224</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ahmad</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Rahim</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zubair</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Abdul-Ghafar</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Artificial intelligence (AI) in medicine, current applications and future role with special emphasis on its potential and promise in pathology: present and future impact, obstacles including costs and acceptance among pathologists, practical and philosoph</article-title>. <source>Diagn. Pathol.</source> <volume>16</volume>, <fpage>1</fpage>&#x2013;<lpage>16</lpage>. <pub-id pub-id-type="doi">10.1186/s13000-021-01085-4</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alsuliman</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Humaidan</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Sliman</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Machine learning and artificial intelligence in the service of medicine: necessity or potentiality?</article-title> <source>Curr. Res. Transl. Med.</source> <volume>68</volume>, <fpage>245</fpage>&#x2013;<lpage>251</lpage>. <pub-id pub-id-type="doi">10.1016/j.retram.2020.01.002</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alves</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Castro</surname>
<given-names>G. Z.</given-names>
</name>
<name>
<surname>Oliveira</surname>
<given-names>B. A. S.</given-names>
</name>
<name>
<surname>Ferreira</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>Ram&#xed;rez</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Silva</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Explaining machine learning based diagnosis of COVID-19 from routine blood tests with decision trees and criteria graphs</article-title>. <source>Comput. Biol. Med.</source> <volume>132</volume>, <fpage>104335</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2021.104335</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Auld</surname>
<given-names>S. C.</given-names>
</name>
<name>
<surname>Harrington</surname>
<given-names>K. R. V.</given-names>
</name>
<name>
<surname>Adelman</surname>
<given-names>M. W.</given-names>
</name>
<name>
<surname>Robichaux</surname>
<given-names>C. J.</given-names>
</name>
<name>
<surname>Overton</surname>
<given-names>E. C.</given-names>
</name>
<name>
<surname>Caridi-Scheible</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Trends in ICU mortality from coronavirus disease 2019: a tale of three surges</article-title>. <source>Crit. Care Med.</source> <volume>50</volume>, <fpage>245</fpage>&#x2013;<lpage>255</lpage>. <pub-id pub-id-type="doi">10.1097/ccm.0000000000005185</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Azarkhish</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Raoufy</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Gharibzadeh</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Artificial intelligence models for predicting iron deficiency anemia and iron serum level based on accessible laboratory data</article-title>. <source>J. Med. Syst.</source> <volume>36</volume>, <fpage>2057</fpage>&#x2013;<lpage>2061</lpage>. <pub-id pub-id-type="doi">10.1007/s10916-011-9668-3</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Babaei</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sorayaie</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ghafari</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bagherzadeh</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>COVID-19 diagnosis from routine blood tests using artificial intelligence techniques</article-title>. <source>Biomed. Signal Process Control</source>. <pub-id pub-id-type="doi">10.1016/j.bspc.2021.103263</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Badrick</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Evidence-based laboratory medicine</article-title>. <source>Clin. Biochem. Rev.</source> <volume>34</volume>, <fpage>43</fpage>&#x2013;<lpage>46</lpage>.</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bajwa</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Munir</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Nori</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Williams</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Artificial intelligence in healthcare: transforming the practice of medicine</article-title>. <source>Future Healthc. J.</source> <volume>8</volume>, <fpage>e188</fpage>&#x2013;<lpage>e194</lpage>. <pub-id pub-id-type="doi">10.7861/fhj.2021-0095</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Banerjee</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ray</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Vorselaars</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Kitson</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Mamalakis</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Weeks</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Use of machine learning and artificial intelligence to predict SARS-CoV-2 infection from full blood counts in a population</article-title>. <source>Int. Immunopharmacol.</source> <volume>86</volume>, <fpage>106705</fpage>. <pub-id pub-id-type="doi">10.1016/j.intimp.2020.106705</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barnhart-Magen</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Gotlib</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Marilus</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Einav</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Differential diagnostics of thalassemia minor by artificial neural networks model</article-title>. <source>J. Clin. Lab. Anal.</source> <volume>27</volume>, <fpage>481</fpage>&#x2013;<lpage>486</lpage>. <pub-id pub-id-type="doi">10.1002/jcla.21631</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Benito-Le&#xf3;n</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>del Castillo</surname>
<given-names>M. D.</given-names>
</name>
<name>
<surname>Estirado</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ghosh</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Dubey</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Serrano</surname>
<given-names>J. I.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Using unsupervised machine learning to identify age- and sex-independent severity subgroups among patients with COVID-19: observational longitudinal study</article-title>. <source>J. Med. Internet Res.</source> <volume>23</volume>, <fpage>259888</fpage>&#x2013;<lpage>e26014</lpage>. <pub-id pub-id-type="doi">10.2196/25988</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Benjamens</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Dhunnoo</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Mesk&#xf3;</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>The state of artificial intelligence-based FDA-approved medical devices and algorithms: an online database</article-title>. <source>npj Digit. Med.</source> <volume>3</volume>, <fpage>118</fpage>. <pub-id pub-id-type="doi">10.1038/s41746-020-00324-0</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bernardini</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Morettini</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Romeo</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Frontoni</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Burattini</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>TyG-er: an ensemble Regression Forest approach for identification of clinical factors related to insulin resistance condition using Electronic Health Records</article-title>. <source>Comput. Biol. Med.</source> <volume>112</volume>, <fpage>103358</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2019.103358</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>B&#xf6;ger</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Fachi</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>Vilhena</surname>
<given-names>R. O.</given-names>
</name>
<name>
<surname>Cobre</surname>
<given-names>A. F.</given-names>
</name>
<name>
<surname>Tonin</surname>
<given-names>F. S.</given-names>
</name>
<name>
<surname>Pontarolo</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Systematic review with meta-analysis of the accuracy of diagnostic tests for COVID-19</article-title>. <source>Am. J. Infect. Control</source> <volume>49</volume>, <fpage>21</fpage>&#x2013;<lpage>29</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajic.2020.07.011</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brendan McMahan</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Moore</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Ramage</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Hampson</surname>
<given-names>S.</given-names>
</name>
</person-group>, <article-title>Communication-Efficient learning of deep networks from decentralized data</article-title>. <volume>54</volume>, <fpage>10</fpage> (<year>2017</year>).</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brinati</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Campagner</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ferrari</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Locatelli</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Banfi</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Cabitza</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Detection of COVID-19 infection from routine blood exams with machine learning: a feasibility study</article-title>. <source>J. Med. Syst.</source> <volume>44</volume>, <fpage>135</fpage>. <pub-id pub-id-type="doi">10.1007/s10916-020-01597-4</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bruckert</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Finzel</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Schmid</surname>
<given-names>U.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>The next generation of medical decision support: a roadmap toward transparent expert companions</article-title>. <source>Front. Artif. Intell.</source> <volume>3</volume>, <fpage>507973</fpage>&#x2013;<lpage>508013</lpage>. <pub-id pub-id-type="doi">10.3389/frai.2020.507973</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cabitza</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Campagner</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Soares</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Garc&#xed;a&#xa0;de&#xa0;Guadiana-Romualdo</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Challa</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Sulejmani</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>The importance of being external. methodological insights for the external validation of machine learning models in medicine</article-title>. <source>Comput. Methods Programs Biomed.</source> <volume>208</volume>, <fpage>106288</fpage>. <pub-id pub-id-type="doi">10.1016/j.cmpb.2021.106288</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Campagner</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Carobene</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Cabitza</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>External validation of machine learning models for COVID-19 detection based on complete blood count</article-title>. <source>Health Inf. Sci. Syst.</source> <volume>9</volume>, <fpage>37</fpage>&#x2013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1007/s13755-021-00167-3</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>Z.De</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X. F.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>C. J.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>An MLP classifier for prediction of HBV-induced liver cirrhosis using routinely available clinical parameters</article-title>. <source>Dis. Markers</source> <volume>35</volume>, <fpage>653</fpage>&#x2013;<lpage>660</lpage>. <pub-id pub-id-type="doi">10.1155/2013/127962</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Celkan</surname>
<given-names>T. T.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>What does a hemogram say to us?</article-title> <source>Turk pediatri arsivi</source> <volume>55</volume>, <fpage>103</fpage>&#x2013;<lpage>116</lpage>. <pub-id pub-id-type="doi">10.14744/TurkPediatriArs.2019.76301</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chadaga</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Prabhu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bhat</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Sampathila</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Umakanth</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chadaga</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A decision support system for diagnosis of COVID-19 from non-COVID-19 influenza-like illness using explainable artificial intelligence</article-title>. <source>Bioengineering</source> <volume>10</volume>, <fpage>439</fpage>. <pub-id pub-id-type="doi">10.3390/bioengineering10040439</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chadaga</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Prabhu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Vivekananda Bhat</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Umakanth</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sampathila</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Medical diagnosis of COVID-19 using blood tests and machine learning</article-title>. <source>J. Phys. Conf. Ser.</source> <volume>2161</volume>, <fpage>012017</fpage>. <pub-id pub-id-type="doi">10.1088/1742-6596/2161/1/012017</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Chatburn</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Hematology</surname>
<given-names>E. M.-C.</given-names>
</name>
</person-group> (<year>2010</year>). <source>Handbook of respiratory care</source>. <edition>Third Edition</edition>, <fpage>54</fpage>&#x2013;<lpage>63</lpage>.</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>&#xc7;il</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Ayy&#x131;ld&#x131;z</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Tuncer</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Discrimination of &#x3b2;-thalassemia and iron deficiency anemia through extreme learning machine and regularized extreme learning machine based decision support system</article-title>. <source>Med. Hypotheses</source> <volume>138</volume>, <fpage>109611</fpage>. <pub-id pub-id-type="doi">10.1016/j.mehy.2020.109611</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Collins</surname>
<given-names>G. S.</given-names>
</name>
<name>
<surname>Reitsma</surname>
<given-names>J. B.</given-names>
</name>
<name>
<surname>Altman</surname>
<given-names>D. G.</given-names>
</name>
<name>
<surname>Moons</surname>
<given-names>K. G. M.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Transparent reporting of a multivariable prediction model for individual prognosis or diagnosis (TRIPOD): the TRIPOD statement</article-title>. <source>Eur. Urol.</source> <volume>67</volume>, <fpage>1142</fpage>&#x2013;<lpage>1151</lpage>. <pub-id pub-id-type="doi">10.1016/j.eururo.2014.11.025</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Co-operation</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Machine learning in translation</article-title>. <source>Nat. Biomed. Eng.</source> <volume>5</volume>, <fpage>485</fpage>&#x2013;<lpage>486</lpage>. <pub-id pub-id-type="doi">10.1038/s41551-021-00758-1</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Data</surname>
<given-names>O. W.</given-names>
</name>
</person-group> (<year>2023</year>). <source>Covid-19 deaths</source>.</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dayan</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Roth</surname>
<given-names>H. R.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Harouni</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Gentili</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Abidin</surname>
<given-names>A. Z.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Federated learning for predicting clinical outcomes in patients with COVID-19</article-title>. <source>Nat. Med.</source> <volume>27</volume>, <fpage>1735</fpage>&#x2013;<lpage>1743</lpage>. <pub-id pub-id-type="doi">10.1038/s41591-021-01506-3</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Demichev</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Tober-Lau</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Lemke</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Nazarenko</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Thibeault</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Whitwell</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>A time-resolved proteomic and prognostic map of COVID-19</article-title>. <source>Cell Syst.</source> <volume>12</volume>, <fpage>780</fpage>&#x2013;<lpage>794.e7</lpage>. <pub-id pub-id-type="doi">10.1016/j.cels.2021.05.005</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="book">
<collab>Diagnostics for All</collab> (<year>2023</year>). <source>Foundation for innovative new diagnostics</source>. <comment>Test Directory</comment>.</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dipnall</surname>
<given-names>J. F.</given-names>
</name>
<name>
<surname>Pasco</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Berk</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Williams</surname>
<given-names>L. J.</given-names>
</name>
<name>
<surname>Dodd</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Jacka</surname>
<given-names>F. N.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Fusing data mining, machine learning and traditional statistics to detect biomarkers associated with depression</article-title>. <source>PLoS One</source> <volume>11</volume>, <fpage>01481955</fpage>&#x2013;<lpage>e148223</lpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0148195</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Famiglini</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Campagner</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Carobene</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Cabitza</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A robust and parsimonious machine learning method to predict ICU admission of COVID-19 patients</article-title>. <source>Med. Biol. Eng. Comput.</source>, <fpage>1</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1007/s11517-022-02543-x</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fernandes</surname>
<given-names>F. T.</given-names>
</name>
<name>
<surname>de Oliveira</surname>
<given-names>T. A.</given-names>
</name>
<name>
<surname>Teixeira</surname>
<given-names>C. E.</given-names>
</name>
<name>
<surname>Batista</surname>
<given-names>A. F. M.</given-names>
</name>
<name>
<surname>Dalla Costa</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Chiavegatto Filho</surname>
<given-names>A. D. P.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A multipurpose machine learning approach to predict COVID-19 negative prognosis in S&#xe3;o Paulo, Brazil</article-title>. <source>Sci. Rep.</source> <volume>11</volume>, <fpage>3343</fpage>&#x2013;<lpage>3347</lpage>. <pub-id pub-id-type="doi">10.1038/s41598-021-82885-y</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fialoke</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Malarstig</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Miller</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Dumitriu</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Application of machine learning methods to predict non-alcoholic steatohepatitis (NASH) in non-alcoholic fatty liver (NAFL) patients</article-title>. <source>AMIAAnnu. Symp. Proc. AMIA Symp.</source> <volume>2018</volume>, <fpage>430</fpage>&#x2013;<lpage>439</lpage>.</citation>
</ref>
<ref id="B38">
<citation citation-type="book">
<collab>Food and Drug Administration. Emergency use authorizations for medical devices</collab> (<year>2021</year>). <source>Template for developers of molecular and antigen diagnostic COVID-19 tests for home use</source>.</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fujiwara</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Sparse Modeling delivers fast, energy efficient and explainable AI solutions for cutting-edge medical applications</article-title>. <source>Nature</source>, <fpage>50</fpage>&#x2013;<lpage>51</lpage>.</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gun&#x10d;ar</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Kukar</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Notar</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Brvar</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>&#x10c;ernel&#x10d;</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Notar</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>An application of machine learning to haematological diagnosis</article-title>. <source>Sci. Rep.</source> <volume>8</volume>, <fpage>411</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-017-18564-8</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hong</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Predicting the postoperative blood coagulation state of children with congenital heart disease by machine learning based on real-world data</article-title>. <source>Transl. Pediatr.</source> <volume>10</volume>, <fpage>33</fpage>&#x2013;<lpage>43</lpage>. <pub-id pub-id-type="doi">10.21037/tp-20-238</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Haider</surname>
<given-names>R. Z.</given-names>
</name>
<name>
<surname>Ujjan</surname>
<given-names>I. U.</given-names>
</name>
<name>
<surname>Khan</surname>
<given-names>N. A.</given-names>
</name>
<name>
<surname>Urrechaga</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Shamsi</surname>
<given-names>T. S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Beyond the in-practice CBC: the research CBC parameters-driven machine learning predictive modeling for early differentiation among leukemias</article-title>. <source>Diagnostics</source> <volume>12</volume>, <fpage>138</fpage>. <pub-id pub-id-type="doi">10.3390/diagnostics12010138</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Hao</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2023</year>). <source>Training a single AI model can emit as much carbon as five cars in their lifetimes</source>. <publisher-loc>United States</publisher-loc>: <publisher-name>MIT Technology Review</publisher-name>.</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Henry</surname>
<given-names>K. E.</given-names>
</name>
<name>
<surname>Kornfield</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Sridharan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Linton</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>Groh</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Human &#x2013; machine teaming is key to AI adoption: clinicians &#x2019; experiences with a deployed machine learning system</article-title>. <source>NPJ Digit. Med.</source> <volume>5</volume>, <fpage>97</fpage>&#x2013;<lpage>106</lpage>. <pub-id pub-id-type="doi">10.1038/s41746-022-00597-7</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ho</surname>
<given-names>T. S.</given-names>
</name>
<name>
<surname>Weng</surname>
<given-names>T. C.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J. D.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>H. C.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>H. C.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>C. C.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Comparing machine learning with case-control models to identify confirmed dengue cases</article-title>. <source>PLoS Negl. Trop. Dis.</source> <volume>14</volume>, <fpage>00088433</fpage>&#x2013;<lpage>e8921</lpage>. <pub-id pub-id-type="doi">10.1371/journal.pntd.0008843</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hochman</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Feldman</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Weizman</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Krivoy</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Gur</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Barzilay</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Development and validation of a machine learning-based postpartum depression prediction model: a nationwide cohort study</article-title>. <source>Depress. Anxiety</source> <volume>38</volume>, <fpage>400</fpage>&#x2013;<lpage>411</lpage>. <pub-id pub-id-type="doi">10.1002/da.23123</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hornbrook</surname>
<given-names>M. C.</given-names>
</name>
<name>
<surname>Goshen</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Choman</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>O&#x2019;Keeffe-Rosetti</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kinar</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liles</surname>
<given-names>E. G.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Early colorectal cancer detected by machine learning model using gender, age, and complete blood count data</article-title>. <source>Dig. Dis. Sci.</source> <volume>62</volume>, <fpage>2719</fpage>&#x2013;<lpage>2727</lpage>. <pub-id pub-id-type="doi">10.1007/s10620-017-4722-8</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Joshi</surname>
<given-names>R. P.</given-names>
</name>
<name>
<surname>Pejaver</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Hammarlund</surname>
<given-names>N. E.</given-names>
</name>
<name>
<surname>Sung</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>S. K.</given-names>
</name>
<name>
<surname>Furmanchuk</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>A predictive tool for identification of SARS-CoV-2 PCR-negative emergency department patients using routine test results</article-title>. <source>J. Clin. Virol.</source> <volume>129</volume>, <fpage>104502</fpage>. <pub-id pub-id-type="doi">10.1016/j.jcv.2020.104502</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kairouz</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>McMahan</surname>
<given-names>H. B.</given-names>
</name>
<name>
<surname>Avent</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Bellet</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bennis</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Nitin Bhagoji</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Advances and open problems in federated learning</article-title>. <source>Found. Trends&#xae; Mach. Learn.</source> <volume>14</volume>, <fpage>1</fpage>&#x2013;<lpage>210</lpage>. <pub-id pub-id-type="doi">10.1561/2200000083</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Karthikeyan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Garg</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Vinod</surname>
<given-names>P. K.</given-names>
</name>
<name>
<surname>Priyakumar</surname>
<given-names>U. D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Machine learning based clinical decision support system for early COVID-19 mortality prediction</article-title>. <source>Front. Public Health</source> <volume>9</volume>, <fpage>626697</fpage>&#x2013;<lpage>626713</lpage>. <pub-id pub-id-type="doi">10.3389/fpubh.2021.626697</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kerr</surname>
<given-names>W. T.</given-names>
</name>
<name>
<surname>Lau</surname>
<given-names>E. P.</given-names>
</name>
<name>
<surname>Owens</surname>
<given-names>G. E.</given-names>
</name>
<name>
<surname>Trefler</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>The future of medical diagnostics: large digitized databases</article-title>. <source>Yale J. Biol. Med.</source> <volume>85</volume>, <fpage>363</fpage>&#x2013;<lpage>377</lpage>.</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kinar</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Kalkstein</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Akiva</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Levin</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Half</surname>
<given-names>E. E.</given-names>
</name>
<name>
<surname>Goldshtein</surname>
<given-names>I.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Development and validation of a predictive model for detection of colorectal cancer in primary care by analysis of complete blood counts: a binational retrospective study</article-title>. <source>J. Am. Med. Inf. Assoc.</source> <volume>23</volume>, <fpage>879</fpage>&#x2013;<lpage>890</lpage>. <pub-id pub-id-type="doi">10.1093/jamia/ocv195</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kline</surname>
<given-names>R. R.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Cybernetics, automata studies, and the dartmouth conference on artificial intelligence</article-title>. <source>IEEE Ann. Hist. Comput.</source> <volume>33</volume>, <fpage>5</fpage>&#x2013;<lpage>16</lpage>. <pub-id pub-id-type="doi">10.1109/mahc.2010.44</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kocbek</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Fijacko</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Soguero-Ruiz</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Mikalsen</surname>
<given-names>K. &#xd8;.</given-names>
</name>
<name>
<surname>Maver</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Povalej Brzan</surname>
<given-names>P.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Maximizing interpretability and cost-effectiveness of surgical site infection (SSI) predictive models using feature-specific regularized logistic regression on preoperative temporal data</article-title>. <source>Comput. Math. Methods Med.</source> <volume>2019</volume>, <fpage>1</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1155/2019/2059851</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kopitar</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Kocbek</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Cilar</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Sheikh</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Stiglic</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Early detection of type 2 diabetes mellitus using machine learning-based prediction models</article-title>. <source>Sci. Rep.</source> <volume>10</volume>, <fpage>11981</fpage>&#x2013;<lpage>12012</lpage>. <pub-id pub-id-type="doi">10.1038/s41598-020-68771-z</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Krizhevsky</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sutskever</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Hinton</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>ImageNet classification with deep convolutional neural networks</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>25</volume>, <fpage>1097</fpage>&#x2013;<lpage>1105</lpage>. <pub-id pub-id-type="doi">10.1145/3383972.3383975</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kushner</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Breton</surname>
<given-names>M. D.</given-names>
</name>
<name>
<surname>Sankaranarayanan</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Multi-hour blood glucose prediction in type 1 diabetes: a patient-specific approach using shallow neural network models</article-title>. <source>Diabetes Technol. Ther.</source> <volume>22</volume>, <fpage>883</fpage>&#x2013;<lpage>891</lpage>. <pub-id pub-id-type="doi">10.1089/dia.2020.0061</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>W. T.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Shende</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Castaneda</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Chakladar</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Tsai</surname>
<given-names>J. C.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Using machine learning of clinical data to diagnose COVID-19: a systematic review and meta-analysis</article-title>. <source>BMC Med. Inf. Decis. Mak.</source> <volume>20</volume>, <fpage>247</fpage>&#x2013;<lpage>313</lpage>. <pub-id pub-id-type="doi">10.1186/s12911-020-01266-z</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>J. K.</given-names>
</name>
<name>
<surname>Chien</surname>
<given-names>T. W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L. Y.</given-names>
</name>
<name>
<surname>Chou</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>An artificial neural network model to predict the mortality of COVID-19 patients using routine blood samples at the time of hospital admission: development and validation study</article-title>. <source>Med. Baltim.</source> <volume>100</volume>, <fpage>e26532</fpage>. <pub-id pub-id-type="doi">10.1097/md.0000000000026532</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luo</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>The selection of indicators from initial blood routine test results to improve the accuracy of early prediction of COVID-19 severity</article-title>. <source>PLoS One</source> <volume>16</volume>, <fpage>02533299</fpage>&#x2013;<lpage>e253418</lpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0253329</pub-id>
</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ma</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>C. F.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>C. H.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y. M.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Application of machine learning techniques for clinical predictive modeling: a cross-sectional study on nonalcoholic fatty liver disease in China</article-title>. <source>BioMed Res. Int.</source> <volume>2018</volume>, <fpage>1</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1155/2018/4304376</pub-id>
</citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mahmood</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Shahid</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bakhshi</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Riaz</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ghufran</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yaqoob</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Identification of significant risks in pediatric acute lymphoblastic leukemia (ALL) through machine learning (ML) approach</article-title>. <source>Med. Biol. Eng. Comput.</source> <volume>58</volume>, <fpage>2631</fpage>&#x2013;<lpage>2640</lpage>. <pub-id pub-id-type="doi">10.1007/s11517-020-02245-2</pub-id>
</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mamoshina</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Kochetov</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Cortese</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Kovalchuk</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Aliper</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Putin</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Blood biochemistry analysis to detect smoking status and quantify accelerated aging in smokers</article-title>. <source>Sci. Rep.</source> <volume>9</volume>, <fpage>142</fpage>&#x2013;<lpage>210</lpage>. <pub-id pub-id-type="doi">10.1038/s41598-018-35704-w</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marieb</surname>
<given-names>E. N.</given-names>
</name>
<name>
<surname>Hoehn</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Blood composition and functions</article-title>. <source>Hum. Anat. Physiol.</source>, <fpage>634</fpage>&#x2013;<lpage>657</lpage>.</citation>
</ref>
<ref id="B65">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mathioudakis</surname>
<given-names>N. N.</given-names>
</name>
<name>
<surname>Abusamaan</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Shakarchi</surname>
<given-names>A. F.</given-names>
</name>
<name>
<surname>Sokolinsky</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Fayzullin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>McGready</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Development and validation of a machine learning model to predict near-term risk of iatrogenic hypoglycemia in hospitalized patients</article-title>. <source>JAMA Netw. Open</source> <volume>4</volume>, <fpage>e2030913</fpage>&#x2013;<lpage>e2030915</lpage>. <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2020.30913</pub-id>
</citation>
</ref>
<ref id="B66">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Matthew</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Pincus</surname>
<given-names>N. Z. A. J.</given-names>
</name>
</person-group> (<year>2011</year>). <source>Henry&#x2019;s clinical diagnosis and management</source>. <edition>22th Edition</edition>. <publisher-loc>Amsterdam, Netherlands</publisher-loc>: <publisher-name>Elsevier</publisher-name>.</citation>
</ref>
<ref id="B67">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meiseles</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Paley</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Ziv</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hadid</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Rokach</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Tadmor</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Explainable machine learning for chronic lymphocytic leukemia treatment prediction using only inexpensive tests</article-title>. <source>Comput. Biol. Med.</source> <volume>145</volume>, <fpage>105490</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2022.105490</pub-id>
</citation>
</ref>
<ref id="B68">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Metsker</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Magoev</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Yakovlev</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Yanishevskiy</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kopanitsa</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Kovalchuk</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Identification of risk factors for patients with diabetes: diabetic polyneuropathy case study</article-title>. <source>BMC Med. Inf. Decis. Mak.</source> <volume>20</volume>, <fpage>201</fpage>&#x2013;<lpage>215</lpage>. <pub-id pub-id-type="doi">10.1186/s12911-020-01215-w</pub-id>
</citation>
</ref>
<ref id="B69">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mooney</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Eogan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>N&#xed; &#xc1;inle</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Cleary</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Gallagher</surname>
<given-names>J. J.</given-names>
</name>
<name>
<surname>O&#x27;Loughlin</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Predicting bacteraemia in maternity patients using full blood count parameters: a supervised machine learning algorithm approach</article-title>. <source>Int. J. Laboratory Hematol.</source> <volume>43</volume>, <fpage>609</fpage>&#x2013;<lpage>615</lpage>. <pub-id pub-id-type="doi">10.1111/ijlh.13434</pub-id>
</citation>
</ref>
<ref id="B70">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moranga</surname>
<given-names>C. M.</given-names>
</name>
<name>
<surname>Amenga&#x2013;Etego</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Bah</surname>
<given-names>S. Y.</given-names>
</name>
<name>
<surname>Appiah</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Amuzu</surname>
<given-names>D. S. Y.</given-names>
</name>
<name>
<surname>Amoako</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Machine learning approaches classify clinical malaria outcomes based on haematological parameters</article-title>. <source>BMC Med.</source> <volume>18</volume>, <fpage>375</fpage>&#x2013;<lpage>416</lpage>. <pub-id pub-id-type="doi">10.1186/s12916-020-01823-3</pub-id>
</citation>
</ref>
<ref id="B71">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Murri</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Lenkowicz</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Masciocchi</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Iacomini</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Fantoni</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Damiani</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>A machine-learning parsimonious multivariable predictive model of mortality risk in patients with Covid-19</article-title>. <source>Sci. Rep.</source> <volume>11</volume>, <fpage>21136</fpage>&#x2013;<lpage>21210</lpage>. <pub-id pub-id-type="doi">10.1038/s41598-021-99905-6</pub-id>
</citation>
</ref>
<ref id="B72">
<citation citation-type="web">
<collab>Our World in data</collab> (<year>2023</year>). <article-title>Covid-19 cases</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://ourworldindata.org/grapher/covid-cases-income">https://ourworldindata.org/grapher/covid-cases-income</ext-link>.</comment>
</citation>
</ref>
<ref id="B73">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pedregosa</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Varoquaux</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Gramfort</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Michel</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Thirion</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Grisel</surname>
<given-names>O.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>Scikit-learn: machine learning in Python</article-title>. <source>J. Mach. Learn. Res.</source> <volume>12</volume>, <fpage>2825</fpage>.</citation>
</ref>
<ref id="B74">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>C. H.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Identification of exacerbation risk in patients with liver dysfunction using machine learning algorithms</article-title>. <source>PLoS One</source> <volume>15</volume>, <fpage>02392666</fpage>&#x2013;<lpage>e239310</lpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0239266</pub-id>
</citation>
</ref>
<ref id="B75">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Plante</surname>
<given-names>T. B.</given-names>
</name>
<name>
<surname>Blau</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Berg</surname>
<given-names>A. N.</given-names>
</name>
<name>
<surname>Weinberg</surname>
<given-names>A. S.</given-names>
</name>
<name>
<surname>Jun</surname>
<given-names>I. C.</given-names>
</name>
<name>
<surname>Tapson</surname>
<given-names>V. F.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Development and external validation of a machine learning tool to rule out COVID-19 among adults in the emergency department using routine blood tests: a large, multicenter, real-world study</article-title>. <source>J. Med. Internet Res.</source> <volume>22</volume>, <fpage>240488</fpage>&#x2013;<lpage>e24112</lpage>. <pub-id pub-id-type="doi">10.2196/24048</pub-id>
</citation>
</ref>
<ref id="B76">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Prince-Guerra</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Almendares</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Nolen</surname>
<given-names>L. D.</given-names>
</name>
<name>
<surname>Gunn</surname>
<given-names>J. K. L.</given-names>
</name>
<name>
<surname>Dale</surname>
<given-names>A. P.</given-names>
</name>
<name>
<surname>Buono</surname>
<given-names>S. A.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Evaluation of abbott BinaxNOW rapid antigen test for SARS-CoV-2 infection at two community-based testing sites &#x2014; pima county, Arizona, november 3&#x2013;17, 2020</article-title>. <source>MMWR. Morb. Mortal. Wkly. Rep.</source> <volume>70</volume>, <fpage>100</fpage>&#x2013;<lpage>105</lpage>. <pub-id pub-id-type="doi">10.15585/mmwr.mm7003e3</pub-id>
</citation>
</ref>
<ref id="B77">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rashed-Al-Mahfuz</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Haque</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Azad</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Alyami</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Quinn</surname>
<given-names>J. M. W.</given-names>
</name>
<name>
<surname>Moni</surname>
<given-names>M. A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Clinically applicable machine learning approaches to identify attributes of chronic kidney disease (CKD) for use in low-cost diagnostic screening</article-title>. <source>IEEE J. Transl. Eng. Health Med.</source> <volume>9</volume>, <fpage>1</fpage>&#x2013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.1109/jtehm.2021.3073629</pub-id>
</citation>
</ref>
<ref id="B78">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ratzinger</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Dedeyan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Rammerstorfer</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Perkmann</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Burgmann</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Makristathis</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>A risk prediction model for screening bacteremic patients: a cross sectional study</article-title>. <source>PLoS One</source> <volume>9</volume>, <fpage>e106765</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0106765</pub-id>
</citation>
</ref>
<ref id="B79">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rawson</surname>
<given-names>T. M.</given-names>
</name>
<name>
<surname>Hernandez</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Moore</surname>
<given-names>L. S. P.</given-names>
</name>
<name>
<surname>Blandy</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Herrero</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Gilchrist</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Supervised machine learning for the prediction of infection on admission to hospital: a prospective observational cohort study</article-title>. <source>J. Antimicrob. Chemother.</source> <volume>74</volume>, <fpage>1108</fpage>&#x2013;<lpage>1115</lpage>. <pub-id pub-id-type="doi">10.1093/jac/dky514</pub-id>
</citation>
</ref>
<ref id="B80">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reardon</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Rise of robot radiologists</article-title>. <source>Nature</source> <volume>576</volume>, <fpage>S54</fpage>&#x2013;<lpage>S58</lpage>. <pub-id pub-id-type="doi">10.1038/d41586-019-03847-z</pub-id>
</citation>
</ref>
<ref id="B81">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Richard</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>McPherson</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Pincus</surname>
<given-names>M. R.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Clinical diagnosis and management by laboratory methods</article-title>. <pub-id pub-id-type="doi">10.1136/jcp.34.2.228-a</pub-id>
</citation>
</ref>
<ref id="B82">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Roland</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>B&#xf6;ck</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Tschoellitsch</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Maletzky</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hochreiter</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Meier</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Domain shifts in machine learning based covid-19 diagnosis from blood tests</article-title>. <source>J. Med. Syst.</source> <volume>46</volume>, <fpage>23</fpage>. <pub-id pub-id-type="doi">10.1007/s10916-022-01807-1</pub-id>
</citation>
</ref>
<ref id="B83">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sarbaz</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Pournik</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Ghalichi</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Kimiafar</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Razavi</surname>
<given-names>A. R.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Designing a Human T-Lymphotropic Virus Type 1 (HTLV-I) diagnostic model using the complete blood count</article-title>. <source>Iran. J. Basic Med. Sci.</source> <volume>16</volume>, <fpage>247</fpage>&#x2013;<lpage>251</lpage>.</citation>
</ref>
<ref id="B84">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sarker</surname>
<given-names>I. H.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Machine learning: algorithms, real-world applications and research directions</article-title>. <source>SN Comput. Sci.</source> <volume>2</volume>, <fpage>160</fpage>. <pub-id pub-id-type="doi">10.1007/s42979-021-00592-x</pub-id>
</citation>
</ref>
<ref id="B85">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sarker</surname>
<given-names>I. H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>AI-based modeling: techniques, applications and research issues towards automation, intelligent and smart systems</article-title>. <source>SN Comput. Sci.</source> <volume>3</volume>, <fpage>158</fpage>&#x2013;<lpage>220</lpage>. <pub-id pub-id-type="doi">10.1007/s42979-022-01043-x</pub-id>
</citation>
</ref>
<ref id="B86">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shou</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>W. W.</given-names>
</name>
<name>
<surname>Barszczyk</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>S. J.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Waese-Perlman</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Blood biomarkers predict cardiac workload using machine learning</article-title>. <source>BioMed Res. Int.</source> <volume>2021</volume>, <fpage>1</fpage>&#x2013;<lpage>5</lpage>. <pub-id pub-id-type="doi">10.1155/2021/6172815</pub-id>
</citation>
</ref>
<ref id="B87">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shukla Shubhendu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Vijay</surname>
</name>
</person-group> (<year>2013</year>). <article-title>J. Applicability of artificial intelligence in different fields of life</article-title>. <source>Int. J. Sci. Eng. Res.</source> <volume>1</volume>, <fpage>28</fpage>&#x2013;<lpage>35</lpage>.</citation>
</ref>
<ref id="B88">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Soerensen</surname>
<given-names>P. D.</given-names>
</name>
<name>
<surname>Christensen</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Gray Worsoe Laursen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Hardahl</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Brandslund</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Madsen</surname>
<given-names>J. S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Using artificial intelligence in a primary care setting to identify patients at risk for cancer: a risk prediction model based on routine laboratory tests</article-title>. <source>Clin. Chem. Laboratory Med. (CCLM)</source> <volume>60</volume>, <fpage>2005</fpage>&#x2013;<lpage>2016</lpage>. <pub-id pub-id-type="doi">10.1515/cclm-2021-1015</pub-id>
</citation>
</ref>
<ref id="B89">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Soguero-Ruiz</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Fei</surname>
<given-names>W. M. E.</given-names>
</name>
<name>
<surname>Jenssen</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Augestad</surname>
<given-names>K. M.</given-names>
</name>
<name>
<surname>&#xc1;lvarez</surname>
<given-names>J. L. R.</given-names>
</name>
<name>
<surname>Jim&#xe9;nez</surname>
<given-names>I. M.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Data-driven temporal prediction of surgical site infection</article-title>. <source>AMIA Annu. Symp. Proc. AMIA Symp.</source> <volume>2015</volume>, <fpage>1164</fpage>&#x2013;<lpage>1173</lpage>.</citation>
</ref>
<ref id="B90">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Souza</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Almeida</surname>
<given-names>D. C</given-names>
</name>
<name>
<surname>Barcelos</surname>
<given-names>T. S.</given-names>
</name>
<name>
<surname>Bortoletto</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>Munoz</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Waldman</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Simple hemogram to support the decision-making of COVID-19 diagnosis using clusters analysis with self-organizing maps neural network</article-title>. <source>Soft Comput.</source> <volume>27</volume>, <fpage>3295</fpage>&#x2013;<lpage>3306</lpage>. <pub-id pub-id-type="doi">10.1007/s00500-021-05810-5</pub-id>
</citation>
</ref>
<ref id="B91">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stegmann</surname>
<given-names>G. M.</given-names>
</name>
<name>
<surname>Hahn</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Liss</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Shefner</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Rutkove</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kawabata</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Repeatability of commonly used speech and language features for clinical applications</article-title>. <source>Digit. Biomarkers</source> <volume>4</volume>, <fpage>109</fpage>&#x2013;<lpage>122</lpage>. <pub-id pub-id-type="doi">10.1159/000511671</pub-id>
</citation>
</ref>
<ref id="B92">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Svensson</surname>
<given-names>C. M.</given-names>
</name>
<name>
<surname>H&#xfc;bler</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Figge</surname>
<given-names>M. T.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Automated classification of circulating tumor cells and the impact of interobsever variability on classifier training and performance</article-title>. <source>J. Immunol. Res.</source> <volume>2015</volume>, <fpage>1</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1155/2015/573165</pub-id>
</citation>
</ref>
<ref id="B93">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tarwater</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Estimated glomerular filtration rate explained</article-title>. <source>Mo. Med.</source> <volume>108</volume>, <fpage>29</fpage>&#x2013;<lpage>32</lpage>.</citation>
</ref>
<ref id="B94">
<citation citation-type="web">
<collab>The Medical Futurist</collab> (<year>2022</year>). <article-title>FDA approved AI-based algorithms</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://medicalfuturist.com/fda-approved-ai-based-algorithms/">https://medicalfuturist.com/fda-approved-ai-based-algorithms/</ext-link>.</comment>
</citation>
</ref>
<ref id="B95">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tian</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Kong</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Tian</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Molecular pathogenesis: connections between viral hepatitis-induced and non-alcoholic steatohepatitis-induced hepatocellular carcinoma</article-title>. <source>Nat. Prod. Res.</source> <volume>13</volume>, <fpage>1</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1080/14786419.2022.2134864</pub-id>
</citation>
</ref>
<ref id="B96">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tschoellitsch</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>D&#xfc;nser</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>B&#xf6;ck</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Schwarzbauer</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Meier</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Machine learning prediction of SARS-CoV-2 polymerase chain reaction results with routine blood tests</article-title>. <source>Lab. Med.</source> <volume>52</volume>, <fpage>146</fpage>&#x2013;<lpage>149</lpage>. <pub-id pub-id-type="doi">10.1093/labmed/lmaa111</pub-id>
</citation>
</ref>
<ref id="B97">
<citation citation-type="journal">
<collab>World Health Organization</collab> (<year>2021</year>). <article-title>Antigen-detection in the diagnosis of SARS-CoV-2 infection</article-title>. <source>Interim guid.</source>
</citation>
</ref>
<ref id="B98">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Shao</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A novel combined dynamic ensemble selection model for imbalanced data to detect COVID-19 from complete blood count</article-title>. <source>Comput. Methods Programs Biomed.</source> <volume>211</volume>, <fpage>106444</fpage>. <pub-id pub-id-type="doi">10.1016/j.cmpb.2021.106444</pub-id>
</citation>
</ref>
<ref id="B99">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>McGoogan</surname>
<given-names>J. M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Characteristics of and important lessons from the coronavirus disease 2019 (COVID-19) outbreak in China: summary of a report of 72314 cases from the Chinese center for disease control and prevention</article-title>. <source>JAMA - J. Am. Med. Assoc.</source> <volume>323</volume>, <fpage>1239</fpage>&#x2013;<lpage>1242</lpage>. <pub-id pub-id-type="doi">10.1001/jama.2020.2648</pub-id>
</citation>
</ref>
<ref id="B100">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yao</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Guan</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Ye</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Liver disease screening based on densely connected deep neural networks</article-title>. <source>Neural Netw.</source> <volume>123</volume>, <fpage>299</fpage>&#x2013;<lpage>304</lpage>. <pub-id pub-id-type="doi">10.1016/j.neunet.2019.11.005</pub-id>
</citation>
</ref>
<ref id="B101">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Y&#x131;lmaz</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Bozkurt</surname>
<given-names>M. R.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Determination of women iron deficiency anemia using neural networks</article-title>. <source>J. Med. Syst.</source> <volume>36</volume>, <fpage>2941</fpage>&#x2013;<lpage>2945</lpage>. <pub-id pub-id-type="doi">10.1007/s10916-011-9772-4</pub-id>
</citation>
</ref>
<ref id="B102">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Diagnosis of asthma based on routine blood biomarkers using machine learning</article-title>. <source>Comput. Intell. Neurosci.</source> <volume>2020</volume>, <fpage>1</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1155/2020/8841002</pub-id>
</citation>
</ref>
<ref id="B103">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhong</surname>
</name>
</person-group> (<year>2021</year>). <article-title>J. Reinforcement learning assisted oxygen therapy for COVID-19 patients under intensive care</article-title>. <source>BMC Med. Inf. Decis. Mak.</source> <volume>21</volume>, <fpage>1</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1186/s12911-021-01712-6</pub-id>
</citation>
</ref>
<ref id="B104">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Shang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>P.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Rapid triage for ischemic stroke: a machine learning-driven approach in the context of predictive, preventive and personalised medicine</article-title>. <source>EPMA J.</source> <volume>13</volume>, <fpage>285</fpage>&#x2013;<lpage>298</lpage>. <pub-id pub-id-type="doi">10.1007/s13167-022-00283-4</pub-id>
</citation>
</ref>
<ref id="B105">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zoabi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Kehat</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Lahav</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Weiss-Meilik</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Adler</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Shomron</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Predicting bloodstream infection outcome using machine learning</article-title>. <source>Sci. Rep.</source> <volume>11</volume>, <fpage>20101</fpage>&#x2013;<lpage>20111</lpage>. <pub-id pub-id-type="doi">10.1038/s41598-021-99105-2</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>