<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Psychol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Psychology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Psychol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-1078</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpsyg.2026.1746479</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Comparing machine learning and artificial neural network models in psychological research: a ROC-based analysis</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Leitner</surname>
<given-names>Marie-Luise</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3249999"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Arendasy</surname>
<given-names>Martin</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><institution>Department of Psychology, University of Graz</institution>, <city>Graz</city>, <country country="at">Austria</country></aff>
<author-notes>
<corresp id="c001"><label>&#x002A;</label>Correspondence: Marie-Luise Leitner, <email xlink:href="mailto:marie.leitner@uni-graz.at">marie.leitner@uni-graz.at</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-20">
<day>20</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>17</volume>
<elocation-id>1746479</elocation-id>
<history>
<date date-type="received">
<day>14</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>27</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>03</day>
<month>02</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2026 Leitner and Arendasy.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Leitner and Arendasy</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-20">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec id="sec1001">
<title>Introduction</title>
<p>The increasing use of data-driven methods in psychological assessment has raised the question of whether artificial neural networks provide advantages over established machine learning approaches in applied selection contexts. In particular, comparative evidence based on ROC-based evaluation using real-world psychological datasets remains limited.</p>
</sec>
<sec id="sec2001">
<title>Methods</title>
<p>Using a dataset of <italic>N</italic> = 4,155 applicants from a university entrance examination, this study compared three traditional machine learning models&#x2014;logistic regression, decision tree, and random forest&#x2014;with a feedforward artificial neural network comprising a single hidden layer. All models were implemented in Python and evaluated using accuracy and receiver operating characteristic (ROC) analysis, with the area under the curve (AUC) as the primary performance metric.</p>
</sec>
<sec id="sec3001">
<title>Results</title>
<p>Logistic regression achieved the highest predictive performance (accuracy = 0.973, AUC = 0.99), followed closely by the random forest model (accuracy = 0.961, AUC = 0.98). The artificial neural network reached competitive accuracy (0.933) but showed reduced discriminative ability (AUC = 0.87) and indications of overfitting. Feature importance analyses consistently identified biology, chemistry, and numerical reasoning as the most influential predictors of admission success.</p>
</sec>
<sec id="sec4001">
<title>Discussion</title>
<p>The results indicate that for medium-sized, structured psychological datasets, traditional machine learning models provide more stable, interpretable, and robust performance than the evaluated shallow neural network architecture. These findings highlight the importance of model choice and inductive bias in applied psychological research and support the continued use of classical machine learning approaches in selection and assessment contexts.</p>
</sec>
</abstract>
<kwd-group>
<kwd>artificial neural network</kwd>
<kwd>decision tree</kwd>
<kwd>feature importance</kwd>
<kwd>logistic regression</kwd>
<kwd>machine learning</kwd>
<kwd>noise</kwd>
<kwd>overfitting</kwd>
<kwd>ROC (receiver operating characteristic)</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. Open access funding provided by the University of Graz.</funding-statement>
</funding-group>
<counts>
<fig-count count="2"/>
<table-count count="2"/>
<equation-count count="14"/>
<ref-count count="58"/>
<page-count count="11"/>
<word-count count="8989"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Quantitative Psychology and Measurement</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec1">
<title>Introduction</title>
<p>In recent years, the increasing availability of psychological data has prompted researchers to explore advanced computational models for classification, prediction, and decision support. Machine learning (ML) and artificial neural networks (ANNs) have emerged as powerful tools in this context, offering new possibilities for identifying patterns and forecasting outcomes based on complex, multidimensional datasets (<xref ref-type="bibr" rid="ref34">Jordan and Mitchell, 2015</xref>). In particular, their use in educational psychology has gained momentum, as researchers seek to improve the accuracy and interpretability of models used to predict academic success, diagnose learning difficulties, or inform admissions decisions (<xref ref-type="bibr" rid="ref7">Baker and Inventado, 2014</xref>).</p>
<p>While neural network-based methods have received substantial attention due to their remarkable success in domains such as image recognition and natural language processing (<xref ref-type="bibr" rid="ref36">LeCun et al., 2015</xref>), their advantages are less evident in domains characterized by relatively small sample sizes, structured tabular data, and high predictor-to-sample ratios - conditions commonly found in psychological research (<xref ref-type="bibr" rid="ref16">Bzdok et al., 2018</xref>). In such cases, traditional machine learning algorithms like logistic regression, decision trees, and ensemble methods (e.g., random forests) have often demonstrated superior performance, both in terms of predictive accuracy and model interpretability (<xref ref-type="bibr" rid="ref50">Shmueli, 2010</xref>; <xref ref-type="bibr" rid="ref48">Rudin, 2019</xref>).</p>
<p>This study builds on this growing body of work by systematically comparing traditional ML methods and ANN architectures for a binary classification task - predicting whether individuals are &#x201C;selected&#x201D; or &#x201C;not selected&#x201D; for university admission. The dataset includes a variety of sociodemographic, academic, and cognitive variables from 4,155 participants, reflecting the kind of high-dimensional but moderately sized data typical in applied psychology.</p>
<p>Specifically, this paper evaluates four models: logistic regression, decision tree, random forest (as representatives of traditional ML), and a feedforward artificial neural network (ANN). Model performance is assessed using both overall accuracy and Receiver Operating Characteristic (ROC) analysis, which provides a threshold-independent measure of classification performance (<xref ref-type="bibr" rid="ref22">Fawcett, 2006</xref>). Special attention is paid to issues of overfitting and generalization, particularly in neural network-based models, where high accuracy on training data does not necessarily translate to robust performance on test data.</p>
<p>Integrating ROC methodology, this research extends beyond conventional accuracy metrics and provides a robust, theory-informed framework for evaluating psychological classification models. This approach enables not only better discrimination analysis but also methodological transparency in model selection and evaluation.</p>
<p>The relevance of this methodological comparison extends beyond mere performance metrics. In applied settings such as methodological selection or psychological diagnosis, model interpretability, computational efficiency, and reliability under limited data conditions are essential. Addressing these practical and theoretical concerns, the present study contributes to evidence-based decision-making in psychology, while also offering methodological guidance for researchers selecting predictive models under real-world constraints.</p>
<p>Ultimately, this research aims to answer the following questions: How do traditional machine learning models compare to neural network-based models in terms of accuracy, generalizability, and robustness when applied to psychological datasets? What are the implications of these findings for future research and practice in psychology, education, and the social sciences?</p>
<sec id="sec2">
<title>Introduction to receiver operating characteristic (ROC) analysis</title>
<p>In the evaluation of predictive models, particularly in applied psychology, medicine, and machine learning, it is crucial to assess not only the overall accuracy of a model but also its ability to distinguish between classes under varying decision thresholds. Receiver Operating Characteristic (ROC) analysis has emerged as a gold standard methodology for this purpose, offering both a conceptual and quantitative framework for evaluating classifier performance across a continuum of threshold settings (<xref ref-type="bibr" rid="ref22">Fawcett, 2006</xref>; <xref ref-type="bibr" rid="ref41">Metz, 1978</xref>).</p>
<p>The ROC framework is built on the confusion matrix (<xref ref-type="fig" rid="fig1">Figure 1</xref>):</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>Confusion matrix (binary classification). Rows&#x202F;=&#x202F;actual class; columns&#x202F;=&#x202F;predicted class. Cells show counts [or percentages]. Each instance falls into one of four outcomes&#x2014;true positive (TP), false positive (FP), false negative (FN), or true negative (TN); diagonal cells (TP, TN) are correct classifications; off-diagonals (FP, FN) are errors.</p>
</caption>
<graphic xlink:href="fpsyg-17-1746479-g001.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Confusion matrix diagram with four labeled quadrants: top left is true positives, top right is false positives, bottom left is false negatives, and bottom right is true negatives. Used for evaluating model performance.</alt-text>
</graphic>
</fig>
<p>The <italic>true positive</italic> rate, which is also named <italic>sensitivity, hit rate</italic> or <italic>recall</italic>, is calculated by the following expression (<xref ref-type="bibr" rid="ref22">Fawcett, 2006</xref>):The performance measures used in ROC analysis and model evaluation are formally defined in <xref ref-type="disp-formula" rid="E1 E2 E3 E4 E5 E6 E7 E8 E9 E10 E11 E12 E13">Equations 1&#x2013;13</xref>.<disp-formula id="E1">
<mml:math id="M1">
<mml:mi mathvariant="italic">TPR</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mo stretchy="true">(</mml:mo>
<mml:mtext mathvariant="italic">True Positive Rate</mml:mtext>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mtext mathvariant="italic">Sensitivity</mml:mtext>
<mml:mtext mathvariant="italic">Recall</mml:mtext>
</mml:mfrac>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mi mathvariant="italic">TP</mml:mi>
<mml:mrow>
<mml:mi mathvariant="italic">TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">FN</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(1)</label>
</disp-formula></p>
<p>The <italic>false positive</italic> rate, also referred to as a <italic>false alarm,</italic> is expressed as<disp-formula id="E2">
<mml:math id="M2">
<mml:mi mathvariant="italic">FPR</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mo stretchy="true">(</mml:mo>
<mml:mtext mathvariant="italic">False Positive Rate</mml:mtext>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mtext mathvariant="italic">False Alarm</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mi mathvariant="italic">FP</mml:mi>
<mml:mrow>
<mml:mi mathvariant="italic">TN</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">FP</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(2)</label>
</disp-formula></p>
<p>The true negative rate is denoted as specificity, while the false negative rate corresponds to the false omission rate or the complement of sensitivity.<disp-formula id="E3">
<mml:math id="M3">
<mml:mi mathvariant="italic">TNR</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mo stretchy="true">(</mml:mo>
<mml:mtext mathvariant="italic">True Negative Rate</mml:mtext>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mtext mathvariant="italic">Specificity</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mi mathvariant="italic">TN</mml:mi>
<mml:mrow>
<mml:mi mathvariant="italic">TN</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">FP</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(3)</label>
</disp-formula><disp-formula id="E4">
<mml:math id="M4">
<mml:mi mathvariant="italic">FNR</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mo stretchy="true">(</mml:mo>
<mml:mtext mathvariant="italic">False Negative Rate</mml:mtext>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mi mathvariant="italic">FN</mml:mi>
<mml:mrow>
<mml:mi mathvariant="italic">FN</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">TP</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(4)</label>
</disp-formula></p>
<p>Stated differently, sensitivity and specificity quantify two distinct aspects of classification accuracy with respect to a binary outcome&#x2014;such as passing or missing an entrance test. Sensitivity refers to the proportion of actual positive cases (e.g., applicants who would genuinely pass) that are correctly identified by the test, whereas specificity refers to the proportion of actual negative cases (e.g., those who would not pass) that are correctly classified as such. These metrics can be applied to any binary classification problem, provided that the outcome categories are clearly operationalized in the process of calculating and interpreting sensitivity and specificity values (<xref ref-type="bibr" rid="ref41">Metz, 1978</xref>).</p>
<p>In addition, three other terms used in ROC analysis represent conditions involving negative cases and incorrectly identified positive cases (<xref ref-type="bibr" rid="ref41">Metz, 1978</xref>):<disp-formula id="E5">
<mml:math id="M5">
<mml:mtext mathvariant="italic">Precision</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mi mathvariant="italic">TP</mml:mi>
<mml:mrow>
<mml:mi mathvariant="italic">TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">FP</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(5)</label>
</disp-formula><disp-formula id="E6">
<mml:math id="M6">
<mml:mtext mathvariant="italic">Accuracy</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="italic">TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">TN</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">FP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">FN</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">TN</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(6)</label>
</disp-formula></p>
<p>Traditional accuracy metrics often obscure critical nuances of classification performance, particularly when the base rates of the outcome classes are imbalanced. As <xref ref-type="bibr" rid="ref41">Metz (1978</xref>, <xref ref-type="bibr" rid="ref42">1979)</xref> illustrated using diagnostic screening examples, a classifier may achieve high nominal accuracy by simply favouring the majority class yet fail catastrophically in identifying the minority class of interest. This limitation necessitates a more refined metric that captures the trade-off between true positive and false positive rates&#x2014;a gap that ROC analysis is designed to address (<xref ref-type="fig" rid="fig2">Figure 2</xref>).</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>ROC curves with threshold points. ROC curves for three classifiers plotting true positive rate (TPR) against false positive rate (FPR) across decision thresholds. The blue curve shows the strongest discrimination, followed by orange and green (larger AUC implies better performance). Points A and B mark specific thresholds: A prioritizes sensitivity (higher TPR, higher FPR), while B is more conservative (lower FPR, lower TPR). Curves closer to the top-left corner indicate superior performance; a diagonal chance line would reflect random classification. The false positive rate on the <italic>x</italic>-axis ranges from 0 to 1.</p>
</caption>
<graphic xlink:href="fpsyg-17-1746479-g002.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Receiver operating characteristic (ROC) curve with three lines representing different models; the y-axis is true positive rate and the x-axis is false positive rate. Points A and B are labeled on the blue and orange curves respectively, with A higher than B.</alt-text>
</graphic>
</fig>
<p>In psychology, ROC curves are particularly valuable for evaluating tests and classification models where different types of errors&#x2014;false positives versus false negatives&#x2014;carry different theoretical and practical consequences (<xref ref-type="bibr" rid="ref53">Swets, 1988</xref>; <xref ref-type="bibr" rid="ref52">Streiner and Cairney, 2007</xref>).</p>
<p>From a methodological standpoint, ROC analysis enables three core applications:<list list-type="order">
<list-item>
<p>Threshold selection: By analyzing the ROC curve shape and slope at various points, researchers can determine the optimal cut-off value based on the cost&#x2013;benefit trade-offs of false positives and false negatives (<xref ref-type="bibr" rid="ref54">Westin et al., 2001</xref>).</p>
</list-item>
<list-item>
<p>Comparing classifiers: AUC values allow for model comparison regardless of scale or unit. However, overlapping or crossing ROC curves necessitate more nuanced statistics, such as the partial AUC or resampling methods (<xref ref-type="bibr" rid="ref20">Faraggi and Reiser, 2002</xref>). The statistical interpretation of the area under the ROC curve (AUC) was formally established by <xref ref-type="bibr" rid="ref28">Hanley and McNeil (1982)</xref>, providing a foundation for subsequent ROC-based model comparisons.</p>
</list-item>
<list-item>
<p>Discrimination capacity: The ROC curve facilitates understanding of a model&#x2019;s capacity to distinguish between groups&#x2014;an essential feature in psychological test construction, where latent traits must be inferred from observable indicators.</p>
</list-item>
</list></p>
<p>While ROC analysis is valuable for assessing discriminative ability, it does not account for the calibration of predicted probabilities and may be less informative in highly imbalanced datasets. In such cases, precision&#x2013;recall curves may offer a useful complement (<xref ref-type="bibr" rid="ref49">Saito and Rehmsmeier, 2015</xref>).</p>
<p>The current study employs ROC analysis as a central evaluation tool to compare four classification algorithms: logistic regression, decision tree, random forest, and single-layer neural network. For each model, the ROC curve and its associated AUC are calculated based on out-of-sample test predictions. In addition, the analysis encompasses ROC-based threshold optimization, comparative evaluation of area under the curve (AUC), and the examination of classifier bias as represented within the ROC space.</p>
</sec>
<sec id="sec3">
<title>Introduction to logistic regression</title>
<p>Logistic regression is one of the most widely used and foundational models in both psychological research and statistical classification. As a generalized linear model (GLM), it provides a robust framework for estimating the probability of a binary outcome based on one or more predictor variables (<xref ref-type="bibr" rid="ref33">Hosmer et al., 2013</xref>; <xref ref-type="bibr" rid="ref3">Agresti, 2013</xref>). Its enduring appeal in psychology stems from its interpretability, statistical rigor, and capacity for inference, making it suitable for both hypothesis testing and predictive modeling (<xref ref-type="bibr" rid="ref40">Menard, 2002</xref>; <xref ref-type="bibr" rid="ref43">Pampel, 2000</xref>).</p>
<p>The logistic regression model is governed by a specific mathematical function, which can be described as follows:<disp-formula id="E7">
<mml:math id="M7">
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>.</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>.</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>.</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mi>a</mml:mi>
</mml:math>
<label>(7)</label>
</disp-formula></p>
<p>Expressed in words, the predicted probability of a binary outcome is calculated by taking the exponential of a linear combination of predictor variables (<inline-formula>
<mml:math id="M8">
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula> multiplied by their corresponding coefficients (<inline-formula>
<mml:math id="M9">
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>,</mml:mo>
</mml:math>
</inline-formula> and adding a constant term (a). This linear combination is then transformed using the logistic or sigmoid function, which projects the outcome into a binary value between 0 and 1. The result value represents the estimated probability of the binary outcome, indicating the likelihood of belonging to a particular category or class.</p>
<p>Given a linear combination:<disp-formula id="E8">
<mml:math id="M10">
<mml:mi>z</mml:mi>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>&#x03B2;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x03B2;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x03B2;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x03B2;</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:math>
<label>(8)</label>
</disp-formula></p>
<p>&#x2026;the logistic function transforms it into the probability:<disp-formula id="E9">
<mml:math id="M11">
<mml:mi>P</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>Y</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mi>X</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfrac>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>&#x03B2;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x03B2;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x03B2;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x03B2;</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(9)</label>
</disp-formula></p>
<p>This transformation maps the linear predictor onto a probability scale from 0 to 1, representing the estimated likelihood of an observation belonging to a particular category or class (<xref ref-type="bibr" rid="ref38">Long and Freese, 2014</xref>).</p>
<p>In applied psychology, these assumptions are generally satisfied when working with moderate to large datasets and well-curated instruments. However, violations&#x2014;particularly of multicollinearity and sample size may bias estimates and reduce generalizability (<xref ref-type="bibr" rid="ref6">Babyak, 2004</xref>).</p>
<p>In the present study, logistic regression serves as the benchmark for evaluating more complex models, including decision trees, random forests, and neural networks. Its relatively simple architecture allows for high interpretability, and its well-established statistical underpinnings facilitate construct validation and inference, which are often required in psychology but less straightforward with black-box models (<xref ref-type="bibr" rid="ref14">Breiman, 2001a</xref>, <xref ref-type="bibr" rid="ref15">2001b</xref>; <xref ref-type="bibr" rid="ref55">Yarkoni and Westfall, 2017</xref>).</p>
</sec>
<sec id="sec4">
<title>Introduction to decision trees</title>
<p>Decision trees are a class of supervised learning algorithms used for classification and regression tasks. They function by recursively partitioning the feature space into subsets based on input variables, creating a tree-like structure composed of decision nodes and terminal leaves (<xref ref-type="bibr" rid="ref90012">Breiman et al., 1984</xref>). At each node, the algorithm selects the feature and corresponding split point that optimally separates the data according to a predefined impurity criterion, such as Gini impurity, information gain (based on entropy), or classification error.</p>
<p>As non-parametric models, decision trees do not assume any specific distribution of the input data, which makes them particularly attractive in applied psychological and educational research where assumptions of linearity and normality are often violated. The model construction typically follows a greedy, top-down approach known as recursive binary splitting, aiming to produce subsets that are as homogeneous as possible with respect to the target variable (<xref ref-type="bibr" rid="ref90012">Breiman et al., 1984</xref>).</p>
<p>Mathematically, for a given node <italic>t</italic>, the impurity <italic>I(t)</italic> can be measured using Gini impurity:<disp-formula id="E10">
<mml:math id="M12">
<mml:mi>I</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>C</mml:mi>
</mml:msubsup>
<mml:msubsup>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:math>
<label>(10)</label>
</disp-formula></p>
<p>Despite their simplicity and interpretability, decision trees are highly sensitive to overfitting, particularly when grown to full depth without regularization constraints. In such cases, they may model random noise in the training data as if it were meaningful structure, thereby reducing their generalizability to unseen cases (<xref ref-type="bibr" rid="ref45">Quinlan, 1997</xref>; <xref ref-type="bibr" rid="ref8">Balcan and Sharma, 2024</xref>). To mitigate this risk, pruning strategies&#x2014;such as pre-pruning and post-pruning&#x2014;are commonly employed, alongside the specification of minimum sample thresholds per node, in order to reduce model complexity and enhance robustness (<xref ref-type="bibr" rid="ref12">Bramer, 2002</xref>; <xref ref-type="bibr" rid="ref4">Ahmed et al., 2018</xref>).</p>
<p>The interpretability of decision trees, conveyed through easily understandable decision rules, makes them particularly valuable in domains where transparency is essential, such as clinical diagnostics, personnel selection, or university admissions (<xref ref-type="bibr" rid="ref10">Blockeel et al., 2023</xref>; <xref ref-type="bibr" rid="ref2">Agarwal et al., 2022a</xref>, <xref ref-type="bibr" rid="ref1">2022b</xref>). However, due to their methodological limitations&#x2014;namely high variance and sensitivity to minor perturbations in the data&#x2014;caution is warranted when applying them to high-stakes decisions. These challenges often motivate the use of ensemble techniques, such as random forests or gradient-boosted trees, which aggregate multiple decision trees to produce more stable and accurate predictions (<xref ref-type="bibr" rid="ref14">Breiman, 2001a</xref>, <xref ref-type="bibr" rid="ref15">2001b</xref>; <xref ref-type="bibr" rid="ref8">Balcan and Sharma, 2024</xref>).</p>
</sec>
<sec id="sec5">
<title>Introduction to random forests</title>
<p>Random Forests represent a powerful and widely used ensemble learning method in supervised machine learning, particularly suitable for both classification and regression tasks (<xref ref-type="bibr" rid="ref14">Breiman, 2001a</xref>, <xref ref-type="bibr" rid="ref15">2001b</xref>). As an extension of decision tree models, Random Forests aim to overcome the high variance and overfitting tendencies of individual trees by aggregating predictions from multiple decision trees built on random subsets of data and features.</p>
<p>At the core of the Random Forest algorithm lies the principle of bootstrap aggregating, or <italic>bagging</italic> (<xref ref-type="bibr" rid="ref14">Breiman, 2001a</xref>, <xref ref-type="bibr" rid="ref15">2001b</xref>). This process involves generating multiple bootstrap samples from the training data by sampling with replacement. For each sample, a separate decision tree is constructed. At every node split during tree construction, a random subset of features (rather than all features) is evaluated to determine the optimal split. This dual randomization - in both sample selection and feature selection&#x2014;introduces model diversity and reduces the correlation between individual trees, thereby improving the generalization performance of the ensemble (<xref ref-type="bibr" rid="ref29">Hastie et al., 2009</xref>).</p>
<p>Mathematically, the final prediction of a Random Forest is the aggregated outcome of all individual trees. For classification tasks, this typically involves a majority vote across the trees:<disp-formula id="E11">
<mml:math id="M13">
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mo>=</mml:mo>
<mml:mtext>mode</mml:mtext>
<mml:mo stretchy="true">{</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo stretchy="true">}</mml:mo>
</mml:math>
<label>(11)</label>
</disp-formula></p>
<p>For regression, the prediction is the mean of the outputs:<disp-formula id="E12">
<mml:math id="M14">
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>n</mml:mi>
</mml:mfrac>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(12)</label>
</disp-formula>where<inline-formula>
<mml:math id="M15">
<mml:mspace width="0.1em"/>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mspace width="0.1em"/>
<mml:mtext>denotes the prediction of the</mml:mtext>
<mml:mspace width="0.1em"/>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>th</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mtext>tree</mml:mtext>
<mml:mo>,</mml:mo>
<mml:mtext mathvariant="italic">and</mml:mtext>
<mml:mspace width="0.25em"/>
<mml:mi>T</mml:mi>
<mml:mspace width="0.1em"/>
</mml:math>
</inline-formula> is the total number of trees in the forest.</p>
<p>Random Forests offer several advantages, particularly in applied research contexts. They handle large, high-dimensional datasets efficiently, are robust to outliers and noise, and can deal with missing values without the need for imputation (<xref ref-type="bibr" rid="ref5">Ayyadevara, 2018</xref>). Moreover, they provide internal metrics such as feature importance scores and out-of-bag (OOB) error estimates, which allow for an efficient and unbiased estimation of generalization error without requiring a separate validation set (<xref ref-type="bibr" rid="ref37">Liaw and Wiener, 2002</xref>).</p>
<p>In psychological methodology, Random Forests are especially valuable due to their non-parametric nature, eliminating the need to satisfy stringent assumptions such as linearity, normality, or homoscedasticity. For instance, <xref ref-type="bibr" rid="ref23">Fife and D&#x2019;Onofrio (2023)</xref> show that Random Forests outperform traditional regression models under conditions of nonlinear effects and interaction among predictors. In studies of reading ability, <xref ref-type="bibr" rid="ref39">Matsuki et al. (2016)</xref> demonstrate that Random Forests better manage overfitting and multicollinearity in datasets with many highly correlated predictors.</p>
<p>Despite their robustness and flexibility, Random Forests are not without limitations. Their ensemble-based architecture, which aggregates predictions from a large number of decorrelated decision trees, makes it difficult to trace how individual input variables influence a specific classification outcome. This lack of transparency limits their interpretability, particularly in comparison to models such as logistic regression, which offer coefficient-based inference, or single decision trees, which provide rule-based explanations (<xref ref-type="bibr" rid="ref14">Breiman, 2001a</xref>, <xref ref-type="bibr" rid="ref15">2001b</xref>; <xref ref-type="bibr" rid="ref23">Fife and D&#x2019;Onofrio, 2023</xref>; <xref ref-type="bibr" rid="ref39">Matsuki et al., 2016</xref>). In contexts such as psychological assessment, educational placement, or admissions testing&#x2014;where interpretability and justification of decisions are critical&#x2014;this limitation poses a significant challenge.</p>
<p>Moreover, Random Forest performance may degrade in the presence of severely imbalanced datasets, as the algorithm tends to favour the majority class. This issue is particularly relevant in high-stakes classification tasks, where the minority class often represents the group of primary interest (e.g., students at risk of failing an entrance test). Without corrective measures, such as resampling techniques, class weighting, or cost-sensitive learning, predictive performance for the minority class may be substantially compromised (<xref ref-type="bibr" rid="ref17">Chen et al., 2004</xref>; <xref ref-type="bibr" rid="ref13">Branco et al., 2016</xref>).</p>
<p>Overall, Random Forests present a compelling methodological choice when prediction accuracy, noise resilience, and variable importance estimation are prioritized over model transparency. In the context of this dissertation, they are employed as a comparative benchmark against other models (e.g., logistic regression, support vector machines, neural networks) to evaluate classification accuracy, AUC performance, and resistance to overfitting.</p>
</sec>
<sec id="sec6">
<title>Introduction to artificial neural network (ANN)</title>
<p>Artificial neural networks (ANNs) are computational models inspired by the architecture and functioning of the human brain. Originally developed to emulate biological neural systems, ANNs are particularly well suited for solving complex, non-linear problems that are intractable for traditional statistical approaches (<xref ref-type="bibr" rid="ref31">Haykin, 2009</xref>). Their structure consists of interconnected processing units (neurons) organized in layers, allowing them to learn data representations through iterative training processes.</p>
<p>The typical ANN comprises an input layer, one or more hidden layers, and an output layer. Each neuron in a layer is connected to neurons in the subsequent layer via weighted connections. During the forward pass, neurons compute weighted sums of their inputs and apply an activation function (e.g., sigmoid, ReLU, or softmax) to introduce non-linearity into the model. The model&#x2019;s predictive capacity is refined through backpropagation, a learning algorithm that minimizes a loss function by adjusting weights based on the gradient descent principle (<xref ref-type="bibr" rid="ref31">Haykin, 2009</xref>).</p>
<p>Mathematically, for a neuron <inline-formula>
<mml:math id="M16">
<mml:mi mathvariant="normal">j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mtext>the activation</mml:mtext>
<mml:mspace width="0.1em"/>
<mml:msub>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:msub>
<mml:mspace width="0.1em"/>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:math>
</inline-formula>s given by:<disp-formula id="E13">
<mml:math id="M17">
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>&#x03D5;</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi mathvariant="italic">ij</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(13)</label>
</disp-formula><disp-formula id="E14">
<mml:math id="M18">
<mml:mtable columnalign="left" displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:mtext>where</mml:mtext>
<mml:mspace width="0.1em"/>
<mml:msub>
<mml:mi mathvariant="normal">x</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mspace width="0.1em"/>
<mml:mi>are</mml:mi>
<mml:mspace width="0.1em"/>
<mml:mtext>the input values</mml:mtext>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi mathvariant="italic">ij</mml:mi>
</mml:msub>
<mml:mspace width="0.1em"/>
<mml:mi>are</mml:mi>
<mml:mspace width="0.1em"/>
<mml:mtext>the corresponding weights</mml:mtext>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mspace width="0.1em"/>
<mml:mtext>is the bias term</mml:mtext>
<mml:mo>,</mml:mo>
<mml:mtext>and</mml:mtext>
<mml:mspace width="0.1em"/>
<mml:mi>&#x03D5;</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mo>&#x00B7;</mml:mo>
<mml:mo stretchy="true">)</mml:mo>
<mml:mspace width="0.1em"/>
<mml:mtext>is the activation function</mml:mtext>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
</disp-formula></p>
<p>A central strength of artificial neural networks (ANNs) is their theoretical capacity to approximate any continuous function to an arbitrary degree of accuracy, a property known as universal approximation: under certain conditions, a neural network with just one hidden layer can approximate any continuous function on compact subsets of &#x211D;&#x207F;, given sufficient neurons (<xref ref-type="bibr" rid="ref32">Hornik, 1991</xref>). This theoretical property makes ANNs especially attractive for modeling psychological data, which often exhibit non-linear interactions and latent patterns.</p>
<p>There are three main types of ANN training methods: supervised learning, in which input&#x2013;output pairs guide weight adjustments; unsupervised learning, where the network identifies structure in unlabelled data; and self-supervised or fixed-weight networks, used in constrained optimization scenarios (<xref ref-type="bibr" rid="ref21">Fausett, 1994</xref>).</p>
<p>In psychological research, ANNs are increasingly utilized to detect complex relationships in behavioural, cognitive, and neuropsychological data. Their ability to model non-linear associations, handle high-dimensional inputs, and learn from noise makes them a valuable methodological tool&#x2014;though they often lack interpretability, which can be a limitation in theory-driven research.</p>
<p>In this study, an artificial neural network (ANN) was implemented to classify admission outcomes based on psychometric features. Performance is evaluated against classical and modern machine learning techniques (e.g., logistic regression, decision trees, support vector machines, and random forests) using metrics such as accuracy, AUC, and ROC analysis.</p>
</sec>
<sec id="sec7">
<title>Aim and significance of the study</title>
<p>The primary aim of this study is to systematically evaluate and compare the predictive performance of traditional machine learning models (logistic regression, decision tree, random forest) and modern neural network-based model approaches (i.e., a feedforward artificial neural network) in classifying outcomes within a psychological admissions dataset. Using a real-world dataset of <italic>N</italic>&#x202F;=&#x202F;4,155 applicants, the study investigates the utility, accuracy, and robustness of these models in identifying individuals who are likely to succeed in an entrance examination based on demographic, academic, and cognitive features.</p>
<p>This research is significant in both theoretical and practical terms. Methodologically, it contributes to the ongoing discussion about the appropriateness of neural network-based models in psychological context, where sample sizes are typically smaller than in industrial machine learning applications. Practically, the findings have implications for optimizing selection procedures in applied psychological assessment by identifying the most efficient and interpretable predictive models for use in personnel selection, educational screening, and diagnostic processes.</p>
</sec>
<sec id="sec8">
<title>Research questions</title>
<p>This study systematically examines the comparative utility of traditional and modern machine learning approaches in the classification of psychological data. The investigation is guided by the following research questions:<list list-type="bullet">
<list-item>
<p>To what extent does the predictive performance of neural network-based models differ from that of traditional statistical methods when applied to psychological classification problems?</p>
</list-item>
<list-item>
<p>Among the selected classification algorithms: logistic regression, decision trees, random forests, and artificial neural networks&#x2014;which demonstrates the highest level of predictive accuracy in the given psychological dataset?</p>
</list-item>
<list-item>
<p>Given the available dataset (N&#x202F;&#x003E;&#x202F;4,000), neural network-based models exhibit signs of overfitting, and what are the methodological and practical implications of such behaviour for their application in psychological research contexts?</p>
</list-item>
<list-item>
<p>What are the respective methodological advantages and limitations of traditional classification techniques and neural network-based models, particularly when applied to small or moderately sized psychological datasets?</p>
</list-item>
</list></p>
</sec>
</sec>
<sec sec-type="methods" id="sec9">
<title>Methods</title>
<sec id="sec10">
<title>Participants</title>
<p>The dataset consisted of <italic>N</italic>&#x202F;=&#x202F;4,155 applicants to a university entrance examination in health sciences. The data were collected retrospectively and included a wide range of sociodemographic, academic, and cognitive features. As the dataset was fully anonymized and archival in nature, no direct interaction with human participants took place, and ethical approval was not required. Nonetheless, data were handled in compliance with data protection regulations.</p>
<p>After initial data cleaning, including the removal of outliers, missing values, and implausible or falsified entries (e.g., incorrect age specifications), the dataset was reduced from <italic>N</italic>&#x202F;=&#x202F;4,177 to <italic>N</italic>&#x202F;=&#x202F;4,155 applicants. The final sample comprised 2,447 females and 1,708 males, ranging in age from 18 to 38&#x202F;years. Regarding nationality, 2,756 participants were Austrian citizens, 1,294 originated from other European Union (EU) countries, and 105 from non-EU countries. Participants represented a broad range of secondary school backgrounds, including Gymnasium, Realgymnasium, Oberstufenrealgymnasium, Naturwissenschaftliches Gymnasium, Humanistisches Gymnasium, Neusprachliches Gymnasium, foreign school-leaving certificates, Handelsakademie, technical and vocational colleges, schools of business administration, and other school types. Academic indicators included subject-specific knowledge in biology, chemistry, physics, and mathematics. Cognitive ability measures covered figural analogies (<italic>fz_score</italic>), number series (<italic>zf_score</italic>), memory performance (<italic>gm_score</italic>), and mathematical thinking (<italic>md_score</italic>). Text processing competence (<italic>tv_score</italic>) was also included. The binary dependent variable was admission outcome (<italic>sel</italic>; 0&#x202F;=&#x202F;not selected, 1&#x202F;=&#x202F;selected). To assess possible redundancy among predictors, intercorrelation analyses were conducted. The shared variance (<italic>R</italic><sup>2</sup>) among predictors was consistently low, with all pairwise correlations remaining below r&#x202F;=&#x202F;0.70. In addition, variance inflation factor (VIF) values were below the conventional threshold of 5, and tolerance statistics exceeded 0.20, indicating that multicollinearity was not a concern. Taken together, these results suggest that the predictors could be considered sufficiently independent for the purposes of model estimation (<xref ref-type="bibr" rid="ref18">Dangeti, 2017</xref>; <xref ref-type="bibr" rid="ref27">Hair et al., 2010</xref>).</p>
</sec>
<sec id="sec11">
<title>Measures</title>
<p><italic>Sociodemographic variables</italic>. Participants reported gender (female, male), age (18&#x2013;38&#x202F;years), nationality (Austria, EU, non-EU), and type of secondary school attended (e.g., Gymnasium, Realgymnasium, Oberstufenrealgymnasium, Naturwissenschaftliches Gymnasium, Humanistisches Gymnasium, Neusprachliches Gymnasium, foreign school-leaving certificate, Handelsakademie, technical and vocational college, school of business administration, or other).</p>
<p><italic>Cognitive ability measures</italic>. Cognitive performance was assessed using subtests that captured (a) figural reasoning (<italic>fz_score</italic>), (b) number series (<italic>zf_score</italic>), (c) memory performance (<italic>gm_score</italic>), and (d) mathematical thinking (<italic>md_score</italic>). These indicators reflect core dimensions of general cognitive ability relevant to academic success.</p>
<p><italic>Academic knowledge measures</italic>. Domain-specific knowledge was measured through subject-based test scores in biology (<italic>bi_score</italic>), chemistry (<italic>ch_score</italic>), physics (<italic>ph_score</italic>), and mathematics (<italic>ma_score</italic>).</p>
<p><italic>Text processing competence</italic>. In addition, a standardized task assessing text processing skills (<italic>tv_score</italic>) was included as an indicator of verbal-academic competence.</p>
<p><italic>Outcome variable</italic>. The dependent variable was admission outcome (<italic>sel</italic>), coded dichotomously as 0&#x202F;=&#x202F;not selected and 1&#x202F;=&#x202F;selected.</p>
</sec>
<sec id="sec12">
<title>Procedure</title>
<p>The research followed a quantitative, data-driven modelling framework. All models were implemented in Python using open-source libraries such as Scikit-learn, Keras, and TensorFlow. The dataset was split into training and testing sets using an 80/20 ratio, with stratified sampling applied to preserve the distribution of the outcome variable. Each model was trained and tested on the same data split to ensure direct comparability of performance metrics.</p>
<p>Responses were automatically recorded and scored using standardized algorithms. Data integrity was ensured through immediate plausibility checks at the point of entry. Following data collection, all records were anonymized so that no personal identifiers were retained. The binary admission outcome was determined based on official university admission criteria and subsequently linked to each participant&#x2019;s test record.</p>
</sec>
<sec id="sec13">
<title>Data preprocessing</title>
<p>All categorical variables, including those representing nationality and type of secondary school attended, were transformed using one-hot encoding to facilitate their inclusion in the machine learning models. Numerical variables were standardized to have a mean of zero and a standard deviation of one in order to ensure comparability across features and to support the convergence of gradient-based algorithms. The dataset contained only minimal missing data, which were addressed through case-wise deletion. An analysis of class distribution revealed no substantial imbalance between admitted and non-admitted applicants (51.7% selected vs. 48.3% not selected), rendering the use of resampling techniques or class weighting unnecessary. The final dataset therefore exhibited a near-balanced class distribution, which does not constitute a substantial class imbalance according to common conventions in classification research (e.g., <xref ref-type="bibr" rid="ref22">Fawcett, 2006</xref>). Accordingly, accuracy and ROC-based metrics were considered appropriate evaluation measures.</p>
</sec>
<sec id="sec14">
<title>Model description</title>
<p>Four models were implemented and compared:<list list-type="simple">
<list-item>
<p>I. Logistic regression (LR)</p>
</list-item>
</list></p>
<p>A regularized logistic regression model was used as a baseline. It assumes linear relationships between predictors and the log-odds of the outcome.<list list-type="simple">
<list-item>
<p>II. Decision tree (DT)</p>
</list-item>
</list></p>
<p>A Gini impurity-based classification tree was grown without pruning to evaluate model instability and overfitting tendencies.<list list-type="simple">
<list-item>
<p>III. Random forest (RF)</p>
</list-item>
</list></p>
<p>An ensemble of 100 decision trees was constructed using bootstrap aggregation and random feature sampling to reduce variance and increase robustness.<list list-type="simple">
<list-item>
<p>IV. Artificial neural network &#x2013; (ANN)</p>
</list-item>
</list></p>
<p>The neural network implemented in the present study was a feedforward artificial neural network with a single hidden layer. This shallow architecture reflects a commonly used neural network design in applied psychological research and was selected to represent typical practical implementations under real-world conditions.</p>
<p>A feedforward neural network comprising a single hidden layer with 32&#x202F;units and ReLU activation was implemented. The output layer consisted of a single neuron with a sigmoid activation function, suitable for binary classification. The model was trained using binary cross-entropy loss and optimized with the Adam algorithm.</p>
<p>All models used identical inputs and were evaluated on the same test split for comparability.</p>
<p>To ensure a fair and comparable evaluation across model classes, all models were implemented using commonly recommended default or conservative hyperparameter settings. No extensive hyperparameter optimization was performed for any model class. This decision was motivated by the primary aim of the study, which was to compare model robustness, generalization behaviour, and interpretability under typical applied conditions rather than to maximize predictive performance through fine-tuning.</p>
</sec>
<sec id="sec15">
<title>Evaluation metrics</title>
<p>To assess and compare the performance of the classification models, a set of complementary performance metrics was employed:<list list-type="bullet">
<list-item>
<p>Accuracy, defined as the proportion of correctly classified instances, served as a baseline measure of overall predictive performance.</p>
</list-item>
<list-item>
<p>Area under the receiver operating characteristic curve (AUC-ROC) was used to quantify the model&#x2019;s ability to discriminate between admitted and non-admitted applicants across all possible classification thresholds.</p>
</list-item>
<list-item>
<p>Receiver operating characteristic (ROC) curves were plotted to visualize the trade-off between the true positive rate and false positive rate at varying threshold levels.</p>
</list-item>
<list-item>
<p>Feature importance scores, calculated for tree-based models (e.g., decision trees and random forests), were used to identify the most influential predictor variables in the classification process.</p>
</list-item>
</list></p>
<p>These metrics enabled both threshold-independent and threshold-dependent comparisons between models.</p>
</sec>
<sec id="sec16">
<title>Noise as a methodological factor</title>
<p>In this study, noise, defined as random variability not systematically associated with the true outcome, was explicitly treated as a methodological concern. Consistent with the conceptualization by <xref ref-type="bibr" rid="ref35">Kahneman et al. (2021)</xref>, noise was understood as unwanted variability that can obscure signal and compromise model reliability. Its presence and influence were examined both qualitatively, through observed differences in model behaviour, and quantitatively, via fluctuations in predictive performance across training iterations.</p>
<p>Among the models evaluated, decision trees demonstrated pronounced sensitivity to noise, with considerable variability in classification outcomes observed across repeated training runs. This instability aligns with prior findings suggesting that decision trees, particularly when grown to full depth, tend to overfit due to their reliance on greedy, axis-aligned splits and their responsiveness to small perturbations in the training data (<xref ref-type="bibr" rid="ref44">Quinlan, 1996</xref>; <xref ref-type="bibr" rid="ref47">Rokach and Maimon, 2008</xref>). Similarly, artificial neural networks exhibited a tendency to memorize both meaningful patterns and irrelevant fluctuations in the data. Overfitting was most evident in the absence of regularization techniques such as dropout or early stopping (<xref ref-type="bibr" rid="ref56">Zhang et al., 2017</xref>). In contrast, random forests displayed greater robustness to noise. Their ensemble-based architecture, which aggregates predictions across multiple decorrelated trees, effectively reduced variance and mitigated overfitting, thereby enhancing model stability (<xref ref-type="bibr" rid="ref14">Breiman, 2001a</xref>, <xref ref-type="bibr" rid="ref15">2001b</xref>; <xref ref-type="bibr" rid="ref9">Biau and Scornet, 2016</xref>).</p>
<p>Although no artificial noise was injected into the dataset, variability in model performance across repeated runs was interpreted as reflecting algorithmic instability rather than noise inherent in the data. To further strengthen this perspective, the treatment of noise was embedded into the overall evaluation framework. This ensured that performance differences were not only attributed to sampling variability but also interpreted considering each model&#x2019;s structural sensitivity to random perturbations. By framing noise explicitly as a methodological dimension, the study underscores its central role in evaluating the robustness and generalizability of predictive models in psychological research.</p>
<p>Variability in model performance across repeated training runs may arise from several algorithmic sources, including random weight initialization, stochastic optimization procedures, and sensitivity to hyperparameter choices. In the present study, such variability is therefore interpreted as an indicator of algorithmic instability rather than as direct evidence of noise inherent in the data.</p>
<p>Accordingly, the term &#x201C;noise&#x201D; is used here in a broader methodological sense to denote unwanted variability in model behaviour, rather than explicitly introduced data perturbations.</p>
</sec>
</sec>
<sec sec-type="results" id="sec17">
<title>Results</title>
<p>This section reports the predictive performance of four machine learning models, each trained to classify success in a university entrance examination. All models were trained on identical feature sets and evaluated using consistent training/test splits to ensure comparability. Model performance was assessed using overall classification accuracy and, more importantly, receiver operating characteristic (ROC) analysis and the area under the ROC curve (AUC). The AUC metric, as a threshold-independent measure of discrimination, is particularly informative in evaluating model generalizability across varying decision boundaries (<xref ref-type="bibr" rid="ref22">Fawcett, 2006</xref>). Presenting accuracy together with AUC ensures a balanced view of threshold-dependent and threshold-independent performance across models.</p>
<sec id="sec18">
<title>Logistic regression</title>
<p>The logistic regression model achieved the highest classification performance, with a test accuracy of 0.973 and an AUC of 0.99. ROC analysis revealed excellent sensitivity and specificity across a wide range of threshold values. These results suggest not only high accuracy in predicting exam success but also robust generalization, making logistic regression both statistically reliable and practically interpretable.</p>
</sec>
<sec id="sec19">
<title>Decision tree</title>
<p>The decision tree classifier yielded a test accuracy of 0.926 and an AUC of 0.80. While the overall accuracy indicates a reasonable level of predictive performance, the lower AUC suggests that the model&#x2019;s discriminative capacity is more sensitive to threshold settings. This limitation reflects the well-documented tendency of single-tree models to overfit and produce less stable decision boundaries. Post-pruning the tree (e.g., constraining maximum depth to 5) led to a marginal increase in accuracy (0.930) but no substantial improvement in AUC, indicating that pruning alone may not sufficiently enhance generalizability. The most influential features identified were biology (<italic>bi_score</italic>&#x202F;=&#x202F;0.085), numerical reasoning (<italic>zf_score</italic>&#x202F;=&#x202F;0.039), and memory performance (<italic>gm_score</italic>&#x202F;=&#x202F;0.034).</p>
</sec>
<sec id="sec20">
<title>Random forest</title>
<p>The random forest classifier achieved a test accuracy of 0.961 and an AUC of 0.98. This strong performance reflects the ensemble model&#x2019;s ability to aggregate across multiple decorrelated trees, thereby reducing variance and enhancing robustness to overfitting. The ROC curve demonstrated excellent class separation, confirming the model&#x2019;s ability to generalize effectively. Feature-importance analysis identified prior achievement in biology (<italic>bi_score</italic>&#x202F;=&#x202F;0.176), chemistry (<italic>ch_score</italic>&#x202F;=&#x202F;0.132), and numerical reasoning (<italic>zf_score</italic>&#x202F;=&#x202F;0.103) as the most influential predictors of exam success.</p>
</sec>
<sec id="sec21">
<title>Artificial neural network</title>
<p>The single-layer artificial neural network reached a test accuracy of 0.933 and an AUC of 0.87. ROC analysis showed a noticeably lower AUC compared to tree-based models, especially at training epochs 21, 42, 46, and 50, where overfitting became evident. These findings indicate that, while the ANN achieved competitive accuracy, its ability to generalize across decision thresholds was limited. The results highlight the need for regularization and careful tuning when applying neural networks to moderately sized, multivariable datasets.</p>
</sec>
<sec id="sec22">
<title>Comparative model summary</title>
<p><xref ref-type="table" rid="tab1">Table 1</xref> shows accuracy of machine learning and neural network-based model.</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Accuracy of machine learning and neural network-based model.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th>Model</th>
<th align="center" valign="top">Accuracy (Machine learning)</th>
<th align="center" valign="top">Accuracy (Neural network)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Logistic regression</td>
<td align="center" valign="middle">0.973</td>
<td align="center" valign="middle">-</td>
</tr>
<tr>
<td align="left" valign="middle">Random forest</td>
<td align="center" valign="middle">0.961</td>
<td align="center" valign="middle">-</td>
</tr>
<tr>
<td align="left" valign="middle">Decision tree</td>
<td align="center" valign="middle">0.926</td>
<td align="center" valign="middle">-</td>
</tr>
<tr>
<td align="left" valign="middle">Neural network</td>
<td align="center" valign="middle">-</td>
<td align="center" valign="middle">0.933</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec23">
<title>Comparative feature selection</title>
<p><xref ref-type="table" rid="tab2">Table 2</xref> shows comparative feature importance for decision tree and random forest models.</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>Comparative feature importance for decision tree and random forest models.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th>Model</th>
<th align="left" valign="top">Most important feature</th>
<th align="left" valign="top">Second most important feature</th>
<th align="left" valign="top">Third most important</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Random forest</td>
<td align="left" valign="top">Biology (<italic>bi_score</italic>&#x202F;=&#x202F;0.176)</td>
<td align="left" valign="top">Chemistry (<italic>ch_score</italic>&#x202F;=&#x202F;0.132)</td>
<td align="left" valign="top">Number series (<italic>zf_score</italic>&#x202F;=&#x202F;0.103)</td>
</tr>
<tr>
<td align="left" valign="middle">Decision tree</td>
<td align="left" valign="top">Biology (<italic>bi_score</italic>&#x202F;=&#x202F;0.085)</td>
<td align="left" valign="top">Number series (<italic>zf_score</italic>&#x202F;=&#x202F;0.039)</td>
<td align="left" valign="top">Memory performance (<italic>gm_score</italic>&#x202F;=&#x202F;0.034)</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec24">
<title>ROC-based evaluation</title>
<p>The ROC curves collectively demonstrate that logistic regression and random forest models yield the most reliable performance across thresholds, with ROC curves closely approaching the upper-left corner of the ROC space. These models are thus especially suitable in applied psychological contexts where decision thresholds may shift (e.g., when prioritizing false positives over false negatives in screening).</p>
</sec>
</sec>
<sec sec-type="discussion" id="sec25">
<title>Discussion</title>
<p>The present study compared the predictive performance of traditional machine learning models and neural network-based model approaches for the classification of applicants in a psychological university entrance test on a real-world dataset. Using a dataset of <italic>N</italic>&#x202F;=&#x202F;4,155 cases and identical training/test splits, four classification models were evaluated with particular attention to predictive accuracy, ROC analysis, and generalization ability. The findings offer several important insights for model selection and methodological decision-making in psychological research and applied classification tasks.</p>
<p>Traditional models&#x2014;specifically logistic regression and random forest&#x2014;demonstrated the highest overall classification performance. Logistic regression achieved the best results, with an accuracy of 0.973 and a near-perfect area under the ROC curve (AUC), confirming its strong discriminatory power across classification thresholds. This aligns with its long-standing reputation in the literature as a robust, interpretable model particularly well suited for binary classification tasks in psychological contexts. Similarly, the random forest algorithm achieved high accuracy (0.961) and exhibited excellent generalization performance, with ROC curves indicating stable separation between classes. Feature importance analyses identified subject-specific competencies biology (<italic>bi_score</italic>&#x202F;=&#x202F;0.176), chemistry (<italic>ch_score</italic>&#x202F;=&#x202F;0.132), and numerical reasoning (number series) (<italic>zf_score</italic>&#x202F;=&#x202F;0.103) as key predictors&#x2014;offering both practical relevance and theoretical alignment with classical intelligence models.</p>
<sec id="sec26">
<title>Interpretability and scope of feature importance analyses</title>
<p>Differences in feature importance across models do not indicate contradictory findings but rather reflect model-specific inductive biases. Feature importance measures derived from decision trees and random forests are inherently dependent on the model structure, splitting criteria, and interaction effects among predictors. While decision tree importance values are highly sensitive to individual splits and therefore less stable, random forest importance represents an aggregated, global estimate across multiple decorrelated trees and can thus be considered more robust at the model level.</p>
<p>Model-agnostic explanation techniques such as LIME (<xref ref-type="bibr" rid="ref46">Ribeiro et al., 2016</xref>) provide local, instance-level explanations of individual predictions and serve a complementary purpose. By contrast, the present study focused on global model behaviour and comparative robustness rather than on post-hoc explanations of individual cases.</p>
<p>Accordingly, feature importance values in this study should be interpreted as model-internal relevance indicators serving comparative and descriptive purposes, rather than as causal estimates.</p>
<p>The decision tree model yielded the lowest overall predictive performance among the models evaluated. This outcome may be attributed to the limitations of single-tree structures when applied to multivariable, structured datasets. Unlike random forests, which aggregate predictions from multiple trees and thereby capture more complex feature interactions, individual decision trees are less capable of handling multidimensional patterns within the data. The structural simplicity of the decision tree model, while advantageous in terms of interpretability, may therefore contribute to its reduced classification accuracy.</p>
<p>Despite its lower overall performance, the decision tree&#x2019;s feature selection results remain of interest. The model identified biology (<italic>bi_score</italic>&#x202F;=&#x202F;0.085), numerical reasoning (<italic>zf_score</italic>&#x202F;=&#x202F;0.039), and memory performance (<italic>gm_score</italic>&#x202F;=&#x202F;0.034) as the most influential predictors. These findings may offer valuable insights for researchers interested in domain-specific item analysis or targeted test development. Notably, the feature selection outcomes of both the decision tree and random forest models converge in highlighting the importance of subject-specific knowledge (e.g., biology and chemistry) alongside domain-general cognitive abilities (e.g., numerical reasoning and memory). This suggests that these variables are particularly relevant for the accurate classification of success in the university entrance examination.</p>
<p>In contrast, the neural network-based model showed notably lower performance. Although the single-layer network reached an accuracy of 0.933, closer inspection of the training process and ROC curves revealed significant overfitting, with performance degradation beginning at early training epochs. The reduced AUC values indicated poor discrimination across thresholds. These patterns highlight the tendency of feedforward neural networks to memorize training data in smaller, structured datasets without appropriate regularization. The results also emphasize the challenges of applying neural network-based models to moderately sized psychological datasets, where generalization can be limited without careful regularization and architecture choices tailored to tabular data.</p>
<p>A theoretical explanation for this pattern can be derived from the concept of architectural inductive bias. Classical models such as logistic regression and tree-based methods impose strong, task-relevant inductive biases that align well with structured tabular data. Linear models encode additive and monotonic relationships, whereas decision trees and random forests exploit axis-aligned splits and hierarchical feature interactions. Feedforward neural networks, by contrast, rely on comparatively weak and generic inductive biases that assume smooth function approximation rather than explicitly leveraging the structural properties of tabular data. Recent research has demonstrated that this mismatch systematically disadvantages neural networks on tabular datasets, even at moderate sample sizes, whereas classical models often outperform neural network-based architectures under these conditions (<xref ref-type="bibr" rid="ref26">Grinsztajn et al., 2022</xref>; <xref ref-type="bibr" rid="ref51">Shwartz-Ziv and Armon, 2022</xref>; <xref ref-type="bibr" rid="ref11">Borisov et al., 2023</xref>).</p>
<p>Methodologically, the findings reaffirm that simpler, traditional models often outperform more complex architectures in contexts with limited data and high interpretability demands. Logistic regression and random forest not only provided higher predictive performance but also allowed for more transparent model behaviour, facilitating insight into the relative influence of predictor variables&#x2014;an essential consideration in psychological decision-making. In contrast, the opacity and instability of the artificial neural network underscore its limitations in domains where accountability, reproducibility, and interpretability are critical.</p>
<p>Despite the relatively large sample size (<italic>N</italic>&#x202F;=&#x202F;4,155) for psychological research, it may still be insufficient for training feedforward neural networks with multiple layers and high parameter complexity. This limitation, combined with the relatively small number of features (<italic>n</italic>&#x202F;=&#x202F;13), likely contributed to the models&#x2019; overfitting and instability. These findings are consistent with prior research indicating that neural network-based models require extensive data and careful regularization to avoid performance degradation (<xref ref-type="bibr" rid="ref24">Geman et al., 1992</xref>; <xref ref-type="bibr" rid="ref30">Hawkins, 2003</xref>). Feature selection strategies, such as dimensionality reduction or domain-driven index construction, may offer viable solutions in future studies aiming to enhance neural network-based model performance in psychological datasets.</p>
<p>An additional methodological contribution of the present study concerns the explicit treatment of noise. Variability in predictive performance across repeated training iterations was interpreted as indirect evidence of susceptibility to noise inherent in the data. Decision trees and neural networks proved particularly sensitive, whereas random forests showed greater robustness, consistent with their ensemble-based design. Treating noise as a central methodological dimension highlights that model evaluation in psychology must consider not only accuracy and generalization but also stability under random perturbations (<xref ref-type="bibr" rid="ref19">Dietterich, 1995</xref>; <xref ref-type="bibr" rid="ref25">Grandvalet and Bengio, 2005</xref>).</p>
<p>The broader implications of these results point to a continued role for traditional machine learning models in psychology, particularly for classification tasks involving structured data and moderate sample sizes. Logistic regression and random forest provide robust, interpretable, and computationally efficient tools, making them highly suitable for applied settings such as university admissions. Nonetheless, as the field continues to evolve, there remains a need to refine and adapt neural network-based model approaches to the specific challenges of psychological research - especially regarding small sample sizes, missing data, and the curse of dimensionality. Advances in regularization techniques, model compression, and domain-informed architecture design may help bridge this gap in future investigations.</p>
<p>Collectively, this study indicates that model selection in psychological data analysis must be guided not only by considerations of predictive accuracy but also by attention to overfitting risk, interpretability, the structural characteristics of the data, and robustness to noise. Traditional machine learning methods currently offer a more reliable and transparent approach for psychological applications, particularly when working with limited and structured datasets. Recent empirical applications of machine learning in psychological prediction contexts further support the robustness of ensemble methods such as random forests in structured datasets (<xref ref-type="bibr" rid="ref57">Zhang et al., 2023</xref>).</p>
<p>Model performance was primarily reported using point estimates. Future research may benefit from uncertainty quantification through resampling procedures, confidence intervals, or statistical significance testing to further assess the robustness and practical relevance of observed performance differences.</p>
<p>The present findings should be interpreted considering several limitations. The empirical evaluation is based on a single university admissions dataset, which restricts the generalizability of the results. Observed performance differences may reflect characteristics specific to this dataset rather than general properties of psychological data. Replication across additional datasets and institutional contexts would be required to establish broader external validity. Nevertheless, the dataset represents a typical applied psychological selection context, supporting the relevance of the findings for comparable real-world settings.</p>
</sec>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec27">
<title>Data availability statement</title>
<p>The dataset contains sensitive personal information collected as part of university admissions procedures. In accordance with the General Data Protection Regulation (GDPR) and institutional data protection policies, the raw data cannot be shared publicly. Only aggregated results and analysis scripts can be made available upon reasonable request. Requests to access these datasets should be directed to Marie-Luise Leitner, <email xlink:href="mailto:marie.leitner@uni-graz.at">marie.leitner@uni-graz.at</email>.</p>
</sec>
<sec sec-type="ethics-statement" id="sec28">
<title>Ethics statement</title>
<p>Ethical approval was not required for the study involving humans in accordance with the local legislation and institutional requirements. Written informed consent to participate in this study was not required from the participants or the participants&#x2019; legal guardians/next of kin in accordance with the national legislation and the institutional requirements.</p>
</sec>
<sec sec-type="author-contributions" id="sec29">
<title>Author contributions</title>
<p>M-LL: Formal analysis, Writing &#x2013; original draft, Software, Investigation, Writing &#x2013; review &#x0026; editing, Resources, Data curation, Visualization, Methodology, Project administration, Validation, Conceptualization. MA: Conceptualization, Methodology, Supervision, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec sec-type="COI-statement" id="sec30">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="sec31">
<title>Generative AI statement</title>
<p>The author(s) declared that Generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="sec32">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Agarwal</surname><given-names>R.</given-names></name> <name><surname>Frosst</surname><given-names>N.</given-names></name> <name><surname>Zhang</surname><given-names>X.</given-names></name> <name><surname>Caruana</surname><given-names>R.</given-names></name> <name><surname>Hinton</surname><given-names>G.</given-names></name></person-group> (<year>2022b</year>). <article-title>Neural additive models: interpretable machine learning with neural nets</article-title>. <conf-name>Proceedings of the 35th International Conference on Neural Information Processing Systems (NeurIPS 2022)</conf-name>. Available online at: <ext-link xlink:href="https://doi.org/10.48550/arXiv.2004.13912" ext-link-type="uri">https://doi.org/10.48550/arXiv.2004.13912</ext-link></mixed-citation></ref>
<ref id="ref2"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Agarwal</surname><given-names>A.</given-names></name> <name><surname>Tan</surname><given-names>Y. S.</given-names></name> <name><surname>Ronen</surname><given-names>O.</given-names></name> <name><surname>Singh</surname><given-names>C.</given-names></name> <name><surname>Yu</surname><given-names>B.</given-names></name></person-group> (<year>2022a</year>). <article-title>Hierarchical shrinkage: improving the accuracy and interpretability of tree-based methods</article-title>. <italic>arXiv</italic> preprint. Available online at: <ext-link xlink:href="https://doi.org/10.48550/arXiv.2202.00858" ext-link-type="uri">https://doi.org/10.48550/arXiv.2202.00858</ext-link></mixed-citation></ref>
<ref id="ref3"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Agresti</surname><given-names>A.</given-names></name></person-group> (<year>2013</year>). <source>Categorical data analysis</source>. <edition>3rd</edition> Edn: <publisher-name>Wiley</publisher-name>.</mixed-citation></ref>
<ref id="ref4"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ahmed</surname><given-names>A. M.</given-names></name> <name><surname>Mahmoud</surname><given-names>A. M.</given-names></name> <name><surname>Ali</surname><given-names>A. M.</given-names></name></person-group> (<year>2018</year>). <article-title>A novel decision tree classification based on post-pruning</article-title>. <source>Biomed. Res. Int.</source> <volume>2018</volume>:<fpage>2073082</fpage>. doi: <pub-id pub-id-type="doi">10.1155/2018/2073082</pub-id></mixed-citation></ref>
<ref id="ref5"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Ayyadevara</surname><given-names>V. K.</given-names></name></person-group> (<year>2018</year>). <source>Pro machine learning algorithms: A hands-on approach to implementing algorithms in Python and R</source>: <publisher-name>Apress</publisher-name>.</mixed-citation></ref>
<ref id="ref6"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Babyak</surname><given-names>M. A.</given-names></name></person-group> (<year>2004</year>). <article-title>What you see may not be what you get: a brief, nontechnical introduction to overfitting in regression-type models</article-title>. <source>Psychosom. Med.</source> <volume>66</volume>, <fpage>411</fpage>&#x2013;<lpage>421</lpage>. doi: <pub-id pub-id-type="doi">10.1097/01.psy.0000127692.23278.a9</pub-id>, <pub-id pub-id-type="pmid">15184705</pub-id></mixed-citation></ref>
<ref id="ref7"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Baker</surname><given-names>R. S.</given-names></name> <name><surname>Inventado</surname><given-names>P. S.</given-names></name></person-group> (<year>2014</year>). &#x201C;<article-title>Educational data mining and learning analytics</article-title>&#x201D; in <source>Learning analytics</source> (<publisher-name>Springer</publisher-name>), <fpage>61</fpage>&#x2013;<lpage>75</lpage>.</mixed-citation></ref>
<ref id="ref8"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Balcan</surname><given-names>M. F.</given-names></name> <name><surname>Sharma</surname><given-names>Y.</given-names></name></person-group> (<year>2024</year>). <article-title>Understanding robustness of decision trees</article-title>. <conf-name>Proceedings of the 41st International Conference on Machine Learning (ICML 2024)</conf-name>. Available online at: <ext-link xlink:href="https://doi.org/10.48550/arXiv.2402.12345" ext-link-type="uri">https://doi.org/10.48550/arXiv.2402.12345</ext-link></mixed-citation></ref>
<ref id="ref9"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Biau</surname><given-names>G.</given-names></name> <name><surname>Scornet</surname><given-names>E.</given-names></name></person-group> (<year>2016</year>). <article-title>A random forest guided tour</article-title>. <source>TEST</source> <volume>25</volume>, <fpage>197</fpage>&#x2013;<lpage>227</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11749-016-0481-7</pub-id></mixed-citation></ref>
<ref id="ref10"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Blockeel</surname><given-names>H.</given-names></name> <name><surname>Devos</surname><given-names>L.</given-names></name> <name><surname>Fr&#x00E9;nay</surname><given-names>B.</given-names></name> <name><surname>Nanfack</surname><given-names>G.</given-names></name> <name><surname>Nijssen</surname><given-names>S</given-names></name></person-group>. (<year>2023</year>). <article-title>Decision trees: from efficient prediction to responsible AI</article-title>. <source>Front. Artif. Intell.</source> <volume>6</volume>:<fpage>1185955</fpage>.</mixed-citation></ref>
<ref id="ref11"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Borisov</surname><given-names>V.</given-names></name> <name><surname>Leemann</surname><given-names>T.</given-names></name> <name><surname>Se&#x00DF;ler</surname><given-names>K.</given-names></name> <name><surname>Haug</surname><given-names>J.</given-names></name> <name><surname>Pawelczyk</surname><given-names>M.</given-names></name> <name><surname>Kasneci</surname><given-names>G.</given-names></name></person-group> (<year>2023</year>). <article-title>Deep neural networks and tabular data: a survey</article-title>. <source>IEEE Trans. Neural Netw. Learn. Syst.</source> <volume>34</volume>, <fpage>8332</fpage>&#x2013;<lpage>8349</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TNNLS.2022.3169902</pub-id></mixed-citation></ref>
<ref id="ref12"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bramer</surname><given-names>M.</given-names></name></person-group> (<year>2002</year>). <article-title>Using J-pruning to reduce overfitting in classification trees</article-title>. <source>Knowl. Based Syst.</source> <volume>15</volume>, <fpage>49</fpage>&#x2013;<lpage>54</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S0950-7051(01)00163-0</pub-id></mixed-citation></ref>
<ref id="ref13"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Branco</surname><given-names>P.</given-names></name> <name><surname>Torgo</surname><given-names>L.</given-names></name> <name><surname>Ribeiro</surname><given-names>R. P.</given-names></name></person-group> (<year>2016</year>). <article-title>A survey of predictive modeling on imbalanced domains</article-title>. <source>ACM Comput. Surv.</source> <volume>49</volume>, <fpage>1</fpage>&#x2013;<lpage>50</lpage>. doi: <pub-id pub-id-type="doi">10.1145/2907070</pub-id></mixed-citation></ref>
<ref id="ref14"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Breiman</surname><given-names>L.</given-names></name></person-group> (<year>2001a</year>). <article-title>Random forests</article-title>. <source>Mach. Learn.</source> <volume>45</volume>, <fpage>5</fpage>&#x2013;<lpage>32</lpage>. doi: <pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id></mixed-citation></ref>
<ref id="ref15"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Breiman</surname><given-names>L.</given-names></name></person-group> (<year>2001b</year>). <article-title>Statistical modeling: the two cultures</article-title>. <source>Stat. Sci.</source> <volume>16</volume>, <fpage>199</fpage>&#x2013;<lpage>231</lpage>.</mixed-citation></ref>
<ref id="ref90012"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Breiman</surname><given-names>L.</given-names></name> <name><surname>Friedman</surname><given-names>J. H.</given-names></name> <name><surname>Olshen</surname><given-names>R. A.</given-names></name> <name><surname>Stone</surname><given-names>C. J.</given-names></name></person-group> (<year>1984</year>). <article-title>Classification and regression trees</article-title>. Belmont, CA: Wadsworth International Group.</mixed-citation></ref>
<ref id="ref16"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bzdok</surname><given-names>D.</given-names></name> <name><surname>Altman</surname><given-names>N.</given-names></name> <name><surname>Krzywinski</surname><given-names>M.</given-names></name></person-group> (<year>2018</year>). <article-title>Statistics versus machine learning</article-title>. <source>Nat. Methods</source> <volume>15</volume>, <fpage>233</fpage>&#x2013;<lpage>234</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nmeth.4642</pub-id>, <pub-id pub-id-type="pmid">30100822</pub-id></mixed-citation></ref>
<ref id="ref17"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Chen</surname><given-names>C.</given-names></name> <name><surname>Liaw</surname><given-names>A.</given-names></name> <name><surname>Breiman</surname><given-names>L.</given-names></name></person-group> <year>2004</year> <source>Using random forest to learn imbalanced data</source> <publisher-name>University of California, Berkeley Technical Report</publisher-name>. Available online at: <ext-link xlink:href="https://www.stat.berkeley.edu/~breiman/Using_random_forests_V3.1.pdf" ext-link-type="uri">https://www.stat.berkeley.edu/~breiman/Using_random_forests_V3.1.pdf</ext-link></mixed-citation></ref>
<ref id="ref18"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Dangeti</surname><given-names>P.</given-names></name></person-group> (<year>2017</year>). <source>Statistics for machine learning: Techniques for exploring supervised, unsupervised, and reinforcement learning models with Python and R</source>: <publisher-name>Packt Publishing</publisher-name>.</mixed-citation></ref>
<ref id="ref19"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dietterich</surname><given-names>T. G.</given-names></name></person-group> (<year>1995</year>). <article-title>Overfitting and undercomputing in machine learning</article-title>. <source>ACM Comput. Surv.</source> <volume>27</volume>, <fpage>326</fpage>&#x2013;<lpage>327</lpage>. doi: <pub-id pub-id-type="doi">10.1145/212094.212114</pub-id></mixed-citation></ref>
<ref id="ref20"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Faraggi</surname><given-names>D.</given-names></name> <name><surname>Reiser</surname><given-names>B.</given-names></name></person-group> (<year>2002</year>). <article-title>Estimation of the area under the ROC curve</article-title>. <source>Stat. Med.</source> <volume>21</volume>, <fpage>3093</fpage>&#x2013;<lpage>3106</lpage>. doi: <pub-id pub-id-type="doi">10.1002/sim.1228</pub-id>, <pub-id pub-id-type="pmid">12369084</pub-id></mixed-citation></ref>
<ref id="ref21"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Fausett</surname><given-names>L. V.</given-names></name></person-group> (<year>1994</year>). <source>Fundamentals of neural networks: Architectures, algorithms, and applications</source>. <publisher-loc>Englewood Cliffs, NJ</publisher-loc>: <publisher-name>Prentice-Hall</publisher-name>.</mixed-citation></ref>
<ref id="ref22"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fawcett</surname><given-names>T.</given-names></name></person-group> (<year>2006</year>). <article-title>An introduction to ROC analysis</article-title>. <source>Pattern Recogn. Lett.</source> <volume>27</volume>, <fpage>861</fpage>&#x2013;<lpage>874</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.patrec.2005.10.010</pub-id></mixed-citation></ref>
<ref id="ref23"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fife</surname><given-names>D. A.</given-names></name> <name><surname>D&#x2019;Onofrio</surname><given-names>B. M.</given-names></name></person-group> (<year>2023</year>). <article-title>Common, uncommon, and novel applications of random forest in psychological research</article-title>. <source>Psychol. Methods</source> <volume>29</volume>, <fpage>1164</fpage>&#x2013;<lpage>1179</lpage>. doi: <pub-id pub-id-type="doi">10.1037/met0000532</pub-id>, <pub-id pub-id-type="pmid">36201820</pub-id></mixed-citation></ref>
<ref id="ref24"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Geman</surname><given-names>S.</given-names></name> <name><surname>Bienenstock</surname><given-names>E.</given-names></name> <name><surname>Doursat</surname><given-names>R.</given-names></name></person-group> (<year>1992</year>). <article-title>Neural networks and the bias/variance dilemma</article-title>. <source>Neural Comput.</source> <volume>4</volume>, <fpage>1</fpage>&#x2013;<lpage>58</lpage>. doi: <pub-id pub-id-type="doi">10.1162/neco.1992.4.1.1</pub-id></mixed-citation></ref>
<ref id="ref25"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Grandvalet</surname><given-names>Y.</given-names></name> <name><surname>Bengio</surname><given-names>Y.</given-names></name></person-group> (<year>2005</year>). &#x201C;<article-title>Semi-supervised learning by entropy minimization</article-title>&#x201D; in <source>Advances in neural information processing systems</source>. eds. <person-group person-group-type="editor"><name><surname>Saul</surname><given-names>L. K.</given-names></name> <name><surname>Weiss</surname><given-names>Y.</given-names></name> <name><surname>Bottou</surname><given-names>L.</given-names></name></person-group>, vol. <volume>17</volume> (<publisher-name>MIT Press</publisher-name>), <fpage>529</fpage>&#x2013;<lpage>536</lpage>.</mixed-citation></ref>
<ref id="ref26"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Grinsztajn</surname><given-names>L.</given-names></name> <name><surname>Oyallon</surname><given-names>E.</given-names></name> <name><surname>Varoquaux</surname><given-names>G.</given-names></name> <name><surname>Ayache</surname><given-names>N.</given-names></name></person-group> (<year>2022</year>). <article-title>Why do tree-based models still outperform deep learning on tabular data? Advances in neural information processing systems</article-title>, <volume>35</volume>: <fpage>507</fpage>&#x2013;<lpage>520</lpage>. Available online at: <ext-link xlink:href="https://proceedings.neurips.cc/paper/2022/file/5a5d0f4d7c1a2f0f8b5c9cdbfdb59e4e-Paper.pdf" ext-link-type="uri">https://proceedings.neurips.cc/paper/2022/file/5a5d0f4d7c1a2f0f8b5c9cdbfdb59e4e-Paper.pdf</ext-link></mixed-citation></ref>
<ref id="ref27"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Hair</surname><given-names>J. F.</given-names></name> <name><surname>Black</surname><given-names>W. C.</given-names></name> <name><surname>Babin</surname><given-names>B. J.</given-names></name> <name><surname>Anderson</surname><given-names>R. E.</given-names></name></person-group> (<year>2010</year>). <source>Multivariate data analysis</source>. <edition>7th</edition> Edn. <publisher-loc>Upper Saddle River, NJ</publisher-loc>: <publisher-name>Pearson Prentice Hall</publisher-name>.</mixed-citation></ref>
<ref id="ref28"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hanley</surname><given-names>J. A.</given-names></name> <name><surname>McNeil</surname><given-names>B. J.</given-names></name></person-group> (<year>1982</year>). <article-title>The meaning and use of the area under a receiver operating characteristic (ROC) curve</article-title>. <source>Radiology</source> <volume>143</volume>, <fpage>29</fpage>&#x2013;<lpage>36</lpage>. doi: <pub-id pub-id-type="doi">10.1148/radiology.143.1.7063747</pub-id>, <pub-id pub-id-type="pmid">7063747</pub-id></mixed-citation></ref>
<ref id="ref29"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Hastie</surname><given-names>T.</given-names></name> <name><surname>Tibshirani</surname><given-names>R.</given-names></name> <name><surname>Friedman</surname><given-names>J. H.</given-names></name></person-group> (<year>2009</year>). <source>The elements of statistical learning: Data mining, inference, and prediction</source>. <edition>2nd</edition> Edn: <publisher-name>Springer</publisher-name>.</mixed-citation></ref>
<ref id="ref30"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hawkins</surname><given-names>D. M.</given-names></name></person-group> (<year>2003</year>). <article-title>The problem of overfitting</article-title>. <source>J. Chem. Inf. Comput. Sci.</source> <volume>44</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. doi: <pub-id pub-id-type="doi">10.1021/ci0342472</pub-id>, <pub-id pub-id-type="pmid">14741005</pub-id></mixed-citation></ref>
<ref id="ref31"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Haykin</surname><given-names>S. S.</given-names></name></person-group> (<year>2009</year>). <source>Neural networks and learning machines</source>. <publisher-loc>Upper Saddle River, NJ</publisher-loc>: <publisher-name>Pearson Education</publisher-name>.</mixed-citation></ref>
<ref id="ref32"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hornik</surname><given-names>K.</given-names></name></person-group> (<year>1991</year>). <article-title>Approximation capabilities of multilayer feedforward networks</article-title>. <source>Neural Netw.</source> <volume>4</volume>, <fpage>251</fpage>&#x2013;<lpage>257</lpage>. doi: <pub-id pub-id-type="doi">10.1016/0893-6080(91)90009-T</pub-id></mixed-citation></ref>
<ref id="ref33"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Hosmer</surname><given-names>D. W.</given-names></name> <name><surname>Lemeshow</surname><given-names>S.</given-names></name> <name><surname>Sturdivant</surname><given-names>R. X.</given-names></name></person-group> (<year>2013</year>). <source>Applied logistic regression</source>. <edition>3rd</edition> Edn: <publisher-name>Wiley</publisher-name>.</mixed-citation></ref>
<ref id="ref34"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jordan</surname><given-names>M. I.</given-names></name> <name><surname>Mitchell</surname><given-names>T. M.</given-names></name></person-group> (<year>2015</year>). <article-title>Machine learning: trends, perspectives, and prospects</article-title>. <source>Science</source> <volume>349</volume>, <fpage>255</fpage>&#x2013;<lpage>260</lpage>. doi: <pub-id pub-id-type="doi">10.1126/science.aaa8415</pub-id>, <pub-id pub-id-type="pmid">26185243</pub-id></mixed-citation></ref>
<ref id="ref35"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Kahneman</surname><given-names>D.</given-names></name> <name><surname>Sibony</surname><given-names>O.</given-names></name> <name><surname>Sunstein</surname><given-names>C. R.</given-names></name></person-group> (<year>2021</year>). <source>Noise: a flaw in human judgment</source>: <publisher-name>Little, Brown Spark</publisher-name>.</mixed-citation></ref>
<ref id="ref36"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>LeCun</surname><given-names>Y.</given-names></name> <name><surname>Bengio</surname><given-names>Y.</given-names></name> <name><surname>Hinton</surname><given-names>G.</given-names></name></person-group> (<year>2015</year>). <article-title>Deep learning</article-title>. <source>Nature</source> <volume>521</volume>, <fpage>436</fpage>&#x2013;<lpage>444</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nature14539</pub-id>, <pub-id pub-id-type="pmid">26017442</pub-id></mixed-citation></ref>
<ref id="ref37"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liaw</surname><given-names>A.</given-names></name> <name><surname>Wiener</surname><given-names>M.</given-names></name></person-group> (<year>2002</year>). <article-title>Classification and regression by randomForest</article-title>. <source>R News</source> <volume>2</volume>, <fpage>18</fpage>&#x2013;<lpage>22</lpage>.</mixed-citation></ref>
<ref id="ref38"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Long</surname><given-names>J. S.</given-names></name> <name><surname>Freese</surname><given-names>J.</given-names></name></person-group> (<year>2014</year>). <source>Regression models for categorical dependent variables using Stata</source>. <edition>3rd</edition> Edn: <publisher-name>Stata Press</publisher-name>.</mixed-citation></ref>
<ref id="ref39"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Matsuki</surname><given-names>K.</given-names></name> <name><surname>Kuperman</surname><given-names>V.</given-names></name> <name><surname>Van Dyke</surname><given-names>J. A.</given-names></name></person-group> (<year>2016</year>). <article-title>The random forests statistical technique: an examination of its value for the study of reading</article-title>. <source>Sci. Stud. Read.</source> <volume>20</volume>, <fpage>20</fpage>&#x2013;<lpage>33</lpage>. doi: <pub-id pub-id-type="doi">10.1080/10888438.2015.1107075</pub-id>, <pub-id pub-id-type="pmid">26770056</pub-id></mixed-citation></ref>
<ref id="ref40"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Menard</surname><given-names>S.</given-names></name></person-group> (<year>2002</year>). <source>Applied logistic regression analysis</source>. <edition>2nd</edition> Edn: <publisher-name>Sage</publisher-name>.</mixed-citation></ref>
<ref id="ref41"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Metz</surname><given-names>C. E.</given-names></name></person-group> (<year>1978</year>). <article-title>Basic principles of ROC analysis</article-title>. <source>Semin. Nucl. Med.</source> <volume>8</volume>, <fpage>283</fpage>&#x2013;<lpage>298</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S0001-2998(78)80014-2</pub-id>, <pub-id pub-id-type="pmid">112681</pub-id></mixed-citation></ref>
<ref id="ref42"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Metz</surname><given-names>C. E.</given-names></name></person-group> (<year>1979</year>). <article-title>ROC methodology in radiologic imaging</article-title>. <source>Investig. Radiol.</source> <volume>14</volume>, <fpage>234</fpage>&#x2013;<lpage>243</lpage>. doi: <pub-id pub-id-type="doi">10.1097/00004424-197905000-00009</pub-id></mixed-citation></ref>
<ref id="ref43"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Pampel</surname><given-names>F. C.</given-names></name></person-group> (<year>2000</year>). <source>Logistic regression: a primer</source>: <publisher-name>Sage</publisher-name>.</mixed-citation></ref>
<ref id="ref44"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Quinlan</surname><given-names>J. R.</given-names></name></person-group> (<year>1996</year>). <article-title>Improved use of continuous attributes in C4.5</article-title>. <source>J. Artif. Intell. Res.</source> <volume>4</volume>, <fpage>77</fpage>&#x2013;<lpage>90</lpage>. doi: <pub-id pub-id-type="doi">10.1613/jair.279</pub-id></mixed-citation></ref>
<ref id="ref45"><mixed-citation publication-type="confproc"><person-group person-group-type="author"><name><surname>Quinlan</surname><given-names>J. R.</given-names></name></person-group> (<year>1997</year>). <article-title>A study of overfitting in decision tree induction</article-title>. <conf-name>Proceedings of the 14th National Conference on Artificial Intelligence (AAAI)</conf-name>, <fpage>725</fpage>&#x2013;<lpage>730</lpage>.</mixed-citation></ref>
<ref id="ref46"><mixed-citation publication-type="confproc"><person-group person-group-type="author"><name><surname>Ribeiro</surname><given-names>M. T.</given-names></name> <name><surname>Singh</surname><given-names>S.</given-names></name> <name><surname>Guestrin</surname><given-names>C.</given-names></name></person-group> (<year>2016</year>). <article-title>&#x201C;Why should I trust you?&#x201D;: explaining the predictions of any classifier</article-title>. <conf-name>Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD &#x2018;16)</conf-name>, <fpage>1135</fpage>&#x2013;<lpage>1144</lpage>. doi:<pub-id pub-id-type="doi">10.1145/2939672.2939778</pub-id></mixed-citation></ref>
<ref id="ref47"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Rokach</surname><given-names>L.</given-names></name> <name><surname>Maimon</surname><given-names>O.</given-names></name></person-group> (<year>2008</year>). <source>Data mining with decision trees: theory and applications</source>. <edition>2nd</edition> Edn: <publisher-name>World Scientific</publisher-name>.</mixed-citation></ref>
<ref id="ref48"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rudin</surname><given-names>C.</given-names></name></person-group> (<year>2019</year>). <article-title>Stop explaining black box machine learning models for high stakes decisions and use interpretable models instead</article-title>. <source>Nat. Mach. Intell.</source> <volume>1</volume>, <fpage>206</fpage>&#x2013;<lpage>215</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s42256-019-0048-x</pub-id>, <pub-id pub-id-type="pmid">35603010</pub-id></mixed-citation></ref>
<ref id="ref49"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Saito</surname><given-names>T.</given-names></name> <name><surname>Rehmsmeier</surname><given-names>M.</given-names></name></person-group> (<year>2015</year>). <article-title>The precision-recall plot is more informative than the ROC plot when evaluating binary classifiers on imbalanced datasets</article-title>. <source>PLoS One</source> <volume>10</volume>:<fpage>e0118432</fpage>. doi: <pub-id pub-id-type="doi">10.1371/journal.pone.0118432</pub-id>, <pub-id pub-id-type="pmid">25738806</pub-id></mixed-citation></ref>
<ref id="ref50"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shmueli</surname><given-names>G.</given-names></name></person-group> (<year>2010</year>). <article-title>To explain or to predict?</article-title> <source>Stat. Sci.</source> <volume>25</volume>, <fpage>289</fpage>&#x2013;<lpage>310</lpage>. doi: <pub-id pub-id-type="doi">10.1214/10-sts330</pub-id></mixed-citation></ref>
<ref id="ref51"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shwartz-Ziv</surname><given-names>R.</given-names></name> <name><surname>Armon</surname><given-names>A.</given-names></name></person-group> (<year>2022</year>). <article-title>Tabular data: deep learning is not all you need</article-title>. <source>Inf. Fusion</source> <volume>81</volume>, <fpage>84</fpage>&#x2013;<lpage>90</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.inffus.2021.11.011</pub-id></mixed-citation></ref>
<ref id="ref52"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Streiner</surname><given-names>D. L.</given-names></name> <name><surname>Cairney</surname><given-names>J.</given-names></name></person-group> (<year>2007</year>). <article-title>What&#x2019;s under the ROC? An introduction to receiver operating characteristics curves</article-title>. <source>Can. J. Psychiatr.</source> <volume>52</volume>, <fpage>121</fpage>&#x2013;<lpage>128</lpage>. doi: <pub-id pub-id-type="doi">10.1177/070674370705200210</pub-id>, <pub-id pub-id-type="pmid">17375868</pub-id></mixed-citation></ref>
<ref id="ref53"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Swets</surname><given-names>J. A.</given-names></name></person-group> (<year>1988</year>). <article-title>Measuring the accuracy of diagnostic systems</article-title>. <source>Science</source> <volume>240</volume>, <fpage>1285</fpage>&#x2013;<lpage>1293</lpage>. doi: <pub-id pub-id-type="doi">10.1126/science.3287615</pub-id>, <pub-id pub-id-type="pmid">3287615</pub-id></mixed-citation></ref>
<ref id="ref54"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Westin</surname><given-names>C. F.</given-names></name> <name><surname>Maier</surname><given-names>S. E.</given-names></name> <name><surname>Mamata</surname><given-names>H.</given-names></name> <name><surname>Nabavi</surname><given-names>A.</given-names></name> <name><surname>Jolesz</surname><given-names>F. A.</given-names></name> <name><surname>Kikinis</surname><given-names>R.</given-names></name></person-group> (<year>2001</year>). <article-title>Processing and visualization for diffusion tensor MRI</article-title>. <source>Med. Image Anal.</source> <volume>6</volume>, <fpage>93</fpage>&#x2013;<lpage>108</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S1361-8415(01)00040-9</pub-id></mixed-citation></ref>
<ref id="ref55"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yarkoni</surname><given-names>T.</given-names></name> <name><surname>Westfall</surname><given-names>J.</given-names></name></person-group> (<year>2017</year>). <article-title>Choosing prediction over explanation in psychology: lessons from machine learning</article-title>. <source>Perspect. Psychol. Sci.</source> <volume>12</volume>, <fpage>1100</fpage>&#x2013;<lpage>1122</lpage>. doi: <pub-id pub-id-type="doi">10.1177/1745691617693393</pub-id>, <pub-id pub-id-type="pmid">28841086</pub-id></mixed-citation></ref>
<ref id="ref56"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname><given-names>C.</given-names></name> <name><surname>Bengio</surname><given-names>S.</given-names></name> <name><surname>Hardt</surname><given-names>M.</given-names></name> <name><surname>Recht</surname><given-names>B.</given-names></name> <name><surname>Vinyals</surname><given-names>O.</given-names></name></person-group> (<year>2017</year>). <article-title>Understanding artificial intelligence requires rethinking generalization</article-title>. <source>Int. Conf. Learn. Represent.</source></mixed-citation></ref>
<ref id="ref57"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname><given-names>X.</given-names></name> <name><surname>Zhao</surname><given-names>Y.</given-names></name> <name><surname>Liu</surname><given-names>H.</given-names></name></person-group> (<year>2023</year>). <article-title>Identifying the predictors of severe psychological distress using machine learning approaches: a random forest model comparison</article-title>. <source>J. Affect. Disord. Rep.</source> <volume>11</volume>:<fpage>100485</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jadr.2023.100485</pub-id></mixed-citation></ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/37355/overview">Fernando Marmolejo-Ramos</ext-link>, Flinders University, Australia</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/765446/overview">Philomena Marfo Berchie</ext-link>, African Institute for Mathematical Sciences, Cameroon</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/882734/overview">Mustafa Cavus</ext-link>, Eskisehir Technical University, T&#x00FC;rkiye</p>
</fn>
</fn-group>
</back>
</article>