<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Digit. Health</journal-id>
<journal-title>Frontiers in Digital Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Digit. Health</abbrev-journal-title>
<issn pub-type="epub">2673-253X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fdgth.2025.1629413</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Digital Health</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>An AI-powered data curation and publishing virtual assistant: usability and explainability/causability of, and patient interest in the first-generation prototype</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes"><name><surname>van Mierlo</surname><given-names>Rutger</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref><uri xlink:href="https://loop.frontiersin.org/people/2625659/overview"/><role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/><role content-type="https://credit.niso.org/contributor-roles/data-curation/"/><role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/><role content-type="https://credit.niso.org/contributor-roles/investigation/"/><role content-type="https://credit.niso.org/contributor-roles/methodology/"/><role content-type="https://credit.niso.org/contributor-roles/visualization/"/><role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author"><name><surname>Liang</surname><given-names>Wenjie</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="an1"><sup>&#x2020;</sup></xref><role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/><role content-type="https://credit.niso.org/contributor-roles/data-curation/"/><role content-type="https://credit.niso.org/contributor-roles/investigation/"/><role content-type="https://credit.niso.org/contributor-roles/methodology/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author"><name><surname>Norak</surname><given-names>Kerli</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="author-notes" rid="an1"><sup>&#x2020;</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/3206538/overview" /><role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/><role content-type="https://credit.niso.org/contributor-roles/data-curation/"/><role content-type="https://credit.niso.org/contributor-roles/investigation/"/><role content-type="https://credit.niso.org/contributor-roles/methodology/"/><role content-type="https://credit.niso.org/contributor-roles/visualization/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author"><name><surname>Kargl</surname><given-names>Michaela</given-names></name>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref><role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/><role content-type="https://credit.niso.org/contributor-roles/data-curation/"/><role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/><role content-type="https://credit.niso.org/contributor-roles/investigation/"/><role content-type="https://credit.niso.org/contributor-roles/methodology/"/><role content-type="https://credit.niso.org/contributor-roles/visualization/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author"><name><surname>Maasik</surname><given-names>Mall</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="aff" rid="aff7"><sup>7</sup></xref><role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/><role content-type="https://credit.niso.org/contributor-roles/data-curation/"/><role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/><role content-type="https://credit.niso.org/contributor-roles/investigation/"/><role content-type="https://credit.niso.org/contributor-roles/methodology/"/><role content-type="https://credit.niso.org/contributor-roles/visualization/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author"><name><surname>Bynens</surname><given-names>Anne-Lore</given-names></name>
<xref ref-type="aff" rid="aff8"><sup>8</sup></xref><role content-type="https://credit.niso.org/contributor-roles/data-curation/"/><role content-type="https://credit.niso.org/contributor-roles/investigation/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author"><name><surname>Plass</surname><given-names>Markus</given-names></name>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref><role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author"><name><surname>Kreuzthaler</surname><given-names>Markus</given-names></name>
<xref ref-type="aff" rid="aff9"><sup>9</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/2198870/overview" /><role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/><role content-type="https://credit.niso.org/contributor-roles/data-curation/"/><role content-type="https://credit.niso.org/contributor-roles/investigation/"/><role content-type="https://credit.niso.org/contributor-roles/methodology/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author"><name><surname>Benedikt</surname><given-names>Martin</given-names></name>
<xref ref-type="aff" rid="aff10"><sup>10</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/2699374/overview" /><role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author"><name><surname>Hochstenbach</surname><given-names>Laura</given-names></name>
<xref ref-type="aff" rid="aff11"><sup>11</sup></xref><role content-type="https://credit.niso.org/contributor-roles/supervision/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author"><name><surname>van &#x0027;t Hof</surname><given-names>Arnoud</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref><role content-type="https://credit.niso.org/contributor-roles/supervision/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author"><name><surname>Celebi</surname><given-names>Remzi</given-names></name>
<xref ref-type="aff" rid="aff12"><sup>12</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/2042700/overview" /><role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/><role content-type="https://credit.niso.org/contributor-roles/data-curation/"/><role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/><role content-type="https://credit.niso.org/contributor-roles/methodology/"/><role content-type="https://credit.niso.org/contributor-roles/software/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author"><name><surname>Dekker</surname><given-names>Andre</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/1503759/overview" /><role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/><role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/><role content-type="https://credit.niso.org/contributor-roles/supervision/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author"><name><surname>de Zegher</surname><given-names>Isabelle</given-names></name>
<xref ref-type="aff" rid="aff13"><sup>13</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/2519004/overview" /><role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/><role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/><role content-type="https://credit.niso.org/contributor-roles/project-administration/"/><role content-type="https://credit.niso.org/contributor-roles/visualization/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author"><name><surname>Kalendralis</surname><given-names>Petros</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/2025891/overview" /><role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/><role content-type="https://credit.niso.org/contributor-roles/data-curation/"/><role content-type="https://credit.niso.org/contributor-roles/investigation/"/><role content-type="https://credit.niso.org/contributor-roles/methodology/"/><role content-type="https://credit.niso.org/contributor-roles/software/"/><role content-type="https://credit.niso.org/contributor-roles/supervision/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author" id="collab1">
<collab>the AIDAVA consortium</collab></contrib>
</contrib-group>
<contrib-group content-type="collab-list">
<contrib contrib-type="collab" rid="collab1"><name><surname>Kreutz</surname><given-names>Alexander</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Tana</surname><given-names>Alp&#x00E1;r</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Ravai-Nagy</surname><given-names>Aranka</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Scheenstra</surname><given-names>Bart</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Bihari</surname><given-names>B&#x00E9;la</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Kiss</surname><given-names>Botond</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Steiger</surname><given-names>Dominik</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>S&#x00F5;ber</surname><given-names>Elion</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Benoist</surname><given-names>Emmanuel</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Lotman</surname><given-names>Eno-Martin</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Ensar</surname><given-names>Erol</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Klimenkov</surname><given-names>Gleb</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Sambataro</surname><given-names>Gregorio</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>M&#x00FC;ller</surname><given-names>Heimo</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Horv&#x00E1;th</surname><given-names>Istv&#x00E1;n</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Dallos</surname><given-names>J&#x00E1;nos D&#x00E1;niel</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Serafimova</surname><given-names>Katerina Zdravkova</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Lepik</surname><given-names>Katrin</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>M&#x00F5;istlik</surname><given-names>Kertu</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Kankainen</surname><given-names>Kristian</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Boersma</surname><given-names>Liesbeth</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Ferencz</surname><given-names>L&#x00F3;r&#x00E1;nt</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Dumontier</surname><given-names>Michel</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Moga</surname><given-names>Monika</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>&#x0160;ermolajeva</surname><given-names>Natalja</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Amirrajab</surname><given-names>Sina</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Boytcheva</surname><given-names>Svetla</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>Primov</surname><given-names>Todor</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>L&#x00E1;z&#x00E1;r</surname><given-names>Zolt&#x00E1;n</given-names></name></contrib>
<contrib contrib-type="collab" rid="collab1"><name><surname>M&#x00E1;t&#x00E9;</surname><given-names>Zsolt</given-names></name></contrib>
</contrib-group>
<aff id="aff1"><label><sup>1</sup></label><institution>Department of Radiation Oncology (Maastro), GROW Research Institute for Oncology and Reproduction, Maastricht University Medical Centre&#x002B; (MUMC&#x002B;)</institution>, <addr-line>Maastricht</addr-line>, <country>Netherlands</country></aff>
<aff id="aff2"><label><sup>2</sup></label><institution>Department of Cardiology, Cardiovascular Research Institute Maastricht (CARIM), Maastricht University</institution>, <addr-line>Maastricht</addr-line>, <country>Netherlands</country></aff>
<aff id="aff3"><label><sup>3</sup></label><institution>Department of Cardiology, Maastricht University Medical Centre&#x002B; (MUMC&#x002B;)</institution>, <addr-line>Maastricht</addr-line>, <country>Netherlands</country></aff>
<aff id="aff4"><label><sup>4</sup></label><institution>Department of Cardiology, Zuyderland Medical Centre</institution>, <addr-line>Heerlen</addr-line>, <country>Netherlands</country></aff>
<aff id="aff5"><label><sup>5</sup></label><institution>Department of Health Technologies, Tallinn University of Technology</institution>, <addr-line>Tallinn</addr-line>, <country>Estonia</country></aff>
<aff id="aff6"><label><sup>6</sup></label><institution>Diagnostic and Research Institute of Pathology, Medical University of Graz</institution>, <addr-line>Graz</addr-line>, <country>Austria</country></aff>
<aff id="aff7"><label><sup>7</sup></label><institution>IT Department, North Estonia Medical Centre</institution>, <addr-line>Tallinn</addr-line>, <country>Estonia</country></aff>
<aff id="aff8"><label><sup>8</sup></label><institution>Clinical Data Science, Maastricht University Medical Centre (MUMC&#x002B;)</institution>, <addr-line>Maastricht</addr-line>, <country>Netherlands</country></aff>
<aff id="aff9"><label><sup>9</sup></label><institution>Institute for Medical Informatics, Statistics and Documentation, Medical University of Graz</institution>, <addr-line>Graz</addr-line>, <country>Austria</country></aff>
<aff id="aff10"><label><sup>10</sup></label><institution>Department of Internal Medicine, Division of Cardiology, Medical University of Graz</institution>, <addr-line>Graz</addr-line>, <country>Austria</country></aff>
<aff id="aff11"><label><sup>11</sup></label><institution>Department of Health Services Research, Care and Public Health Research Institute (CAPHRI), Faculty of Health Medicine and Life Sciences, Maastricht University</institution>, <addr-line>Maastricht</addr-line>, <country>Netherlands</country></aff>
<aff id="aff12"><label><sup>12</sup></label><institution>Department of Advanced Computing Sciences, Maastricht University</institution>, <addr-line>Maastricht</addr-line>, <country>Netherlands</country></aff>
<aff id="aff13"><label><sup>13</sup></label><institution>b!loba</institution>, <addr-line>Tervuren</addr-line>, <country>Belgium</country></aff>
<author-notes>
<fn fn-type="edited-by"><p><bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2735521/overview">Eugenia Rinaldi</ext-link>, Charit&#x00E9; Medical University of Berlin, Germany</p></fn>
<fn fn-type="edited-by"><p><bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1940380/overview">Mariangela Dametto</ext-link>, Centro de Tecnologia da Informa&#x00E7;&#x00E3;o Renato Archer (CTI), Brazil</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3081294/overview">Colm Brandon</ext-link>, University of Limerick, Ireland</p></fn>
<corresp id="cor1"><label>&#x002A;</label><bold>Correspondence:</bold> Rutger van Mierlo <email>Rutger.vanmierlo@maastrichtuniversity.nl</email></corresp>
<fn fn-type="equal" id="an1"><label><sup>&#x2020;</sup></label><p>These authors share second authorship</p></fn>
</author-notes>
<pub-date pub-type="epub"><day>17</day><month>10</month><year>2025</year></pub-date>
<pub-date pub-type="collection"><year>2025</year></pub-date>
<volume>7</volume><elocation-id>1629413</elocation-id>
<history>
<date date-type="received"><day>15</day><month>05</month><year>2025</year></date>
<date date-type="accepted"><day>22</day><month>09</month><year>2025</year></date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2025 van Mierlo, Liang, Norak, Kargl, Maasik, Bynens, Plass, Kreuzthaler, Benedikt, Hochstenbach, van &#x0027;t Hof, Celebi, Dekker, de Zegher, Kalendralis and the AIDAVA consortium.</copyright-statement>
<copyright-year>2025</copyright-year><copyright-holder>van Mierlo, Liang, Norak, Kargl, Maasik, Bynens, Plass, Kreuzthaler, Benedikt, Hochstenbach, van &#x0027;t Hof, Celebi, Dekker, de Zegher, Kalendralis and the AIDAVA consortium</copyright-holder><license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract><sec><title>Introduction</title>
<p>Ensuring high quality and reusability of personal health data is costly and time-consuming. An AI-powered virtual assistant for health data curation and publishing could support patients to ensure harmonization and data quality enhancement, which improves interoperability and reusability. This formative evaluation study aimed to assess the usability of the first-generation (G1) prototype developed during the AI-powered data curation and publishing virtual assistant (AIDAVA) Horizon Europe project.</p>
</sec><sec><title>Methods</title>
<p>In this formative evaluation study, we planned to recruit 45 patients with breast cancer and 45 patients with cardiovascular disease from three European countries. An intuitive front-end, supported by AI and non-AI data curation tools, is being developed across two generations. G1 was based on existing curation tools and early prototypes of tools being developed. Patients were tasked with ingesting and curating their personal health data, creating a personal health knowledge graph that represented their integrated, high-quality medical records. Usability of G1 was assessed using the system usability scale. The subjective importance of the explainability/causability of G1, the perceived fulfillment of these needs by G1, and interest in AIDAVA-like technology were explored using study-specific questionnaires.</p>
</sec><sec><title>Results</title>
<p>A total of 83 patients were recruited; 70 patients completed the study, of whom 19 were unable to successfully curate their health data due to configuration issues when deploying the curation tools. Patients rated G1 as marginally acceptable on the system usability scale (59.1&#x2009;&#x00B1;&#x2009;19.7/100) and moderately positive for explainability/causability (3.3&#x2013;3.8/5), and were moderately positive to positive regarding their interest in AIDAVA-like technology (3.4&#x2013;4.4/5).</p>
</sec><sec><title>Discussion</title>
<p>Despite its marginal acceptability, G1 shows potential in automating data curation into a personal health knowledge graph, but it has not reached full maturity yet. G1 deployed very early prototypes of tools planned for the second-generation (G2) prototype, which may have contributed to the lower usability and explainability/causability scores. Conversely, patient interest in AIDAVA-like technology seems quite high at this stage of development, likely due to the promising potential of data curation and data publication technology. Improvements in the library of data curation and publishing tools are planned for G2 and are necessary to fully realize the value of the AIDAVA solution.</p>
</sec>
</abstract>
<kwd-group>
<kwd>data curation</kwd>
<kwd>interoperability</kwd>
<kwd>reusability</kwd>
<kwd>usability</kwd>
<kwd>explainability</kwd>
<kwd>causability</kwd>
<kwd>artificial intelligence</kwd>
<kwd>data publishing</kwd>
</kwd-group><contract-num rid="cn001">101057062</contract-num><contract-num rid="cn002">22.00093, REF-1131-52104</contract-num><contract-sponsor id="cn001">European Union&#x2019;s Horizon Europe Research and Innovation Programme</contract-sponsor><contract-sponsor id="cn002">Swiss State Secretariat for Education, Research and Innovation (SBFI)</contract-sponsor><contract-sponsor id="cn003">SNOMED-CT International</contract-sponsor><counts>
<fig-count count="4"/>
<table-count count="5"/><equation-count count="0"/><ref-count count="28"/><page-count count="12"/><word-count count="0"/></counts><custom-meta-wrap><custom-meta><meta-name>section-at-acceptance</meta-name><meta-value>Connected Health</meta-value></custom-meta></custom-meta-wrap>
</article-meta>
</front>
<body><sec id="s1" sec-type="intro"><label>1</label><title>Introduction</title>
<p>Personal health data (PHD) consist of vast amounts of rich, structured and unstructured data in narrative forms, available in heterogeneous formats and scattered amongst healthcare systems. PHD is stored as hospital and non-hospital data in electronic health record (EHR) systems, which may or may not be interconnected (<xref ref-type="bibr" rid="B1">1</xref>&#x2013;<xref ref-type="bibr" rid="B3">3</xref>). Curating and publishing PHD are costly and time-consuming, and consequently, PHD are difficult to reuse due to the large amounts of narrative text. For example, text-based content comprises 40&#x0025;&#x2013;80&#x0025; of electronic health record information (<xref ref-type="bibr" rid="B3">3</xref>). These data could benefit healthcare and research if they are curated and published in an interoperable and reusable format for data users (i.e., patients or healthcare professionals).</p>
<p>In this article, &#x201C;data curation and data publishing&#x201D; denotes the integration, harmonization, and quality enhancement (data curation) of PHD, consisting of multimodal data, and its transformation into a target format (data publishing) to make these data more reusable for humans and machines. Today, expert data stewards can make sense of the unstructured data by using existing data curation tools, such as text mining and additional manual processing. However, due to the enormous amount of available PHD, parts of these data are undoubtedly not curated or unused, even though they represent a wealth of information for clinical care and clinical research (<xref ref-type="bibr" rid="B4">4</xref>). AI-based automated curation with an active human-in-the-loop (HITL) approach (<xref ref-type="bibr" rid="B5">5</xref>) could be a promising solution for data curation and data publishing.</p>
<p>Research has shown positive health-related outcomes, such as improved self-care or medication adherence, as a result of active patient engagement in managing their PHD (<xref ref-type="bibr" rid="B6">6</xref>). The problem, however, resides in enabling or motivating patients to actively engage in curating their PHD. Patients may prefer to take on a passive role, rather than an active role in entering or updating their PHD, especially with more complex medical information (<xref ref-type="bibr" rid="B6">6</xref>). Therefore, it seems unfeasible for some, if not most, patients to curate and enhance their PHD without adequate support from expert data stewards or AI and non-AI curation tools.</p>
<p>Currently, the approach to reuse PHD is population-centric curation (<xref ref-type="bibr" rid="B7">7</xref>), which relies on forms of mass curation by expert data stewards. PHD, anonymized or pseudonymized, cannot be linked across data sources. Moreover, further organizational (lack of skilled resources), cultural, ethical, and legal challenges remain prevalent (<xref ref-type="bibr" rid="B8">8</xref>). We believe a paradigm shift is required from population-centric and anonymized curation to individual-centric curation (<xref ref-type="bibr" rid="B7">7</xref>), supported by AI and non-AI curation tools and an HITL approach. Thus, multimodal patient data is transformed into a knowledge graph, which is defined as a semantic network that represents the relationships between entities or events in the real world (<xref ref-type="bibr" rid="B9">9</xref>). The sources of each patient&#x2019;s data are curated into a source knowledge graph (SHKG); all SHKGs would then be integrated into a single personal health knowledge graph (PHKG) (<xref ref-type="bibr" rid="B9">9</xref>&#x2013;<xref ref-type="bibr" rid="B11">11</xref>). PHKGs can be introduced to enhance the interoperability and reusability of PHD, provided they are supported by an ontology aligned with widely adopted standards and medical terminologies such as Systematized Nomenclature of Medicine Clinical Terms (SNOMED-CT) (<xref ref-type="bibr" rid="B12">12</xref>) and Logical Observation Identifiers Names and Codes (LOINC) (<xref ref-type="bibr" rid="B13">13</xref>). This would introduce the paradigm shift we believe necessary to provide a centralized, personal health dossier in an interoperable and reusable format (<xref ref-type="bibr" rid="B7">7</xref>).</p>
<p>The AI-powered data curation and publishing virtual assistant (AIDAVA) project (<xref ref-type="bibr" rid="B7">7</xref>, <xref ref-type="bibr" rid="B14">14</xref>) that started in September 2022 aimed to support patients to quickly and automatically curate their PHD (<xref ref-type="bibr" rid="B15">15</xref>, <xref ref-type="bibr" rid="B16">16</xref>) into a PHKG. In the first-generation (G1) prototype, we integrated existing and newly developed AI and non-AI data curation tools, with an intuitive front-end to support data curation and data publishing. We evaluated G1 in two separate, but relevant use cases. The first use case involved patients with cardiovascular disease (CVD), presenting a longitudinal health record. This includes hospital and non-hospital data collected across multiple organizations in heterogeneous formats, from which a Second Manifestations of Arterial Disease (SMART) risk score (<xref ref-type="bibr" rid="B17">17</xref>) could be calculated that primarily benefits clinical healthcare. The second use case involved patients with breast cancer (BC), addressing the issue of non-interoperable, cross-border patient registries and supporting international clinical research. The PHKGs from the patients in both use cases were extracted and visualized into a personal International Patient Summary (IPS), &#x201C;an electronic health record extract containing essential healthcare information about a subject of care&#x201D; following the emerging European Electronic Health Record Exchange Format (EEHRxF) standard identified in the European Health Data Space (EHDS) regulation (<xref ref-type="bibr" rid="B18">18</xref>). The aim of this formative evaluation study was to assess the usability and explainability/causability of (<xref ref-type="bibr" rid="B19">19</xref>) and patient interest in G1 in these use cases.</p>
</sec>
<sec id="s2" sec-type="methods"><label>2</label><title>Methods</title>
<sec id="s2a"><label>2.1</label><title>Research design</title>
<p>This formative evaluation study was conducted at four health institutions across three European countries. These were the North Estonia Medical Centre (NEMC) in Estonia, Maastro and the Maastricht University Medical Centre (MUMC&#x002B;) in Netherlands, and the Medical University of Graz (MUG) in Austria. The patients tested G1 for at least 2&#x2013;4 weeks, with support from the research team. The study flow for the patients is visualized in the <xref ref-type="sec" rid="s13">Supplementary Materials</xref>. In addition, a list of the AI and non-AI tools deployed in G1 can be found in the <xref ref-type="sec" rid="s13">Supplementary Materials</xref>.</p>
</sec>
<sec id="s2b"><label>2.2</label><title>Patient selection</title>
<p>The inclusion goal was set at 90 adult patients for adequate evaluation, equally dividing 30 patients between NEMC, Maastro/MUMC&#x002B;, and MUG, each of which included 15 patients with BC and 15 patients with CVD, specifically with type 1 myocardial infarction. The recruitment period for G1 lasted from May 2024 to December 2024. The inclusion and exclusion criteria for G1 prototype testing are listed in <xref ref-type="table" rid="T1">Table&#x00A0;1</xref>. The development of G1 took place with the support of patient consultants selected by the European Patient Cancer Coalition (ECPC) (<xref ref-type="bibr" rid="B20">20</xref>) and European Heart Network (EHN) (<xref ref-type="bibr" rid="B21">21</xref>). They did not contribute their PHD but acted as patient representatives in the co-development of G1.</p>
<table-wrap id="T1" position="float"><label>Table 1</label>
<caption><p>Inclusion and exclusion criteria for G1 prototype testing.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Inclusion criteria</th>
<th valign="top" align="center">Exclusion criteria</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Data available in the electronic health records within the related medical center</td>
<td valign="top" align="left">The patient was vulnerable, as judged by the physician</td>
</tr>
<tr>
<td valign="top" align="left">Owner and user of a smartphone</td>
<td valign="top" align="left">The patient was underage</td>
</tr>
<tr>
<td valign="top" align="left">Provide consent for the data curator, study nurse, and research associate to access and identify, and extract their PHD</td>
<td valign="top" align="left"/>
</tr>
<tr>
<td valign="top" align="left">Sign a collaboration agreement with the relevant HDI, if applicable</td>
<td valign="top" align="left"/>
</tr>
<tr>
<td valign="top" align="left">Agree to test both the G1 and G2 prototypes</td>
<td valign="top" align="left"/>
</tr>
<tr>
<td valign="top" align="left">Understand and speak English or the local language (Dutch, Estonian, or German)</td>
<td valign="top" align="left"/>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2c"><label>2.3</label><title>Research setting</title>
<p>The start of the study was superseded by a &#x201C;dry-run workshop&#x201D;, conducted with the patient consultants in May 2024, as preparation for the evaluation of G1. A comprehensive training plan was developed, which included role specifications for study nurses, research associates, expert data curators, patients, and data users (clinicians) (<xref ref-type="table" rid="T2">Table&#x00A0;2</xref>). The patient consultants provided many valuable insights and feedback to implement before G1 testing that may have reduced the chance of errors during actual deployment.</p>
<table-wrap id="T2" position="float"><label>Table 2</label>
<caption><p>Task overview during G1 testing.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Role in G1</th>
<th valign="top" align="center">Main task(s)<xref ref-type="table-fn" rid="table-fn1"><sup>a</sup></xref></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" rowspan="5">Study nurse and/or research associate</td>
<td valign="top" align="left">Extract PHD from EHR</td>
</tr>
<tr>
<td valign="top" align="left">Fill in REDCap forms for the patients</td>
</tr>
<tr>
<td valign="top" align="left">Contact for patients&#x0027; concerns and questions</td>
</tr>
<tr>
<td valign="top" align="left">Explain the ingestion, curation, publishing, and use steps to patients</td>
</tr>
<tr>
<td valign="top" align="left">Administer questionnaires</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="2">Expert data curator</td>
<td valign="top" align="left">Supervise and support the ingestion, curation, publishing, and use steps for the patients</td>
</tr>
<tr>
<td valign="top" align="left">Answer questions in AIDAVA if the patient has selected: &#x201C;I don&#x0027;t know&#x201D;</td>
</tr>
<tr>
<td valign="top" align="left">Patient</td>
<td valign="top" align="left">Work through the ingestion, curation, publishing, and use steps for G1 prototype testing</td>
</tr>
<tr>
<td valign="top" align="left" colspan="2">Data user/clinician</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="2">BC specialist</td>
<td valign="top" align="left">Screening and recruitment</td>
</tr>
<tr>
<td valign="top" align="left">Check the accuracy of BC registry inquiries</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="2">CVD specialist</td>
<td valign="top" align="left">Screening and recruitment</td>
</tr>
<tr>
<td valign="top" align="left">Calculate the SMART risk score</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="table-fn1"><label><sup>a</sup></label>
<p>The table provides a general overview of tasks within the research team. However, tasks could be interchangeable between each role if the appropriate skill and knowledge were present.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>The dry run had three objectives, namely to (1) gather feedback on the AIDAVA prototype (pre-G1) at that stage and the health data intermediary (HDI) integration, (2) align the evaluation process across sites, and (3) consolidate feedback on the Research Electronic Data Capture (REDCap) tool. The patient consultants were asked to comment on the user journey, and the final draft is available in the <xref ref-type="sec" rid="s13">Supplementary Materials</xref>. After the dry run, the formative evaluation started, and eligible patients were invited to an information session to explain the purpose of the study and go through the informed consent. Patients who signed informed consent forms were guided by the study nurse or research associate in a 1-h training session. In this session, the study nurse or research associate (1) reminded the patient of the objective of AIDAVA, (2) created an account with the patient for G1 and the HDI, and (3) explained the data flow and steps, as presented in <xref ref-type="sec" rid="s13">Supplementary Figure S1</xref>.</p>
<p>The patients worked through four steps (data ingestion, data curation, data publishing, and data use) after the deployment of G1 at the four health institutions acting as the test sites in this study (<xref ref-type="sec" rid="s13">Supplementary Figure S1</xref>). The first three steps required active patient involvement, which include HITL mechanisms to improve the quality of the final IPS that the patient receives. In step 1, data ingestion, the patients consented to have their PHD identified and extracted from the hospital EHR, as well as from the HDI. In this context, HDI refers to an entity or platform that facilitates the collection, integration, and controlled sharing of PHD across different healthcare systems and data users. The patient actively connects their HDI account to the AIDAVA account. Then, these PHD were transferred to the AIDAVA data store, which was exclusively available within the hospital testing environment of the local site or a secure national cloud service. An overview of the complete data flow is illustrated in <xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref>.</p>
<fig id="F1" position="float"><label>Figure 1</label>
<caption><p>Overview of the data flow and steps. AI, artificial intelligence; BP, blood pressure; GP, general practitioner; HDI, health data intermediary; IPS, international patient summary; QLY, quality of life.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1629413-g001.tif"><alt-text content-type="machine-generated">Flowchart illustrating a healthcare data management system. \"Data Ingestion\" includes GP data, QLY questions, and device data managed by HDI or within hospitals. \"Data Curation\" involves human interaction and a personal health knowledge graph, including various health records. \"Data Publishing\" shows a patient viewing their health summary on a phone. \"Data Use\" involves a patient, cardiologist assessing risk scores, and breast cancer specialist exploring research questions.</alt-text>
</graphic>
</fig>
<p>In step 2, data curation, the patients activated their AIDAVA account to transform, integrate, and complete their ingested PHD into a standardized representation, the PHKG. As G1 is in the prototyping phase, the patients were warned that any output from the virtual assistant may be incorrect. G1 generated questions for the patients when complete automation of data curation failed, which was generally expected to occur due to missing data. This was the main role and implementation of the HITL mechanism, i.e., to catch any errors prior to data publication. If the patients were unsure of the answer or the question generated by G1 did not make sense to them, they were able to forward the question to the expert data curator appointed by the local health institution.</p>
<p>In step 3, data publishing, the data to be extracted from the PHKG were specified. For this formative evaluation study, three data publishing outputs were defined: (1) extraction and visualization of the patient&#x0027;s IPS, one of the six critical data categories proposed in the EHDS and compliant with the international Health Level Seven Fast Healthcare Interoperability Resources (HL7 FHIR) guideline (<xref ref-type="bibr" rid="B18">18</xref>); (2) extraction of data elements to create local BC registries at each site (BC use case); and (3) extraction of key variables supporting the automatic calculation of the patient&#x0027;s SMART risk score (<xref ref-type="bibr" rid="B17">17</xref>) (CVD use case). These data elements and key variables can be found in the <xref ref-type="sec" rid="s13">Supplementary Materials</xref>. The curated PHD, which are the source of these three different outputs, only need to be curated once as the respective data extraction processes are executed multiple times in the PHKG.</p>
<p>In step 4, data use, these three outputs were exploited by different data users (<xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref>). The patients were able to visualize their IPS through a specific visualization tool (MIDATA IPS Viewer). The BC specialist could access the metrics calculated from their local BC registry and from interoperable cross-border registries. The prototype executes federated queries for the virtual cross-border BC registry to avoid any data transfer across hospitals. The CVD specialist could visualize the automatically calculated SMART risk score and the details of its calculation for each patient. Due to the possibility of inaccurate data curation at this prototype stage, the SMART risk scores were solely used to test the prototype&#x0027;s accuracy, not for communicating actual risk to the patients.</p>
<p>The patients began testing G1 under the guidance of their local study nurse and/or research associate. Each patient tested and evaluated G1 for a duration of 2&#x2013;4 weeks, starting with an on-site visit in which the study nurse supported the patient with the activation of their AIDAVA account. In addition, the patients created a personal HDI account, filled out the Medical and Digital Literacy questionnaires, and created an account for their blood pressure device (only for patients with CVD).</p>
<p>As shown in <xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref>, there were two paths for uploading data into the patient&#x0027;s AIDAVA account. First, after the patient had signed the informed consent form, the study nurse/research associate extracted the PHD of the patient from the hospital EHR and imported these data into the patient&#x0027;s personal AIDAVA account. Second, the patients uploaded PHD to their HDI account (including data from their blood pressure device), which were also sent to the patient&#x0027;s personal account within the hospital&#x2019;s secure environment setup for AIDAVA, respecting the security requirements of each institution. In the remaining weeks, the patients tested the system with general support from the study nurse/research associate, and technical support from the expert data curator, who used a shared ticketing system to quickly solve any issues.</p>
<p>Once the data from different sources, typically in a heterogeneous format, were ingested, the patient could then request the AIDAVA system to curate them into a PHKG. This structured representation of PHD is compliant with the AIDAVA ontology based on the Swiss Personal Health Network (SPHN), including the SNOMED, LOINC, and FHIR profiles and is easily mappable to multiple standards, supporting semantic interoperability. AIDAVA uses an AI-powered semantic transformation infrastructure to orchestrate curation workflows; it leverages predefined data source descriptions (i.e., dictionaries) to process and transform data based on its meaning, rather than just its format. Each attribute/column in (semi-)structured data sources is first semi-automatically mapped either to the AIDAVA ontology with classical extract, transform, and load (ETL) transformation tools, or to dedicated curation tools such as entity linking (mapping to clinical terminologies) and entity alignment (linking terms from two different terminologies), supporting transformation of the data source into an SHKG compliant with the AIDAVA ontology. Unstructured data were directly processed by the natural language processing (NLP) tools and transformed into another SHKG. When data from each SHKG are integrated into a single patient&#x0027;s PHKG, the entity deduplication tool removes duplicate records referring to the same real-world entity and validation is performed using individual data quality checks. Quality scores are computed across multiple dimensions (e.g., completeness and consistency) and categories (e.g., valid code and temporal order consistency) and each detected quality issue is formulated as a question to the user to support data improvement. The entire curation and quality enhancement process is recorded within an audit trail to meet regulatory compliance.</p>
<p>The main task for the patients was to test data ingestion and data curation of their PHD, while checking the accuracy of the published PHD in their own IPS (<xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref>). More precisely, this meant that patients ingested files and documents that were uploaded to their personal AIDAVA account, so that these files and documents would then be available to them for automatic curation. To preserve data ownership, the patients could always choose whether to start automatic data curation for all or only some of the ingested files. G1 addressed questions to the patient whenever issues during the automatic curation arose (i.e., missing data, incompatible formats, unrecognizable documents, etc.). The patients answered these questions in a format related to the question (i.e., when asked for a date, they were presented with a &#x201C;mm/dd/yyyy&#x201D; format input field). The patients always had the possibility to either answer the question if they knew the answer or skip the question by clicking the &#x201C;I don&#x0027;t know the answer&#x201D; option. When patients selected the &#x201C;I don&#x0027;t know the answer&#x201D; option, the question was automatically forwarded to the expert data curator. To finalize the question, the patients either provided feedback on the question or pushed the &#x201C;skip feedback&#x201D; button. Furthermore, the patients were always able to check their IPS. At the closing evaluation session, the patients provided feedback on each step in the evaluation study, and the patients&#x2019; comments were documented in REDCap forms (<xref ref-type="bibr" rid="B22">22</xref>, <xref ref-type="bibr" rid="B23">23</xref>) by the study nurse and/or research associate.</p>
</sec>
<sec id="s2d"><label>2.4</label><title>Data collection</title>
<p>Evaluation data were collected via REDCap forms (questionnaires and narrative feedback) by the study nurse and/or research associate. The patients&#x0027; answers to the Medical and Digital Literacy questionnaire were collected during the first on-site visit. The patients&#x0027; answers to the G1 evaluation questionnaires were collected after testing the prototype for 2&#x2013;4 weeks after the first on-site visit. These questionnaires included the system usability scale (SUS) (<xref ref-type="bibr" rid="B24">24</xref>) and study-specific questionnaires on the explainability/causability of G1 and the patient&#x0027;s interest in AIDAVA-like technology.</p>
<p>The patients were asked to comment on the questionnaires, the HDI, the blood pressure device, and each of the four deployment steps of G1 (data ingestion, data curation, data publishing, and data use). These comments were collected in the REDCap forms. The patients were given a time sheet to track how much time they spent on testing G1, and how much time they spent on study-related activities other than G1 testing (which include blood pressure measurements and other study-related activities).</p>
</sec>
<sec id="s2e"><label>2.5</label><title>Data analysis</title>
<p>All the quantitative data from the questionnaires are presented as mean&#x2009;&#x00B1;&#x2009;standard deviation, median, and range. The study-specific Medical and Digital Literacy questionnaires contained six questions in each domain. The answers ranked from 0 (i.e., no knowledge) to 5 (i.e., expert knowledge). The purpose of the Medical and Digital Literacy questionnaires was user profiling, so that in an ideal situation, the virtual assistant could adjust communication to the patient accordingly. The evaluation questionnaires comprised the SUS questionnaire, which contains 10 questions, six study-specific questions on explainability/causability, and six questions related to the patient&#x0027;s interest in AIDAVA-like technology. The answers to these questions ranged from 1 (strong disagreement) to 5 (strong agreement) on a 5-point Likert scale. The item responses for each of the three sets of SUS questionnaires were analyzed using cumulative link mixed models, including a random intercept per patient. The Cronbach&#x0027;s alpha values were used to assess the reliability of the SUS questionnaires. The original 10 SUS questions were used to calculate the SUS score (0&#x2013;100), which was then correlated to a level of acceptability and net promoter score (<xref ref-type="bibr" rid="B25">25</xref>). Data on time spent on data ingestion and data curation are presented as mean&#x2009;&#x00B1;&#x2009;standard deviation, median, and range. Comparisons between the two use cases (BC vs. CVD) and questionnaire items were analyzed using independent-samples tests (<italic>t</italic>-test or Mann&#x2013;Whitney <italic>U</italic> test, as appropriate). A <italic>P</italic>-value of 0.05 or less was considered statistically significant in all the analyses.</p>
</sec>
<sec id="s2f" sec-type="ethics-statement"><label>2.6</label><title>Ethics statement</title>
<p>The study was approved by each local ethics committee at the participating test sites. An ethical advisory board, which includes external advisors, oversaw the G1 development process and the dry run workshop to ensure the study met all ethical and procedural standards.</p>
</sec>
</sec>
<sec id="s3" sec-type="results"><label>3</label><title>Results</title>
<sec id="s3a"><label>3.1</label><title>Participants</title>
<p>A total of 423 patients were screened for inclusion in the study, with 182 for the CVD use case and 246 for the BC use case (<xref ref-type="table" rid="T3">Table&#x00A0;3</xref>). A total of 83 patients signed informed consent forms for G1 testing. However, 13 patients withdrew before or during the testing of G1. The reasons for withdrawals were lack of motivation, difficulty using digital devices, personal reasons, the perceived effort required for testing G1, or a combination of these. Ultimately, 70 patients successfully tested G1.</p>
<table-wrap id="T3" position="float"><label>Table 3</label>
<caption><p>Overview of the patients screened and recruited, the withdrawals, and the number finalized per site.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left" rowspan="2">Study stage</th>
<th valign="top" align="center" colspan="4">CVD use case</th>
<th valign="top" align="center" colspan="4">BC use case</th>
</tr>
<tr>
<th valign="top" align="center">NEMC</th>
<th valign="top" align="center">MUG</th>
<th valign="top" align="center">MUMC</th>
<th valign="top" align="center">Total</th>
<th valign="top" align="center">NEMC</th>
<th valign="top" align="center">MUG</th>
<th valign="top" align="center">Maastro</th>
<th valign="top" align="center">Total</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Screened</td>
<td valign="top" align="center">32</td>
<td valign="top" align="center">110</td>
<td valign="top" align="center">40</td>
<td valign="top" align="center">182</td>
<td valign="top" align="center">35</td>
<td valign="top" align="center">142</td>
<td valign="top" align="center">246</td>
<td valign="top" align="center">423</td>
</tr>
<tr>
<td valign="top" align="left">Recruited&#x2014;signed informed consent</td>
<td valign="top" align="center">13</td>
<td valign="top" align="center">10</td>
<td valign="top" align="center">15</td>
<td valign="top" align="center">38</td>
<td valign="top" align="center">15</td>
<td valign="top" align="center">15</td>
<td valign="top" align="center">15</td>
<td valign="top" align="center">45</td>
</tr>
<tr>
<td valign="top" align="left">Withdrawals<xref ref-type="table-fn" rid="table-fn2"><sup>a</sup></xref></td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">8</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">5</td>
</tr>
<tr>
<td valign="top" align="left">Finalized</td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">14</td>
<td valign="top" align="center">30</td>
<td valign="top" align="center">14</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center">15</td>
<td valign="top" align="center">40</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="table-fn2"><label><sup>a</sup></label>
<p>Withdrawals after signing informed consent.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3b"><label>3.2</label><title>Medical and digital literacy</title>
<p>The patients reported significantly higher mean digital literacy scores (18.2&#x2009;&#x00B1;&#x2009;6.7) than medical literacy scores (15.6&#x2009;&#x00B1;&#x2009;5.9) (<italic>P</italic>&#x2009;&#x003D;&#x2009;0.01) (<xref ref-type="table" rid="T4">Table&#x00A0;4</xref>). Moreover, the patient-reported medical literacy scores were significantly higher for the patients in the BC use case (17.3&#x2009;&#x00B1;&#x2009;5.9) compared to those in the CVD use case (13.3&#x2009;&#x00B1;&#x2009;5.0) (<italic>P</italic>&#x2009;&#x003C;&#x2009;0.01). There was no significant difference in the patient-reported digital literacy scores between the BC use case (18.1&#x2009;&#x00B1;&#x2009;6.3) and the CVD use case (18.3&#x2009;&#x00B1;&#x2009;7.1) (<italic>P</italic>&#x2009;&#x003D;&#x2009;0.92).</p>
<table-wrap id="T4" position="float"><label>Table 4</label>
<caption><p>The patient-reported medical and digital literacy and system usability scores for G1.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Questionnaire</th>
<th valign="top" align="center">Mean&#x2009;&#x00B1;&#x2009;SD</th>
<th valign="top" align="center">Median</th>
<th valign="top" align="center">Range</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Medical literacy (score 0&#x2013;30) (<italic>n</italic>&#x2009;&#x003D;&#x2009;70)</td>
<td valign="top" align="center">15.6&#x2009;&#x00B1;&#x2009;5.9</td>
<td valign="top" align="center">15.0</td>
<td valign="top" align="center">2.0&#x2013;30.0</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;BC (<italic>n</italic>&#x2009;&#x003D;&#x2009;40)</td>
<td valign="top" align="center">17.3&#x2009;&#x00B1;&#x2009;5.9</td>
<td valign="top" align="center">17.0</td>
<td valign="top" align="center">5.0&#x2013;30.0</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;CVD (<italic>n</italic>&#x2009;&#x003D;&#x2009;30)</td>
<td valign="top" align="center">13.3&#x2009;&#x00B1;&#x2009;5.0</td>
<td valign="top" align="center">14.0</td>
<td valign="top" align="center">2.0&#x2013;22.0</td>
</tr>
<tr>
<td valign="top" align="left">Digital literacy (score 0&#x2013;30) (<italic>n</italic>&#x2009;&#x003D;&#x2009;70)</td>
<td valign="top" align="center">18.2&#x2009;&#x00B1;&#x2009;6.7</td>
<td valign="top" align="center">19.0</td>
<td valign="top" align="center">2.0&#x2013;30.0</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;BC (<italic>n</italic>&#x2009;&#x003D;&#x2009;40)</td>
<td valign="top" align="center">18.1&#x2009;&#x00B1;&#x2009;6.3</td>
<td valign="top" align="center">18.5</td>
<td valign="top" align="center">7.0&#x2013;27.0</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;CVD (<italic>n</italic>&#x2009;&#x003D;&#x2009;30)</td>
<td valign="top" align="center">18.3&#x2009;&#x00B1;&#x2009;7.1</td>
<td valign="top" align="center">20.0</td>
<td valign="top" align="center">2.0&#x2013;30.0</td>
</tr>
<tr>
<td valign="top" align="left">System usability scale (score 0&#x2013;100) (<italic>n</italic>&#x2009;&#x003D;&#x2009;62)</td>
<td valign="top" align="center">59.1&#x2009;&#x00B1;&#x2009;19.7</td>
<td valign="top" align="center">57.5</td>
<td valign="top" align="center">15.0&#x2013;97.5</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;BC (<italic>n</italic>&#x2009;&#x003D;&#x2009;34)</td>
<td valign="top" align="center">61.5&#x2009;&#x00B1;&#x2009;18.0</td>
<td valign="top" align="center">61.3</td>
<td valign="top" align="center">15.0&#x2013;95.0</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;CVD (<italic>n</italic>&#x2009;&#x003D;&#x2009;28)</td>
<td valign="top" align="center">55.9&#x2009;&#x00B1;&#x2009;20.9</td>
<td valign="top" align="center">55.0</td>
<td valign="top" align="center">22.5&#x2013;97.5</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3c"><label>3.3</label><title>System usability</title>
<p>The results suggest that the usability of G1 was marginally acceptable to the patients, as the mean score of the total was 59.1&#x2009;&#x00B1;&#x2009;19.7 (<xref ref-type="table" rid="T4">Table&#x00A0;4</xref>) (Cronbach&#x0027;s <italic>&#x03B1;</italic>&#x2009;&#x003D;&#x2009;0.85). In total, 18 patients scored G1 higher than 68, which was considered the threshold of complete acceptability. In contrast, 18 patients scored G1 lower than 52, which means one should consider them detractors. The statement that scored the highest was &#x201C;I am ready to use this system frequently&#x201D;, with an average score between neutral and agreement (3.5 out of 5) (<xref ref-type="fig" rid="F2">Figure&#x00A0;2</xref>). The lowest scores were for the statements &#x201C;I would need the support of a technical person to be able to use this system&#x201D;, &#x201C;I found the system very awkward to use&#x201D;, and &#x201C;I needed to learn a lot of things before I could get going with the system&#x201D;, with average scores between disagreement and neutral (2.3&#x2013;2.6 out of 5). The comments by patients ranged from acceptance to rejection, as evidenced by the following two quotes:</p>
<fig id="F2" position="float"><label>Figure 2</label>
<caption><p>System usability scale scores for the G1 prototype<italic>.</italic> sus_1: I am ready to use this system frequently. sus_2: I found the system unnecessarily complex. sus_3: I thought the system was easy to use. sus_4: I would need the support of a technical person to be able to use this system. sus_5: I found the various functions in this system were well integrated. sus_6: There was too much inconsistency in this system. sus_7: Most people would learn to use this system very quickly. sus_8: I found the system very awkward to use. sus_9: I felt very confident using the system. sus_10: I needed to learn a lot of things before I could get going with the system.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1629413-g002.tif"><alt-text content-type="machine-generated">Bar chart displaying the System Usability Scale (SUS) scores for ten items labeled sus_1 to sus_10. Each item has three bars representing totals, BC, and CVD, with numerical values ranging from 1.0 to 3.5 on the horizontal axis. A legend indicates the color coding: green for totals, yellow for BC, and blue for CVD.</alt-text>
</graphic>
</fig><disp-quote>
<p>&#x201C;AIDAVA is a system you can get used to, but it needs time and training. As soon as you understand it, there is nothing complicated.&#x201D;</p></disp-quote><disp-quote>
<p>&#x201C;In general, AIDAVA G1 is not reasonably usable for the average user, cannot be recommended. The main basis for this conclusion is the inability to obtain or analyze sufficiently high-quality data. If one of the doctors starts making decisions based on a health report synthesized from low-quality data, it can be a threat to human life.&#x201D;</p></disp-quote>
</sec>
<sec id="s3d"><label>3.4</label><title>Explainability/causability</title>
<p>The patients had moderately positive scores for the explainability/causability questions, scoring between 3.3 and 3.8 out of 5 (<xref ref-type="fig" rid="F3">Figure&#x00A0;3</xref>) (Cronbach&#x0027;s <italic>&#x03B1;</italic>&#x2009;&#x003D;&#x2009;0.69). The results indicate a significant difference between the scoring for the question &#x201C;For me it is important to know where the different curated health data are coming from&#x201D; and the scoring of the question related to its explainability in AIDAVA, &#x201C;In my opinion, information regarding this aspect is sufficient in AIDAVA&#x201D; (3.8 vs. 3.2; <italic>P</italic>&#x2009;&#x003D;&#x2009;0.01). The patients in both the BC and CVD use cases agreed in their scoring of explainability/causability. A specific comment by a patient in the CVD use case emphasized the lack of explainability as to where the PHD came from:</p>
<fig id="F3" position="float"><label>Figure 3</label>
<caption><p>Explainability/causability scores for the G1 prototype<italic>.</italic> scs_1: For me, it is important to know where the different curated health data are coming from. scs_suff_1: in my opinion, information regarding this aspect is sufficient in AIDAVA. scs_2: For me, it is important to know who has curated my health data and which tools have been used. scs_suff_2: In my opinion, information regarding this aspect is sufficient in AIDAVA. scs_3: For me, it is important to know whether a health data curation method has been applied by a human or an algorithm. scs_suff_3: In my opinion, information regarding this aspect is sufficient in AIDAVA.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1629413-g003.tif"><alt-text content-type="machine-generated">Bar chart titled \"Explainability/Causability G1\" displays data for four categories: scs_1, scs_suff_1, scs_2, and scs_suff_2. Each category is divided into Totals (green), BC (yellow), and CVD (blue) bars, with values ranging from approximately 1.0 to 4.0. Totals are the highest, followed by BC, and then CVD for each category.</alt-text>
</graphic>
</fig><disp-quote>
<p>&#x201C;What does &#x2018;medical partner&#x2019; mean under file names? This should mention WHERE the document came from. That is unclear with this term.&#x201D;</p></disp-quote>
</sec>
<sec id="s3e"><label>3.5</label><title>Patient interest</title>
<p>The patients were moderately positive to positive in their interest in AIDAVA-like technology, scoring between 3.4 and 4.4 out of 5 (<xref ref-type="fig" rid="F4">Figure&#x00A0;4</xref>) (Cronbach&#x0027;s <italic>&#x03B1;</italic>&#x2009;&#x003D;&#x2009;0.78). The patients in both the BC and CVD use cases were in agreement in their scoring, except for the question &#x201C;I am ready to spend the needed time to ensure proper data curation of my data&#x201D;, as the patients in the CVD use case were more willing to spend time to complete data curation than the those in the BC use case (4.3 vs. 3.7; <italic>P</italic>&#x2009;&#x003D;&#x2009;0.012).</p>
<fig id="F4" position="float"><label>Figure 4</label>
<caption><p>Overall interest scores in AIDAVA-like technology<italic>.</italic> sus_11: I would recommend AIDAVA to my friend, colleague, or family member. sus_12: I am ready to work with AIDAVA when it is available on the market. sus_13: I understand the purpose of data curation. sus_14: I am ready to spend the needed time to ensure proper data curation of my data. sus_15: This system is unique and different from anything else available. sus_16: This system will allow me to manage my health records better.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1629413-g004.tif"><alt-text content-type="machine-generated">Bar chart titled \"Overall interest in AIDAVA-like technology\" showing interest levels for categories sus_11 to sus_16. Bars are divided into Totals (green), BC (yellow), and CVD (blue). The values range from 3.3 to 4.4.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3f"><label>3.6</label><title>Prototype testing</title>
<p>The time spent on G1 consisted of &#x201C;data ingestion and data curation&#x201D; and &#x201C;other activities,&#x201D; such as the training, visit(s), or visualization of PHD. On average, the patients spent 124&#x2009;&#x00B1;&#x2009;132&#x2005;min on data ingestion and data curation, with a median of 75&#x2005;min and a range of 0&#x2013;635&#x2005;min (<xref ref-type="table" rid="T5">Table&#x00A0;5</xref>). The patients in the BC use case spent significantly less time on average on data ingestion and data curation than those in the CVD use case (78 vs. 176&#x2005;min, <italic>P</italic>&#x2009;&#x003C;&#x2009;0.01). The patients spent 78&#x2009;&#x00B1;&#x2009;37&#x2005;min on average on other activities related to G1 testing, with no significant differences between the BC and CVD use cases (78&#x2009;&#x00B1;&#x2009;35 vs. 78&#x2009;&#x00B1;&#x2009;38, <italic>P</italic>&#x2009;&#x003D;&#x2009;0.99).</p>
<table-wrap id="T5" position="float"><label>Table 5</label>
<caption><p>Patient-reported time (min) spent on G1 testing.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Activity</th>
<th valign="top" align="center">Mean&#x2009;&#x00B1;&#x2009;SD</th>
<th valign="top" align="center">Median</th>
<th valign="top" align="center">Range</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Data ingestion and data curation (<italic>n</italic>&#x2009;&#x003D;&#x2009;60)</td>
<td valign="top" align="center">124&#x2009;&#x00B1;&#x2009;132</td>
<td valign="top" align="center">75</td>
<td valign="top" align="center">0&#x2013;635</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;BC (<italic>n</italic>&#x2009;&#x003D;&#x2009;32)</td>
<td valign="top" align="center">78&#x2009;&#x00B1;&#x2009;62</td>
<td valign="top" align="center">60</td>
<td valign="top" align="center">5&#x2013;290</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;CVD (<italic>n</italic>&#x2009;&#x003D;&#x2009;28)</td>
<td valign="top" align="center">176&#x2009;&#x00B1;&#x2009;166</td>
<td valign="top" align="center">130</td>
<td valign="top" align="center">0&#x2013;635</td>
</tr>
<tr>
<td valign="top" align="left">Other activities</td>
<td valign="top" align="center">78&#x2009;&#x00B1;&#x2009;37</td>
<td valign="top" align="center">80</td>
<td valign="top" align="center">0&#x2013;175</td>
</tr>
<tr>
<td valign="top" align="left" colspan="4">Training, visit(s), and visualization (<italic>n</italic>&#x2009;&#x003D;&#x2009;55)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;BC (<italic>n</italic>&#x2009;&#x003D;&#x2009;32)</td>
<td valign="top" align="center">78&#x2009;&#x00B1;&#x2009;35</td>
<td valign="top" align="center">80</td>
<td valign="top" align="center">19&#x2013;175</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;CVD (<italic>n</italic>&#x2009;&#x003D;&#x2009;23)</td>
<td valign="top" align="center">78&#x2009;&#x00B1;&#x2009;38</td>
<td valign="top" align="center">75</td>
<td valign="top" align="center">0&#x2013;150</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s4" sec-type="discussion"><label>4</label><title>Discussion</title>
<p>The aim of this formative study was to evaluate G1&#x2019;s usability and explainability/causability and patient interest in AIDAVA-like technology. It allowed us to assess the AI-powered automatic health data curation and data publishing workflow, gather feedback regarding its strengths and weaknesses, and identify areas for improvement and necessary changes that need to be considered for the development of second-generation (G2) prototype. Before testing using real patient data, the system was built and tested using synthetic test data, which, although it performed sub-optimally, was considered adequate by the evaluation site to proceed with this formative evaluation. Despite its perceived marginal acceptability among the patients who tested it using real-world data, G1 shows potential in automating data curation into a PHKG.</p>
<p>Apart from well-established tools, such as the optical character recognition (OCR) tool TESSERACT (<xref ref-type="bibr" rid="B26">26</xref>) and the German NLP tool AVERBIS Health Discovery (<xref ref-type="bibr" rid="B27">27</xref>), the majority of the curation tools integrated into G1 were still in the early stages of development and had not yet been fully tested and refined. However, they offered a promising solution and were seen as a better alternative to not having these tools in G1. For example, to extract structured data from Estonian and Dutch text, the AIDAVA consortium is developing a multi-lingual model capable of extracting concepts directly from these languages; this tool will be available in G2. In G1, we had to use a translation tool to translate these languages into German and then use the AVERBIS tool. This could have resulted in suboptimal translation and extraction. Unfortunately, this has likely contributed to the marginal acceptability of G1 but leaves an opportunity for improvement after further development.</p>
<p>Regarding the perceived usability of G1 by the patients, there was an equal mix of complete acceptability and detraction (approximately 25&#x0025; each), according to Brooke&#x2019;s scoring system (<xref ref-type="bibr" rid="B28">28</xref>). The fact that this was a formative evaluation study likely explains some of the detraction at this stage of development. The unfinished integration of all the curation and publishing tools could give patients a sense of it being an early prototype (which G1 is). Conversely, acceptability at this stage may be explained by the perceived future value of AIDAVA-like technology. Our data on the overall interest in AIDAVA-like technology suggests as much, as the patients&#x2019; scores for these questions were moderately positive to positive. As for the explainability/causability of data curation in G1, there were specific aspects that required improvement. For example, there was a discrepancy in scores between the questions &#x201C;For me it is important to know where the different curated health data are coming from&#x201D; and its follow-up &#x201C;In my opinion, information regarding this aspect is sufficient in AIDAVA&#x201D;. This illustrates a clear need for explainability from the patients&#x0027; point of view. An explainability module incorporated in G2 may address this issue.</p>
<p>Overall, the user interface of G1 was considered quite straightforward and easy to use. Some patients suggested implementing a push notification when documents were ready to be ingested, as patients would have to manually check if documents were ready or they would have someone from the research team notify them, which they considered to not be user-friendly. Patients who curated documents found the procedure easy to follow and the patients who uploaded documents via their HDI found this to be very convenient and straightforward. Overall, G1 was, as expected, no more than marginally acceptable to the patients. The patients scored the lowest for the question, &#x201C;Would you recommend AIDAVA to a friend, colleague, or family member?&#x201D; at this stage of the development. Conversely, the patients scored highly in &#x201C;understanding the purpose of data curation.&#x201D; This suggests that the patients saw the potential for a well-developed, AI-supported virtual assistant that ingests and curates their PHD, which G1 has not achieved thus far due to its early development stage.</p>
<p>In addition, comments were made about the HITL dialogue, which will be improved in G2. Full automation without errors or missing data did not occur in any of the patients&#x0027; PHD. The goal of the HITL dialog was to provide the patients with the correct context when asking for the missing data. However, the communication between the intelligent virtual assistant and the patient was not understandable in many cases and lacked the very context that would have been necessary for understandability. Comments such as &#x201C;[t]he questions were not asked in a simple way/in simple language&#x201D; were reiterated by patients across sites in both use cases. An example of the language used is as follows: &#x201C;AIDAVA needs your help with patient identification (admission, discharge, transfer information). Information about hat Geburtsdatum is missing&#x201D; [original question for the Dutch patient: &#x201C;AIDAVA heeft uw hulp nodig bij patient identification (admission, discharge, transfer information). Informatie over hat Geburtsdatum mist&#x201D;]. Even though patients could understand that a date of birth is being required from the context, the terms &#x201C;admission, discharge, transfer information,&#x201D; referring to the source documents, were not understandable for the average patient. In addition, the German words scattered through the question for non-German patients gave a strong sense of G1 being an early prototype, which may also have negatively impacted usability scores. Therefore, implementing the complete set of curation tools for G2 will likely benefit the HITL approach and will likely result in more favorable usability and patient interest outcomes.</p>
<p>In the development of G2, the AIDAVA consortium will focus on improving and integrating the curation tools according to the findings from the G1 evaluation. A critical component of the configuration and deployment of G1 was the technical specification of the data to be exchanged between the hospital system and G1. The data transfer specification (DTS) was designed to ensure the consistency and accuracy of the data transfer. Unfortunately, during the deployment of G1 in the Netherlands, the DTS was not properly configured, which led all the patients with BC and a few patients with CVD to have issues with data ingestion and curation. Even though the issue of data ingestion was solved in time, curation was solved too late for these patients, likely impacting the acceptability of G1. Thus, the AIDAVA technical team will explore and develop solutions to facilitate necessary configurations for this approach and ensure that the automated data curation and data ingestion workflow will be streamlined and effective in G2.</p>
<p>In addition, further development of the publishing tools could provide patients with the most tangible use for their PHD (and PHKG), in the form of their IPS. Unfortunately, due to the incomplete integration of the curation tools as described before and the resulting incomplete quality of the PHKG, data publishing was not well covered by G1. Thus, the evaluation of the publishing step will be prioritized in the G2 assessment. Moreover, we aim to upgrade the entire automated workflow for effective data curation and data publishing by smoothing the integration of both non-AI and AI-based tools. We will improve the HITL process based on large language models and optimize the human&#x2013;machine interaction according to the users&#x0027; medical and digital literacy.</p>
</sec>
<sec id="s5" sec-type="conclusions"><label>5</label><title>Conclusion</title>
<p>Despite its marginal acceptability, G1 shows potential in automating data curation into a personal health knowledge graph, but it has not reached full maturity yet. G1 was intended to reuse existing curation tools. However, apart from a few off-the-shelf software solutions, there were no suitable tools available for reuse. As a result, the team had to rely on very early prototypes of tools that were originally planned for use in G2. This may have contributed to lower usability and explainability/causability. Conversely, patient interest in AIDAVA-like technology seems quite high at this stage of development, likely due to the promising potential of curation and publication technology. Improvements in the library of curation and publication tools are planned for G2 and are necessary to fully realize the value of the AIDAVA solution.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability"><title>Data availability statement</title>
<p>The datasets presented in this article are not readily available because the consortium project is not finished at the time of publication. Requests to access the datasets should be directed to <email>rutger.vanmierlo@maastrichtuniversity.nl</email>.</p>
</sec>
<sec id="s7" sec-type="ethics-statement"><title>Ethics statement</title>
<p>This study involving humans was approved by each local ethics committee at the participating test sites. This study was conducted in accordance with the local legislation and institutional requirements. The participants provided their written informed consent to participate in this study.</p>
</sec>
<sec id="s8" sec-type="author-contributions"><title>Author contributions</title>
<p>RvM: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. WL: Conceptualization, Data curation, Investigation, Methodology, Writing &#x2013; review &#x0026; editing. KN: Conceptualization, Data curation, Investigation, Methodology, Visualization, Writing &#x2013; review &#x0026; editing. MKa: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Visualization, Writing &#x2013; review &#x0026; editing. MM: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Visualization, Writing &#x2013; review &#x0026; editing. A-LB: Data curation, Investigation, Writing &#x2013; review &#x0026; editing. MP: Conceptualization, Writing &#x2013; review &#x0026; editing. MKr: Conceptualization, Data curation, Investigation, Methodology, Writing &#x2013; review &#x0026; editing. MB: Conceptualization, Writing &#x2013; review &#x0026; editing. LH: Supervision, Writing &#x2013; review &#x0026; editing. AvH: Supervision, Writing &#x2013; review &#x0026; editing. RC: Conceptualization, Data curation, Funding acquisition, Methodology, Software, Writing &#x2013; review &#x0026; editing. AD: Conceptualization, Funding acquisition, Supervision, Writing &#x2013; review &#x0026; editing. IdZ: Conceptualization, Funding acquisition, Project administration, Visualization, Writing &#x2013; review &#x0026; editing. PK: Conceptualization, Data curation, Investigation, Methodology, Software, Supervision, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec id="s9"><title>Group members of the AIDAVA consortium</title>
<p>Alexander Kreutz, Bern University of Applied Sciences, Biel, Switzerland; Alp&#x00E1;r Tana, Egnosis, Sfantu Gheorghe, Romania; Aranka Ravai-Nagy, Egnosis, Sfantu Gheorghe, Romania; Bart Scheenstra, Department of Cardiothoracic Surgery, Heart and Vascular Centre, Maastricht University, Maastricht, the Netherlands; B&#x00E9;la Bihari, Egnosis, Sfantu Gheorghe, Romania; Botond Kiss, Egnosis, Sfantu Gheorghe, Romania; Dominik Steiger, MIDATA Cooperative, Zurich, Switzerland; Elion S&#x00F5;ber, North Estonia Medical Centre, Tallinn, Estonia; Emmanuel Benoist, Bern University of Applied Sciences, Biel, Switzerland; Eno-Martin Lotman, North Estonia Medical Centre, Tallinn, Estonia; Erol Ensar, Maastricht University Department of Advanced Computing Sciences, Maastricht, Netherlands; Gleb Klimenkov, North Estonia Medical Centre, Tallinn, Estonia; Gregorio Sambataro, European Heart Network, Brussels, Belgium; Heimo M&#x00FC;ller, Medical University of Graz, Graz, Austria; Istv&#x00E1;n Horv&#x00E1;th, Egnosis, Sfantu Gheorghe, Romania; J&#x00E1;nos D&#x00E1;niel Dallos, Egnosis, Sfantu Gheorghe, Romania; Katerina Zdravkova Serafimova, ONTOTEXT AD, Sofia, Bulgaria; Katrin Lepik, North Estonia Medical Centre, Tallinn, Estonia; Kertu M&#x00F5;istlik, North Estonia Medical Centre, Tallinn, Estonia; Kristian Kankainen, North Estonia Medical Centre, Tallinn, Estonia; Liesbeth Boersma, Maastricht University Medical Centre&#x002B;, Maastricht, Netherlands; L&#x00F3;r&#x00E1;nt Ferencz, Egnosis, Sfantu Gheorghe, Romania; Michel Dumontier, Department of Advanced Computing Sciences, Maastricht University, Maastricht, Netherlands; Monika Moga, Egnosis, Sfantu Gheorghe; Natalja &#x0160;ermolajeva, North Estonia Medical Centre, Tallinn, Estonia; Sina Amirrajab, The D-Lab, Department of Precision Medicine, GROW&#x2014;Research Institute for Oncology and Reproduction, Maastricht University, Maastricht, Netherlands; Svetla Boytcheva, Ontotext, Sofia, Bulgaria; Todor Primov, Ontotext, Sofia, Bulgaria; Zolt&#x00E1;n L&#x00E1;z&#x00E1;r, Egnosis, Sfantu Gheorghe, Romania; Zsolt M&#x00E1;t&#x00E9;, Egnosis, Sfantu Gheorghe, Romania.</p>
</sec>
<sec id="s10" sec-type="funding-information"><title>Funding</title>
<p>The author(s) declare that financial support was received for the research and/or publication of this article. This work was supported by the European Union&#x2019;s Horizon Europe Research and Innovation Programme under Grant Agreement No. 101057062 (AIDAVA). The work of the Swiss Partner (MIDATA) received funding from the Swiss State Secretariat for Education, Research and Innovation (SBFI) under Subvention Contract 22.00093, REF-1131-52104. The project was supported by a license waiver from SNOMED-CT International for a period of 2&#x2005;years (2024&#x2013;2025), on condition that SNOMED-CT is not used and/or deployed commercially.</p>
</sec>
<ack><title>Acknowledgments</title>
<p>The authors are grateful to all the members of the AIDAVA consortium who diligently support the project with their expertise. A special thanks goes to the patient consultants with breast cancer and cardiovascular disease from the ECPC (now replaced by Data for Patients) and EHN patient organizations; they keep providing us with insights and a sense of what is important to the patients, which is critically important for the project.</p>
</ack>
<sec id="s11" sec-type="COI-statement"><title>Conflict of interest</title>
<p>IdZ was employed by b!loba.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s12" sec-type="ai-statement"><title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec id="s14" sec-type="disclaimer"><title>Publisher&#x0027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s13" sec-type="supplementary-material"><title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fdgth.2025.1629413/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fdgth.2025.1629413/full&#x0023;supplementary-material</ext-link></p>
<supplementary-material id="SD1" content-type="local-data">
<media mimetype="application" mime-subtype="pdf" xlink:href="Datasheet1.pdf"/></supplementary-material>
<supplementary-material id="SD2" content-type="local-data">
<media mimetype="application" mime-subtype="pdf" xlink:href="Datasheet2.pdf"/></supplementary-material>
</sec>
<fn-group>
<title>Abbreviations</title>
<fn fn-type="abbr" id="ab001"><p>AIDAVA, AI-powered data curation and publishing virtual assistant; BC, breast cancer; CVD, cardiovascular disease; DTS, data transfer specification; EHR, electronic health record; G1, AIDAVA first-generation prototype; G2, AIDAVA second-generation prototype; HDI, health data intermediary; MUG, Medical University of Graz; MUMC&#x002B;, Maastricht University Medical Centre; NEMC, North Estonia Medical Centre; PHD, personal health data; PHKG, personal health knowledge graph; REDCap, Research Electronic Data Capture; SMART, Second Manifestations of Arterial Disease; SUS, system usability scale.</p></fn>
</fn-group>
<ref-list><title>References</title>
<ref id="B1"><label>1.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Harahap</surname><given-names>NC</given-names></name><name><surname>Handayani</surname><given-names>PW</given-names></name><name><surname>Hidayanto</surname><given-names>AN</given-names></name></person-group>. <article-title>Functionalities and issues in the implementation of personal health records: systematic review</article-title>. <source>J Med Internet Res</source>. (<year>2021</year>) <volume>23</volume>(<issue>7</issue>):<fpage>e26236</fpage>. <pub-id pub-id-type="doi">10.2196/26236</pub-id><pub-id pub-id-type="pmid">34287210</pub-id></citation></ref>
<ref id="B2"><label>2.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tang</surname><given-names>PC</given-names></name><name><surname>Ash</surname><given-names>JS</given-names></name><name><surname>Bates</surname><given-names>DW</given-names></name><name><surname>Overhage</surname><given-names>JM</given-names></name><name><surname>Sands</surname><given-names>DZ</given-names></name></person-group>. <article-title>Personal health records: definitions, benefits, and strategies for overcoming barriers to adoption</article-title>. <source>J Am Med Inform Assoc</source>. (<year>2006</year>) <volume>13</volume>(<issue>2</issue>):<fpage>121</fpage>&#x2013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1197/jamia.M2025</pub-id><pub-id pub-id-type="pmid">16357345</pub-id></citation></ref>
<ref id="B3"><label>3.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Roberts</surname><given-names>A</given-names></name></person-group>. <article-title>Language, structure, and reuse in the electronic health record</article-title>. <source>AMA J Ethics</source>. (<year>2017</year>) <volume>19</volume>(<issue>3</issue>):<fpage>281</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1001/journalofethics.2017.19.3.stas1-1703</pub-id><pub-id pub-id-type="pmid">28323609</pub-id></citation></ref>
<ref id="B4"><label>4.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sedlakova</surname><given-names>J</given-names></name><name><surname>Daniore</surname><given-names>P</given-names></name><name><surname>Horn Wintsch</surname><given-names>A</given-names></name><name><surname>Wolf</surname><given-names>M</given-names></name><name><surname>Stanikic</surname><given-names>M</given-names></name><name><surname>Haag</surname><given-names>C</given-names></name><etal/></person-group> <article-title>Challenges and best practices for digital unstructured data enrichment in health research: a systematic narrative review</article-title>. <source>PLOS Digit Health</source>. (<year>2023</year>) <volume>2</volume>(<issue>10</issue>):<fpage>e0000347</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pdig.0000347</pub-id><pub-id pub-id-type="pmid">37819910</pub-id></citation></ref>
<ref id="B5"><label>5.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Holzinger</surname><given-names>A</given-names></name></person-group>. <article-title>Interactive machine learning for health informatics: when do we need the human-in-the-loop?</article-title> <source>Brain Inform</source>. (<year>2016</year>) <volume>3</volume>(<issue>2</issue>):<fpage>119</fpage>&#x2013;<lpage>31</lpage>. <pub-id pub-id-type="doi">10.1007/s40708-016-0042-6</pub-id><pub-id pub-id-type="pmid">27747607</pub-id></citation></ref>
<ref id="B6"><label>6.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Damen</surname><given-names>DJ</given-names></name><name><surname>Schoonman</surname><given-names>GG</given-names></name><name><surname>Maat</surname><given-names>B</given-names></name><name><surname>Habibovi&#x0107;</surname><given-names>M</given-names></name><name><surname>Krahmer</surname><given-names>E</given-names></name><name><surname>Pauws</surname><given-names>S</given-names></name></person-group>. <article-title>Patients managing their medical data in personal electronic health records: scoping review</article-title>. <source>J Med Internet Res</source>. (<year>2022</year>) <volume>24</volume>(<issue>12</issue>):<fpage>e37783</fpage>. <pub-id pub-id-type="doi">10.2196/37783</pub-id><pub-id pub-id-type="pmid">36574275</pub-id></citation></ref>
<ref id="B7"><label>7.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>De Zegher</surname><given-names>I</given-names></name><name><surname>Norak</surname><given-names>K</given-names></name><name><surname>Steiger</surname><given-names>D</given-names></name><name><surname>M&#x00FC;ller</surname><given-names>H</given-names></name><name><surname>Kalra</surname><given-names>D</given-names></name><name><surname>Scheenstra</surname><given-names>B</given-names></name><etal/></person-group> <article-title>Artificial intelligence based data curation: enabling a patient-centric European health data space</article-title>. <source>Front Med (Lausanne)</source>. (<year>2024</year>) <volume>11</volume>:<fpage>1365501</fpage>. <pub-id pub-id-type="doi">10.3389/fmed.2024.1365501</pub-id><pub-id pub-id-type="pmid">38813389</pub-id></citation></ref>
<ref id="B8"><label>8.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gehrmann</surname><given-names>J</given-names></name><name><surname>Herczog</surname><given-names>E</given-names></name><name><surname>Decker</surname><given-names>S</given-names></name><name><surname>Beyan</surname><given-names>O</given-names></name></person-group>. <article-title>What prevents us from reusing medical real-world data in research</article-title>. <source>Sci Data</source>. (<year>2023</year>) <volume>10</volume>(<issue>1</issue>):<fpage>459</fpage>. <pub-id pub-id-type="doi">10.1038/s41597-023-02361-2</pub-id><pub-id pub-id-type="pmid">37443164</pub-id></citation></ref>
<ref id="B9"><label>9.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Abad-Navarro</surname><given-names>F</given-names></name><name><surname>Mart&#x00ED;nez-Costa</surname><given-names>C</given-names></name></person-group>. <article-title>A knowledge graph-based data harmonization framework for secondary data reuse</article-title>. <source>Comput Methods Programs Biomed</source>. (<year>2024</year>) <volume>243</volume>:<fpage>107918</fpage>. <pub-id pub-id-type="doi">10.1016/j.cmpb.2023.107918</pub-id><pub-id pub-id-type="pmid">37981455</pub-id></citation></ref>
<ref id="B10"><label>10.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Shirai</surname><given-names>SS</given-names></name><name><surname>Seneviratne</surname><given-names>O</given-names></name><name><surname>McGuinness</surname><given-names>DL</given-names></name></person-group>. <article-title>Applying personal knowledge graphs to health 2021</article-title>. (2021). <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://dx.doi.org/10.48550/arxiv.2104.07587">https://dx.doi.org/10.48550/arxiv.2104.07587</ext-link> <comment>(Accessed February 26, 2025)</comment>.</citation></ref>
<ref id="B11"><label>11.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rotmensch</surname><given-names>M</given-names></name><name><surname>Halpern</surname><given-names>Y</given-names></name><name><surname>Tlimat</surname><given-names>A</given-names></name><name><surname>Horng</surname><given-names>S</given-names></name><name><surname>Sontag</surname><given-names>D</given-names></name></person-group>. <article-title>Learning a health knowledge graph from electronic medical records</article-title>. <source>Sci Rep</source>. (<year>2017</year>) <volume>7</volume>(<issue>1</issue>):<fpage>5994</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-017-05778-z</pub-id><pub-id pub-id-type="pmid">28729710</pub-id></citation></ref>
<ref id="B12"><label>12.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Van Woensel</surname><given-names>W</given-names></name><name><surname>Armstrong</surname><given-names>C</given-names></name><name><surname>Rajaratnam</surname><given-names>M</given-names></name><name><surname>Gupta</surname><given-names>V</given-names></name><name><surname>Abidi</surname><given-names>SSR</given-names></name></person-group>. <article-title>Using knowledge graphs to plausibly infer missing associations in EMR data</article-title>. <source>Stud Health Technol Inform</source>. (<year>2021</year>) <volume>281</volume>:<fpage>417</fpage>&#x2013;<lpage>21</lpage>. <pub-id pub-id-type="doi">10.3233/SHTI210192</pub-id><pub-id pub-id-type="pmid">34042777</pub-id></citation></ref>
<ref id="B13"><label>13.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nikiema</surname><given-names>JN</given-names></name><name><surname>Griffier</surname><given-names>R</given-names></name><name><surname>Jouhet</surname><given-names>V</given-names></name><name><surname>Mougin</surname><given-names>F</given-names></name></person-group>. <article-title>Aligning an interface terminology to the Logical Observation Identifiers Names and Codes [LOINC(&#x00AE;)]</article-title>. <source>JAMIA Open</source>. (<year>2021</year>) <volume>4</volume>(<issue>2</issue>):<fpage>ooab035</fpage>. <pub-id pub-id-type="doi">10.1093/jamiaopen/ooab035</pub-id><pub-id pub-id-type="pmid">34131637</pub-id></citation></ref>
<ref id="B14"><label>14.</label><citation citation-type="other"><collab>AIDAVA</collab>. <article-title>Project website</article-title> (2023). <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://www.aidava.eu/">https://www.aidava.eu/</ext-link> <comment>(Accessed February 25, 2025)</comment>.</citation></ref>
<ref id="B15"><label>15.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Topol</surname><given-names>EJ</given-names></name></person-group>. <article-title>High-performance medicine: the convergence of human and artificial intelligence</article-title>. <source>Nat Med</source>. (<year>2019</year>) <volume>25</volume>(<issue>1</issue>):<fpage>44</fpage>&#x2013;<lpage>56</lpage>. <pub-id pub-id-type="doi">10.1038/s41591-018-0300-7</pub-id><pub-id pub-id-type="pmid">30617339</pub-id></citation></ref>
<ref id="B16"><label>16.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ramesh</surname><given-names>A</given-names></name><name><surname>Kambhampati</surname><given-names>C</given-names></name><name><surname>Monson</surname><given-names>J</given-names></name><name><surname>Drew</surname><given-names>P</given-names></name></person-group>. <article-title>Artificial intelligence in medicine</article-title>. <source>Ann R Coll Surg Engl</source>. (<year>2004</year>) <volume>86</volume>(<issue>5</issue>):<fpage>334</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1308/147870804290</pub-id><pub-id pub-id-type="pmid">15333167</pub-id></citation></ref>
<ref id="B17"><label>17.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hageman</surname><given-names>SHJ</given-names></name><name><surname>McKay</surname><given-names>AJ</given-names></name><name><surname>Ueda</surname><given-names>P</given-names></name><name><surname>Gunn</surname><given-names>LH</given-names></name><name><surname>Jernberg</surname><given-names>T</given-names></name><name><surname>Hagstr&#x00F6;m</surname><given-names>E</given-names></name><etal/></person-group> <article-title>Estimation of recurrent atherosclerotic cardiovascular event risk in patients with established cardiovascular disease: the updated SMART2 algorithm</article-title>. <source>Eur Heart J</source>. (<year>2022</year>) <volume>43</volume>(<issue>18</issue>):<fpage>1715</fpage>&#x2013;<lpage>27</lpage>. <pub-id pub-id-type="doi">10.1093/eurheartj/ehac056</pub-id><pub-id pub-id-type="pmid">35165703</pub-id></citation></ref>
<ref id="B18"><label>18.</label><citation citation-type="other"><collab>HL7 International</collab>. <article-title>Home&#x2014;International patient summary implementation guide v2.0.0</article-title>. (2020). <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://build.fhir.org/ig/HL7/fhir-ips/">https://build.fhir.org/ig/HL7/fhir-ips/</ext-link> (Accessed May 15, 2025).</citation></ref>
<ref id="B19"><label>19.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Holzinger</surname><given-names>A</given-names></name><name><surname>Langs</surname><given-names>G</given-names></name><name><surname>Denk</surname><given-names>H</given-names></name><name><surname>Zatloukal</surname><given-names>K</given-names></name><name><surname>M&#x00FC;ller</surname><given-names>H</given-names></name></person-group>. <article-title>Causability and explainability of artificial intelligence in medicine</article-title>. <source>Wiley Interdiscip Rev Data Min Knowl Discov</source>. (<year>2019</year>) <volume>9</volume>(<issue>4</issue>):<fpage>e1312</fpage>. <pub-id pub-id-type="doi">10.1002/widm.1312</pub-id><pub-id pub-id-type="pmid">32089788</pub-id></citation></ref>
<ref id="B20"><label>20.</label><citation citation-type="other"><collab>ECPC</collab>. <article-title>European Cancer Patient Coalition</article-title> (2024). <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://ecpc.org/">https://ecpc.org/</ext-link> (Accessed May 15, 2025).</citation></ref>
<ref id="B21"><label>21.</label><citation citation-type="other"><collab>EHN</collab>. <article-title>European Heart Network</article-title>. <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://ehnheart.org/">https://ehnheart.org/</ext-link> (Accessed May 15, 2025).</citation></ref>
<ref id="B22"><label>22.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Harris</surname><given-names>PA</given-names></name><name><surname>Taylor</surname><given-names>R</given-names></name><name><surname>Minor</surname><given-names>BL</given-names></name><name><surname>Elliott</surname><given-names>V</given-names></name><name><surname>Fernandez</surname><given-names>M</given-names></name><name><surname>O&#x0027;Neal</surname><given-names>L</given-names></name><etal/></person-group> <article-title>The REDCap consortium: building an international community of software platform partners</article-title>. <source>J Biomed Inform</source>. (<year>2019</year>) <volume>95</volume>:<fpage>103208</fpage>. <pub-id pub-id-type="doi">10.1016/j.jbi.2019.103208</pub-id><pub-id pub-id-type="pmid">31078660</pub-id></citation></ref>
<ref id="B23"><label>23.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Harris</surname><given-names>PA</given-names></name><name><surname>Taylor</surname><given-names>R</given-names></name><name><surname>Thielke</surname><given-names>R</given-names></name><name><surname>Payne</surname><given-names>J</given-names></name><name><surname>Gonzalez</surname><given-names>N</given-names></name><name><surname>Conde</surname><given-names>JG</given-names></name></person-group>. <article-title>Research electronic data capture (REDCap)&#x2014;a metadata-driven methodology and workflow process for providing translational research informatics support</article-title>. <source>J Biomed Inform</source>. (<year>2009</year>) <volume>42</volume>(<issue>2</issue>):<fpage>377</fpage>&#x2013;<lpage>81</lpage>. <pub-id pub-id-type="doi">10.1016/j.jbi.2008.08.010</pub-id><pub-id pub-id-type="pmid">18929686</pub-id></citation></ref>
<ref id="B24"><label>24.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ensink</surname><given-names>CJ</given-names></name><name><surname>Keijsers</surname><given-names>NLW</given-names></name><name><surname>Groen</surname><given-names>BE</given-names></name></person-group>. <article-title>Translation and validation of the system usability scale to a Dutch version: D-SUS</article-title>. <source>Disabil Rehabil</source>. (<year>2024</year>) <volume>46</volume>(<issue>2</issue>):<fpage>395</fpage>&#x2013;<lpage>400</lpage>. <pub-id pub-id-type="doi">10.1080/09638288.2022.2160837</pub-id><pub-id pub-id-type="pmid">36573399</pub-id></citation></ref>
<ref id="B25"><label>25.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Sauro</surname><given-names>J</given-names></name></person-group>. <article-title>5 ways to interpret a SUS score 2018</article-title>. (2018). <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://measuringu.com/interpret-sus-score/">https://measuringu.com/interpret-sus-score/</ext-link> <comment>(Accessed February 2, 2025)</comment>.</citation></ref>
<ref id="B26"><label>26.</label><citation citation-type="book"><collab>Team TO</collab>. <article-title>Tesseract OCR</article-title>. <edition>5.3.3 ed.</edition> <publisher-name>GitHub</publisher-name> (<year>2025</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://github.com/tesseract-ocr/tesseract?utm_source=chatgpt.com">https://github.com/tesseract-ocr/tesseract?utm_source&#x003D;chatgpt.com</ext-link> (Accessed May 15, 2025).</citation></ref>
<ref id="B27"><label>27.</label><citation citation-type="other"><collab>Averbis</collab>. <article-title>Health Discovery &#x007C; KI-Plattform f&#x00FC;r Gesundheitsdaten</article-title>. (2020). <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://averbis.com/health-discovery">https://averbis.com/health-discovery</ext-link> <comment>(Accessed April 7, 2025)</comment>.</citation></ref>
<ref id="B28"><label>28.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Brooke</surname><given-names>J</given-names></name></person-group>. <article-title>SUS: a quick and dirty usability scale</article-title>. <source>Usability Eval Ind</source>. (<year>1995</year>) <volume>189</volume>:<fpage>4</fpage>&#x2013;<lpage>7</lpage>.</citation></ref></ref-list>
</back>
</article>