<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Archiving and Interchange DTD v2.3 20070202//EN" "archivearticle.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="methods-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Public Health</journal-id>
<journal-title>Frontiers in Public Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Public Health</abbrev-journal-title>
<issn pub-type="epub">2296-2565</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpubh.2025.1630351</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Public Health</subject>
<subj-group>
<subject>Methods</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Ethical and secure evidence generation from regionwide clinical data through a collaborative environment for advancing predictive care</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Mu&#x000F1;oyerro-Mu&#x000F1;iz</surname> <given-names>Dolores</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Villegas</surname> <given-names>Rom&#x000E1;n</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>de la Oliva</surname> <given-names>V&#x000ED;ctor</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Esteban-Medina</surname> <given-names>Alberto</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Fern&#x000E1;ndez del Valle</surname> <given-names>Patricia</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>S&#x000E1;nchez</surname> <given-names>Ana</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Susin</surname> <given-names>M. Belen</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Gutierrez-Alvarez</surname> <given-names>Isidoro</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Reboredo</surname> <given-names>Marta</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Alejos</surname> <given-names>Laura</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/3141958/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Loucera</surname> <given-names>Carlos</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/3070594/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Dopazo</surname> <given-names>Joaqu&#x000ED;n</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/38670/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Subdirecci&#x000F3;n T&#x000E9;cnica Asesora de Gesti&#x000F3;n de la Informaci&#x000F3;n, Servicio Andaluz de Salud</institution>, <addr-line>Sevilla</addr-line>, <country>Spain</country></aff>
<aff id="aff2"><sup>2</sup><institution>Andalusian Platform for Computational Medicine, Andalusian Public Foundation Progress and Health-FPS</institution>, <addr-line>Seville</addr-line>, <country>Spain</country></aff>
<aff id="aff3"><sup>3</sup><institution>R&#x00026;I Centers, Programs and Projects Management Area, Andalusian Public Foundation Progress and Health-FPS</institution>, <addr-line>Seville</addr-line>, <country>Spain</country></aff>
<aff id="aff4"><sup>4</sup><institution>Institute of Biomedicine of Seville (IBiS), University Hospital Virgen del Roc&#x000ED;o/CSIC/University of Sevilla</institution>, <addr-line>Sevilla</addr-line>, <country>Spain</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: George Notas, University of Crete, Greece</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Anca Parmena Olimid, University of Craiova, Romania</p>
<p>Tom Southerington, University of Turku, Finland</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Joaqu&#x000ED;n Dopazo <email>joaquin.dopazo&#x00040;juntadeandalucia.es</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>08</day>
<month>08</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>13</volume>
<elocation-id>1630351</elocation-id>
<history>
<date date-type="received">
<day>17</day>
<month>05</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>21</day>
<month>07</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2025 Mu&#x000F1;oyerro-Mu&#x000F1;iz, Villegas, de la Oliva, Esteban-Medina, Fern&#x000E1;ndez del Valle, S&#x000E1;nchez, Susin, Gutierrez-Alvarez, Reboredo, Alejos, Loucera and Dopazo.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Mu&#x000F1;oyerro-Mu&#x000F1;iz, Villegas, de la Oliva, Esteban-Medina, Fern&#x000E1;ndez del Valle, S&#x000E1;nchez, Susin, Gutierrez-Alvarez, Reboredo, Alejos, Loucera and Dopazo</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>Ensuring data protection is a major challenge in clinical research involving sensitive patient information. However, secure processing environments (SPEs) enable the ethical and compliant secondary use of real-world data (RWD) for evidence generation. This study presents a collaborative infrastructure integrating a comprehensive Health Population Database (BPS) with a legal and computational framework to facilitate secure, large-scale clinical studies. The Andalusian Platform for Medical Evidence Generation is an SPE embedded within the Andalusian healthcare network, leveraging RWD from over 15 million patients from the BPS. It supports diverse studies, including treatment efficacy, survival analyses, and predictive modeling, while ensuring alignment with the General Data Protection Regulation (GDPR) and proactively designed to meet forthcoming European Health Data Space (EHDS) requirements. Data are processed within a secure ecosystem, preventing unauthorized access and enabling legally compliant research collaborations. By combining clinical RWD with a robust ethical and legal framework, we present a scalable model for secure, data-driven region-level healthcare innovation. The platform supports cost-effective predictive models, particularly relevant for aging populations, and establishes a blueprint for regional and international adaptation. This approach strengthens the role of healthcare systems in both knowledge generation and sustainable economic growth, ensuring that patient data is leveraged for scientific and societal benefit.</p></abstract>
<kwd-group>
<kwd>real-world data</kwd>
<kwd>electronic health records</kwd>
<kwd>predictive medicine</kwd>
<kwd>secure processing environment</kwd>
<kwd>artificial intelligence</kwd>
<kwd>data privacy</kwd>
</kwd-group>
<contract-num rid="cn001">PID2020-117979RB-I00</contract-num>
<contract-num rid="cn001">PID2023-152380OB-C21</contract-num>
<contract-num rid="cn001">PRTR-C17.I1</contract-num>
<contract-num rid="cn001">TED2021-132346B-I00</contract-num>
<contract-num rid="cn002">IMP/00019</contract-num>
<contract-num rid="cn002">PMP24/00024</contract-num>
<contract-num rid="cn003">IE19_259 FPS</contract-num>
<contract-sponsor id="cn001">Ministerio de Ciencia e Innovaci&#x000F3;n<named-content content-type="fundref-id">https://doi.org/10.13039/501100004837</named-content></contract-sponsor>
<contract-sponsor id="cn002">Instituto de Salud Carlos III<named-content content-type="fundref-id">https://doi.org/10.13039/501100004587</named-content></contract-sponsor>
<contract-sponsor id="cn003">Consejer&#x000ED;a de Salud y Familias, Junta de Andaluc&#x000ED;a<named-content content-type="fundref-id">https://doi.org/10.13039/501100010566</named-content></contract-sponsor>
<counts>
<fig-count count="3"/>
<table-count count="2"/>
<equation-count count="0"/>
<ref-count count="64"/>
<page-count count="11"/>
<word-count count="8530"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Digital Public Health</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>The use of Real-World Data (RWD) is transforming healthcare research by enabling large-scale studies on treatment effectiveness, disease progression, and healthcare resource utilization in routine clinical settings (<xref ref-type="bibr" rid="B1">1</xref>). Unlike randomized controlled trials (RCTs), which have strict inclusion criteria and often exclude diverse patient populations, RWD provides a broader and more representative perspective on real-world clinical outcomes (<xref ref-type="bibr" rid="B2">2</xref>). By using RWD from electronic health records (EHRs), hospital admissions, pharmacy records, laboratory tests, and disease registries, researchers can generate Real-World Evidence (RWE) that supports clinical decision-making, regulatory policies, and medical innovation (<xref ref-type="bibr" rid="B3">3</xref>, <xref ref-type="bibr" rid="B4">4</xref>).</p>
<p>One of the most significant advantages of RWD is its ability to accelerate research and improve patient outcomes by providing insights that complement traditional clinical trials (<xref ref-type="bibr" rid="B5">5</xref>). This is particularly valuable in contexts where RCTs are impractical, such as rare diseases, where patient populations are small, or during public health emergencies, like the COVID-19 pandemic, where rapid evidence generation is essential (<xref ref-type="bibr" rid="B6">6</xref>, <xref ref-type="bibr" rid="B7">7</xref>). Moreover, RWD plays a crucial role in post-marketing drug surveillance (Phase IV studies), enabling the continuous assessment of drug safety and effectiveness in broader patient populations (<xref ref-type="bibr" rid="B8">8</xref>). Also, an upsurge in preventive medicine is foreseeable from the possibility of deriving early condition, diagnosis or end-point predictors from retrospective RWD studies (<xref ref-type="bibr" rid="B9">9</xref>, <xref ref-type="bibr" rid="B10">10</xref>).</p>
<p>Despite its potential, the use of RWD presents several challenges, particularly regarding data privacy, security, and regulatory compliance. Ensuring that sensitive patient information is protected while enabling meaningful research is a key priority in modern healthcare. The European Health Data Space (EHDS) (<xref ref-type="bibr" rid="B11">11</xref>) aims to establish a legal framework that facilitates the secure and ethical use of health data across Europe while complying with the General Data Protection Regulation (GDPR) (<xref ref-type="bibr" rid="B12">12</xref>).</p>
<p>In this context, the Andalusian Health Population Database (BPS, acronym for &#x0201C;Base Poblacional de Salud&#x0201D; in Spanish) stands as one of the most comprehensive health data repositories in Europe, containing EHRs from over 15 million patients since 2001 (<xref ref-type="bibr" rid="B13">13</xref>). The Andalusian Platform for Medical Evidence Generation (PAGEM, from its acronym in Spanish), is an instrumental infrastructure, hosted within the Andalusian Public Health System, that provides a Secure Processing Environment (SPE) that enables the secondary use of this data for research while ensuring compliance with GDPR and EHDS regulations (<xref ref-type="bibr" rid="B14">14</xref>). Through this infrastructure, researchers can conduct treatment efficacy studies, survival analyses, and predictive modeling while maintaining strict data security standards, ensuring patient data privacy.</p>
<p>This paper presents PAGEM as a model for ethical and secure RWD utilization, demonstrating its potential to shift healthcare from a reactive to a predictive approach. By ensuring data protection, regulatory compliance, and research accessibility, PAGEM exemplifies how public health systems can leverage clinical big data to drive medical innovation and improve patient outcomes, setting a standard for other regions and countries to follow within the new EHDS.</p>
</sec>
<sec id="s2">
<title>2 Material and equipment</title>
<sec>
<title>2.1 A region-wide collaborative environment for secure and ethical generation of evidence from medical data</title>
<p>A region-wide collaborative environment for secure and ethical secondary use of medical data to generate medical evidence has been structured around three fundamental components: first, the PAGEM, a SPE with a state-of-the-art computing infrastructure that ensures data security and confidentiality for secondary use of medical data for research purposes; second, an immense medical data lake, the BPS, integrating EHRs and other clinical data from over 15 million patients, covering a wide range of health indicators, diagnoses, treatments, and healthcare utilization; and third, a regional legal and ethical framework, compliant with GDPR and the EHDS, designed to ensure that patient privacy is protected while enabling the responsible use of information for research purposes.</p>
<p>Within this environment, researchers work within the healthcare corporate network, where data never leaves the controlled secure environment. This privacy-preserving model eliminates risks associated with data transfers while enabling high-performance analytics, including artificial intelligence (AI)-driven studies, survival analyses, drug safety assessments, etc.</p>
</sec>
<sec>
<title>2.2 The PAGEM infrastructure</title>
<p>The PAGEM is a SPE consisting of a computational infrastructure initially funded within the scope of the Andalusian Plan for Research, Development and Innovation (<xref ref-type="bibr" rid="B14">14</xref>). This infrastructure is located within the SSPA corporate network and has specifically conceived for the ethical and secure analysis of data protected by the GDPR. PAGEM is compliant with the definition of SPE as described in Article 50 of the Resolution of the European Parliament of 24 April 2024 on the proposal for a Regulation of the European Parliament and of the Council on the EHDS (<xref ref-type="bibr" rid="B11">11</xref>).</p>
<sec>
<title>2.2.1 Open software, reproducibility and explainability</title>
<p>For the sake of reproducibility and explainability, all the software used in the studies carried out in the PAGEM SPE is open. For data processing, Python (<xref ref-type="bibr" rid="B15">15</xref>) and its extensive ecosystem of libraries, such as Pandas (<xref ref-type="bibr" rid="B16">16</xref>) and NumPy (<xref ref-type="bibr" rid="B17">17</xref>) is used. For statistical analysis, Python libraries like Numpy, Scipy (<xref ref-type="bibr" rid="B18">18</xref>) and statsmodels (<xref ref-type="bibr" rid="B19">19</xref>) are used, whereas R (<xref ref-type="bibr" rid="B20">20</xref>) and Bioconductor (<xref ref-type="bibr" rid="B21">21</xref>) packages are used for some advanced analysis. AI-driven models, including machine learning and deep learning, are backed by the core libraries of the Python scientific distribution, like NumPy and SciPy, as well as specialized tools like scikit-learn (<xref ref-type="bibr" rid="B22">22</xref>), and TensorFlow (<xref ref-type="bibr" rid="B23">23</xref>).</p>
<p>To ensure computational reproducibility, software versioning is achieved using locally managed gitlab servers. Conda environments, based on the conda-forge (<xref ref-type="bibr" rid="B24">24</xref>) and Bioconda (<xref ref-type="bibr" rid="B25">25</xref>) channels, provide project-specific software dependencies. Additionally, entire analysis environments are encapsulated within Docker containers (<xref ref-type="bibr" rid="B26">26</xref>). Finally, interactive analyses are facilitated through securely-served Jupyter notebooks (<xref ref-type="bibr" rid="B27">27</xref>), leveraging the aforementioned reproducibility infrastructure for computation.</p>
</sec>
<sec>
<title>2.2.2 Hardware</title>
<p>The PAGEM hardware infrastructure is specifically designed to meet the rigorous computational demands of modern clinical Big Data research. A general-purpose computing cluster provides significant computational capacity, currently featuring 1,832 cores, 13,568 GB of RAM, and 993 TB of storage. For deep learning applications, a specialized GPU server cluster is available to handle the high-performance requirements of AI-driven research. This cluster includes 192 CPU cores, 4,864 GB of RAM, and 18 high-performance GPUs.</p>
</sec>
</sec>
<sec>
<title>2.3 The BPS, a regionwide medical data lake</title>
<p>The Andalusian Public Health System (SSPA from its acronym in Spanish) provides service to the population of the Andalusia region (Southern Spain), with a population of &#x0007E;8.5 million inhabitants, and comprises a total of 55 hospitals and 34 primary care districts with a total of 1,505 primary care centers. The SSPA is fully digitalized by means of the digital system Diraya, where all the data are indexed and referred to the patient. Diraya incrementally dumps all patient data in the BPS on a monthly basis. The BPS is a health information system that collects clinical data and data on the use of health resources of each person receiving health care in the SSPA, totalling over 15 million patients accumulated since 2001. To illustrate the dimension of BPS it is worth mentioning some figures (see <xref ref-type="table" rid="T1">Table 1</xref>) about the data stored, such as the 874 million diagnoses, the 2,428 million analytical tests, or the 7,000 million medical images, to cite just a few examples. These data originate from different professional sources within the health system: medical diagnoses are typically entered by physicians; nursing diagnoses by trained nursing personnel; laboratory results by automated systems validated by clinical laboratory staff; and imaging records by radiology services. Each category of data serves a distinct clinical purpose, from initial diagnosis and monitoring to treatment planning and follow-up care. This volume makes BPS one of the largest medical RWD repositories in the world.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>BPS in figures.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Data type</bold></th>
<th valign="top" align="left"><bold>Source</bold></th>
<th valign="top" align="left"><bold>Data</bold></th>
<th valign="top" align="center"><bold>Number (millions)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td/>
<td/>
<td valign="top" align="left">Users/Patients (2001&#x02013;September 2024)</td>
<td valign="top" align="center">15.8</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="5">Total users</td>
<td valign="top" align="left" rowspan="3">Administrative</td>
<td valign="top" align="left">Active users (2024)</td>
<td valign="top" align="center">8.89</td>
</tr>
<tr>
<td valign="top" align="left">Men (2024)</td>
<td valign="top" align="center">4.51</td>
</tr>
<tr>
<td valign="top" align="left">Women (2024)</td>
<td valign="top" align="center">4.38</td>
</tr>
<tr>
<td valign="top" align="left">Medical doctors</td>
<td valign="top" align="left">Medical diagnoses (90% automatic coding)</td>
<td valign="top" align="center">874.1</td>
</tr>
<tr>
<td valign="top" align="left">Nursing staff</td>
<td valign="top" align="left">Nursing diagnoses</td>
<td valign="top" align="center">51.3</td>
</tr>
<tr>
<td valign="top" align="left">Clinical</td>
<td valign="top" align="left">Laboratory staff</td>
<td valign="top" align="left">Analytical test results</td>
<td valign="top" align="center">2,428</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="5">practice</td>
<td valign="top" align="left">Medical image staff</td>
<td valign="top" align="left">PACS images</td>
<td valign="top" align="center">7,000</td>
</tr>
<tr>
<td valign="top" align="left">Medical doctors</td>
<td valign="top" align="left">Vital signs (weight, height, BMI, blood pressure)</td>
<td valign="top" align="center">83.5</td>
</tr>
<tr>
<td valign="top" align="left">Nursing staff</td>
<td valign="top" align="left">Functional evaluations</td>
<td valign="top" align="center">4.2</td>
</tr>
<tr>
<td valign="top" align="left">Nursing staff</td>
<td valign="top" align="left">Cognitive assessments</td>
<td valign="top" align="center">2.7</td>
</tr>
<tr>
<td/>
<td valign="top" align="left">Vaccination events</td>
<td valign="top" align="center">79</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="3">Processes</td>
<td valign="top" align="left">Administrative</td>
<td valign="top" align="left">Prosthesis and implant records</td>
<td valign="top" align="center">0.7</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="2"/>
<td valign="top" align="left">Temporary Incapacity (TI) processes</td>
<td valign="top" align="center">11.2</td>
</tr>
<tr>
<td valign="top" align="left">Hospital stays</td>
<td valign="top" align="center">3.5</td>
</tr></tbody>
</table>
</table-wrap>
<p>From the data collected in BPS, it is possible to obtain estimates on health, the behavior of patients and, in general, users in relation to health services and to stratify the population in order to guide the provision of these services. The BPS also enables longitudinal studies to be carried out, the incidence of pathologies to be estimated and projections to be made on the state of health of the population and its resource needs. It also makes it possible to analyse efficiency in the use of resources by health care providers. Since its inception, BPS was conceived with a strong focus on research. In fact, it has been included in the Repository of Innovative Practices in Active and Healthy Aging of the European Commission (<xref ref-type="bibr" rid="B28">28</xref>).</p>
<p>The structure of BPS is defined in a reference publication (<xref ref-type="bibr" rid="B13">13</xref>), which describes the development of this information system that connects data from multiple Electronic Health Records (EHR) to improve assistance to patients, health services administration, management, evaluation, and inspection, as well as public health and research. BPS connects pseudonymized data from nearly any individual of the whole Andalusia population (<xref ref-type="bibr" rid="B13">13</xref>). The data are sourced from different SSPA information systems including: EHRs, the minimum basic data sets (inpatients, outpatient major surgery, hospital emergencies and medical day hospital), mental health information systems, analytical and image tests, vaccines, renal patients, and pharmacy, among others. In order to have the data as structured as possible, an automatic coder developed in-house for hospital emergency and primary care electronic medical records (<xref ref-type="bibr" rid="B29">29</xref>) is used to code clinical diagnoses into ICD10. Also 80 chronic pathologies were identified and coded. <xref ref-type="table" rid="T2">Table 2</xref> presents a selection of prevalent chronic and high-impact conditions as coded in BPS. These include both non-communicable diseases (e.g., diabetes, cardiovascular disorders) and cancers. The list has been produced by BPS curators based on prevalence, impact on health services, and standardized ICD-10 coding coverage (<xref ref-type="bibr" rid="B13">13</xref>).</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Some prevalent diseases in BPS.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Pathology</bold></th>
<th valign="top" align="center"><bold>Women</bold></th>
<th valign="top" align="center"><bold>Men</bold></th>
<th valign="top" align="center"><bold>Total</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Dyslipemia</td>
<td valign="top" align="center">1,085,213</td>
<td valign="top" align="center">966,250</td>
<td valign="top" align="center">2,051,463</td>
</tr>
<tr>
<td valign="top" align="left">Hypertension</td>
<td valign="top" align="center">1,018,136</td>
<td valign="top" align="center">915,329</td>
<td valign="top" align="center">1,933,465</td>
</tr>
<tr>
<td valign="top" align="left">Arthrosis, spondylosis</td>
<td valign="top" align="center">979,136</td>
<td valign="top" align="center">557,983</td>
<td valign="top" align="center">1,537,119</td>
</tr>
<tr>
<td valign="top" align="left">Anxiety disorder</td>
<td valign="top" align="center">698,438</td>
<td valign="top" align="center">329,416</td>
<td valign="top" align="center">1,027,854</td>
</tr>
<tr>
<td valign="top" align="left">Asthma</td>
<td valign="top" align="center">511,843</td>
<td valign="top" align="center">441,215</td>
<td valign="top" align="center">953,058</td>
</tr>
<tr>
<td valign="top" align="left">Diabetes</td>
<td valign="top" align="center">380,105</td>
<td valign="top" align="center">414,329</td>
<td valign="top" align="center">794,434</td>
</tr>
<tr>
<td valign="top" align="left">Hypothyroidism</td>
<td valign="top" align="center">596,937</td>
<td valign="top" align="center">130,390</td>
<td valign="top" align="center">727,327</td>
</tr>
<tr>
<td valign="top" align="left">Colorectal cancer</td>
<td valign="top" align="center">27,079</td>
<td valign="top" align="center">33,271</td>
<td valign="top" align="center">60,350</td>
</tr>
<tr>
<td valign="top" align="left">Lung cancer</td>
<td valign="top" align="center">5,050</td>
<td valign="top" align="center">10,525</td>
<td valign="top" align="center">15,575</td>
</tr>
<tr>
<td valign="top" align="left">Breast cancer</td>
<td valign="top" align="center">96,171</td>
<td valign="top" align="center">2,107</td>
<td valign="top" align="center">98,278</td>
</tr>
<tr>
<td valign="top" align="left">Heart failure</td>
<td valign="top" align="center">120,430</td>
<td valign="top" align="center">106,849</td>
<td valign="top" align="center">227,279</td>
</tr>
<tr>
<td valign="top" align="left">Atrial fibrillation</td>
<td valign="top" align="center">86,311</td>
<td valign="top" align="center">91,836</td>
<td valign="top" align="center">178,147</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>Cases in 2023.</p>
</table-wrap-foot>
</table-wrap>
<p>To ensure data quality and harmonization, the BPS relies on a standardized, patient-indexed structure supported by Diraya, the unified digital health system. As previously mentioned, diagnostic information is coded using ICD-10, with automatic coders improving consistency across care levels. Other standard codes are used, such as Anatomical Therapeutic Chemical (ATC) Classification for drugs (<xref ref-type="bibr" rid="B30">30</xref>). Data from diverse sources (EHRs, pharmacy, imaging, lab tests, etc.) are integrated through validated linkage processes and updated monthly. Each data field undergoes validation checks, and the system maintains full traceability. For participation in international projects, data can be mapped to standardized structures such as the OMOP Common Data Model, facilitating semantic interoperability and federated analytics.</p>
<p>The BPS also supports linkage to external datasets, such as biobank-derived genomic data, clinical trial cohorts, registries, survey results, etc., provided the integration is justified, ethically approved, and technically feasible via pseudonymized identifiers. This enables enriched, longitudinal research studies that combine clinical, molecular, and behavioral dimensions, further expanding the research potential of the platform.</p>
</sec>
</sec>
<sec id="s3">
<title>3 Methods</title>
<sec>
<title>3.1 Secure data management in PAGEM</title>
<p>The PAGEM infrastructure complies with the GDPR and adheres to the Joint Resolution 1/2021 of the General Secretariat for Research, Development and Innovation in Health of the Regional Ministry of Health and Families and the Management Directorate of the Andalusian Health Service (<xref ref-type="bibr" rid="B31">31</xref>), which anticipates key elements of the forthcoming EHDS regulation (<xref ref-type="bibr" rid="B11">11</xref>). Specifically, it follows the principles of lawfulness, fairness, transparency, purpose limitation, data minimization, and integrity/confidentiality as defined in Articles 5&#x02013;9 of the GDPR. Data processing is grounded in Article 6(1)(e) (task carried out in the public interest) and Article 9(2)(j) (scientific research purposes). Although the EHDS regulation is not yet finalized, the PAGEM SPE is aligned with its current legislative resolution (24 April 2024), particularly Article 50, and is proactively designed to fulfill its anticipated technical and governance criteria for Secure Processing Environments</p>
<p>Since the infrastructure is located within the SSPA corporate network and is operated by personnel of the Foundation Progress and Health, which belongs to the health system, data never leave the secure environment of the health system and are managed by trusted personnel from the health system. These two aspects are crucial for the Data Protection Impact Assessment (DPIA), given that the relative risk for the data used in the study is minimized (<xref ref-type="bibr" rid="B32">32</xref>). The DPIA is a document consisting of a description of the data life cycle, detailing the activities to be carried out, the specific data to be processed and the people and technologies involved, both for the data acquisition process and for its storage, processing, transfer to third parties and final destruction. The DPIA includes the description of potential hazards, their inherent and residual risk, and mitigation measures that could be implemented. By analyzing the data in the PAGEM infrastructure, an action plan is not required (<xref ref-type="bibr" rid="B32">32</xref>).</p>
<p>Summarizing, the data management procedure (described in the DPIA as the life cycle of the data and schematised in <xref ref-type="fig" rid="F1">Figure 1</xref>) is as follows: i) once the study is approved by the data access committee the PAGEM work team request the data to BPS, ii) the BPS team extract the data and pseudonymize it, iii) the BPS transfer the pseudonymized data to the PAGEM infrastructure, iv) the PAGEM work team carries out the analysis of the data as described in the research protocol, v) once is done the data is removed from the PAGEM infrastructure.</p>
<fig position="float" id="F1">
<label>Figure 1</label>
<caption><p>Schematic representation of the collaborative environment for secure and ethics data analysis. The left side illustrates the PAGEM model, in which only authorized health system personnel analyse pseudonymized data within the SPE, and researchers receive only aggregated results. This contrasts with potential EHDS-compliant models (right side), which may support controlled researcher access to pseudonymized individual-level data within SPEs. Schematically, 1) the researcher agrees a study with the PAGEM, 2) the PAGEM takes care of the permissions for data management and, 3) once granted, the PAGEM request to BPS the extraction of pseudonymized data, 4) the PAGEM team and, occasionally researchers with a DPA, perform the analysis and 5) the results, that does not contain any private data, are released. The right side of the figure represents the conventional data analysis procedure.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpubh-13-1630351-g0001.tif">
<alt-text>Flowchart illustrating the data management process involving committees for study approval, pseudonymization, and anonymization through PAGeM and BPS systems, leading to secure external data infrastructure.</alt-text>
</graphic>
</fig>
<p>It is important to clarify that the Joint Resolution 1/2021 requires that the principal researcher (or one of them, if there are more than one), belongs to the Andalusian health System. Therefore, in the current configuration of PAGEM, only authorized personnel from the health system (i.e., members of the Andalusian Health System, including the PAGEM work team, and occasionally other researchers with a data processing agreement -DPA-) access and analyse the pseudonymized individual-level data within the SPE. Researchers external to the health system receive only aggregate, non-identifiable results. This conservative data governance model prioritizes privacy and aligns with national interpretations of GDPR and regional ethical oversight. However, the EHDS regulation supports a broader model where accredited researchers can be granted direct access to pseudonymized individual-level data within SPEs under strict safeguards. While PAGEM does not yet implement this model, it has been technically and procedurally designed to allow for such evolution in the future, once regulatory and governance adjustments permit.</p>
<p>The use of pseudonymized data is an important asset specific to this collaborative environment (<xref ref-type="fig" rid="F1">Figure 1</xref> left part), as it allows patient re-identification when necessary. This approach contrasts with many conventional data analysis frameworks (<xref ref-type="fig" rid="F1">Figure 1</xref>, right side), which can also use pseudonymized data but typically restrict or preclude any mechanism for re-identification, even under authorized circumstances, by anonymizing the data. In contrast, the PAGEM model preserves the ability to re-identify patients when ethically justified and approved by the ethics committee, a key enabler for studies that may trigger clinical action or benefit.</p>
</sec>
<sec>
<title>3.2 Regional regulation for ethical and secure data access</title>
<p>The most recent regulation for the use of medical data for research purposes in the Andalusia region was issued the 4th December, 2021, in the Joint Resolution 1/2021 of the General Secretariat for Research, Development and Innovation in Health of the Regional Ministry of Health and Families and the Management Directorate of the Andalusian Health Service (<xref ref-type="bibr" rid="B31">31</xref>). This innovative resolution anticipated the procedure for access to medical data which has further been described in the recent European Parliament legislative resolution (Regulation 2025/327, OJ L 2025/327, 5 March 2025) in the proposal for a Regulation of the European Parliament and of the Council on the EHDS (<xref ref-type="bibr" rid="B11">11</xref>). According to such Regulation, primary use refers to the processing of health data for the direct provision of healthcare services, whereas secondary use includes activities such as scientific research, public health planning, and health policy development. The PAGEM framework is designed specifically for secondary use, governed by ethical approval and strict safeguards for privacy and data minimization.</p>
<p>The Joint Resolution 1/2021 defines a Health Data Access Body (Data Access Committee, DAC) responsible for granting access to medical data for secondary use. In order to evaluate the appropriateness and justification of the request of access to the data based on scientific, ethical, and legal criteria, the DAC requires: i) the research protocol for which the data is requested, ii) the permission of the Coordinating Committee on Biomedical Research Ethics of Andaluc&#x000ED;a (CCEIBA) (<xref ref-type="bibr" rid="B33">33</xref>), iii) the DPIA (<xref ref-type="bibr" rid="B32">32</xref>) document, subject to the GDPR and taking into account the Horizon 2020 Programme Guidance How to complete your ethics self-assessment (<xref ref-type="bibr" rid="B34">34</xref>) and, iv) signed commitment of the principal researcher in which he/she undertakes not to redistribute data, not to attempt to re-identify individuals in the dataset and removing the dataset once the study has finished. Typically, one of the members of the PAGEM work team acts as co-principal researcher in the study protocols and takes the responsibility of comply with all the commitments.</p>
<p>The secondary use of clinical data for research purposes in PAGEM is based on Articles 6(1)(e) and 9(2)(j) of the GDPR, which allow data processing when necessary for tasks in the public interest and scientific research, respectively, provided appropriate safeguards are implemented. This legal basis is explicitly recognized under Spanish national law, in which the Seventeenth Additional Provision of Organic Law 3/2018 on the Protection of Personal Data and Guarantee of Digital Rights (LOPDGDD) (<xref ref-type="bibr" rid="B35">35</xref>) does provide for an exemption from informed consent in certain research contexts, especially when large databases are used and it is not possible to provide individualized information to data subjects because it would be a disproportionate effort.</p>
<p>Additionally, this legal basis is operationalized through the regional Joint Resolution 1/2021, which defines the procedures and oversight mechanisms, including the mandatory review by an accredited ethics committee (CCEIBA), approval by a Data Access Committee (DAC), and a detailed Data Protection Impact Assessment (DPIA) for each study. Although the model does not rely on individual informed consent, it includes robust safeguards such as pseudonymization, restricted access, and strict governance. Patients retain the right to exercise their GDPR data rights, and no data is commercialized or shared outside authorized use cases. Future developments may explore structured mechanisms for patient or public involvement in the governance of data access and research prioritization.</p>
<p>While the regional (Joint Resolution 1/2021 allows for the export of health data for EU-funded projects, subject to ethics and DAC approval, PAGEM itself is not designed to facilitate data transfers. Instead, it is optimized for secure <italic>in situ</italic> analysis and federated research. As such, PAGEM is suitable for participating in initiatives like the European Medicines Agency&#x00027;s DARWIN EU initiative (<xref ref-type="bibr" rid="B36">36</xref>) and the European Health Data and Evidence Network (EHDEN) (<xref ref-type="bibr" rid="B37">37</xref>), where standardized analytical pipelines are deployed locally, and only aggregate, non-sensitive outputs are shared, fully preserving data sovereignty.</p>
<p>In essence, the data access regulation described is completely compliant with the EHDS regulations and would fit perfectly in the proposed future federated structure of the EHDS.</p>
</sec>
<sec>
<title>3.3 Sample size in region-wide studies</title>
<p>In conventional clinical studies, sample size estimation is critical to ensure statistical power and compliance with data minimization principles. In region-wide retrospective studies using RWD, such as those conducted within PAGEM, the analytical cohort can be defined by all individuals in the target population who meet the inclusion criteria. This exhaustive approach minimizes sampling bias and enhances representativeness. Importantly, data minimization remains a core requirement: the necessity and proportionality of the data collected are evaluated during the study design phase and documented in the DPIA, in accordance with Article 5(1)(c) of the GDPR. Thus, although traditional statistical power calculations may not be required, each study justifies the scope and granularity of the data used as adequate and necessary for the research objectives.</p>
</sec>
</sec>
<sec id="s4">
<title>4 Results</title>
<sec>
<title>4.1 Evidence generation possibilities</title>
<p>Multiple type of studies can be carried out in large RWD repositories. <xref ref-type="fig" rid="F2">Figure 2</xref> summarizes the most common studies already performed, ongoing or under consideration in the PAGEM infrastructure. Clockwise from the top, the most common studies currently requested by pharma companies are epidemiological studies of prevalence or incidence of diseases (a total of 60%), followed by more detailed studies of cost of disease or interventions (&#x0007E;20%).</p>
<fig position="float" id="F2">
<label>Figure 2</label>
<caption><p>The main types of studies that can be performed in the PAGEM.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpubh-13-1630351-g0002.tif">
<alt-text>Central diagram depicting a stylized brain with arrows pointing to various medical research topics, including disease epidemiology, cost of treatment, survival, drug repurposing, and clinical trials. Topics are labeled in blue circles.</alt-text>
</graphic>
</fig>
<p>Survival studies (<xref ref-type="bibr" rid="B38">38</xref>) are fundamental in medical research as they provide crucial insights into patient prognosis, the efficacy of treatments, and the natural course of diseases. These studies allow for the estimation of survival probabilities over time, identifying factors that may influence patient outcomes, such as comorbidities, demographics, and treatment modalities. By analyzing survival data, researchers can also identify potential risk factors and guide clinical decision-making.</p>
<p>Since many patients will present concomitant drug treatments, it is relatively easy to extend the concept of survival studies to repurposing studies by modeling the effect of the concomitant drugs in the outcome of the patient. Similarly, specific treatments or interventions can be assessed, taking into consideration all the possible confusion variables, providing valuable information on the efficacy of these. Actually, in the case of post-marketing surveillance of drugs, they are Phase IV studies (<xref ref-type="bibr" rid="B8">8</xref>) of clinical trials.</p>
<p>As previously mentioned, the secure data management environment used here involves the utilization of pseudonymized data, which makes possible patient re-identification, if required by the study and authorized by the ethics committee. Studies aiming to detect undiagnosed patients of rare diseases affected by the well-known diagnostic odyssey (<xref ref-type="bibr" rid="B39">39</xref>) or to discover undiagnosed patients of an infectious condition, like Hepatitis C, HIV, etc., for eradication programs (<xref ref-type="bibr" rid="B40">40</xref>), or other similar ones, are ultimately oriented to the identification of individuals, which is possible with pseudonymized data but not with anonymized data.</p>
<p>Probably one of the most interesting studies for RWE generation using retrospective cohorts are early predictors of diagnosis, treatment outcomes or other endpoints (<xref ref-type="bibr" rid="B9">9</xref>, <xref ref-type="bibr" rid="B10">10</xref>).</p>
<p>Apart from the obvious use of retrospective data in Phase IV studies mentioned above, other innovative applications are also possible in clinical trials. Recruitment and retention of control intervention arm patients, generally consisting of placebo, poses ethical and logistic challenges, especially in oncology (<xref ref-type="bibr" rid="B41">41</xref>). Thus, a variety of synthetic control statistical methods can be used to evaluate the comparative effectiveness of an intervention using external control data, defined as cohorts of patients from external sources (<xref ref-type="bibr" rid="B42">42</xref>).</p>
<p>Another innovative application of Deep Learning (DL) methods to large datasets is the generation of synthetic data. They can be extremely useful if they meet two conditions: (1) high fidelity, meaning the generated data maintain utility for the intended task, such as yielding comparable performance when training a diagnostic model; and (2) compliance with privacy standards, ensuring that no real patient identities are disclosed in the synthetic dataset (<xref ref-type="bibr" rid="B43">43</xref>). Generative adversarial networks (GANs) have been remarkable success, giving rise to diverse generative models for EHR synthesis, oriented to various clinical purposes (<xref ref-type="bibr" rid="B44">44</xref>&#x02013;<xref ref-type="bibr" rid="B46">46</xref>). The importance of having access to original RWD to generate high fidelity simulated data is clearly demonstrated by the fact that AI models tend to collapse when trained on recursively generated data (<xref ref-type="bibr" rid="B47">47</xref>).</p>
<p>It is worth noting that, unlike in static databases, BPS is updated on a monthly basis, opening thus the possibility of not only retrospective studies, but also prospective or ambispective proposals.</p>
</sec>
<sec>
<title>4.2 Use cases</title>
<p>PAGEM has successfully been used for carrying out numerous RWE generation studies. Many retrospective studies using large databases of RWD focus on the efficacy of treatments or drugs. In a recent study performed in PAGEM, the evidence of the association between increased use of direct oral anticoagulants and a reduction in the rate of atrial fibrillation-related stroke and major bleeding at the population level was demonstrated using a population of 95,085 patients (<xref ref-type="bibr" rid="B48">48</xref>). Actually, the same methodology can be used for different types of interventions, and some original retrospective studies can be carried out. As an example, the Andalusian Genomic Surveillance System (<xref ref-type="bibr" rid="B49">49</xref>), specifically the COVID-19 circuit (<xref ref-type="bibr" rid="B50">50</xref>), made available a large number of SARS-CoV-2 viral genomes which, in combination with the clinical data of the patients, has been used to carry out an interesting study of the effect of the viral lineage and specific viral mutations on patient survival (<xref ref-type="bibr" rid="B51">51</xref>). Another different application of the methodology is the study of the effect of other concomitant pharmacologic treatments in the outcome of the disease. Finding unexpectedly good prognostics associated with other drugs provided by other reasons to the patients can lead to drug repurposing proposals. Some examples of drug repurposing have been made in PAGEM during the recent pandemics. Thus, it has been demonstrated that several drugs, like vitamin D (<xref ref-type="bibr" rid="B52">52</xref>) or antipsychotic drugs like aripiprazole have a significant protective effect on COVID-19 patient survival (<xref ref-type="bibr" rid="B53">53</xref>). This study was further generalized to discover a significant protective effect in a total of 21 drugs of common use in patients (<xref ref-type="bibr" rid="B7">7</xref>).</p>
<p>Another interesting aspect is the identification of the population at risk of different diseases. In the recent COVID-19 pandemics, the identification of individuals at risk of severe infection was a priority for clinicians and health systems, and was successfully addressed using RWD from BPS (<xref ref-type="bibr" rid="B54">54</xref>).</p>
<p>Early endpoint predictors are also of paramount interest for any health system. Recently, a model was developed to identify individuals at high risk of ovarian cancer without the need of using specific tumor markers or prior stratification into risk groups using only clinical variables from BPS like demographics, comorbidities, symptoms, blood test results, and healthcare utilization patterns (<xref ref-type="bibr" rid="B55">55</xref>).</p>
<p>Recently, as an example of innovative clinical data utilization, &#x0007E;1 million real electronic health records (EHRs) from diabetic patients were employed to train a GAN, the medGAN (<xref ref-type="bibr" rid="B44">44</xref>). The goal was to generate synthetic EHRs that closely mimic the characteristics of diabetic patients while ensuring that the data do not correspond to any actual individuals, thus preserving patient privacy. These data are available within the &#x0201C;Synthetic Clinical Health Records&#x0201D; challenge of the Critical Assessment on Massive Data Analysis conference (<xref ref-type="bibr" rid="B56">56</xref>).</p>
</sec>
<sec>
<title>4.3 Governance and opportunities for collaboration</title>
<p>The PAGEM governance structure ensures that research collaborations align with the strategic priorities of the Andalusian Health System. An internal committee evaluates proposals based on their relevance, resource availability, and financial feasibility. This structured approach maximizes the platform&#x00027;s impact on medical research and innovation.</p>
<p>Currently, PAGEM has established agreements with over 10 major pharmaceutical companies and three Contract Research Organizations (CROs). These partnerships generate revenue that sustains its infrastructure, enabling access for public health initiatives and independent research. This collaborative model fosters medical knowledge generation while maintaining financial sustainability. It is important to note that these collaborations do not involve the sale or transfer of any individual-level data. All data remain securely within the SPE, and analyses are conducted exclusively by authorized personnel from the public health system. External collaborators receive only aggregate results, and all projects, public or private, must be approved by an accredited ethics committee (CCEIBA) and the Data Access Committee. These partnerships are framed as research collaborations aimed at generating public value. In addition to financial sustainability, they contribute to drug safety surveillance, comparative effectiveness research, and development of predictive models, ultimately benefiting the public health system and patient care.</p>
<p>At the international level, PAGEM utilize federated analytical models based on the OMOP common data standard (<xref ref-type="bibr" rid="B57">57</xref>), ensuring interoperability and large-scale Real-World Evidence (RWE) generation in federated research networks, like the European Medicines Agency&#x00027;s DARWIN EU initiative (<xref ref-type="bibr" rid="B36">36</xref>) and the European Health Data &#x00026; Evidence Network (EHDEN) (<xref ref-type="bibr" rid="B37">37</xref>).</p>
<p>Compared to the EHDS framework, the NHS model in the UK includes centrally managed databases with opt-out provisions for patients (<xref ref-type="bibr" rid="B58">58</xref>), while the U.S. Health Insurance Portability and Accountability Act (HIPAA) framework (<xref ref-type="bibr" rid="B59">59</xref>) focuses more heavily on institutional data controllers and breach notification mechanisms. EHDS emphasizes federated data access with public oversight, which PAGEM mirrors by embedding analysis within a secure, non-exportable infrastructure.</p>
</sec>
</sec>
<sec id="s5">
<title>5 Discussion</title>
<sec>
<title>5.1 The future of precision preventive medicine</title>
<p>A key aspect of precision preventive medicine is the development of early endpoint predictors, which anticipate adverse health outcomes before symptoms appear (<xref ref-type="bibr" rid="B60">60</xref>). By leveraging machine learning algorithms, healthcare providers can implement targeted interventions, improving patient outcomes and reducing healthcare costs (<xref ref-type="bibr" rid="B61">61</xref>). For example, predictive models have been successfully applied to cardiovascular disease (<xref ref-type="bibr" rid="B61">61</xref>) and cancer prevention (<xref ref-type="bibr" rid="B9">9</xref>, <xref ref-type="bibr" rid="B62">62</xref>).</p>
<p>Most existing prediction models are applied to pre-stratified risk populations, meaning individuals already under clinical monitoring. However, ideal predictors should be applicable to the general population, using routinely collected healthcare data. A notable example is the early predictor for ovarian cancer, developed using RWD from BPS, which identifies high-risk individuals based on clinical variables rather than traditional tumor markers (<xref ref-type="bibr" rid="B55">55</xref>). This approach is particularly relevant for high-mortality diseases, where early detection significantly improves survival rates (<xref ref-type="bibr" rid="B63">63</xref>).</p>
<p>Developing robust predictors requires large datasets, computational power, and expertise, but once established, they can be efficiently applied at scale. As illustrated in <xref ref-type="fig" rid="F3">Figure 3</xref>, integrating predictive analytics into healthcare systems can transform them from reactive to preventive models, ensuring earlier interventions and more cost-effective patient management.</p>
<fig position="float" id="F3">
<label>Figure 3</label>
<caption><p>Summarized architecture of the Andalusian Health System with a double layer of digitalization: the first layer, Diraya, for primary use of clinical data for the management of the patient, and a second layer, BPS, for permanent data storage for administrative and research purposes. The Coordinating Ethics Committee (CCEIBA) assesses ethical compliance of each project, while the Data Access Committee (DAC) evaluates the scientific, legal, and proportionality aspects before granting access. The technical architecture includes ingestion from Diraya, structured storage in the BPS, and a secure analytics layer within PAGEM that restricts data access to authorized personnel. The SPE is connected to the BPS in order to promote secondary use of clinical data for research purposes. In particular, predictors developed using BPS data can be run directly over BPS. By doing this, the data repository at the end of the data production chain of the health system becomes the first line in the application of preventive medicine.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpubh-13-1630351-g0003.tif">
<alt-text>Diagram illustrating a system for precision preventive medicine in Andalusia. It features components: SPE and BPS, connected to healthcare facilities across a regional map. Processes include early endpoint predictors and early warning for precision medicine. Dotted lines indicate data flow, linking to the Andalusian Public Health System logo.</alt-text>
</graphic>
</fig>
</sec>
<sec>
<title>5.2 Prevention and the future sustainability of the health system</title>
<p>The long-term sustainability of healthcare systems may benefit from shifting toward a predictive and preventive model. Integrating cost-effective endpoint predictors into clinical workflows has the potential to improve patient outcomes while optimizing the use of healthcare resources.</p>
<p>Several studies have shown that the use of Real-World Data (RWD) and predictive models can reduce the cost and duration of evidence generation compared to traditional randomized controlled trials, particularly in post-marketing surveillance and comparative effectiveness research (<xref ref-type="bibr" rid="B9">9</xref>, <xref ref-type="bibr" rid="B10">10</xref>). Moreover, public-private research collaborations, as managed by PAGEM, generate revenue that can support infrastructure maintenance and open access for public health studies. While further research is needed to quantify these economic effects, such models represent a promising direction for equitable and data-driven innovation.</p>
<p>A core principle of this model is justice, given that patient data belong to them, and the benefits derived from their use must return to patients (<xref ref-type="bibr" rid="B64">64</xref>). By leveraging predictive analytics, data-driven research, and ethical governance, healthcare systems can become self-sustaining knowledge hubs, reinforcing both scientific progress and public wellbeing.</p>
<p>The collaborative model presented here represents a transformative step in healthcare, integrating the PAGEM, a SPE, with the BPS, a large clinical data repository, all within a robust legal framework to drive medical research and innovation. By facilitating an ethical and secure use of RWD, the platform fosters and accelerates the generation of medical evidence.</p>
<p>Although PAGEM is currently the only operational SPE within the Andalusian health system, the regulatory framework (Joint Resolution 1/2021) allows for the creation of additional SPEs if needed for domain-specific or geographically distributed projects. PAGEM does not currently allow remote access by external researchers but supports federated analyses using OMOP-CDM and similar standards. It handles a broad range of data types, including structured EHRs, imaging, lab results, and genomics. Future development plans include expanding analytical toolsets and exploring controlled user access pathways, in alignment with evolving EHDS requirements.</p>
<p>The integration of cost-effective endpoint predictors enhances early intervention, patient outcomes, and healthcare efficiency, particularly in aging populations, shifting healthcare from a reactive to a preventive approach. Additionally, public-private partnerships generate revenue, sustaining infrastructure while ensuring that the benefits of patient data return to society.</p>
<p>By combining data security, regulatory compliance, and large-scale analytics, PAGEM establishes a scalable and replicable model for regional and international healthcare systems. This data-driven approach strengthens healthcare&#x00027;s role in knowledge generation, economic sustainability, and public health improvement, positioning Andalusia as a leading example of ethical, secure, and innovative healthcare digitalization.</p>
</sec>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>DM-M: Writing &#x02013; review &#x00026; editing, Writing &#x02013; original draft, Resources. RV: Writing &#x02013; review &#x00026; editing, Writing &#x02013; original draft, Resources. VO: Writing &#x02013; review &#x00026; editing, Formal analysis, Writing &#x02013; original draft, Validation, Data curation, Methodology, Investigation. AE-M: Writing &#x02013; original draft, Investigation, Formal analysis, Data curation, Validation, Writing &#x02013; review &#x00026; editing, Methodology. PV: Methodology, Writing &#x02013; review &#x00026; editing, Writing &#x02013; original draft, Validation, Formal analysis, Data curation. AS: Project administration, Writing &#x02013; review &#x00026; editing, Writing &#x02013; original draft. MS: Data curation, Methodology, Writing &#x02013; review &#x00026; editing, Writing &#x02013; original draft. IG-A: Methodology, Software, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. MR: Resources, Project administration, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. LA: Writing &#x02013; review &#x00026; editing, Resources, Data curation, Formal analysis, Writing &#x02013; original draft. CL: Supervision, Conceptualization, Writing &#x02013; review &#x00026; editing, Formal analysis, Software, Writing &#x02013; original draft, Methodology, Visualization, Investigation, Funding acquisition, Validation. JD: Writing &#x02013; original draft, Investigation, Conceptualization, Funding acquisition, Supervision, Writing &#x02013; review &#x00026; editing, Resources.</p>
</sec>
<sec sec-type="funding-information" id="s8">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research and/or publication of this article. This work has been co-financed by the Spanish Ministry of Science and Innovation with funds from the European Union NextGenerationEU (PRTR-C17.I1) and the Regional Ministry of University, Research and Innovation of the Autonomous Community of Andalusia within the framework of the Biotechnology Plan applied to Health. It is also supported by the Spanish Ministry of Science and Innovation (PID2020-117979RB-I00, PID2023-152380OB-C21, TED2021-132346B-I00), by the Instituto de Salud Carlos III (ISCIII), co-funded with European Regional Development Funds (IMP/00019, PMP24/00024), and by the Consejeria de Salud y Consumo, Junta de Andalucia (IE19_259 FPS).</p>
</sec>
<ack><p>We are indebted to the curators of the Population Help Database for their constant support to the PAGEM platform.</p>
</ack>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s9">
<title>Generative AI statement</title>
<p>The author(s) declare that no Gen AI was used in the creation of this manuscript.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>F</given-names></name> <name><surname>Panagiotakos</surname> <given-names>D</given-names></name></person-group>. <article-title>Real-world data: a brief review of the methods, applications, challenges and opportunities</article-title>. <source>BMC Med Res Methodol.</source> (<year>2022</year>) <volume>22</volume>:<fpage>287</fpage>. <pub-id pub-id-type="doi">10.1186/s12874-022-01768-6</pub-id><pub-id pub-id-type="pmid">36335315</pub-id></citation></ref>
<ref id="B2">
<label>2.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ramagopalan</surname> <given-names>SV</given-names></name> <name><surname>Simpson</surname> <given-names>A</given-names></name> <name><surname>Sammon</surname> <given-names>C</given-names></name></person-group>. <article-title>Can real-world data really replace randomised clinical trials?</article-title> <source>BMC Med.</source> (<year>2020</year>) <volume>18</volume>:<fpage>1</fpage>&#x02013;<lpage>2</lpage>. <pub-id pub-id-type="doi">10.1186/s12916-019-1481-8</pub-id><pub-id pub-id-type="pmid">31937304</pub-id></citation></ref>
<ref id="B3">
<label>3.</label>
<citation citation-type="web"><person-group person-group-type="author"><collab>US Food and Drug Administration</collab></person-group>. <source>Use of real-world evidence to support regulatory decision-making for medical devices: guidance for industry and Food and Drug Administration staff</source>. (<year>2017</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.fda.gov/downloads/medicaldevices/deviceregulationandguidance/guidancedocuments/ucm513027.pdf">https://www.fda.gov/downloads/medicaldevices/deviceregulationandguidance/guidancedocuments/ucm513027.pdf</ext-link><pub-id pub-id-type="pmid">39060838</pub-id></citation></ref>
<ref id="B4">
<label>4.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Makady</surname> <given-names>A</given-names></name> <name><surname>de Boer</surname> <given-names>A</given-names></name> <name><surname>Hillege</surname> <given-names>H</given-names></name> <name><surname>Klungel</surname> <given-names>O</given-names></name> <name><surname>Goettsch</surname> <given-names>W</given-names></name></person-group>. <article-title>What is real-world data? A review of definitions based on literature and stakeholder interviews</article-title>. <source>Value Health.</source> (<year>2017</year>) <volume>20</volume>:<fpage>858</fpage>&#x02013;<lpage>65</lpage>. <pub-id pub-id-type="doi">10.1016/j.jval.2017.03.008</pub-id><pub-id pub-id-type="pmid">28712614</pub-id></citation></ref>
<ref id="B5">
<label>5.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Corrigan-Curay</surname> <given-names>J</given-names></name> <name><surname>Sacks</surname> <given-names>L</given-names></name> <name><surname>Woodcock</surname> <given-names>J</given-names></name></person-group>. <article-title>Real-world evidence and real-world data for evaluating drug safety and effectiveness</article-title>. <source>JAMA.</source> (<year>2018</year>) <volume>320</volume>:<fpage>867</fpage>&#x02013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1001/jama.2018.10136</pub-id><pub-id pub-id-type="pmid">30105359</pub-id></citation></ref>
<ref id="B6">
<label>6.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sherman</surname> <given-names>RE</given-names></name> <name><surname>Anderson</surname> <given-names>SA</given-names></name> <name><surname>Dal Pan</surname> <given-names>GJ</given-names></name> <name><surname>Gray</surname> <given-names>GW</given-names></name> <name><surname>Gross</surname> <given-names>T</given-names></name> <name><surname>Hunter</surname> <given-names>NL</given-names></name> <etal/></person-group>. <article-title>Real-world evidence&#x02014;what is it and what can it tell us</article-title>. <source>N Engl J Med.</source> (<year>2016</year>) <volume>375</volume>:<fpage>2293</fpage>&#x02013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1056/NEJMsb1609216</pub-id><pub-id pub-id-type="pmid">27959688</pub-id></citation></ref>
<ref id="B7">
<label>7.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Loucera</surname> <given-names>C</given-names></name> <name><surname>Carmona</surname> <given-names>R</given-names></name> <name><surname>Esteban-Medina</surname> <given-names>M</given-names></name> <name><surname>Bostelmann</surname> <given-names>G</given-names></name> <name><surname>Mu&#x000F1;oyerro-Mu&#x000F1;iz</surname> <given-names>D</given-names></name> <name><surname>Villegas</surname> <given-names>R</given-names></name> <etal/></person-group>. <article-title>Real-world evidence with a retrospective cohort of 15,968 COVID-19 hospitalized patients suggests 21 new effective treatments</article-title>. <source>Virol J.</source> (<year>2023</year>) <volume>20</volume>:<fpage>226</fpage>. <pub-id pub-id-type="doi">10.1186/s12985-023-02195-9</pub-id><pub-id pub-id-type="pmid">37803348</pub-id></citation></ref>
<ref id="B8">
<label>8.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Suvarna</surname> <given-names>V</given-names></name></person-group>. <article-title>Phase IV of drug development</article-title>. <source>Perspect Clin Res.</source> (<year>2010</year>) <volume>1</volume>:<fpage>57</fpage>&#x02013;<lpage>60</lpage>. <pub-id pub-id-type="doi">10.4103/2229-3485.71852</pub-id></citation>
</ref>
<ref id="B9">
<label>9.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Placido</surname> <given-names>D</given-names></name> <name><surname>Yuan</surname> <given-names>B</given-names></name> <name><surname>Hjaltelin</surname> <given-names>JX</given-names></name> <name><surname>Zheng</surname> <given-names>C</given-names></name> <name><surname>Haue</surname> <given-names>AD</given-names></name> <name><surname>Chmura</surname> <given-names>PJ</given-names></name> <etal/></person-group>. <article-title>A deep learning algorithm to predict risk of pancreatic cancer from disease trajectories</article-title>. <source>Nat Med.</source> (<year>2023</year>) <volume>29</volume>:<fpage>1113</fpage>&#x02013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1038/s41591-023-02332-5</pub-id><pub-id pub-id-type="pmid">37156936</pub-id></citation></ref>
<ref id="B10">
<label>10.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Feuerriegel</surname> <given-names>S</given-names></name> <name><surname>Frauen</surname> <given-names>D</given-names></name> <name><surname>Melnychuk</surname> <given-names>V</given-names></name> <name><surname>Schweisthal</surname> <given-names>J</given-names></name> <name><surname>Hess</surname> <given-names>K</given-names></name> <name><surname>Curth</surname> <given-names>A</given-names></name> <etal/></person-group>. <article-title>Causal machine learning for predicting treatment outcomes</article-title>. <source>Nat Med.</source> (<year>2024</year>) <volume>30</volume>:<fpage>958</fpage>&#x02013;<lpage>68</lpage>. <pub-id pub-id-type="doi">10.1038/s41591-024-02902-1</pub-id><pub-id pub-id-type="pmid">38641741</pub-id></citation></ref>
<ref id="B11">
<label>11.</label>
<citation citation-type="web"><person-group person-group-type="author"><name><surname>European</surname> <given-names>Commission</given-names></name></person-group>. <source>Regulation of the European Parliament and of the Council on the European Health Data Space</source>. (<year>2025</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://eur-lex.europa.eu/eli/reg/2025/327/oj/eng">https://eur-lex.europa.eu/eli/reg/2025/327/oj/eng</ext-link> (Accessed July 28, 2025).</citation>
</ref>
<ref id="B12">
<label>12.</label>
<citation citation-type="web"><person-group person-group-type="author"><name><surname>European</surname> <given-names>Union</given-names></name></person-group>. <source>Regulation (EU) 2018/1725 of the European Parliament and of the Council of 23 October 2018 on the protection of natural persons with regard to the processing of personal data by the Union institutions, bodies, offices and agencies and on the free movement of such data, and repealing Regulation</source>. (<year>2018</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://eur-lex.europa.eu/eli/reg/2018/1725/oj">https://eur-lex.europa.eu/eli/reg/2018/1725/oj</ext-link> (Accessed July 28, 2025).</citation>
</ref>
<ref id="B13">
<label>13.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mu&#x000F1;oyerro-Mu&#x000F1;iz</surname> <given-names>D</given-names></name> <name><surname>Goicoechea-Salazar</surname> <given-names>J</given-names></name> <name><surname>Garc&#x000ED;a-Le&#x000F3;n</surname> <given-names>F</given-names></name> <name><surname>Laguna-Tellez</surname> <given-names>A</given-names></name> <name><surname>Larrocha-Mata</surname> <given-names>D</given-names></name> <name><surname>Cardero-Rivas</surname> <given-names>M</given-names></name></person-group>. <article-title>Health record linkage: andalusian health population database</article-title>. <source>Gaceta Sanitaria.</source> (<year>2019</year>) <volume>34</volume>:<fpage>105</fpage>&#x02013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1016/j.gaceta.2019.03.003</pub-id><pub-id pub-id-type="pmid">31133300</pub-id></citation></ref>
<ref id="B14">
<label>14.</label>
<citation citation-type="web"><person-group person-group-type="author"><collab>Plataform of Computational Medicine Platform for the secure generation of biomedical knowledge from clinical big data of the population health database</collab></person-group> (<year>2020</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.clinbioinfosspa.es/projects/iRWD/indexEsp.html">https://www.clinbioinfosspa.es/projects/iRWD/indexEsp.html</ext-link> (Accessed July 28, 2025).</citation>
</ref>
<ref id="B15">
<label>15.</label>
<citation citation-type="web"><person-group person-group-type="author"><collab>Python</collab></person-group> (<year>2024</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.python.org/">https://www.python.org/</ext-link> (Accessed July 28, 2025).</citation>
</ref>
<ref id="B16">
<label>16.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>McKinney</surname> <given-names>W</given-names></name></person-group>. <article-title>Data structures for statistical computing in python</article-title>. <source>Proc 9th Python Sci Conf.</source> (<year>2010</year>) <volume>445</volume>:<fpage>51</fpage>&#x02013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.25080/Majora-92bf1922-00a</pub-id></citation>
</ref>
<ref id="B17">
<label>17.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Harris</surname> <given-names>CR</given-names></name> <name><surname>Millman</surname> <given-names>KJ</given-names></name> <name><surname>van der Walt</surname> <given-names>SJ</given-names></name> <name><surname>Gommers</surname> <given-names>R</given-names></name> <name><surname>Virtanen</surname> <given-names>P</given-names></name> <name><surname>Cournapeau</surname> <given-names>D</given-names></name> <etal/></person-group>. <article-title>Array programming with NumPy</article-title>. <source>Nature.</source> (<year>2020</year>) <volume>585</volume>:<fpage>357</fpage>&#x02013;<lpage>62</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-020-2649-2</pub-id><pub-id pub-id-type="pmid">32939066</pub-id></citation></ref>
<ref id="B18">
<label>18.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Virtanen</surname> <given-names>P</given-names></name> <name><surname>Gommers</surname> <given-names>R</given-names></name> <name><surname>Oliphant</surname> <given-names>TE</given-names></name> <name><surname>Haberland</surname> <given-names>M</given-names></name> <name><surname>Reddy</surname> <given-names>T</given-names></name> <name><surname>Cournapeau</surname> <given-names>D</given-names></name> <etal/></person-group>. <article-title>SciPy 1</article-title>.0: fundamental algorithms for scientific computing in Python. <source>Nat Methods.</source> (<year>2020</year>) <volume>17</volume>:<fpage>261</fpage>&#x02013;<lpage>72</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-019-0686-2</pub-id><pub-id pub-id-type="pmid">32015543</pub-id></citation></ref>
<ref id="B19">
<label>19.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Seabold</surname> <given-names>S</given-names></name> <name><surname>Perktold</surname> <given-names>J</given-names></name></person-group>. <article-title>Statsmodels: econometric and statistical modeling with python</article-title>. <source>Proc Python Sci Conf.</source> (<year>2010</year>) <volume>57</volume>:<fpage>10</fpage>&#x02013;<lpage>25080</lpage>. <pub-id pub-id-type="doi">10.25080/Majora-92bf1922-011</pub-id></citation>
</ref>
<ref id="B20">
<label>20.</label>
<citation citation-type="web"><person-group person-group-type="author"><name><surname>R</surname> <given-names>Core Team</given-names></name></person-group>. <source>R: A Language and Environment for Statistical Computing Vienna, Austria</source>. (<year>2021</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="http://www.R-project.org">http://www.R-project.org</ext-link> (Accessed July 28, 2025).</citation>
</ref>
<ref id="B21">
<label>21.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gentleman</surname> <given-names>RC</given-names></name> <name><surname>Carey</surname> <given-names>VJ</given-names></name> <name><surname>Bates</surname> <given-names>DM</given-names></name> <name><surname>Bolstad</surname> <given-names>B</given-names></name> <name><surname>Dettling</surname> <given-names>M</given-names></name> <name><surname>Dudoit</surname> <given-names>S</given-names></name> <etal/></person-group>. <article-title>Bioconductor: open software development for computational biology and bioinformatics</article-title>. <source>Genome Biol.</source> (<year>2004</year>) <volume>5</volume>:<fpage>1</fpage>&#x02013;<lpage>16</lpage>. <pub-id pub-id-type="doi">10.1186/gb-2004-5-10-r80</pub-id><pub-id pub-id-type="pmid">15461798</pub-id></citation></ref>
<ref id="B22">
<label>22.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pedregosa</surname> <given-names>F</given-names></name> <name><surname>Varoquaux</surname> <given-names>G</given-names></name> <name><surname>Gramfort</surname> <given-names>A</given-names></name> <name><surname>Michel</surname> <given-names>V</given-names></name> <name><surname>Thirion</surname> <given-names>B</given-names></name> <name><surname>Grisel</surname> <given-names>O</given-names></name> <etal/></person-group>. <article-title>Scikit-learn: machine learning in python</article-title>. <source>J Mach Learn Res</source>. (<year>2011</year>) <volume>12</volume>:<fpage>2825</fpage>&#x02013;<lpage>30</lpage>.</citation>
</ref>
<ref id="B23">
<label>23.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Abadi</surname> <given-names>M</given-names></name> <name><surname>Barham</surname> <given-names>P</given-names></name> <name><surname>Chen</surname> <given-names>J</given-names></name> <name><surname>Chen</surname> <given-names>Z</given-names></name> <name><surname>Davis</surname> <given-names>A</given-names></name> <name><surname>Dean</surname> <given-names>J</given-names></name> <etal/></person-group>., editors. <source>TensorFlow: A System for Large-Scale Machine Learning</source>. <publisher-loc>Savannah, GA</publisher-loc>: <publisher-name>OSDI</publisher-name> (<year>2016</year>).</citation>
</ref>
<ref id="B24">
<label>24.</label>
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Conda-Forge</surname> <given-names>Community</given-names></name></person-group>. <source>The Conda-Forge Project: Community-Based Software Distribution Built on the Conda Package Format and Ecosystem</source>. <publisher-loc>Zenodo</publisher-loc> (<year>2015</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://zenodo.org/records/4774217">https://zenodo.org/records/4774217</ext-link></citation>
</ref>
<ref id="B25">
<label>25.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gr&#x000FC;ning</surname> <given-names>B</given-names></name> <name><surname>Dale</surname> <given-names>R</given-names></name> <name><surname>Sj&#x000F6;din</surname> <given-names>A</given-names></name> <name><surname>Chapman</surname> <given-names>BA</given-names></name> <name><surname>Rowe</surname> <given-names>J</given-names></name> <name><surname>Tomkins-Tinch</surname> <given-names>CH</given-names></name> <etal/></person-group>. <article-title>Bioconda: sustainable and comprehensive software distribution for the life sciences</article-title>. <source>Nat Methods.</source> (<year>2018</year>) <volume>15</volume>:<fpage>475</fpage>&#x02013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-018-0046-7</pub-id><pub-id pub-id-type="pmid">29967506</pub-id></citation></ref>
<ref id="B26">
<label>26.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Boettiger</surname> <given-names>C</given-names></name></person-group>. <article-title>An introduction to docker for reproducible research</article-title>. <source>ACM SIGOPS Operating Systems Review.</source> (<year>2015</year>) <volume>49</volume>:<fpage>71</fpage>&#x02013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1145/2723872.2723882</pub-id></citation>
</ref>
<ref id="B27">
<label>27.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shen</surname> <given-names>H</given-names></name></person-group>. <article-title>Interactive notebooks: sharing the code</article-title>. <source>Nature</source>. (<year>2014</year>) <volume>515</volume>:<fpage>152</fpage>. <pub-id pub-id-type="doi">10.1038/515151ax</pub-id></citation>
</ref>
<ref id="B28">
<label>28.</label>
<citation citation-type="web"><person-group person-group-type="author"><name><surname>European</surname> <given-names>Commission</given-names></name></person-group>. <source>Active and Healthy Living in the Digital World</source>. (<year>2020</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://futurium.ec.europa.eu/en/active-and-healthy-living-digital-world">https://futurium.ec.europa.eu/en/active-and-healthy-living-digital-world</ext-link> (Accessed July 28, 2025).</citation>
</ref>
<ref id="B29">
<label>29.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Salazar</surname> <given-names>JAG</given-names></name> <name><surname>Garc&#x000ED;a</surname> <given-names>MAN</given-names></name> <name><surname>T&#x000E9;llez</surname> <given-names>AL</given-names></name> <name><surname>Casasola</surname> <given-names>VDC</given-names></name> <name><surname>Herrera</surname> <given-names>JR</given-names></name> <name><surname>Cabezas</surname> <given-names>FM</given-names></name></person-group>. <article-title>Desarrollo de un sistema de codificaci&#x000F3;n autom&#x000E1;tica para recuperar y analizar textos diagn&#x000F3;sticos de los registros de servicios de urgencias hospitalarios</article-title>. <source>Emergencias.</source> (<year>2013</year>) <volume>25</volume>:<fpage>430</fpage>&#x02013;<lpage>6</lpage>.</citation>
</ref>
<ref id="B30">
<label>30.</label>
<citation citation-type="web"><person-group person-group-type="author"><collab>World Health Organization</collab></person-group>. <source>Anatomical Therapeutic Chemical (ATC) Classification</source>. (<year>2025</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.who.int/tools/atc-ddd-toolkit/atc-classification">https://www.who.int/tools/atc-ddd-toolkit/atc-classification</ext-link> (Accessed July 28, 2025).</citation>
</ref>
<ref id="B31">
<label>31.</label>
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Junta</surname> <given-names>de Andaluc&#x000ED;a</given-names></name></person-group>. <source>Joint Resolution 1/2021 of the General Secretariat for Health Research, Development and Innovation of the Regional Ministry of Health and Families and the Management Directorate of the Andalusian Health Service</source>. (<year>2021</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.sspa.juntadeandalucia.es/servicioandaluzdesalud/sites/default/files/sincfiles/wsas-media-sas_normativa_mediafile/2021/resolucion_conjunta_acceso_a_datos_investigacion_def_20211201%28F%29.pdf">https://www.sspa.juntadeandalucia.es/servicioandaluzdesalud/sites/default/files/sincfiles/wsas-media-sas_normativa_mediafile/2021/resolucion_conjunta_acceso_a_datos_investigacion_def_20211201%28F%29.pdf</ext-link> (Accessed July 28, 2025).</citation>
</ref>
<ref id="B32">
<label>32.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Garc&#x000ED;a-Le&#x000F3;n</surname> <given-names>F</given-names></name> <name><surname>Villegas-Portero</surname> <given-names>R</given-names></name> <name><surname>Goicoechea-Salazar</surname> <given-names>J</given-names></name> <name><surname>Mu&#x000F1;oyerro-Mu&#x000F1;iz</surname> <given-names>D</given-names></name> <name><surname>Dopazo</surname> <given-names>J</given-names></name></person-group>. <article-title>Impact assessment on data protection in research projects</article-title>. <source>Gaceta sanitaria.</source> (<year>2020</year>) <volume>34</volume>:<fpage>521</fpage>&#x02013;<lpage>3</lpage>. <pub-id pub-id-type="doi">10.1016/j.gaceta.2019.10.006</pub-id><pub-id pub-id-type="pmid">31980148</pub-id></citation></ref>
<ref id="B33">
<label>33.</label>
<citation citation-type="web"><person-group person-group-type="author"><collab>Coordinating Committee on Biomedical Research Ethics of Andaluc&#x000ED;a (CCEIBA)</collab></person-group> (<year>2021</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="http://si.easp.es/eticaysalud/content/comite-coordinador-etica-investigacion-biomedica-andalucia">http://si.easp.es/eticaysalud/content/comite-coordinador-etica-investigacion-biomedica-andalucia</ext-link> (Accessed July 28, 2025).</citation>
</ref>
<ref id="B34">
<label>34.</label>
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Horizon</surname> <given-names>2020 Programme</given-names></name></person-group>. <source>Guidance. How to complete your ethics self-assessment</source>. (<year>2019</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://ec.europa.eu/research/participants/data/ref/h2020/grants_manual/hi/ethics/h2020_hi_ethics-self-assess_en.pdf">https://ec.europa.eu/research/participants/data/ref/h2020/grants_manual/hi/ethics/h2020_hi_ethics-self-assess_en.pdf</ext-link><pub-id pub-id-type="pmid">40597200</pub-id></citation></ref>
<ref id="B35">
<label>35.</label>
<citation citation-type="web"><person-group person-group-type="author"><collab>BOE</collab></person-group>. <article-title>Ley Org&#x000E1;nica 3/2018, de 5 de diciembre, de Protecci&#x000F3;n de Datos Personales y garant&#x000ED;a de los derechos digitales</article-title>. &#x0226A;BOE&#x0226B; n&#x000FA;m. 294, de 6 de diciembre de 2018, p&#x000E1;ginas 119788 a 1198572018. Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.boe.es/eli/es/lo/2018/12/05/3">https://www.boe.es/eli/es/lo/2018/12/05/3</ext-link> (Accessed July 28, 2025).</citation>
</ref>
<ref id="B36">
<label>36.</label>
<citation citation-type="web"><person-group person-group-type="author"><collab>EMA</collab></person-group>. <source>Data Analysis and Real World Interrogation Network (DARWIN EU)</source>. (<year>2021</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.darwin-eu.org/">https://www.darwin-eu.org/</ext-link></citation>
</ref>
<ref id="B37">
<label>37.</label>
<citation citation-type="web"><person-group person-group-type="author"><collab>European Health Data &#x00026; Evidence Network (EHDEN)</collab></person-group> (<year>2019</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.ehden.eu/">https://www.ehden.eu/</ext-link> (Accessed July 28, 2025).</citation>
</ref>
<ref id="B38">
<label>38.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Emmert-Streib</surname> <given-names>F</given-names></name> <name><surname>Dehmer</surname> <given-names>M</given-names></name></person-group>. <article-title>Introduction to survival analysis in practice</article-title>. <source>Mach Learn Knowl Extr.</source> (<year>2019</year>) <volume>1</volume>:<fpage>1013</fpage>&#x02013;<lpage>38</lpage>. <pub-id pub-id-type="doi">10.3390/make1030058</pub-id></citation>
</ref>
<ref id="B39">
<label>39.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bauskis</surname> <given-names>A</given-names></name> <name><surname>Strange</surname> <given-names>C</given-names></name> <name><surname>Molster</surname> <given-names>C</given-names></name> <name><surname>Fisher</surname> <given-names>C</given-names></name></person-group>. <article-title>The diagnostic odyssey: insights from parents of children living with an undiagnosed condition</article-title>. <source>Orphanet J Rare Dis.</source> (<year>2022</year>) <volume>17</volume>:<fpage>233</fpage>. <pub-id pub-id-type="doi">10.1186/s13023-022-02358-x</pub-id><pub-id pub-id-type="pmid">35717227</pub-id></citation></ref>
<ref id="B40">
<label>40.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Thomas</surname> <given-names>DL</given-names></name></person-group>. <article-title>Global control of hepatitis C: where challenge meets opportunity</article-title>. <source>Nat Med.</source> (<year>2013</year>) <volume>19</volume>:<fpage>850</fpage>&#x02013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1038/nm.3184</pub-id><pub-id pub-id-type="pmid">23836235</pub-id></citation></ref>
<ref id="B41">
<label>41.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chiodo</surname> <given-names>GT</given-names></name> <name><surname>Tolle</surname> <given-names>SW</given-names></name> <name><surname>Bevan</surname> <given-names>L</given-names></name></person-group>. <article-title>Placebo-controlled trials: good science or medical neglect?</article-title> <source>West J Med.</source> (<year>2000</year>) <volume>172</volume>:<fpage>271</fpage>. <pub-id pub-id-type="doi">10.1136/ewjm.172.4.271</pub-id><pub-id pub-id-type="pmid">10778385</pub-id></citation></ref>
<ref id="B42">
<label>42.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Thorlund</surname> <given-names>K</given-names></name> <name><surname>Dron</surname> <given-names>L</given-names></name> <name><surname>Park</surname> <given-names>JJ</given-names></name> <name><surname>Mills</surname> <given-names>EJ</given-names></name></person-group>. <article-title>Synthetic and external controls in clinical trials&#x02013;a primer for researchers</article-title>. <source>Clin Epidemiol</source>. (<year>2020</year>) <volume>12</volume>:<fpage>457</fpage>&#x02013;<lpage>67</lpage>. <pub-id pub-id-type="doi">10.2147/CLEP.S242097</pub-id><pub-id pub-id-type="pmid">32440224</pub-id></citation></ref>
<ref id="B43">
<label>43.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>RJ</given-names></name> <name><surname>Lu</surname> <given-names>MY</given-names></name> <name><surname>Chen</surname> <given-names>TY</given-names></name> <name><surname>Williamson</surname> <given-names>DF</given-names></name> <name><surname>Mahmood</surname> <given-names>F</given-names></name></person-group>. <article-title>Synthetic data in machine learning for medicine and healthcare</article-title>. <source>Nat Biomed Eng.</source> (<year>2021</year>) <volume>5</volume>:<fpage>493</fpage>&#x02013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1038/s41551-021-00751-8</pub-id><pub-id pub-id-type="pmid">34131324</pub-id></citation></ref>
<ref id="B44">
<label>44.</label>
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Choi</surname> <given-names>E</given-names></name> <name><surname>Biswal</surname> <given-names>S</given-names></name> <name><surname>Malin</surname> <given-names>B</given-names></name> <name><surname>Duke</surname> <given-names>J</given-names></name> <name><surname>Stewart</surname> <given-names>WF</given-names></name> <name><surname>Sun</surname> <given-names>J</given-names></name> <etal/></person-group>. <article-title>Generating multi-label discrete patient records using generative adversarial networks</article-title>. In: Machine learning for healthcare conference. PMLR (<year>2017</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://proceedings.mlr.press/faq.html">https://proceedings.mlr.press/faq.html</ext-link></citation>
</ref>
<ref id="B45">
<label>45.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Beaulieu-Jones</surname> <given-names>BK</given-names></name> <name><surname>Wu</surname> <given-names>ZS</given-names></name> <name><surname>Williams</surname> <given-names>C</given-names></name> <name><surname>Lee</surname> <given-names>R</given-names></name> <name><surname>Bhavnani</surname> <given-names>SP</given-names></name> <name><surname>Byrd</surname> <given-names>JB</given-names></name> <etal/></person-group>. <article-title>Privacy-preserving generative deep neural networks support clinical data sharing</article-title>. <source>Circ Cardiovasc Qual Outcomes.</source> (<year>2019</year>) <volume>12</volume>:<fpage>e005122</fpage>. <pub-id pub-id-type="doi">10.1161/CIRCOUTCOMES.118.005122</pub-id><pub-id pub-id-type="pmid">31284738</pub-id></citation></ref>
<ref id="B46">
<label>46.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Baowaly</surname> <given-names>MK</given-names></name> <name><surname>Lin</surname> <given-names>C-C</given-names></name> <name><surname>Liu</surname> <given-names>C-L</given-names></name> <name><surname>Chen</surname> <given-names>K-T</given-names></name></person-group>. <article-title>Synthesizing electronic health records using improved generative adversarial networks</article-title>. <source>J Am Med Inform Assoc.</source> (<year>2019</year>) <volume>26</volume>:<fpage>228</fpage>&#x02013;<lpage>41</lpage>. <pub-id pub-id-type="doi">10.1093/jamia/ocy142</pub-id><pub-id pub-id-type="pmid">30535151</pub-id></citation></ref>
<ref id="B47">
<label>47.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shumailov</surname> <given-names>I</given-names></name> <name><surname>Shumaylov</surname> <given-names>Z</given-names></name> <name><surname>Zhao</surname> <given-names>Y</given-names></name> <name><surname>Papernot</surname> <given-names>N</given-names></name> <name><surname>Anderson</surname> <given-names>R</given-names></name> <name><surname>Gal</surname> <given-names>Y</given-names></name> <etal/></person-group>. <article-title>AI models collapse when trained on recursively generated data</article-title>. <source>Nature.</source> (<year>2024</year>) <volume>631</volume>:<fpage>755</fpage>&#x02013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-024-07566-y</pub-id><pub-id pub-id-type="pmid">39048682</pub-id></citation></ref>
<ref id="B48">
<label>48.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Loucera</surname> <given-names>C</given-names></name> <name><surname>Carmona</surname> <given-names>R</given-names></name> <name><surname>Bostelmann</surname> <given-names>G</given-names></name> <name><surname>Mu&#x000F1;oyerro-Mu&#x000F1;iz</surname> <given-names>D</given-names></name> <name><surname>Villegas</surname> <given-names>R</given-names></name> <name><surname>Gonzalez-Manzanares</surname> <given-names>R</given-names></name> <etal/></person-group>. <article-title>Evidence of the association between increased use of direct oral anticoagulants and a reduction in the rate of atrial fibrillation-related stroke and major bleeding at the population level (2012-2019)</article-title>. <source>Med Clin.</source> (<year>2024</year>) <volume>162</volume>:<fpage>220</fpage>&#x02013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1016/j.medcli.2023.10.008</pub-id><pub-id pub-id-type="pmid">37989706</pub-id></citation></ref>
<ref id="B49">
<label>49.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Casimiro-Soriguer</surname> <given-names>CS</given-names></name> <name><surname>P&#x000E9;rez-Florido</surname> <given-names>J</given-names></name> <name><surname>Robles</surname> <given-names>EA</given-names></name> <name><surname>Lara</surname> <given-names>M</given-names></name> <name><surname>Aguado</surname> <given-names>A</given-names></name> <name><surname>Rodr&#x000ED;guez Iglesias</surname> <given-names>MA</given-names></name> <etal/></person-group>. <article-title>The integrated genomic surveillance system of Andalusia (SIEGA) provides a one health regional resource connected with the clinic</article-title>. <source>Sci Rep.</source> (<year>2024</year>) <volume>14</volume>:<fpage>19200</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-024-70107-0</pub-id><pub-id pub-id-type="pmid">39160186</pub-id></citation></ref>
<ref id="B50">
<label>50.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dopazo</surname> <given-names>J</given-names></name> <name><surname>Maya-Miles</surname> <given-names>D</given-names></name> <name><surname>Garc&#x000ED;a</surname> <given-names>F</given-names></name> <name><surname>Lorusso</surname> <given-names>N</given-names></name> <name><surname>Calleja</surname> <given-names>M</given-names></name> <name><surname>Pareja</surname> <given-names>MJ</given-names></name> <etal/></person-group>. <article-title>Implementing personalized medicine in COVID-19 in Andalusia: an opportunity to transform the healthcare system</article-title>. <source>J Pers Med</source>. (<year>2021</year>) <volume>11</volume>:<fpage>475</fpage>. <pub-id pub-id-type="doi">10.3390/jpm11060475</pub-id><pub-id pub-id-type="pmid">34073493</pub-id></citation></ref>
<ref id="B51">
<label>51.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Loucera</surname> <given-names>C</given-names></name> <name><surname>Perez-Florido</surname> <given-names>J</given-names></name> <name><surname>Casimiro-Soriguer</surname> <given-names>CS</given-names></name> <name><surname>Ortu&#x000F1;o</surname> <given-names>FM</given-names></name> <name><surname>Carmona</surname> <given-names>R</given-names></name> <name><surname>Bostelmann</surname> <given-names>G</given-names></name> <etal/></person-group>. <article-title>Assessing the impact of SARS-CoV-2 lineages and mutations on patient survival</article-title>. <source>Viruses</source>. (<year>2022</year>) <volume>14</volume>:<fpage>1893</fpage>. <pub-id pub-id-type="doi">10.3390/v14091893</pub-id><pub-id pub-id-type="pmid">36146700</pub-id></citation></ref>
<ref id="B52">
<label>52.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Loucera</surname> <given-names>C</given-names></name> <name><surname>Pe&#x000F1;a-Chilet</surname> <given-names>M</given-names></name> <name><surname>Esteban-Medina</surname> <given-names>M</given-names></name> <name><surname>Mu&#x000F1;oyerro-Mu&#x000F1;iz</surname> <given-names>D</given-names></name> <name><surname>Villegas</surname> <given-names>R</given-names></name> <name><surname>Lopez-Miranda</surname> <given-names>J</given-names></name> <etal/></person-group>. <article-title>Real world evidence of calcifediol or vitamin D prescription and mortality rate of COVID-19 in a retrospective cohort of hospitalized Andalusian patients</article-title>. <source>Sci Rep.</source> (<year>2021</year>) <volume>11</volume>:<fpage>23380</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-021-02701-5</pub-id><pub-id pub-id-type="pmid">34862422</pub-id></citation></ref>
<ref id="B53">
<label>53.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Loucera-Mu&#x000F1;ecas</surname> <given-names>C</given-names></name> <name><surname>Canal-Rivero</surname> <given-names>M</given-names></name> <name><surname>Ruiz-Veguilla</surname> <given-names>M</given-names></name> <name><surname>Carmona</surname> <given-names>R</given-names></name> <name><surname>Bostelmann</surname> <given-names>G</given-names></name> <name><surname>Garrido-Torres</surname> <given-names>N</given-names></name> <etal/></person-group>. <article-title>Aripiprazole as protector against COVID-19 mortality</article-title>. <source>Sci Rep.</source> (<year>2024</year>) <volume>14</volume>:<fpage>12362</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-024-60297-y</pub-id><pub-id pub-id-type="pmid">38811612</pub-id></citation></ref>
<ref id="B54">
<label>54.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Carmona-Pirez</surname> <given-names>J</given-names></name> <name><surname>Ioakeim-Skoufa</surname> <given-names>I</given-names></name> <name><surname>Gimeno-Miguel</surname> <given-names>A</given-names></name> <name><surname>Poblador-Plou</surname> <given-names>B</given-names></name> <name><surname>Gonz&#x000E1;lez-Rubio</surname> <given-names>F</given-names></name> <name><surname>Mu&#x000F1;oyerro-Mu&#x000F1;iz</surname> <given-names>D</given-names></name> <etal/></person-group>. <article-title>Multimorbidity profiles and infection severity in COVID-19 population using network analysis in the andalusian health population database</article-title>. <source>Int J Environ Res Public Health.</source> (<year>2022</year>) <volume>19</volume>:<fpage>3808</fpage>. <pub-id pub-id-type="doi">10.3390/ijerph19073808</pub-id><pub-id pub-id-type="pmid">35409489</pub-id></citation></ref>
<ref id="B55">
<label>55.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>de la Oliva</surname> <given-names>V</given-names></name> <name><surname>Esteban-Medina</surname> <given-names>A</given-names></name> <name><surname>Alejos</surname> <given-names>L</given-names></name> <name><surname>Munoyerro-Muniz</surname> <given-names>D</given-names></name> <name><surname>Villegas</surname> <given-names>R</given-names></name> <name><surname>Dopazo</surname> <given-names>J</given-names></name> <etal/></person-group>. <article-title>Early prediction of ovarian cancer risk based on real world data</article-title>. <source>medRxiv.</source> (<year>2024</year>). <pub-id pub-id-type="doi">10.1101/2024.07.26.24310994</pub-id></citation>
</ref>
<ref id="B56">
<label>56.</label>
<citation citation-type="journal"><person-group person-group-type="author"><collab>de la Oliva-Roque VM Kreil DP Dopazo J Ortuno F and Loucera C</collab></person-group>. <article-title>High-fidelity synthetic data replicates clinical prediction performance in a million-patient diabetes cohort</article-title>. <source>medRxiv</source>. (<year>2025</year>) 2025-07. <pub-id pub-id-type="doi">10.1101/2025.07.20.25331852</pub-id></citation>
</ref>
<ref id="B57">
<label>57.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Bathelt</surname> <given-names>F</given-names></name></person-group>. <article-title>The usage of OHDSI OMOP&#x02013;a scoping review</article-title>. In: <source>Proceedings of the German Medical Data Sciences (GMDS)</source>. <publisher-loc>Kiel</publisher-loc> (<year>2021</year>).<pub-id pub-id-type="pmid">34545824</pub-id></citation></ref>
<ref id="B58">
<label>58.</label>
<citation citation-type="web"><person-group person-group-type="author"><collab>Department of Health &#x00026; Social Care</collab></person-group>. <source>Data saves lives: reshaping health and social care with data</source>. (<year>2022</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.gov.uk/government/publications/data-saves-lives-reshaping-health-and-social-care-with-data/data-saves-lives-reshaping-health-and-social-care-with-data">https://www.gov.uk/government/ publications/data-saves-lives-reshaping-health-and-social-care-with-data/data-saves- lives-reshaping-health-and-social-care-with-data</ext-link> (Accessed July 28, 2025).</citation>
</ref>
<ref id="B59">
<label>59.</label>
<citation citation-type="web"><person-group person-group-type="author"><collab>US Department of Health and Human Services</collab></person-group>. <source>The Health Insurance Portability and Accountability Privacy Rule</source>. (<year>2006</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.hhs.gov/hipaa/for-professionals/index.html">https://www.hhs.gov/hipaa/for-professionals/index.html</ext-link> (Accessed July 28, 2025).</citation>
</ref>
<ref id="B60">
<label>60.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ginsburg</surname> <given-names>GS</given-names></name> <name><surname>Phillips</surname> <given-names>KA</given-names></name></person-group>. <article-title>Precision medicine: from science to value</article-title>. <source>Health Aff.</source> (<year>2018</year>) <volume>37</volume>:<fpage>694</fpage>&#x02013;<lpage>701</lpage>. <pub-id pub-id-type="doi">10.1377/hlthaff.2017.1624</pub-id><pub-id pub-id-type="pmid">29733705</pub-id></citation></ref>
<ref id="B61">
<label>61.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ambale-Venkatesh</surname> <given-names>B</given-names></name> <name><surname>Yang</surname> <given-names>X</given-names></name> <name><surname>Wu</surname> <given-names>CO</given-names></name> <name><surname>Liu</surname> <given-names>K</given-names></name> <name><surname>Hundley</surname> <given-names>WG</given-names></name> <name><surname>McClelland</surname> <given-names>R</given-names></name> <etal/></person-group>. <article-title>Cardiovascular event prediction by machine learning: the multi-ethnic study of atherosclerosis</article-title>. <source>Circ Res.</source> (<year>2017</year>) <volume>121</volume>:<fpage>1092</fpage>&#x02013;<lpage>101</lpage>. <pub-id pub-id-type="doi">10.1161/CIRCRESAHA.117.311312</pub-id><pub-id pub-id-type="pmid">28794054</pub-id></citation></ref>
<ref id="B62">
<label>62.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>X</given-names></name> <name><surname>Zhang</surname> <given-names>Y</given-names></name> <name><surname>Hao</surname> <given-names>S</given-names></name> <name><surname>Zheng</surname> <given-names>L</given-names></name> <name><surname>Liao</surname> <given-names>J</given-names></name> <name><surname>Ye</surname> <given-names>C</given-names></name> <etal/></person-group>. <article-title>Prediction of the 1-year risk of incident lung cancer: prospective study using electronic health records from the state of Maine</article-title>. <source>J Med Internet Res.</source> (<year>2019</year>) <volume>21</volume>:<fpage>e13260</fpage>. <pub-id pub-id-type="doi">10.2196/13260</pub-id><pub-id pub-id-type="pmid">31099339</pub-id></citation></ref>
<ref id="B63">
<label>63.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ye</surname> <given-names>B</given-names></name> <name><surname>Gagnon</surname> <given-names>A</given-names></name> <name><surname>Mok</surname> <given-names>SC</given-names></name></person-group>. <article-title>Recent technical strategies to identify diagnostic biomarkers for ovarian cancer</article-title>. <source>Expert Rev Proteomics.</source> (<year>2007</year>) <volume>4</volume>:<fpage>121</fpage>&#x02013;<lpage>31</lpage>. <pub-id pub-id-type="doi">10.1586/14789450.4.1.121</pub-id><pub-id pub-id-type="pmid">17288520</pub-id></citation></ref>
<ref id="B64">
<label>64.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Beauchamp</surname> <given-names>TL</given-names></name> <name><surname>Childress</surname> <given-names>JF</given-names></name></person-group>. <source>Principles of Biomedical Ethics.</source> <publisher-loc>Tukwila, WA</publisher-loc>: <publisher-name>Edicoes Loyola</publisher-name> (<year>1994</year>).</citation>
</ref>
</ref-list>
</back>
</article>