<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Chem.</journal-id>
<journal-title>Frontiers in Chemistry</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Chem.</abbrev-journal-title>
<issn pub-type="epub">2296-2646</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1480887</article-id>
<article-id pub-id-type="doi">10.3389/fchem.2024.1480887</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Chemistry</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Predicting the microalgae lipid profile obtained by supercritical fluid extraction using a machine learning model</article-title>
<alt-title alt-title-type="left-running-head">Rangel Pinto et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fchem.2024.1480887">10.3389/fchem.2024.1480887</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Rangel Pinto</surname>
<given-names>Juan David</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2718663/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Guerrero</surname>
<given-names>Jose L.</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2438362/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Rivera</surname>
<given-names>Lorena</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2875042/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Parada-Pinilla</surname>
<given-names>Mar&#xed;a Paula</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2841003/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Cala</surname>
<given-names>M&#xf3;nica P.</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1227339/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>L&#xf3;pez</surname>
<given-names>Gina</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2874935/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Gonz&#xe1;lez Barrios</surname>
<given-names>Andr&#xe9;s Fernando</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/300513/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Grupo de Dise&#xf1;o de Productos Y Procesos (GDPP)</institution>, <institution>Department of Chemical and Food Engineering</institution>, <institution>Universidad de los Andes</institution>, <addr-line>Bogot&#xe1;</addr-line>, <country>Colombia</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Metabolomics Core Facility&#x2014;MetCore</institution>, <institution>Vice-Presidency for Research</institution>, <institution>Universidad de los Andes</institution>, <addr-line>Bogot&#xe1;</addr-line>, <country>Colombia</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Unidad de Saneamiento y Biotecnolog&#xed;a Ambiental (USBA)</institution>, <institution>Departamento de Biolog&#xed;a</institution>, <institution>Facultad de Ciencias</institution>, <institution>Pontificia Universidad Javeriana (PUJ)</institution>, <addr-line>Bogot&#xe1;</addr-line>, <country>Colombia</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2548084/overview">Wojciech Smulek</ext-link>, Pozna&#x144; University of Technology, Poland</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1974199/overview">Filipe Hobi Bordon Sosa</ext-link>, University of Aveiro, Portugal</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2234644/overview">Abel Z&#xfa;&#xf1;iga-Moreno</ext-link>, National Polytechnic Institute (IPN), Mexico</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Juan David Rangel Pinto, <email>jd.rangel10@uniandes.edu.co</email>; Andr&#xe9;s Fernando Gonz&#xe1;lez Barrios, <email>andgonza@uniandes.edu.co</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>25</day>
<month>10</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>12</volume>
<elocation-id>1480887</elocation-id>
<history>
<date date-type="received">
<day>14</day>
<month>08</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>15</day>
<month>10</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Rangel Pinto, Guerrero, Rivera, Parada-Pinilla, Cala, L&#xf3;pez and Gonz&#xe1;lez Barrios.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Rangel Pinto, Guerrero, Rivera, Parada-Pinilla, Cala, L&#xf3;pez and Gonz&#xe1;lez Barrios</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>In this study a Machine Learning model was employed to predict the lipid profile from supercritical fluid extraction (SFE) of microalgae <italic>Galdieria</italic> sp. USBA-GBX-832 under different temperature (40, 50, 60&#xb0;C), pressure (150, 250&#xa0;bar), and ethanol flow (0.6, 0.9&#xa0;mL&#xa0;min<sup>-1</sup>) conditions. Six machine learning regression models were trained using 33 independent variables: 29 from RD-Kit molecular descriptors, three from the extraction conditions, and the infinite dilution activity coefficient (IDAC). The lipidomic characterization analysis identified 139 features, annotating 89 lipids used as the entries of the model, primarily glycerophospholipids and glycerolipids. It was proposed a methodology for selecting the representative lipids from the lipidomic analysis using an unsupervised learning method, these results were compared with Tanimoto scores and IDAC calculations using COSMO-SAC-HB2 model. The models based on decision trees, particularly XGBoost, outperformed others (RMSE: 0.035, 0.095, 0.065 and coefficient of determination (R<sup>2</sup>): 0.971, 0.933, 0.946 for train, test and experimental validation, respectively), accurately predicting lipid profiles for unseen conditions. Machine Learning methods provide a cost-effective way to optimize SFE conditions and are applicable to other biological samples.</p>
</abstract>
<kwd-group>
<kwd>supercritical fluid extraction</kwd>
<kwd>regression models</kwd>
<kwd>lipidomic</kwd>
<kwd>COSMO-SAC</kwd>
<kwd>extremophile microalgae</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Green and Sustainable Chemistry</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<sec id="s1-1">
<title>1.1 Lipids extraction techniques</title>
<p>Lipids are a diverse group of biomolecules, generally classified into eight categories (fatty acyls, glycerolipids, glycerophospholipids, sphingolipids, sterol lipids, prenol lipids, saccharolipids and polyketides), based on their hydrophobic or amphipathic properties and chemically functional backbones (<xref ref-type="bibr" rid="B19">Fahy et al., 2005</xref>; <xref ref-type="bibr" rid="B37">Liebisch et al., 2020</xref>). Traditionally, oleaginous plants and seeds have been the primary sources of lipids for biofuels production. In recent years, microalgae have gained attention for their potential to provide a diverse range of bioactive molecules. In particular, extremophilic microalgae have the ability to grow under extreme conditions such as acidic or alkaline pH, high temperatures, light and heavy metal concentrations. Some microalgae lipids, such as polyketides and prenol lipids, are reported to possess antioxidant, anti-inflammatory, cytotoxic, and even anticancer properties (<xref ref-type="bibr" rid="B16">De Luca et al., 2021</xref>; <xref ref-type="bibr" rid="B32">Khan et al., 2018</xref>; <xref ref-type="bibr" rid="B10">Castro et al., 2023</xref>). Furthermore, glycerophospholipids, known for their amphiphilicity, are effective emulsifying agents, stabilizing oil-water emulsions in delivery systems for cosmetic and pharmaceutical industries (<xref ref-type="bibr" rid="B36">Li et al., 2019</xref>). This shift towards microalgae is due to their rapid growth rates, high lipid content, and adaptability to various environments (<xref ref-type="bibr" rid="B16">De Luca et al., 2021</xref>; <xref ref-type="bibr" rid="B32">Khan et al., 2018</xref>; <xref ref-type="bibr" rid="B10">Castro et al., 2023</xref>).</p>
<p>Obtaining lipids involves different standard methodologies that include mechanical cell disruption and solvent extraction. Currently there are different techniques that use solvents, one of the most used is Bligh and Dyer (B&#x26;D) method for lipid quantitation at analytical level (<xref ref-type="bibr" rid="B7">Bligh and Dyer, 1959</xref>; <xref ref-type="bibr" rid="B5">Azmin et al., 2016</xref>). However, the reliance of B&#x26;D method on methanol and chloroform presents environmental and health risks unsuitable for industrial applications (<xref ref-type="bibr" rid="B57">Santoro et al., 2019</xref>). Other organic solvents like ethanol, dichloromethane, dimethyl ether, and hexane have been studied but often yield lower results compared to the B&#x26;D method, and some of these solvents may be toxic and hazardous pollutants, unsuitable for cosmetic, pharmaceutical and food industries (<xref ref-type="bibr" rid="B15">de Jesus et al., 2019</xref>; <xref ref-type="bibr" rid="B11">Cauchie et al., 2021</xref>; <xref ref-type="bibr" rid="B69">Xiao et al., 2012</xref>).</p>
<p>Soxhlet extraction offers improved extraction yields, however, large volumes of solvents required can be expensive to remove, and thermal degradation may also occur caused by the extraction performed at the boiling point of the solvent for extended periods of time (<xref ref-type="bibr" rid="B1">Akyil et al., 2018</xref>). Alternative methods such as microwave-assisted extraction, ultrasound-assisted extraction, and supercritical fluid extraction (SFE) are efficient, fast and sustainable. However, their application has been limited due to the higher capital investment for complex equipment (<xref ref-type="bibr" rid="B7">Bligh and Dyer, 1959</xref>; <xref ref-type="bibr" rid="B12">Chang et al., 2017</xref>; <xref ref-type="bibr" rid="B18">Desgrouas et al., 2014</xref>; <xref ref-type="bibr" rid="B71">Zekovi&#x107; et al., 2017</xref>; <xref ref-type="bibr" rid="B49">Orio et al., 2012</xref>).</p>
</sec>
<sec id="s1-2">
<title>1.2 Extraction of lipids employing supercritical fluid extraction (SFE)</title>
<p>Supercritical fluid extraction (SFE) is green technology that is growing for obtaining bioactive compounds because it is capable of solubilizing lipophilic substances in shorter process time, and the solvent can be easily removed from the final extract: this ensures minimal alteration of the bioactive metabolites and preserves their biological functional properties. It achieves high selectivity by tuning pressure and temperature conditions. Its main disadvantage is the high cost of equipment compared to other extraction techniques (<xref ref-type="bibr" rid="B14">Crampon et al., 2013</xref>).</p>
<p>Over 90% of SFE processes use supercritical carbon dioxide (scCO2) due to its low critical temperature (31&#xb0;C) and pressure (74&#xa0;bar), non-flammability, non-toxicity and low cost (<xref ref-type="bibr" rid="B9">Capuzzo et al., 2013</xref>; <xref ref-type="bibr" rid="B54">Reid et al., 1988</xref>). Besides, CO2 is a gas in atmospheric conditions, achieving almost complete CO2 removal in extracts and resulting in solvent-free extract (<xref ref-type="bibr" rid="B45">Molino et al., 2020</xref>). scCO2 exhibits high diffusivity and low viscosity, similar to gasses, which allows the solvent phase to penetrate into the biological matrix, while its high density, like liquids, provides good solvating power. Together these properties enhance the penetration in the biological matrix and the solubilization of the intracellular compounds. However, CO2&#x2019;s non-polarity limits its solvent effectiveness, showing affinity only to non-polar compounds (<xref ref-type="bibr" rid="B17">de Melo et al., 2014</xref>). Cosolvents such as ethanol or isopropanol are used to modify the solvent polarity (<xref ref-type="bibr" rid="B70">Yousefi et al., 2019</xref>).</p>
<p>Extraction temperature and pressure significantly affect the compounds solubility in the solvent phase, depending on the chemical properties of the target compounds. In SFE, efficiency increases with both pressure and temperature. However, higher temperature and pressure can increase solubility of all compounds, even unwanted by-products, such as waxes or chlorophylls. This reduces extraction specificity and necessitates additional purification steps. Morcelli et al. reported reduced target compound yields due to increased chlorophyll concentrations when extracting carotenoids from <italic>Chlorella sorokiniana</italic> at higher pressure and temperature (<xref ref-type="bibr" rid="B46">Morcelli et al., 2021</xref>).</p>
<p>Additionally, higher temperatures may cause thermal degradation of compounds, while higher pressure can increase fluid density and obstruct diffusivity into the biomass, decreasing extraction yields (<xref ref-type="bibr" rid="B45">Molino et al., 2020</xref>; <xref ref-type="bibr" rid="B17">de Melo et al., 2014</xref>; <xref ref-type="bibr" rid="B70">Yousefi et al., 2019</xref>). This thermal degradation and reduced yield at higher pressures were reported by Sanzo et al. when extracting astaxanthin and lutein from <italic>Haematococcus pluvialis</italic> (<xref ref-type="bibr" rid="B59">Sanzo et al., 2018</xref>)<italic>.</italic> Thus, many researchers aim to find optimal extraction conditions to maximize the yield and bioactivity of extracts (<xref ref-type="bibr" rid="B59">Sanzo et al., 2018</xref>; <xref ref-type="bibr" rid="B42">Mac&#xed;as-S&#xe1;nchez et al., 2010</xref>; <xref ref-type="bibr" rid="B48">Nobre et al., 2006</xref>; <xref ref-type="bibr" rid="B43">Macias Sanchez et al., 2009</xref>; <xref ref-type="bibr" rid="B41">Machmudah et al., 2006</xref>; <xref ref-type="bibr" rid="B58">Santoyo et al., 2006</xref>).</p>
</sec>
<sec id="s1-3">
<title>1.3 Thermodynamics-based methods for modeling supercritical fluid extraction</title>
<p>Developing an experimental design to identify optimal extraction conditions considering all variables involves significant time and resource investment. Researchers have formulated accurate and reliable models considering thermodynamics and kinetic constraints, equilibrium relationships, and mass transfer mechanisms across a spectrum of temperatures, pressures, and phase compositions (<xref ref-type="bibr" rid="B30">Izadifar and Abdolahi, 2006</xref>). These models are classified into three categories: empirical equations, analogical models drawing parallels between heat and mass transfer, and models derived from integrated differential mass balances (<xref ref-type="bibr" rid="B63">Sodeifian et al., 2016</xref>).</p>
<p>Empirical equation-based models fit for specific and limited cases, while heat and mass transfer models aim to describe extraction process robustly but are constrained by highly idealized assumptions, such as isothermal processes or homogeneous mixtures. These assumptions often overlook factors like particle size effects or cell wall rupture dynamics (<xref ref-type="bibr" rid="B52">Rai et al., 2014</xref>).</p>
<p>Thermodynamics-based models, such as those using the activity coefficient, describe non-ideal mixtures (<xref ref-type="bibr" rid="B4">Atkins, 2006</xref>). The activity coefficient indicates solvent-solute affinity and extraction efficiency. Models like UNIFAC use group-contribution methods to estimate interaction parameters by breaking molecules into functional groups, facilitating broader generalization and reducing experimental workload (<xref ref-type="bibr" rid="B21">Fredenslund et al., 1977</xref>). However, these models have some inherent disadvantages: require extensive experimental data for accurate fragmentation, struggle with nonadditive molecular effects, and offer limited insight into solute-solvent interactions, which hinders their practical utility (<xref ref-type="bibr" rid="B33">Klamt, 1995</xref>).</p>
<p>An alternative to group-contribution models is the conductor-like screening model (COSMO), which relies on computational quantum mechanics. Unlike UNIFAC-Modified (2002), which uses 612 fitting parameters related to size, shape, and functional group interactions, COSMO models require only four universal parameters. These models predict thermo-physical properties without experimental data and calculate the chemical potential of any molecule in any mixture (<xref ref-type="bibr" rid="B23">Gerber and Soares, 2010</xref>; <xref ref-type="bibr" rid="B38">Lin and Sandler, 2001</xref>). <xref ref-type="fig" rid="F1">Figure 1</xref> presents the step-by-step calculation with a COSMO-based model, starting with the 3D molecular structures, and finishing with the calculation of thermodynamic properties under temperatures and compositions in the extraction system. COSMO-based models have been used successfully for predicting the optimal temperature and ethanol composition for SFE to obtain carotenoids. However, those calculations are based on individual lipids against CO2-ethanol mixtures and cannot account for solute competition or positive synergies that may enhance the extraction yields (<xref ref-type="bibr" rid="B46">Morcelli et al., 2021</xref>). To address these limitations, a comprehensive model is needed, incorporating not only COSMO calculations but also other chemoinformatics tools to accurately describe these effects.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Step calculation with a COSMO-based model. <bold>(A)</bold> 3D molecular structure of the molecules in the mixture: carbon dioxide, ethanol and lipid lucidenic acid L. <bold>(B)</bold> molecular surface charge distribution calculation. In red a high electronic density, in blue a low electronic density. This is the most time-consuming step due to quantum mechanical calculations. <bold>(C)</bold> Bidimensional projection of the surface charge distributions into sigma-profiles. <bold>(D)</bold> Determining the thermodynamic property, infinite dilution activity coefficient (IDAC) of the lipid calculated at 50&#xb0;C, varying ethanol mole fraction. Figures A, B, and C were obtained using JCOSMO-2.9.12.</p>
</caption>
<graphic xlink:href="fchem-12-1480887-g001.tif"/>
</fig>
</sec>
<sec id="s1-4">
<title>1.4 Molecular descriptors</title>
<p>For decades, researchers have sought to translate the encoded information in chemical structures into numerical representations that computers can understand and manipulate (<xref ref-type="bibr" rid="B66">Wang et al., 2021</xref>). This effort led to the development of Quantitative Structure-Activity Relationship (QSAR) approaches, a powerful <italic>in silico</italic> method. QSAR establishes quantitative relationships between a molecule&#x2019;s structure (represented by molecular descriptors) and its properties, including biological activities, reaction mechanisms, and physicochemical properties, such as solubility (<xref ref-type="bibr" rid="B67">Willighagen, 2010</xref>).</p>
<p>Over 5,000 molecular descriptors have been proposed, capturing various aspects of a molecule&#x2019;s structure (<xref ref-type="bibr" rid="B13">Consonni and Todeschini, 2010</xref>). These descriptors range from basic features like the number and types of atoms to more detailed information such as connectivity, geometry, charge distribution, and hydrogen bonding potential (<xref ref-type="bibr" rid="B26">Grisoni et al., 2018</xref>).</p>
</sec>
<sec id="s1-5">
<title>1.5 Machine learning in SFE</title>
<p>The proliferation of Artificial Intelligence (AI) in recent years has been remarkable, permeating various sectors and becoming an integral part of daily activities (<xref ref-type="bibr" rid="B51">Prezhdo, 2020</xref>). AI applications are now used as personal assistants, customer preference predictors, and creators of images and natural language (<xref ref-type="bibr" rid="B44">Mistry et al., 2021</xref>). The success of machine learning in the technology sector is anticipated to be similar in science. The exponential increase in computational power over the past 2&#xa0;decades has enabled <italic>in silico</italic> investigations previously deemed unfeasible due to limited time and experimental resources.</p>
<p>Physics-driven tools have emerged, facilitating high-throughput computational screening for drug discovery, predicting molecular properties based on Quantitative Structure-Property Relationships (QSPRs), and calculating activity coefficients for thermodynamic systems using quantum mechanics models (<xref ref-type="bibr" rid="B68">Winter et al., 2023</xref>). In contrast, machine learning operates without relying on an understanding of underlying physics, leveraging vast datasets to make predictions. This paradigm shifts from physics-driven to data-driven modeling has seen various machine learning algorithms implemented across diverse scientific disciplines, including chemistry, biology, fluid dynamics, and material science (<xref ref-type="bibr" rid="B8">Butler et al., 2018</xref>).</p>
<p>Research in supercritical fluids has also embraced machine learning, from molecular simulation to estimation of solubilities in supercritical conditions (<xref ref-type="bibr" rid="B56">Roach et al., 2023</xref>). In the domain of SFE, there is significant interest in optimizing processes. Much of the analysis has focused on predicting extraction yield under various conditions, employing complex algorithms such as artificial neural networks (ANN), adaptive neuro fuzzy inference system (ANFIS) or cascade-forward back-propagation network (CFBPN) to address an optimization problem (<xref ref-type="bibr" rid="B25">Ghoreishi and Heidari, 2013</xref>; <xref ref-type="bibr" rid="B27">Heidari and Ghoreishi, 2013</xref>; <xref ref-type="bibr" rid="B35">Lashkarbolooki et al., 2013</xref>; <xref ref-type="bibr" rid="B24">Ghoreishi et al., 2016</xref>; <xref ref-type="bibr" rid="B29">Idris et al., 2022</xref>; <xref ref-type="bibr" rid="B65">Valim et al., 2018</xref>). Studies have also investigated the solubility of different organic compounds in scCO2, but these have often focused on individual molecules or a limited set of compounds (<xref ref-type="bibr" rid="B31">Kamali and Mousavi, 2008</xref>; <xref ref-type="bibr" rid="B47">Nguyen et al., 2022</xref>; <xref ref-type="bibr" rid="B28">Huwaimel and Alobaida, 2022</xref>; <xref ref-type="bibr" rid="B34">Kostyrin et al., 2022</xref>; <xref ref-type="bibr" rid="B2">Aminian and ZareNezhad, 2020</xref>). There is a noticeable absence of studies aiming to generalize the solubility of hundreds of organic compounds in a solvent or to elucidate changes in lipid profile composition based on SFE variables (<xref ref-type="bibr" rid="B56">Roach et al., 2023</xref>).</p>
<p>Consequently, in the present study, six Machine Learning models were tested to predict the microalgae lipid profile obtained by SFE at different pressure, temperature and ethanol flow conditions. The lipid profile of the extracts was elucidated using RP-LC-ESI(&#x2b;/&#x2212;)-QTOF-MS platform, and K-Medoids, an unsupervised learning method, was used for systematic lipid selection.</p>
</sec>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>2 Materials and methods</title>
<sec id="s2-1">
<title>2.1 Dataset compilation</title>
<p>The data flow for building the models is presented in <xref ref-type="fig" rid="F2">Figure 2</xref>. A single dataset consolidated all the information for training and testing the models. The defined extraction conditions, the cleaned molecular descriptors and the results from IDAC calculations served as independent variables, while the lipid recovery, measured in the lipidomic characterization analysis, was the dependent variable. Some experiments were performed to collect all this information, and some intermediate steps were necessary for preprocessing the collected data. The data, files and codes used along the methodology have been made available in a GitHub repository (<ext-link ext-link-type="uri" xlink:href="https://github.com/Grupo-de-Diseno-de-Productos-y-Procesos/Lipids-SFE">https://github.com/Grupo-de-Diseno-de-Productos-y-Procesos/Lipids-SFE</ext-link>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Data Flow Diagram of Lipid Extraction and Lipidomic Characterization from <italic>Galdieria</italic> sp. and Machine Learning Regression Models Training for Recovery Prediction.</p>
</caption>
<graphic xlink:href="fchem-12-1480887-g002.tif"/>
</fig>
<sec id="s2-1-1">
<title>2.1.1 Conditions for supercritical fluid extraction (SFE) of microalgae <italic>Galdieria</italic> sp. USBA-gbx-832</title>
<p>The algal strain <italic>Galdieria</italic> sp. USBA-GBX-832 in lyophilized pellets was obtained from culture cultivation at Pontificia Universidad Javeriana, Colombia (CMPUJ U832). This biomass was cultured in mixotrophic conditions in MG911 during 8 days maintaining at 43&#xb1;2&#xb0;C, consistent agitation speed of 170&#xa0;rpm, light intensity of 20 umol. m&#x207b;<sup>2</sup>s&#x207b;<sup>1</sup>, and aeration rate of 0.2 vvm (<xref ref-type="bibr" rid="B40">L&#xf3;pez et al., 2019</xref>; <xref ref-type="bibr" rid="B55">Rivera, 2024</xref>). The biomass of <italic>Galdieria</italic> sp. USBA-GBX-832 was frozen for 24&#xa0;h at &#x2212;80&#xb0;C and freeze-dried (Alpha 1-2 LDPlus, Martin Christ, Germany) at a pressure of 4 &#xd7; 10<sup>&#x2212;4</sup> and temperature of &#x2212;40&#xb0;C for 48&#xa0;h. To ensure uniformity, biomass underwent homogenization before the extraction process. SFE experiments employed carbon dioxide (99.99% purity, Messer, Colombia) and ethanol (99.8%, ITW Reagents, Germany) as solvents.</p>
<p>SFE extractions were performed using the MV-10 ASFE System (Waters, United States) following the manufacturer&#x2019;s recommendations. Freeze-dried biomass was powdered with mortar with pestle and sieved, selecting particle size between 180 and 500&#xa0;&#x3bc;m, and dried at 45&#xb0;C for 12&#xa0;h to eliminate moisture. Samples of 1.0&#xa0;g of microalgae biomass were wrapped with filter paper (7&#x2013;10&#xa0;&#xb5;m pore size) and placed in the extraction vessels. Extraction conditions (pressure, temperature, CO2 flow, cosolvent flow, and extraction time) were controlled <italic>via</italic> the panel, with a CO2 flow rate of 5&#xa0;mL/min for 75&#xa0;min. Pressure (150 and 250&#xa0;bar &#xb1;1&#xa0;bar), temperature (40, 50, and 60&#xb0;C &#xb1; 0.5&#xb0;C), and cosolvent flow (0.6 and 0.9&#xa0;mL/min of ethanol &#xb1;0.1&#xa0;mL/min) were varied, based on literature reports (<xref ref-type="bibr" rid="B17">de Melo et al., 2014</xref>). Extracts were collected in amber flasks to prevent daylight degradation, concentrated in a vacuum concentrator (Vacufuge<sup>&#xae;</sup> Plus, Eppendorf) at 40&#xb0;C for 3&#xa0;h, and freeze-dried (Alpha one to two LDPlus, Martin Christ) at &#x2212;20&#xb0;C, 1&#xa0;mbar for 26&#xa0;h. Lipidomic analysis was conducted on 10&#xa0;&#xb5;g samples of each extraction (see <xref ref-type="table" rid="T1">Table 1</xref> for experimental design).</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Experimental conditions defined for supercritical fluid extraction.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="center">Temperature ethanol flow pressure (bar)</th>
<th colspan="2" align="center">40&#xb0;C</th>
<th colspan="2" align="center">50&#xb0;C</th>
<th colspan="2" align="center">60&#xb0;C</th>
</tr>
<tr>
<th align="center">0.6 mLmin<sup>-1</sup>
</th>
<th align="center">0.9 mLmin<sup>-1</sup>
</th>
<th align="center">0.6 mLmin<sup>-1</sup>
</th>
<th align="center">0.9 mLmin<sup>-1</sup>
</th>
<th align="center">0.6 mLmin<sup>-1</sup>
</th>
<th align="center">0.9 mLmin<sup>-1</sup>
</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">150</td>
<td align="center">SC1</td>
<td align="center">SC2</td>
<td align="center">SC3</td>
<td align="center">SC4</td>
<td align="center">SC5</td>
<td align="center">SC6</td>
</tr>
<tr>
<td align="center">250</td>
<td align="center">SC7</td>
<td align="center">SC8</td>
<td align="center">SC9</td>
<td align="center">SC10</td>
<td align="center">SC11</td>
<td align="center">SC12</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2-1-2">
<title>2.1.2 Lipidomic characterization analysis and representative lipid selection</title>
<p>Lipidomic characterization was conducted using RP-LC-ESI (&#x2b;/&#x2212;)-QTOF-MS. Supercritical extracts were dissolved in MeOH:MTBE (1:1) until obtaining a solution at 200&#xa0;ppm. Samples were vortexed and centrifuged at 13,000&#xa0;rpm for 10&#xa0;min to 4&#xb0;C. Chromatographic elution was achieved by injecting 2&#xa0;&#xb5;L of sample into InfinityLab Poroshell C18 column (3.0 &#xd7; 100&#xa0;mm 2.7&#xa0;&#xb5;m) at flow rate of 0.6 mLmin<sup>-1</sup>, with a column temperature of 60&#xb0;C. Mobile phases consisted of 10&#xa0;mM ammonium formate, ACN:H2O (60:40) and 0.1% of formic acid for phase A and 10&#xa0;mM in ammonium formate, IPA:ACN (90:10) and 0.1% of formic acid for phase B and gradient elution:0&#x2013;2&#xa0;min, 15%&#x2013;30% B; 2&#x2013;2.5&#xa0;min 30%&#x2013;48% B; 2.5&#x2013;11&#xa0;min, 48%&#x2013;82% B; 11&#x2013;11.5&#xa0;min, 82%&#x2013;99% B; 11.5&#x2013;12&#xa0;min, 99% B; 12&#x2013;12.1&#xa0;min, 99%&#x2013;15% By 12.1&#x2013;18&#xa0;min, 15% B. The mass spectrometer was operated in positive mode (ESI &#x2b;/&#x2212;) with a range of 65&#x2013;1700&#xa0;m/z. Capillary voltage was set to 3,000, the drying gas flow rate was 12&#xa0;L&#xa0;min<sup>-1</sup> at 250&#xb0;C, gas nebulizer 3.59 bar (52 psi), fragmentor voltage 175&#xa0;V, skimmer 65&#xa0;V and octopole radio frequency voltage (OCT RF vpp) 750&#xa0;V. Data were collected in centroid mode at a scan rate of 1.02 spectra per second. For electrospray ionization in positive mode, two reference masses were used: m/z 121.0509 [C5H4N4&#x2b;H]&#x2b; and m/z 922.0098 [C18H18O6N3P3F24 &#x2b; H]&#x2b;. For electrospray ionization negative mode were used: m/z 112.9856 [C2O2F3 (NH4)], m/z 1,033.9881 (C18H18O6N3P3F24).</p>
<p>The Lipidomic characterization process is limited in identifying all lipids at the highest level of detail and several lipids share the same shorthand notation. Full structural information is required for further calculations, needing a detailed description of the identified lipids. To address this, a methodology was developed for selecting a representative lipid from the available reported lipids. First, candidate names and structural information in isomeric SMILES format were obtained from Lipid MAPS (<xref ref-type="bibr" rid="B39">Lipid Maps, 2024</xref>). Next, molecular descriptors were calculated using the RDKit 2023.9.4 library, from the 210 descriptors available in RDKit, 29 were selected following the methodology explained in <xref ref-type="sec" rid="s2-2">section 2.2</xref>. The K-Medoids clustering algorithm, an unsupervised learning method, grouped the candidate lipids, and for each group, a centroid was calculated using the cleaned molecular descriptors data. The lipid closest to this centroid was selected as the representative lipid.</p>
<p>The results of this methodology were compared and analyzed against those obtained through Tanimoto similarity scores and IDAC calculations. Tanimoto scores were computed for each pair of candidates, and the mean score for each candidate relative to the others in the group was calculated. The highest-scoring candidate (closest to 1.0) was considered the most structurally similar lipid within the group. The IDAC calculation methodology (further details in the next section) involved evaluating each candidate&#x2019;s activity coefficients under the SFE conditions. The candidate exhibiting the lowest squared error against the mean results of the group was identified as possessing the representative physical and thermodynamic behavior under SFE conditions.</p>
</sec>
<sec id="s2-1-3">
<title>2.1.3 Infinite dilution activity coefficient (IDAC) evaluation</title>
<p>The calculation of IDAC requires information about the electronic charge distribution of the molecules involved in the CO2-ethanol-lipid thermodynamic system (see <xref ref-type="fig" rid="F1">Figure 1</xref>). The electronic charge distribution of lipids was determined using GAMESS software (Mark Gordon&#x2019;s Quantum Theory Group, de Iowa State University, United States) (<xref ref-type="bibr" rid="B6">Barca et al., 2020</xref>), with support from COSPRT patch routine developed by The Virtual Laboratory for Properties Prediction (LVPP, UFRGS, Brazil) (<xref ref-type="bibr" rid="B62">Soares et al., 2020</xref>). For these calculations, 3D-structural information in MOL file format is necessary. The resulting files, in GOUT format, were integrated into the compounds&#x2019; library of JCOSMO 2.9.12 (LVPP, UFRGS, Brazil) (<xref ref-type="bibr" rid="B20">Ferrarini et al., 2018</xref>). This software was used to calculate IDAC, at the same SFE conditions, temperature and ethanol mole fraction. The lipids were set at a mole fraction of 1 &#xd7; 10<sup>&#x2212;5</sup> to ensure infinite dilution conditions.</p>
</sec>
</sec>
<sec id="s2-2">
<title>2.2 RDKit molecular descriptors selection</title>
<p>A set of 210 molecular descriptors was calculated using the RDKit 2023.9.4 library. Data preprocessing involved a Python 3.10 script that removed descriptors with significant missing or unique values. Pearson correlation analysis was then performed with a threshold of 0.75 to reduce redundancy. A final set of 29 descriptors was selected for training the regression models. The descriptors selected by this methodology are specified in <xref ref-type="sec" rid="s10">Supplementary Data 1</xref>.</p>
</sec>
<sec id="s2-3">
<title>2.3 Machine learning models description</title>
<p>Six Machine Learning regression models were trained and tested: Lasso (<xref ref-type="bibr" rid="B64">Tibshirani, 1996</xref>), Gaussian Regression (GR) (<xref ref-type="bibr" rid="B53">Rasmussen and Williams, 2006</xref>), Support Vector Machines (SVR) (<xref ref-type="bibr" rid="B61">Smola and Sch&#xf6;lkopf, 2004</xref>), Random Forest (RFR), Gradient-Boosted Trees (XGBoost) (<xref ref-type="bibr" rid="B22">Freund and Schapire, 1997</xref>), and Artificial Neural Network (ANN) (<xref ref-type="bibr" rid="B3">Atienza, 2018</xref>). These algorithms were implemented using Python 3.10.12, Keras library was used for ANN, and Scikit-Learn library for the other methods.</p>
</sec>
<sec id="s2-4">
<title>2.4 Data splitting</title>
<p>The dataset with three extraction conditions, 29 molecular descriptors, IDAC results, and the recovery of the lipids, in logarithmic scale, were split randomly into training and testing sets with an 80:20 ratio. Data from the extraction conditions SC5 were set aside from the beginning and excluded from the training and testing data sets. This information was used to validate the models&#x2019; capacity to predict the lipid profile under new, unseen extraction conditions.</p>
</sec>
<sec id="s2-5">
<title>2.5 Hyperparameter tuning and evaluation of the models</title>
<p>The models were trained and evaluated both including and excluding the IDAC calculations to assess the influence of this variable in the performance of the regression models. A hyperparameter tuning was performed for every model using Grid Search with 5-fold Cross-Validation (See <xref ref-type="sec" rid="s10">Supplementary Data 2</xref>). After training, the following metrics were calculated: Root Mean Squared Error (RMSE), Mean Absolute Error (MAE), and coefficient of determination (R<sup>2</sup>) (<xref ref-type="bibr" rid="B60">Scikit Learn, 2024</xref>). Hyperparameter tuning process, training and tests calculations were performed using the Python library Scikit-Learn (<xref ref-type="bibr" rid="B50">Pedregosa et al., 2012</xref>).</p>
</sec>
</sec>
<sec sec-type="results|discussion" id="s3">
<title>3 Results and discussion</title>
<sec id="s3-1">
<title>3.1 Lipid profile of the supercritical extracts</title>
<p>A total of 139 features were identified from the supercritical extracts, 89 could be annotated while 50 remain unknown. <xref ref-type="sec" rid="s10">Supplementary Data 3</xref> provides a comprehensive list of all the identified lipids along with their recovery under the 12 extraction conditions. The lipidomic characterization revealed the primary components extracted from microalgae <italic>Galdieria</italic> sp. USBA-GBX-832 were lipids with glycerol backbone: glycerophospholipids and glycerolipids; followed by sphingolipids, prenols and fatty acyls (<xref ref-type="fig" rid="F3">Figure 3</xref>). Although triglycerides had previously been identified in the microalgae under the cultivation conditions from which the biomass was obtained, none were detected in the supercritical extracts (<xref ref-type="bibr" rid="B55">Rivera, 2024</xref>).</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Relative abundance of different lipid classes in the twelve supercritical extracts.</p>
</caption>
<graphic xlink:href="fchem-12-1480887-g003.tif"/>
</fig>
<p>Most of the lipids were identified in all the extracts; however, it is observed that, depending on the condition employed, their abundances were different. <xref ref-type="fig" rid="F3">Figure 3</xref> shows the differences in lipid class profile for each extract. For instance, at lower pressure (150&#xa0;bar), more fatty acyls are extracted when the ethanol flow is lower (0.6 mLmin<sup>-1</sup>) compared to higher flow (0.9 mLmin<sup>-1</sup>). This suggests that fatty acyls are less attracted to the solvent when the polarity increases. Interestingly, this difference is less noticeable at higher pressure (250&#xa0;bar), indicating that pressure helps dissolve fatty acyls, making the CO2-ethanol mixture a more effective solvent. In contrast, the abundance of glycerophospholipids increase as all three variables increase. The lowest abundance is observed at SC1, while the highest abundance is at SC12. At low pressure, the cosolvent has a stronger effect for glycerolipids than the observed with fatty acyls.</p>
</sec>
<sec id="s3-2">
<title>3.2 Representative lipid selection using an unsupervised method</title>
<p>
<xref ref-type="sec" rid="s10">Supplementary Data 4</xref> shows all the lipids identified in the lipidomic characterization.</p>
<p>analysis, with their corresponding lipid annotated. In cases where more than one lipid was reported with the same shorthand notation, the selection was performed using the unsupervised algorithm K-Medoids as was explained above.</p>
<p>The representative lipid selection results were compared with Tanimoto scores and IDAC calculations. In <xref ref-type="fig" rid="F4">Figure 4</xref> can be observed the results of Tanimoto score calculations. High similarity scores (&#x3e;0.80, and in some cases &#x3e;0.95) were observed when comparing the candidates. This high similarity can be attributed to the minimal structural differences between the candidates, primarily involving the location of double bonds. Furthermore, candidates were ranked based on their average score, revealing that no candidate stood out significantly as all had nearly identical values.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Tanimoto Score for pairs of PC 16:0_18:1 <bold>(A)</bold> and DG 36:2 <bold>(B)</bold> candidates.</p>
</caption>
<graphic xlink:href="fchem-12-1480887-g004.tif"/>
</fig>
<p>Additionally, all candidates for each lipid exhibit the same trend and order of magnitude when calculating IDAC (See <xref ref-type="fig" rid="F5">Figure 5</xref>). These findings, combined with the Tanimoto Score results, suggest that while the representative lipid selection through the unsupervised learning method may introduce uncertainty, the physical and thermodynamic behavior of any candidate would correspond to the behavior observed experimentally in the context of extraction (Complementary results in <xref ref-type="sec" rid="s10">Supplementary Data 5</xref>). K-Medoids and Tanimoto Score give a quick result, while IDAC calculation is highly time-consuming, calculating a single molecule&#x2019;s surface-charge distribution can take several days of computer processing. It is important to mention that the only way to validate the selected lipid is experimentally, either through standard solutions or by enhancing the detection capacity of the instruments. However, both options are unfeasible for this work, and generally for most research endeavors.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>IDAC calculations for PC 16:0_18:1 <bold>(A)</bold>, DG 36:2 <bold>(B)</bold> candidates at 50&#xb0;C as a function of the mole fraction of ethanol.</p>
</caption>
<graphic xlink:href="fchem-12-1480887-g005.tif"/>
</fig>
</sec>
<sec id="s3-3">
<title>3.3 Model performance and prediction over the experimental dataset</title>
<p>The lipidomic characterization produced a dataset of 1,056 entries. Additionally, 210 molecular descriptors were calculated. The cleaning data and dimension reduction process was performed by removing variables with missing or unique values, and high correlations. This step aimed to reduce the computational cost and noise, prevent overfitting and improve generalization. The final set of 29 molecular descriptors, combined with the extraction conditions (pressure, temperature, and ethanol flow rate), serve as input for training the selected Machine Learning algorithms for predicting lipid concentration under the given extraction conditions. <xref ref-type="table" rid="T2">Table 2</xref> shows the regression metrics for all the assessed models.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Performance metrics of the assessed Machine Learning algorithms.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Model</th>
<th align="left">MSE train</th>
<th align="left">MSE test</th>
<th align="left">MSE validation</th>
<th align="left">RMSE train</th>
<th align="left">RMSE test</th>
<th align="left">RMSE validation</th>
<th align="left">R2 train</th>
<th align="left">R2 test</th>
<th align="left">R2 validation</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Lasso-</td>
<td align="left">0.805</td>
<td align="left">0.953</td>
<td align="left">0.808</td>
<td align="left">0.648</td>
<td align="left">0.908</td>
<td align="left">0.653</td>
<td align="left">0.463</td>
<td align="left">0.355</td>
<td align="left">0.453</td>
</tr>
<tr>
<td align="left">Lasso&#x2b;</td>
<td align="left">0.624</td>
<td align="left">0.657</td>
<td align="left">0.597</td>
<td align="left">0.389</td>
<td align="left">0.432</td>
<td align="left">0.356</td>
<td align="left">0.702</td>
<td align="left">0.679</td>
<td align="left">0.723</td>
</tr>
<tr>
<td align="left">GR-</td>
<td align="left">0.243</td>
<td align="left">0.388</td>
<td align="left">0.329</td>
<td align="left">0.059</td>
<td align="left">0.150</td>
<td align="left">0.108</td>
<td align="left">0.951</td>
<td align="left">0.894</td>
<td align="left">0.910</td>
</tr>
<tr>
<td align="left">GR&#x2b;</td>
<td align="left">0.231</td>
<td align="left">0.426</td>
<td align="left">0.325</td>
<td align="left">0.054</td>
<td align="left">0.181</td>
<td align="left">0.105</td>
<td align="left">0.959</td>
<td align="left">0.865</td>
<td align="left">0.918</td>
</tr>
<tr>
<td align="left">XGB-</td>
<td align="left">0.186</td>
<td align="left">0.308</td>
<td align="left">0.254</td>
<td align="left">0.035</td>
<td align="left">0.095</td>
<td align="left">0.065</td>
<td align="left">0.971</td>
<td align="left">0.933</td>
<td align="left">0.946</td>
</tr>
<tr>
<td align="left">XGB&#x2b;</td>
<td align="left">0.179</td>
<td align="left">0.367</td>
<td align="left">0.288</td>
<td align="left">0.032</td>
<td align="left">0.135</td>
<td align="left">0.083</td>
<td align="left">0.992</td>
<td align="left">0.917</td>
<td align="left">0.914</td>
</tr>
<tr>
<td align="left">RF-</td>
<td align="left">0.134</td>
<td align="left">0.310</td>
<td align="left">0.291</td>
<td align="left">0.018</td>
<td align="left">0.096</td>
<td align="left">0.084</td>
<td align="left">0.985</td>
<td align="left">0.933</td>
<td align="left">0.927</td>
</tr>
<tr>
<td align="left">RF&#x2b;</td>
<td align="left">0.137</td>
<td align="left">0.394</td>
<td align="left">0.324</td>
<td align="left">0.019</td>
<td align="left">0.155</td>
<td align="left">0.105</td>
<td align="left">0.986</td>
<td align="left">0.884</td>
<td align="left">0.918</td>
</tr>
<tr>
<td align="left">SVR-</td>
<td align="left">0.273</td>
<td align="left">0.378</td>
<td align="left">0.251</td>
<td align="left">0.074</td>
<td align="left">0.143</td>
<td align="left">0.063</td>
<td align="left">0.937</td>
<td align="left">0.905</td>
<td align="left">0.953</td>
</tr>
<tr>
<td align="left">SVR&#x2b;</td>
<td align="left">0.233</td>
<td align="left">0.400</td>
<td align="left">0.290</td>
<td align="left">0.054</td>
<td align="left">0.160</td>
<td align="left">0.084</td>
<td align="left">0.958</td>
<td align="left">0.881</td>
<td align="left">0.934</td>
</tr>
<tr>
<td align="left">ANN-</td>
<td align="left">0.271</td>
<td align="left">0.381</td>
<td align="left">0.260</td>
<td align="left">0.074</td>
<td align="left">0.145</td>
<td align="left">0.067</td>
<td align="left">0.939</td>
<td align="left">0.897</td>
<td align="left">0.944</td>
</tr>
<tr>
<td align="left">ANN&#x2b;</td>
<td align="left">0.210</td>
<td align="left">0.534</td>
<td align="left">0.309</td>
<td align="left">0.044</td>
<td align="left">0.285</td>
<td align="left">0.095</td>
<td align="left">0.966</td>
<td align="left">0.788</td>
<td align="left">0.926</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Abbreviations: &#x2b;, including IDAC, as variable; -, excluding IDAC, as variable; GR, gaussian regression; XGB, XG, boost; RF, random forest; SVR, support vector regressor; ANN, artificial neural network.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Testing the predictiveness of these models on unseen data revealed some limitations. The Lasso displayed the worst performance due to its reliance on linear regression. For instance, while Gaussian Regression exhibited excellent performance on the training set (R<sup>2</sup> &#x2248; 0.998), it showed a notable drop when tested on unseen data (R<sup>2</sup> &#x3c; 0.85). This overfitting was reduced with manual hyperparameter tuning, raising the test performance to around R<sup>2</sup> &#x2248; 0.90 (see <xref ref-type="sec" rid="s10">Supplementary Data 2</xref>). Models based on decision tree architectures, such as Random Forest and XG Boost, consistently demonstrated better performance and generalization. While XGBoost showed promising results with low MSE and RMSE values on training, test, and experimental validation data, it is essential to note that some models, including ANN and SVR, struggled to achieve R<sup>2</sup> &#x3e; 0.90 and RMSE &#x3c;0.1 on the test data.</p>
<p>Comparing the performances of the models with and without IDAC, notably, the Lasso model achieved consistently coefficient of determination around 0.7 for training, test and validation data. This suggests a strong correlation between solubility and activity coefficient, but it is still insufficient for training an accurate model based on linear regression. For the other models, overfitting is observed. While training performance improved when including IDAC, this did not translate to test and validation data. This drop was especially significant for Random Forest and ANN. These results indicate that although IDAC is related to solubility, it is not essential for building a robust model that predicts the lipid profile of the extracts.</p>
<p>Further analysis of the variables identified two molecular descriptors, Quantitative Estimation of Drug-likeness (qed) and Minimum Electrotopological State Index (MinEStateIndex) were highly correlated with IDAC (Pearson correlation coefficients of 0.76 and 0.78, respectively). This redundancy between features might be causing the overfitting. The qed measure reflects the underlying distribution of molecular properties including molecular weight, logP, topological polar surface area, meanwhile the MinEStateIndex calculates the minimum electro-topological state value across all atoms in a molecule. This value can help assess the overall electron-withdrawing character of the molecule. Both descriptors are related to the activity coefficient calculation.</p>
<p>One limitation in calculating IDAC using COSMO-SAC-HB2 is that it does not account for pressure as a variable. The newer COSMO-SAC-Phi model addresses this by incorporating pressure into the IDAC calculations [72]. However, to do so, saturation data is required to compute the parameters involved in the activity coefficient under varying pressures. Unfortunately, this saturation data is currently unavailable for the identified lipids, as these complex molecules lack sufficient experimental data in existing databases.</p>
<p>
<xref ref-type="fig" rid="F6">Figure 6</xref> presents the regression results using the best-performing model, XGBoost, applied to the training, test, and experimental validation data. The low MSE and RMSE values, along with the high coefficient of determination score for the validation data indicate that the model can accurately predict a complete lipid profile for unseen extraction conditions. Moreover, the model-maintained accuracy when predicting for intermediate experimental conditions. Graphical results for the other models are available in <xref ref-type="sec" rid="s10">Supplementary Data 6</xref>.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Regression results for XG Boost model using <bold>(A)</bold> Training data, <bold>(B)</bold> Test data, and <bold>(C)</bold> Validation data from SC5.</p>
</caption>
<graphic xlink:href="fchem-12-1480887-g006.tif"/>
</fig>
<p>All models exhibited high uncertainty when predicting no lipid recovery under specific extraction conditions, particularly for lipids recovered only in SC1. To retain valuable information, lipids with no recovery (relative abundance of 0.00 in <xref ref-type="sec" rid="s10">Supplementary Data 3</xref>) were assigned an arbitrary Log [x] value of &#x2212;7 during the logarithmic transformation. This value, chosen to be lower than the smallest detected abundance, represented a lipid quantity too low for detection by the instruments. XGBoost outperformed the other models in handling these low-recovery lipids, although predicted values still remained very low (Log [x] &#x3c; &#x2212;6).</p>
<p>The relatively small dataset of 1,056 entries, coupled with the specific experimental conditions under which it was generated, may limit the model&#x2019;s ability to generalize beyond its current scope. Despite this, the model demonstrated strong predictive performance by accurately forecasting the complete lipid profile concentration under a combination of conditions unseen by the model during training. This experimental validation suggests the model&#x2019;s reliability within the dataset&#x2019;s context, even though the validation data originated from the same experimental design that fed the training process.</p>
<p>Although the proposed methodology could be extended to different biological samples, including other microalgae species beyond <italic>Galdieria</italic> sp. USBA-GBX-832, the current model is specifically trained on data unique to this species. As a result, its ability to generalize to other microalgae remains uncertain, with predicted lipid profiles closely tied to the biological and cultivation characteristics of <italic>Galdieria sp</italic>. To enhance generalization, additional data reflecting their distinct biological properties and environmental conditions of other microalgae species would be required. Future research should prioritize testing the model on a broader range of species to assess its adaptability and refine it for improved cross-species prediction.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s4">
<title>4 Conclusion</title>
<p>In this work, a Machine Learning approach was used to build the first model capable of accurately predicting the complete lipid profile during supercritical fluid extraction across a range of temperatures and cosolvent flow conditions. Additionally, a systematic approach for representative lipid selection was developed, demonstrating that, in an extraction context, the chosen lipids will exhibit physical and thermodynamic behavior observed experimentally.</p>
<p>The Lasso model with IDAC demonstrated the strong correlation between solubility and the activity coefficient, although the other models that include IDAC suffered overfitting. The best performing model for predicting the lipid profile of the extract was XG Boost without IDAC. IDAC results were limited to the thermodynamic model used, COSMO-SAC-HB2, which does not consider pressure effects. A COSMO-based model that does consider pressure, COSMO-SAC-Phi, was not used because the necessary saturation information was unavailable.</p>
<p>Although the build model is restricted for predicting the lipid profile of the microalgae, this methodology allows researchers to reduce the cost and time needed to identify the desired extraction conditions, whether to achieve the highest extraction yield or to optimize the recovery of specific lipids or lipid groups. For instance, the model can help pinpoint conditions that maximize the extraction of valuable lipids like phosphoglycerolipids or reduce the presence of undesired compounds like chlorophylls.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found below: <ext-link ext-link-type="uri" xlink:href="https://github.com/Grupo-de-Diseno-de-Productos-y-Procesos/Lipids-SFE">https://github.com/Grupo-de-Diseno-de-Productos-y-Procesos/Lipids-SFE</ext-link>.</p>
</sec>
<sec sec-type="author-contributions" id="s6">
<title>Author contributions</title>
<p>JR: Data curation, Formal Analysis, Investigation, Methodology, Visualization, Writing&#x2013;original draft, Writing&#x2013;review and editing, Validation. JG: Formal Analysis, Methodology, Data curation, Investigation, Writing&#x2013;original draft. LR: Funding acquisition, Methodology, Formal Analysis, Investigation, Writing&#x2013;original draft. MP-P: Formal Analysis, Investigation, Methodology, Data curation, Writing&#x2013;review and editing. MC: Methodology, Conceptualization, Funding acquisition, Supervision, Writing&#x2013;review and editing. GL: Formal Analysis, Methodology, Writing&#x2013;review and editing, Conceptualization, Funding acquisition, Project administration, Supervision. AG: Conceptualization, Formal Analysis, Funding acquisition, Methodology, Supervision, Writing&#x2013;review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s7">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. The resources of this project were provided by Sistema General de Regal&#xed;as (SGR) Asignaci&#xf3;n para la Ciencia, Tecnolog&#xed;a e Innovaci&#xf3;n. BPIN 2020000100356. Bogot&#xe1;, 2019.</p>
</sec>
<ack>
<p>This work was carried out under MAVDT Contract No. 212, 20188 for access to genetic resources RGE 0287-8.</p>
</ack>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fchem.2024.1480887/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fchem.2024.1480887/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet4.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet6.docx" id="SM2" mimetype="application/docx" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet5.docx" id="SM3" mimetype="application/docx" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet3.pdf" id="SM4" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet2.docx" id="SM5" mimetype="application/docx" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet1.docx" id="SM6" mimetype="application/docx" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Akyil</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>&#x130;lter</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Ko&#xe7;</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ertekin</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Recent trends in extraction techniques for high value compounds from algae as food additives</article-title>. <source>Turk. JAF. Sci. Tech.</source> <volume>6</volume>, <fpage>1008</fpage>&#x2013;<lpage>1014</lpage>. <pub-id pub-id-type="doi">10.24925/turjaf.v6i8.1008-1014.1895</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aminian</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>ZareNezhad</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A generalized neural network model for the VLE of supercritical carbon dioxide fluid extraction of fatty oils</article-title>. <source>Fuel (Lond).</source> <volume>282</volume>, <fpage>118823</fpage>. <pub-id pub-id-type="doi">10.1016/j.fuel.2020.118823</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Atienza</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2018</year>). <source>Advanced Deep Learning with Keras: applying GANs and other new deep learning algorithms to the real world</source>. <publisher-name>PAckt Publishing</publisher-name>.</citation>
</ref>
<ref id="B4">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Atkins</surname>
<given-names>P. W.</given-names>
</name>
</person-group> (<year>2006</year>). <source>Atkins&#x27; physical chemistry</source>. <edition>8th ed edici&#xf3;n</edition>. <publisher-name>Oxford University Press</publisher-name>.</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Azmin</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Abdul Manan</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wan Alwi</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Chua</surname>
<given-names>L. S.</given-names>
</name>
<name>
<surname>Mustaffa</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Yunus</surname>
<given-names>N. A.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Herbal processing and extraction technologies</article-title>, <volume>45</volume>, <fpage>305</fpage>&#x2013;<lpage>320</lpage>. <pub-id pub-id-type="doi">10.1080/15422119.2016.1145395</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barca</surname>
<given-names>G. M. J.</given-names>
</name>
<name>
<surname>Bertoni</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Carrington</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Datta</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>De Silva</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Deustua</surname>
<given-names>J. E.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Recent developments in the general atomic and molecular electronic structure system</article-title>. <source>J. Chem. Phys.</source> <volume>152</volume>, <fpage>154102</fpage>. <pub-id pub-id-type="doi">10.1063/5.0005188</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bligh</surname>
<given-names>E. G.</given-names>
</name>
<name>
<surname>Dyer</surname>
<given-names>W. J.</given-names>
</name>
</person-group> (<year>1959</year>). <article-title>A rapid method of total lipid extraction and purification</article-title>. <source>Can. J. Biochem. Physiol.</source> <volume>37</volume>, <fpage>911</fpage>&#x2013;<lpage>917</lpage>. <pub-id pub-id-type="doi">10.1139/o59-099</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Butler</surname>
<given-names>K. T.</given-names>
</name>
<name>
<surname>Davies</surname>
<given-names>D. W.</given-names>
</name>
<name>
<surname>Cartwright</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Isayev</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Walsh</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Machine learning for molecular and materials science</article-title>, <volume>559</volume>, <fpage>547</fpage>&#x2013;<lpage>555</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-018-0337-2</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Capuzzo</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Maffei</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Occhipinti</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Supercritical fluid extraction of plant flavors and fragrances</article-title>. <source>Molecules</source> <volume>18</volume>, <fpage>7194</fpage>&#x2013;<lpage>7238</lpage>. <pub-id pub-id-type="doi">10.3390/molecules18067194</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Castro</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Oliveira</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Dias</surname>
<given-names>A. C. P.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Microalgae and cyanobacteria as sources of bioactive compounds for cosmetic applications: a systematic review</article-title>. <source>A Syst. Rev.</source> <volume>76</volume>, <fpage>103287</fpage>. <pub-id pub-id-type="doi">10.1016/j.algal.2023.103287</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cauchie</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Delfau-Bonnet</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Caulier</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Hantson</surname>
<given-names>A.-L.</given-names>
</name>
<name>
<surname>Renault</surname>
<given-names>J.-H.</given-names>
</name>
<name>
<surname>Gerbaux</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Comprehensive lipid profiling of Microchloropsis gaditana by liquid chromatography - (tandem) mass spectrometry: bead milling and extraction solvent effects</article-title>. <source>Algal Res.</source> <volume>58</volume>, <fpage>102388</fpage>. <pub-id pub-id-type="doi">10.1016/j.algal.2021.102388</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chang</surname>
<given-names>C.-W.</given-names>
</name>
<name>
<surname>Yen</surname>
<given-names>C.-C.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>M.-T.</given-names>
</name>
<name>
<surname>Hsu</surname>
<given-names>M.-C.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Y.-T.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Microwave-assisted extraction of cannabinoids in hemp nut using response surface methodology: optimization and comparative study</article-title>. <source>Optim. Comp. Study</source> <volume>22</volume>, <fpage>1894</fpage>. <pub-id pub-id-type="doi">10.3390/molecules22111894</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Consonni</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Todeschini</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2010</year>). &#x201c;<article-title>Recent advances in QSAR studies: methods and applications</article-title>,&#x201d; in <source>Molecular descriptors</source>. <publisher-loc>Dordrecht</publisher-loc>: <publisher-name>Springer</publisher-name>.</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Crampon</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Mouahid</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Toudji</surname>
<given-names>S.-A. A.</given-names>
</name>
<name>
<surname>L&#xe9;pine</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Badens</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Influence of pretreatment on supercritical CO2 extraction from Nannochloropsis oculata</article-title>. <source>J. Supercrit. Fluids</source> <volume>79</volume>, <fpage>337</fpage>&#x2013;<lpage>344</lpage>. <pub-id pub-id-type="doi">10.1016/j.supflu.2012.12.022</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>de Jesus</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Ferreira</surname>
<given-names>G. F.</given-names>
</name>
<name>
<surname>Moreira</surname>
<given-names>L. S.</given-names>
</name>
<name>
<surname>Wolf Maciel</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Maciel Filho</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Comparison of several methods for effective lipid extraction from wet microalgae using green solvents</article-title>. <source>Renew. Energy</source> <volume>143</volume>, <fpage>130</fpage>&#x2013;<lpage>141</lpage>. <pub-id pub-id-type="doi">10.1016/j.renene.2019.04.168</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>De Luca</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Pappalardo</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Limongi</surname>
<given-names>A. R.</given-names>
</name>
<name>
<surname>Viviano</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Radice</surname>
<given-names>R. P.</given-names>
</name>
<name>
<surname>Todisco</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Lipids from microalgae for cosmetic applications</article-title>. <source>Cosmetics</source> <volume>8</volume>, <fpage>52</fpage>. <pub-id pub-id-type="doi">10.3390/cosmetics8020052</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>de Melo</surname>
<given-names>M. M. R.</given-names>
</name>
<name>
<surname>Silvestre</surname>
<given-names>A. J. D.</given-names>
</name>
<name>
<surname>Silva</surname>
<given-names>C. M.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Supercritical fluid extraction of vegetable matrices: applications, trends and future perspectives of a convincing green technology</article-title>. <source>J. Supercrit. Fluids</source> <volume>92</volume>, <fpage>115</fpage>&#x2013;<lpage>176</lpage>. <pub-id pub-id-type="doi">10.1016/j.supflu.2014.04.007</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Desgrouas</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Baghdikian</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Mabrouki</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Bory</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Taudon</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Parzy</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>Rapid and green extraction, assisted by microwave and ultrasound of cepharanthine from Stephania rotunda Lour</article-title>. <source>Sep. Purif. Technol.</source> <volume>123</volume>, <fpage>9</fpage>&#x2013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1016/j.seppur.2013.12.016</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fahy</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Subramaniam</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Brown</surname>
<given-names>H. A.</given-names>
</name>
<name>
<surname>Glass</surname>
<given-names>C. K.</given-names>
</name>
<name>
<surname>Merrill</surname>
<given-names>A. H.</given-names>
<suffix>Jr</suffix>
</name>
<name>
<surname>Murphy</surname>
<given-names>R. C.</given-names>
</name>
<etal/>
</person-group> (<year>2005</year>). <article-title>A comprehensive classification system for lipids</article-title>. <source>J. Lipid Res.</source> <volume>46</volume>, <fpage>839</fpage>&#x2013;<lpage>861</lpage>. <pub-id pub-id-type="doi">10.1194/jlr.E400004-JLR200</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ferrarini</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Fl&#xf4;res</surname>
<given-names>G. B.</given-names>
</name>
<name>
<surname>Muniz</surname>
<given-names>A. R.</given-names>
</name>
<name>
<surname>de Soares</surname>
<given-names>R. P.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>An open and extensible sigma&#x2010;profile database for COSMO&#x2010;based models</article-title>. <source>AIChE J.</source> <volume>64</volume>, <fpage>3443</fpage>&#x2013;<lpage>3455</lpage>. <pub-id pub-id-type="doi">10.1002/aic.16194</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Fredenslund</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Gmehling</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Rasmussen</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>1977</year>). <source>Vapor liquid equilibria using UNIFAC</source>. <publisher-loc>Amsterdam</publisher-loc>: <publisher-name>Elsevier</publisher-name>.</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Freund</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Schapire</surname>
<given-names>R. E.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>A decision-theoretic generalization of on-line learning and an application to boosting</article-title>. <source>J. Comput. Syst. Sci.</source> <volume>55</volume>, <fpage>119</fpage>&#x2013;<lpage>139</lpage>. <pub-id pub-id-type="doi">10.1006/jcss.1997.1504</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gerber</surname>
<given-names>R. P.</given-names>
</name>
<name>
<surname>Soares</surname>
<given-names>R. de P.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Prediction of infinite-dilution activity coefficients using UNIFAC and COSMO-SAC variants</article-title>. <source>Ind. Eng. Chem. Res.</source> <volume>49</volume>, <fpage>7488</fpage>&#x2013;<lpage>7496</lpage>. <pub-id pub-id-type="doi">10.1021/ie901947m</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ghoreishi</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Hedayati</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mousavi</surname>
<given-names>S. O.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Quercetin extraction from Rosa damascena Mill via supercritical CO2: neural network and adaptive neuro fuzzy interface system modeling and response surface optimization</article-title>. <source>J. Supercrit. Fluids</source> <volume>112</volume>, <fpage>57</fpage>&#x2013;<lpage>66</lpage>. <pub-id pub-id-type="doi">10.1016/j.supflu.2016.02.006</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ghoreishi</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Heidari</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Extraction of Epigallocatechin-3-gallate from green tea via supercritical fluid technology: neural network modeling and response surface optimization</article-title>. <source>J. Supercrit. Fluids</source> <volume>74</volume>, <fpage>128</fpage>&#x2013;<lpage>136</lpage>. <pub-id pub-id-type="doi">10.1016/j.Supflu.2012.12.009</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Grisoni</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Ballabio</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Todeschini</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Consonni</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Molecular descriptors for structure&#x2013;activity applications:A hands-on approach</article-title>. <source>Methods Mol. Biol.</source> <volume>1800</volume>, <fpage>3</fpage>&#x2013;<lpage>53</lpage>. <pub-id pub-id-type="doi">10.1007/978-1-4939-7899-1_1</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Heidari</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Ghoreishi</surname>
<given-names>S. M.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Prediction of supercritical extraction recovery of EGCG using hybrid of Adaptive Neuro-Fuzzy Inference System and mathematical model</article-title>. <source>J. Supercrit. Fluids</source> <volume>82</volume>, <fpage>158</fpage>&#x2013;<lpage>167</lpage>. <pub-id pub-id-type="doi">10.1016/j.Supflu.2013.07.006</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huwaimel</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Alobaida</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Anti-cancer drug solubility development within a green solvent: design of novel and robust mathematical models based on artificial intelligence</article-title>. <source>Molecules</source> <volume>27</volume>, <fpage>5140</fpage>. <pub-id pub-id-type="doi">10.3390/molecules27165140</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Idris</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Markom</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Abd Rahman</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Ali</surname>
<given-names>J. M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Prediction of overall yield of Gynura procumbens from ethanol-water &#x2b; supercritical CO2 extraction using artificial neural network model</article-title>. <source>Case Stud. Chem. Environ. Eng.</source> <volume>5</volume>, <fpage>100175</fpage>. <pub-id pub-id-type="doi">10.1016/j.cscee.2021.100175</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Izadifar</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Abdolahi</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Comparison between neural network and mathematical modeling of supercritical CO2 extraction of black pepper essential oil</article-title>. <source>J. Supercrit. Fluids</source> <volume>38</volume>, <fpage>37</fpage>&#x2013;<lpage>43</lpage>. <pub-id pub-id-type="doi">10.1016/j.supflu.2005.11.012</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kamali</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Mousavi</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Analytic, neural network, and hybrid modeling of supercritical extraction of &#x3b1;-pinene</article-title>. <source>J. Supercrit. Fluids</source> <volume>47</volume>, <fpage>168</fpage>&#x2013;<lpage>173</lpage>. <pub-id pub-id-type="doi">10.1016/j.supflu.2008.08.005</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khan</surname>
<given-names>M. I.</given-names>
</name>
<name>
<surname>Shin</surname>
<given-names>J. H.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>J. D.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>The promising future of microalgae: current status, challenges, and optimization of a sustainable and renewable industry for biofuels, feed, and other products</article-title>. <source>Microb. Cell Fact.</source> <volume>17</volume>, <fpage>36</fpage>. <pub-id pub-id-type="doi">10.1186/s12934-018-0879-x</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Klamt</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>1995</year>). <article-title>Conductor-like screening model for real solvents: a new approach to the quantitative calculation of solvation phenomena</article-title>. <source>J. Phys. Chem.</source> <volume>99</volume>, <fpage>2224</fpage>&#x2013;<lpage>2235</lpage>. <pub-id pub-id-type="doi">10.1021/j100007a062</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kostyrin</surname>
<given-names>E. V.</given-names>
</name>
<name>
<surname>Ponkratov</surname>
<given-names>V. V.</given-names>
</name>
<name>
<surname>Al-Shati</surname>
<given-names>A. S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Development of machine learning model and analysis study of drug solubility in supercritical solvent for green technology development</article-title>. <source>Arab. J. Chem.</source> <volume>15</volume>, <fpage>104346</fpage>. <pub-id pub-id-type="doi">10.1016/j.arabjc.2022.104346</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lashkarbolooki</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Shafipour</surname>
<given-names>Z. S.</given-names>
</name>
<name>
<surname>Hezave</surname>
<given-names>A. Z.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Trainable cascade-forward back-propagation network modeling of spearmint oil extraction in a packed bed using SC-CO2</article-title>. <source>J. Supercrit. Fluids</source> <volume>73</volume>, <fpage>108</fpage>&#x2013;<lpage>115</lpage>. <pub-id pub-id-type="doi">10.1016/j.Supflu.2012.10.013</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Anankanbil</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Phospholipid-based surfactants</article-title>,&#x201d; in <source>Biobased surfactants</source> (<publisher-loc>Amsterdam, Netherlands</publisher-loc>: <publisher-name>Elsevier Inc.</publisher-name>), <fpage>243</fpage>&#x2013;<lpage>286</lpage>.</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liebisch</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Fahy</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Aoki</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Dennis</surname>
<given-names>E. A.</given-names>
</name>
<name>
<surname>Durand</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Ejsing</surname>
<given-names>C. S.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Update on LIPID MAPS classification, nomenclature, and shorthand notation for MS-derived lipid structures</article-title>. <source>J. Lipid Res.</source> <volume>61</volume>, <fpage>1539</fpage>&#x2013;<lpage>1555</lpage>. <pub-id pub-id-type="doi">10.1194/jlr.S120001025</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>S.-T.</given-names>
</name>
<name>
<surname>Sandler</surname>
<given-names>S. I.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>
<italic>A priori</italic> phase equilibrium prediction from a segment contribution solvation model</article-title>. <source>Ind. Eng. Chem. Res.</source> <volume>41</volume>, <fpage>899</fpage>&#x2013;<lpage>913</lpage>. <pub-id pub-id-type="doi">10.1021/ie001047w</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="web">
<collab>LIPID MAPS</collab> (<year>2024</year>). <article-title>A free, open access lipidomics resource</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://lipidmaps.org">https://lipidmaps.org</ext-link> (Accessed May 4, 2024)</comment>.</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>L&#xf3;pez</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Yate</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Ramos</surname>
<given-names>F. A.</given-names>
</name>
<name>
<surname>Cala</surname>
<given-names>M. P.</given-names>
</name>
<name>
<surname>Restrepo</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Baena</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Production of polyunsaturated fatty acids and lipids from autotrophic, mixotrophic and heterotrophic cultivation of Galdieria sp. strain USBA-GBX-832</article-title>. <source>Sci. Rep.</source> <volume>9</volume>, <fpage>10791</fpage>. <comment>strain USBA-GBX-832</comment>. <pub-id pub-id-type="doi">10.1038/s41598-019-46645-3</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Machmudah</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Shotipruk</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Goto</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sasaki</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hirose</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Extraction of astaxanthin from Haematococcus pluvialis using supercritical CO2 and ethanol as entrainer</article-title>. <source>Ind. Eng. Chem. Res.</source> <volume>45</volume>, <fpage>3652</fpage>&#x2013;<lpage>3657</lpage>. <pub-id pub-id-type="doi">10.1021/ie051357k</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mac&#xed;as-S&#xe1;nchez</surname>
<given-names>M. D.</given-names>
</name>
<name>
<surname>Fernandez-Sevilla</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Fern&#xe1;ndez</surname>
<given-names>F. G. A.</given-names>
</name>
<name>
<surname>Garc&#xed;a</surname>
<given-names>M. C. C.</given-names>
</name>
<name>
<surname>Grima</surname>
<given-names>E. M.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Supercritical fluid extraction of carotenoids from Scenedesmus almeriensis</article-title>. <source>Food Chem. x.</source> <volume>123</volume>, <fpage>928</fpage>&#x2013;<lpage>935</lpage>. <pub-id pub-id-type="doi">10.1016/j.foodchem.2010.04.076</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Macias Sanchez</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mantell</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Rodriguez</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Martinezdelaossa</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Lubian</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Montero</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Comparison of supercritical fluid and ultrasound-assisted extraction of carotenoids and chlorophyll a from Dunaliella salina</article-title>. <source>Talanta</source> <volume>77</volume>, <fpage>948</fpage>&#x2013;<lpage>952</lpage>. <pub-id pub-id-type="doi">10.1016/j.talanta.2008.07.032</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mistry</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Franco</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Cooper</surname>
<given-names>S. J.</given-names>
</name>
<name>
<surname>Roberts</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Viswanathan</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>How machine learning will revolutionize electrochemical sciences</article-title>. <source>ACS Energy Lett.</source>, <fpage>1422</fpage>&#x2013;<lpage>1431</lpage>. <pub-id pub-id-type="doi">10.1021/acsenergylett.1c00194</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Molino</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mehariya</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Di Sanzo</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Larocca</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Martino</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Leone</surname>
<given-names>G. P.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Recent developments in supercritical fluid extraction of bioactive compounds from microalgae: role of key parameters, technological achievements and challenges</article-title>, <volume>36</volume>, <fpage>196</fpage>&#x2013;<lpage>209</lpage>. <pub-id pub-id-type="doi">10.1016/j.jcou.2019.11.014</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Morcelli</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Cassel</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Vargas</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Rech</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Marc&#xed;lio</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Supercritical fluid (CO2&#x2b;ethanol) extraction of chlorophylls and carotenoids from Chlorella sorokiniana: COSMO-SAC assisted prediction of properties and experimental approach</article-title>, <volume>51</volume>, <fpage>101649</fpage>. <pub-id pub-id-type="doi">10.1016/j.jcou.2021.101649</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nguyen</surname>
<given-names>H. C.</given-names>
</name>
<name>
<surname>Alamray</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Kamal</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Diana</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Mohamed</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Algarni</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Computational prediction of drug solubility in supercritical carbon dioxide: thermodynamic and artificial intelligence modeling</article-title>. <source>J. Mol. Liq.</source> <volume>354</volume>, <fpage>118888</fpage>. <pub-id pub-id-type="doi">10.1016/j.molliq.2022.118888</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nobre</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Marcelo</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Passos</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Beir&#xe3;o</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Palavra</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Gouveia</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2006</year>). <article-title>Supercritical carbon dioxide extraction of astaxanthin and other carotenoids from the microalga Haematococcus pluvialis</article-title>. <source>Eur. Food Res. Technol.</source> <volume>223</volume>, <fpage>787</fpage>&#x2013;<lpage>790</lpage>. <pub-id pub-id-type="doi">10.1007/s00217-006-0270-8</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Orio</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Alexandru</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Cravotto</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Mantegna</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Barge</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>UAE, MAE, SFE-CO2 and classical methods for the extraction of Mitragyna speciosa leaves</article-title>. <source>Ultrason. Sonochem.</source> <volume>19</volume>, <fpage>591</fpage>&#x2013;<lpage>595</lpage>. <pub-id pub-id-type="doi">10.1016/j.ultsonch.2011.10.001</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pedregosa</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Varoquaux</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Gramfort</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Michel</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Thirion</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Grisel</surname>
<given-names>O.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Scikit-learn: machine learning in Python</article-title>. <source>arXiv</source>. <pub-id pub-id-type="doi">10.48550/ARXIV.1201.0490</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Prezhdo</surname>
<given-names>O. V.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Advancing physical chemistry with machine learning</article-title>. <source>J. Phys. Chem. Lett.</source> <volume>11</volume>, <fpage>9656</fpage>&#x2013;<lpage>9658</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jpclett.0c03130</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rai</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Punase</surname>
<given-names>K. D.</given-names>
</name>
<name>
<surname>Mohanty</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Bhargava</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Evaluation of models for supercritical fluid extraction</article-title>. <source>Int. J. Heat. Mass Transf.</source> <volume>72</volume>, <fpage>274</fpage>&#x2013;<lpage>287</lpage>. <pub-id pub-id-type="doi">10.1016/j.ijheatmasstransfer.2014.01.011</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Rasmussen</surname>
<given-names>C. E.</given-names>
</name>
<name>
<surname>Williams</surname>
<given-names>C. K. I.</given-names>
</name>
</person-group> (<year>2006</year>). <source>Gaussian processes for machine learning</source>. <publisher-name>MIT Press</publisher-name>.</citation>
</ref>
<ref id="B54">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Reid</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>Prausnitz</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Poling</surname>
<given-names>B. E.</given-names>
</name>
</person-group> (<year>1988</year>). <source>The properties of gases and liquids</source>. <edition>4th Edition</edition>. <publisher-name>McGraw-Hill</publisher-name>. <pub-id pub-id-type="doi">10.1036/0070116822</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Rivera</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2024</year>). <source>Evaluaci&#xf3;n del efecto de tres condiciones de cultivo en Galdieria sp. USBA-GBX-832 a escala de fotobiorreactor</source>. <publisher-loc>Bogot&#xe1;, Colombia</publisher-loc>: <publisher-name>Pontificia Universidad Javeriana</publisher-name>.</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Roach</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Rignanese</surname>
<given-names>G.-M.</given-names>
</name>
<name>
<surname>Erriguible</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Aymonier</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Applications of machine learning in supercritical fluids research</article-title>. <source>J. Supercrit. Fluids</source> <volume>202</volume>, <fpage>106051</fpage>. <pub-id pub-id-type="doi">10.1016/j.supflu.2023.106051</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Santoro</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Nardi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Benincasa</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Costanzo</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Giordano</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Procopio</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Sustainable and selective extraction of lipids and bioactive compounds from microalgae</article-title>. <source>Molecules</source> <volume>24</volume>, <fpage>4347</fpage>. <pub-id pub-id-type="doi">10.3390/molecules24234347</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Santoyo</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cavero</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Jaime</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Iba&#xf1;ez</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Se&#xf1;or&#xe1;ns</surname>
<given-names>F. J.</given-names>
</name>
<name>
<surname>Reglero</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2006</year>). &#x201c;<article-title>Supercritical carbon dioxide extraction of compounds with antimicrobial activity from origanum vulgare L</article-title>,&#x201d; in <source>Determination of optimal extraction parameters</source>. <pub-id pub-id-type="doi">10.4315/0362-028x-69.2.369</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sanzo</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Mehariya</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Martino</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Larocca</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Casella</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Chianese</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Supercritical carbon dioxide extraction of astaxanthin, lutein, and fatty acids from Haematococcus pluvialis microalgae</article-title>. <source>Mar. Drugs</source> <volume>16</volume>, <fpage>334</fpage>. <pub-id pub-id-type="doi">10.3390/md16090334</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="web">
<collab>Scikit Learn</collab> (<year>2024</year>). <article-title>Metrics and scoring: quantifying the quality of predictions</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://scikit-learn.org/stable/modules/model_evaluation.html">https://scikit-learn.org/stable/modules/model_evaluation.html</ext-link> (Accessed May 27, 2024)</comment>.</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Smola</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Sch&#xf6;lkopf</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>A tutorial on support vector regression</article-title>. <source>Statistics Comput.</source> <volume>14</volume>, <fpage>199</fpage>&#x2013;<lpage>222</lpage>. <pub-id pub-id-type="doi">10.1023/b:stco.0000035301.49549.88</pub-id>
</citation>
</ref>
<ref id="B62">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Soares</surname>
<given-names>R. D. P.</given-names>
</name>
<name>
<surname>Fl&#xf4;res</surname>
<given-names>G. B.</given-names>
</name>
<name>
<surname>Dudapelisser</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Ferrarini</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>GabrielPastorello</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <source>lvpp/sigma: LVPP sigma-profile database (20.06)</source>. <publisher-name>Zenodo</publisher-name>. <pub-id pub-id-type="doi">10.5281/ZENODO.3924076</pub-id>
</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sodeifian</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Ghorbandoost</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sajadian</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Ardestani</surname>
<given-names>N. S.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Extraction of oil from Pistacia khinjuk using supercritical carbon dioxide: experimental and modeling</article-title>. <source>J. Supercrit. Fluids</source> <volume>110</volume>, <fpage>265</fpage>&#x2013;<lpage>274</lpage>. <pub-id pub-id-type="doi">10.1016/j.supflu.2015.12.004</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tibshirani</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>1996</year>). <article-title>Regression shrinkage and selection via the lasso</article-title>. <source>J. R. Stat. Soc. Ser. B Methodol.</source> <volume>58</volume> (<issue>1</issue>), <fpage>267</fpage>&#x2013;<lpage>288</lpage>. <pub-id pub-id-type="doi">10.1111/j.2517-6161.1996.tb02080.x</pub-id>
</citation>
</ref>
<ref id="B65">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Valim</surname>
<given-names>I. C.</given-names>
</name>
<name>
<surname>Rego</surname>
<given-names>A. S. C.</given-names>
</name>
<name>
<surname>Queiroz</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Brant</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Neto</surname>
<given-names>A. A. F.</given-names>
</name>
<name>
<surname>Vilani</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Use of artificial intelligence to experimental conditions identification in the process of delignification of sugarcane bagasse from supercritical carbon dioxide</article-title>, <fpage>1469</fpage>, <lpage>1474</lpage>. <pub-id pub-id-type="doi">10.1016/B978-0-444-64235-6.50256-4</pub-id>
</citation>
</ref>
<ref id="B66">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Quantum chemical descriptors in quantitative structure&#x2013;activity relationship models and their applications</article-title>. <source>Chemom. Intell. Lab. Syst.</source> <volume>217</volume>, <fpage>104384</fpage>. <pub-id pub-id-type="doi">10.1016/j.chemolab.2021.104384</pub-id>
</citation>
</ref>
<ref id="B67">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Willighagen</surname>
<given-names>E. L.</given-names>
</name>
</person-group> (<year>2010</year>). &#x201c;<article-title>Handbook of chemoinformatics algorithms</article-title>,&#x201d; in <source>Three-dimensional (3D) molecular representations</source> (<publisher-loc>Boca Raton, FL</publisher-loc>: <publisher-name>Chapman and Hall/CRC</publisher-name>).</citation>
</ref>
<ref id="B68">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Winter</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Winter</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Esper</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Schilling</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bardow</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>SPT-NRTL: a physics-guided machine learning model to predict thermodynamically consistent activity coefficients</article-title>. <source>Fluid Phase Equilib.</source> <volume>568</volume>, <fpage>113731</fpage>. <pub-id pub-id-type="doi">10.1016/j.fluid.2023.113731</pub-id>
</citation>
</ref>
<ref id="B69">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Mj&#xf8;s</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Haugsgjerd</surname>
<given-names>B. O.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Efficiencies of three common lipid extraction methods evaluated by calculating mass balances of the fatty acids</article-title>. <source>J. Food Compost. Anal.</source> <volume>25</volume>, <fpage>198</fpage>&#x2013;<lpage>207</lpage>. <pub-id pub-id-type="doi">10.1016/j.jfca.2011.08.003</pub-id>
</citation>
</ref>
<ref id="B70">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yousefi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Rahimi-Nasrabadi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Pourmortazavi</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Wysokowski</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Jesionowski</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Ehrlich</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Supercritical fluid extraction of essential oils</article-title>, <volume>118</volume>, <fpage>182</fpage>&#x2013;<lpage>193</lpage>. <pub-id pub-id-type="doi">10.1016/j.trac.2019.05.038</pub-id>
</citation>
</ref>
<ref id="B71">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zekovi&#x107;</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Pinta&#x107;</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Majki&#x107;</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Vidovi&#x107;</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mimica-Duki&#x107;</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Tesli&#x107;</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Utilization of sage by-products as raw material for antioxidants recovery&#x2014;ultrasound versus microwave-assisted extraction</article-title>. <source>Ind. Crops Prod.</source> <volume>99</volume>, <fpage>49</fpage>&#x2013;<lpage>59</lpage>. <pub-id pub-id-type="doi">10.1016/j.indcrop.2017.01.028</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>