<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="review-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Environ. Sci.</journal-id>
<journal-title>Frontiers in Environmental Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Environ. Sci.</abbrev-journal-title>
<issn pub-type="epub">2296-665X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1107393</article-id>
<article-id pub-id-type="doi">10.3389/fenvs.2023.1107393</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Environmental Science</subject>
<subj-group>
<subject>Mini Review</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>The foundations of big data sharing: A CGIAR international research organization perspective</article-title>
<alt-title alt-title-type="left-running-head">Basel et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fenvs.2023.1107393">10.3389/fenvs.2023.1107393</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Basel</surname>
<given-names>Ashleigh M.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1901835/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Nguyen</surname>
<given-names>Kien Tri</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2152152/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Arnaud</surname>
<given-names>Elizabeth</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/51341/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Craparo</surname>
<given-names>Alessandro C. W.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2150241/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>International Centre for Tropical Agriculture</institution>, <institution>Centro Internacional de Agricultura Tropical (CIAT)</institution>, <addr-line>Cali</addr-line>, <country>Colombia</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>African Institute for Mathematical Sciences</institution>, <addr-line>Cape Town</addr-line>, <country>South Africa</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Mathematical Biosciences Hub</institution>, <institution>Department of Mathematical Sciences</institution>, <institution>Stellenbosch University</institution>, <addr-line>Matieland</addr-line>, <country>South Africa</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>International Centre for Tropical Agriculture</institution>, <institution>CIAT</institution>, <addr-line>Hanoi</addr-line>, <country>Vietnam</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Bioversity International</institution>, <addr-line>Montpellier</addr-line>, <country>France</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1373797/overview">Vernon Visser</ext-link>, University of Cape Town, South Africa</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/624535/overview">Polina Lemenkova</ext-link>, Universit&#xe9; libre de Bruxelles, Belgium</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/418582/overview">Andrii Shelestov</ext-link>, Kyiv Polytechnic Institute, Ukraine</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1720057/overview">Ebrahim Jahanshiri</ext-link>, Crops For the Future UK, United Kingdom</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Ashleigh M. Basel, <email>A.Basel@cgiar.org</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Environmental Informatics and Remote Sensing, a section of the journal Frontiers in Environmental Science</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>07</day>
<month>03</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>11</volume>
<elocation-id>1107393</elocation-id>
<history>
<date date-type="received">
<day>24</day>
<month>11</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>22</day>
<month>02</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Basel, Nguyen, Arnaud and Craparo.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Basel, Nguyen, Arnaud and Craparo</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>The potential of big data capabilities to transform and understand global agricultural and biological systems often relies on data from different sources that must be considered together or aggregated to provide insights. The value of data is however not only in its collection and storage, but largely in its re-use. Big data storage repositories are not enough when we consider a world brimming with escalating volumes of data, here we need to consider innovative systems and tools which address data harmonization and standardization and importantly, ones that can bridge the gap between science and end users. In this paper, we will demonstrate how CGIAR (including the Alliance of Bioversity International and CIAT) develops a culture of co-operation and collaboration among custodians of agrobiodiversity data, as well as new directions for big data. CGIAR first launched the Platform for Big Data in Agriculture to enhance the development and maintenance of its data. This helped establish workflows of cross-platform synthesis, annotate and apply the lessons learnt. The Platform then built GARDIAN (Global Agricultural Research Data Innovation and Acceleration Network)&#x2014;a digital tool that harvests from &#x223c;40 separate open data and publication repositories that 15 CGIAR centres have used for data synthesis. While there have been significant advances in big data management and storage, we also identify the gaps to improve use, and the re-use of data in order to reveal its added value in decision making.</p>
</abstract>
<kwd-group>
<kwd>big data</kwd>
<kwd>data management</kwd>
<kwd>data storage</kwd>
<kwd>agrobiodiversity</kwd>
<kwd>ecology</kwd>
<kwd>dashboards</kwd>
<kwd>open source</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>The concept of &#x201c;big data&#x201d; has been gradually gaining popularity since its first emergence in the mid-1990s, and has seen a surge in research publications since 2008 (<xref ref-type="bibr" rid="B15">Li et al., 2016</xref>). Big data has been suggested as a predominant source of innovation and has caused a paradigm shift to data-driven research. The rapid growth of big data originating from expanding social systems in addition to traditional measurement and observation systems offers great potential to revolutionize our approaches to research. We are getting better at discovering knowledge from data and acquiring intelligence from information. Organisations involved in big data research are better harnessing the associated opportunities and more effectively addressing the corresponding challenges (<xref ref-type="bibr" rid="B10">Wang, 2016</xref>).</p>
<p>While big data is still not a clearly defined term, it often refers to a wide range of larger datasets that are difficult to store, manage, and process using traditional processing tools, due to their size, but also their complexity (<xref ref-type="bibr" rid="B16">Liu, 2015</xref>). The development of open-source frameworks has been essential for the growth of big data because these frameworks make data easier to access and work with. Platforms such as Google Earth Engine (GEE) have been in the remote sensing big data spotlight. GEE is a cloud-based platform that enables parallelized processing of geospatial data on a global scale, using Google&#x2019;s cloud (<xref ref-type="bibr" rid="B23">Tamiminia et al., 2020</xref>). Remote-sensing systems have been collecting large volumes of data for decades, but managing and analysing these data are not practical using common desktop computing resources (<xref ref-type="bibr" rid="B1">Amani et al., 2020</xref>). This platform addressed a major problem for scientists, which was how to best access increasing amounts of satellite data while enabling an easy place for researchers to start searching, processing and analysing relevant data (<xref ref-type="bibr" rid="B21">Shelestov et al., 2017</xref>).</p>
<p>Big data and big data platforms to support research are necessary for agile and hyper-local responses to current challenges (<xref ref-type="bibr" rid="B11">Himesh et al., 2018</xref>; <xref ref-type="bibr" rid="B19">Rao, 2018</xref>). Considering the rising risks from changing climate and the increasing focus on food security and biodiversity, we need to target productive and sustainable agriculture (<xref ref-type="bibr" rid="B18">Mbow et al., 2019</xref>). The Global Biodiversity Facility (GBIF) makes biodiversity data accessible and open access (<xref ref-type="bibr" rid="B20">Robertson et al., 2022</xref>), and platforms such as GEE as well as Copernicus, USGS Earth Explorer, NASA open data portal, Natural Earth and OpenStreetMap provide access to extensive volumes of geospatial data. Tools have also been initiated such as the Earth System Data Lab (ESDL) which aim to address issues such as data standardization and harmonization. ESDL combine data from the atmosphere, terrestrial biosphere, hydrosphere, pedosphere and oceans into an easy to use analysis-ready format. ESDL produces products that are able to overcome various obstacles such as formatting inconsistencies, incompatible spatiotemporal resolutions and access restrictions. Research institutes and academics alike have access to these large stores of data and so too are they continuing to add to their own data sources through primary research, however the overlap of biodiversity data platforms with agroecology and agrobiodiversity to target issues such as food security is still limited (<xref ref-type="bibr" rid="B2">Arnaud et al., 2016</xref>).</p>
<p>Finding value in big data is an entire process, and while managing big data has evolved significantly, its value is only now being realised. How do we continue to handle and add value to the escalating volumes of data? CGIAR (including the Alliance of Bioversity International and CIAT) is a consortium of Research Centres that works with partners in six major global regions to jointly address challenges to food, land, and water systems (FLWs) (see <ext-link ext-link-type="uri" xlink:href="http://www.cgiar.org/">http://www.cgiar.org</ext-link> for more details). To assess the status of these FLWs and associated trends requires a vast amount of relevant information across varying spatial and temporal scales. The increasing volume and varying format of big data presents challenges for successful storage, management, analysis and sharing of high quality data for both science and end users. Consequently, large research institutions like CGIAR need to establish multidisciplinary collaboration and operational flows of big data.</p>
</sec>
<sec id="s2">
<title>2 CGIAR and big data</title>
<sec id="s2-1">
<title>2.1 CGIAR&#x2019;s data assets</title>
<p>CGIAR is a consortium that unites an array of international organizations aiming to reduce rural poverty, improve human health and nutrition, introduce sustainable management of natural resources and strengthen food security, primarily in developing countries. In close collaboration with national research institutions, almost 10,000 CGIAR scientists, researchers and technicians are collecting, analysing and synthesizing data on agricultural and biological systems across Asia, Africa, Latin America and the Pacific. With its 15 international research centres, 11 genebanks (that safeguard a unique global resource of crop and tree diversity and respond to thousands of requests for samples per year in more than 100 countries worldwide), 12 Global Research Programs as well as various research platforms, the CGIAR has collected and generated data through a variety of avenues and these data include but are not limited to: Long-term trials, baseline data collections, genomic data, value-added secondary datasets, spatial data and data collected in public-private partnerships. A key objective of CGIAR is to integrate the work of the centres and their partners, avoiding fragmentation and duplication of effort, specifically around these data resources.</p>
<p>In addition to data and databases, there are also other types of information products such as reports, books and book chapters, data analysis and collection tools, video, audio and images and the metadata associated with the information products listed above. In the CGIAR, these information products are stored and preserved in a joint repository called CGSpace (<ext-link ext-link-type="uri" xlink:href="https://cgspace.cgiar.org/">https://cgspace.cgiar.org</ext-link>). CGSpace hosts research outputs and knowledge products for several of CGIAR centres and research programs (<xref ref-type="fig" rid="F1">Figure 1</xref>). For storing and publishing data and databases, other centres also created a range of data portals and also disseminate data through repositories such as the Harvard Dataverse. Even though these portals and repositories have stimulated open science and made research more transparent, the increase in number and size of these tools has signalled a coordination issue between individual CGIAR centres.</p>
<fig id="F1" position="float">
<label>FIGURE1</label>
<caption>
<p>CGSpace model (<xref ref-type="bibr" rid="B3">Ballantyne et al., 2022</xref>).</p>
</caption>
<graphic xlink:href="fenvs-11-1107393-g001.tif"/>
</fig>
</sec>
<sec id="s2-2">
<title>2.2 CGIAR&#x2019;s data as international public goods</title>
<p>A fundamental step towards open science is to offer open access data. The CGIAR therefore developed a clear mandate to recognize data as important information assets and project deliverables. In 2012, the CGIAR approved its Intellectual Assets Principles (CGIAR IA Principles) (<xref ref-type="bibr" rid="B7">CGIAR&#x2014;IEA, 2018</xref>). A year later, in 2013, the CGIAR approved its Open Access and Data Management Policy (OA/DM Policy), which expanded on CGIAR IA Principles. OA/DM Policy officially recognized CGIAR&#x2019;s information products as international public goods. This stated that all research data, generated as a result of research funded by CGIAR programmes, must, subject to confidentiality of respondents, be deposited in a suitable repository and made publicly available following a pre-determined timeline. OA/DM Policy also acted as a guiding framework for data producers and publishers to implement FAIR principles (Findability, Accessibility, Interoperability, and Reusability) (<xref ref-type="bibr" rid="B24">Wilkinson et al., 2016</xref>). The FAIR data principles have rapidly become a standard for assessing responsible and reproducible research (<xref ref-type="bibr" rid="B4">Bezuidenhout and Shanahan., 2022</xref>).</p>
<p>CGIAR, being a global network of agricultural research centres recognized the importance of standardisation and data interchange in building a global network of data and service providers. In 2017, the CGIAR launched the Platform for Big Data in Agriculture as a coordinating mechanism to deliver CGIAR&#x2019;s strategy and to align CGIAR&#x2019;s Research Programs and data management processes (<xref ref-type="bibr" rid="B8">CGIAR Advisory Services Shared Secretariat, 2021</xref>). Prior to the Platform for Big data in Agriculture, CGIAR had some 40 separate open data and publication repositories. For the first time, these intellectual assets became discoverable in one place <italic>via</italic> GARDIAN (<ext-link ext-link-type="uri" xlink:href="https://gardian.bigdata.cgiar.org/">https://gardian.bigdata.cgiar.org</ext-link>). GARDIAN (Global Agricultural Research Data Innovation and Acceleration Network) is the first pan-CGIAR search engine for agricultural data. This platform harvests from separate open data and publication repositories that 15 CGIAR centres have used for data synthesis. GARDIAN also incorporated metadata standards and quality control measures to ensure the accuracy and reliability of the data. Metadata sharing is a critical way to ensure that data is discoverable (<xref ref-type="bibr" rid="B9">Contaxis et al., 2022</xref>).</p>
<p>There are however also many other actors in the big data realm which makes this space highly fragmented. Private companies, government institutions, and university research institutes have also been generating immense amounts of data. Access to these data might be restricted due to protection of the privacy of human subjects, compliance with policies and regulations, and following intellectual property rights. The CGIAR has partly addressed this by updating and refining an open Guideline for Responsible Data (<ext-link ext-link-type="uri" xlink:href="https://bigdata.cgiar.org/responsible-data-guidelines">https://bigdata.cgiar.org/responsible-data-guidelines</ext-link>). However, to further tackle the issue, there is a strong need to cultivate strategic partnerships and to build appropriate policies and business models (<xref ref-type="bibr" rid="B13">King et al., 2021</xref>). Furthermore, CGIAR needs to enable interoperability and shared infrastructure that allows data to flow seamlessly beyond CGIAR systems, and allows partnerships with other organisations to promote the overlap of biodiversity data platforms with agroecology and agrobiodiversity.</p>
</sec>
<sec id="s2-3">
<title>2.3 The gap&#x2014;Data paralysis</title>
<p>Enabling data discoverability is not quite enough. In addition to ensuring that data is discoverable, it is essential to evaluate the impact and value of data on end users as well as the ability of available data to influence scientific research and development. An evaluation by the <xref ref-type="bibr" rid="B8">CGIAR Advisory Services Shared Secretariat (2021)</xref>, an independent panel of experts tasked with evaluating CGIAR&#x2019;s programs, pointed out the lack of analysis on the Platform as well as the scarcity of data downloads. The focus had been on data uploads and not a lot on data re-use and the scientific application of the data. While data uploads are a necessary first step in the platform the lack of subsequent data re-use was considered a missed opportunity to help bridge the gap between science and potential data end-users.</p>
<p>A wealth of information may mean a lack of attention on the part of its recipients, since attention should be allocated wisely, given the overabundance of information stimuli (<xref ref-type="bibr" rid="B12">Kambatla et al., 2014</xref>; <xref ref-type="bibr" rid="B17">Madison et al., 2022</xref>). The rise of data brings with it two problems. Firstly, data users are overwhelmed with information as well as sources of information and have a shortage of attention for a full screening of all available data. Secondly, with the exponential growth of the data being produced daily, the volumes of data available for exploitation in this Big Data era do not just offer answers to a specific set of questions, they also proffer yet-to-be-asked questions, which means data exploration and not just discovery is also important.</p>
<p>At the later development phase of the Platform, GARDIAN introduced a mapping and analytics tool where users can better visualise, explore, and understand the data (<ext-link ext-link-type="uri" xlink:href="https://gardian.bigdata.cgiar.org/">https://gardian.bigdata.cgiar.org/&#x23;/tools</ext-link>; <ext-link ext-link-type="uri" xlink:href="https://gardian.bigdata.cgiar.org/">https://gardian.bigdata.cgiar.org/&#x23;/maps</ext-link>). A lack of analysis on the platform may however indicate how these exploration tools should have been co-developed with current and potential users, especially researchers and policymakers, to allow generating clear added value of the datasets. Perhaps however this is also an indication of the overabundance of data availability leading users to data paralysis. Indeed, there are numerous other high-level interfaces for data retrieval such as DataOne and Globus, and if CGIAR aims to be part of these global data providers it is crucial that CGIAR ensures effective data interchange in this community of repositories and big data services. Infrastructure, tools, and approaches to make CGIAR data more visible, interoperable and reusable continue to be further refined.</p>
<p>Lastly, in the current era of research in order to support better diagnosis of problems or monitoring of interventions such as those related to agroecology, conservation ecology and climate change, biodiversity informatics must embrace real time, near real time or high frequency data streams. Real time high frequency data streams are data sets that are collected in near-instance intervals. These datasets are typically collected though sensors or other automated processes and are used in many projects in CGIAR to gain insights into the complex systems that shape our world. As the Platform for Big Data has now come to the end of its cycle, CGIAR must consider new ways to reuse data to generate and answer common research questions and integrate research across domains, which is fundamental for making progress on solutions. We also need to consider ways to combine real time data within the classical collection of data stores we have available.</p>
</sec>
<sec id="s2-4">
<title>2.4 Enter the era of the dashboard (dashboard 2.0)</title>
<p>The concept of interoperability was popularized by peer-to-peer systems such as Napster, BitTorrent and Gnutella, who emphasized the &#x201c;data as a resource&#x201d; concept with aggregation, resource sharing and cost reduction (<xref ref-type="bibr" rid="B12">Kambatla et al., 2014</xref>). However, while the integration across research domains increases the use and accessibility of data, it does not necessarily reveal its value (Hai et al., 2016). Metrics such as number of open access items may measure openness and accessibility of data and information products, but they do not reflect the impact of data on research and development. Big data needs decision support. From this perspective, there are several tools and techniques that have been applied to big data for decision making (<xref ref-type="bibr" rid="B5">Casado and Younas, 2015</xref>). Enter the era of the dashboard. Dashboards or platforms utilize one or several components of optimization methods, statistics, data mining, machine learning and numerous visualization approaches. In terms of data interchange, dashboards can be designed to allow users to easily export and share data in a variety of formats. Although these may be far from perfect, they allow data discovery and integrated data exploration as a coherent toolkit (<xref ref-type="bibr" rid="B5">Casado and Younas, 2015</xref>). More importantly, they enable the data to be used for its fundamental purpose&#x2014;enabling decision making. It has been shown that reduced cognitive load is vital for information processing, and ease-of-use is a crucial factor influencing human interaction with technology (<xref ref-type="bibr" rid="B14">Lah et al., 2020</xref>; <xref ref-type="bibr" rid="B6">Castro-Alonso et al., 2021</xref>).</p>
<p>The utilization of dashboards also presents a significant benefit in that they can be easily designed to conform to data standards and APIs, which ensures that data is interoperable and can be shared across different systems or platforms. This is particularly important when working with global initiatives and programs. Dashboards will also allow users from academia to the private sector and government bodies to understand what notable information from different sources is available, democratizing data discovery and exploration. Furthermore, these systems derive information from the data, leading to knowledge and then achieve wisdom from knowledge, leading to intelligent decision making (Hai et al., 2016). Lastly, while GARDIAN was designed to handle historic and processed data, with the new stream of &#x201c;real time&#x201d;, data dashboards seem better equipped to address such needs. Real time FLWs data such as sensor data, satellite data, remote sensing data and weather/climate data can be easily integrated into dashboards by connecting the data stream to a cloud based storage platform for users to more easily access real time data produced by CGIAR and integrate data in agroecology and agrobiodiversity to target issues such as food security and other global challenges.</p>
</sec>
</sec>
<sec sec-type="discussion" id="s3">
<title>3 Discussion</title>
<p>The current volume of data and information collected, stored, shared and used is almost unlimited. This data possesses a high value, and calls for smart logistic and cognitive management of the complex data that is available in order to exploit the data while serving a multiplicity of interests from stakeholders and policymakers to diverse research groups. CGIAR is a major producer of agricultural data and information and has paved the way for open access and FAIR data through its principles, policies, frameworks and platforms. While CGIAR has learnt many lessons from these processes, it must apply these lessons in its future workings.</p>
<p>As data becomes more and more pervasive, users can become intimidated or overwhelmed by its sheer volume, even before any analysis is performed. If we apply the principle of &#x201c;less is more&#x201d;, the CGIAR is arguably its own worst enemy with regard to effective data sharing and consumption. Given the abundant data flow, the next challenge is not how to acquire more data, but how to re-use and translate it into something meaningful. User-friendly dashboards or tools that allow data discovery to flow into data exploration, analytics and data interchange offer the means for this. CGIAR has already implemented several dashboards to help facilitate research and collection (<ext-link ext-link-type="uri" xlink:href="https://www.cgiar.org/dashboards/">https://www.cgiar.org/dashboards/</ext-link>) and these need to continually be refined. While dashboards are an attractive way to bridge this gap we must also not overlook the human aspect of big data. Upstream processing for data curation and data governance is required and so sharing data learnings and pipelines such as those used and developed by CGIAR and the Platform is essential. Using this knowledge we can cultivate an open access ecosystem, and improve our knowledge on how to store, use, and re-use data more effectively.</p>
</sec>
</body>
<back>
<sec id="s4">
<title>Author contributions</title>
<p>AB took the lead in writing the manuscript with consultation of KN. Author AC helped shape the direction of the paper and AC and EA provided critical feedback and review.</p>
</sec>
<ack>
<p>The authors acknowledge the editorial input of the Bioversity-CIAT Alliance Science Writing Service, provided by Vincent Johnson. The authors also thank Leroy Mwanzia for his knowledge input.</p>
</ack>
<sec sec-type="COI-statement" id="s5">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s6">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Amani</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ghorbanian</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ahmadi</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Kakooei</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Moghimi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mirmazloumi</surname>
<given-names>S. M.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Google Earth engine cloud computing platform for remote sensing big data applications: A comprehensive review</article-title>. <source>IEEE J. Sel. Top. Appl. Earth Observations Remote Sens.</source> <volume>13</volume>, <fpage>5326</fpage>&#x2013;<lpage>5350</lpage>. <pub-id pub-id-type="doi">10.1109/jstars.2020.3021052</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Arnaud</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Casta&#xf1;eda &#xc1;lvarez</surname>
<given-names>N. P.</given-names>
</name>
<name>
<surname>Ganglo Cossi</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Endresen</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Jahanshiri</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Final report of the Task Group on GBIF data fitness for use in agrobiodiversity</article-title>.</citation>
</ref>
<ref id="B3">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Ballantyne</surname>
<given-names>P. G.</given-names>
</name>
<name>
<surname>Yabowork</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Victor</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Orth</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <source>CGSpace - an open access knowledge and information repository for CGIAR research. Figure</source>. <publisher-loc>Nairobi</publisher-loc>: <publisher-name>Kenya</publisher-name>.</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bezuidenhout</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Shanahan</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Rethinking the a in FAIR data: Issues of data access and accessibility in research</article-title>. <source>Front. Res. Metrics Anal.</source> <volume>42</volume>, <fpage>912456</fpage>. <pub-id pub-id-type="doi">10.3389/frma.2022.912456</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Casado</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Younas</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Emerging trends and technologies in big data processing</article-title>. <source>Concurrency Comput. Pract. Exp.</source> <volume>27</volume> (<issue>8</issue>), <fpage>2078</fpage>&#x2013;<lpage>2091</lpage>. <pub-id pub-id-type="doi">10.1002/cpe.3398</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Castro-Alonso</surname>
<given-names>J. C.</given-names>
</name>
<name>
<surname>de Koning</surname>
<given-names>B. B.</given-names>
</name>
<name>
<surname>Fiorella</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Paas</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Five strategies for optimizing instructional materials: Instructor-and learner-managed cognitive load</article-title>. <source>Educ. Psychol. Rev.</source> <volume>33</volume> (<issue>4</issue>), <fpage>1379</fpage>&#x2013;<lpage>1407</lpage>. <pub-id pub-id-type="doi">10.1007/s10648-021-09606-9</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="book">
<collab>CGIAR - IEA</collab> (<year>2018</year>). <article-title>Review of CGIAR&#x2019;s open access/open data policy and implementation support</article-title>. <source>Independent evaluation arrangement (IEA) of CGIAR</source>. <publisher-loc>Rome</publisher-loc>: <publisher-name>Italy</publisher-name>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="http://iea.cgiar.org/">http://iea.cgiar.org/</ext-link>
</comment>.</citation>
</ref>
<ref id="B8">
<citation citation-type="book">
<collab>CGIAR Advisory Services Shared Secretariat.</collab> (<year>2021</year>). <source>Evaluation of CGIAR platform for big data in agriculture</source>. <publisher-loc>Report. Rome</publisher-loc>: <publisher-name>CAS Secretariat Evaluation Function</publisher-name>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://cas.cgiar.org/">https://cas.cgiar.org/</ext-link>
</comment>.</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Contaxis</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Clark</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Dellureficio</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Gonzales</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mannheimer</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Oxley</surname>
<given-names>P. R.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Ten simple rules for improving research data discovery</article-title>. <source>PLoS Comput. Biol.</source> <volume>18</volume> (<issue>2</issue>), <fpage>e1009768</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1009768</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Himesh</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Rao</surname>
<given-names>E. P.</given-names>
</name>
<name>
<surname>Gouda</surname>
<given-names>K. C.</given-names>
</name>
<name>
<surname>Ramesh</surname>
<given-names>K. V.</given-names>
</name>
<name>
<surname>Rakesh</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Mohapatra</surname>
<given-names>G. N.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Digital revolution and big data: A new revolution in agriculture</article-title>. <source>CABI Rev.</source> <volume>2018</volume>, <fpage>1</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1079/pavsnnr201813021</pub-id>
<comment>Ajilesh</comment>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kambatla</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Kollias</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Kumar</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Grama</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Trends in big data analytics</article-title>. <source>J. parallel distributed Comput.</source> <volume>74</volume> (<issue>7</issue>), <fpage>2561</fpage>&#x2013;<lpage>2573</lpage>. <pub-id pub-id-type="doi">10.1016/j.jpdc.2014.01.003</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>King</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Devare</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Overduin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wong</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Kropff</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Perez</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Toward a Digital One CGIAR. Strategic research on digital transformation in food, land, and water systems in a climate crisis</article-title>. <source>Int. Cent. Trop. Agric. (CIAT)</source>, <fpage>112</fpage>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://cgspace.cgiar.org/handle/10568/113555">https://cgspace.cgiar.org/handle/10568/113555</ext-link>
</comment>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lah</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Lewis</surname>
<given-names>J. R.</given-names>
</name>
<name>
<surname>&#x160;umak</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Perceived usability and the modified technology acceptance model</article-title>. <source>Int. J. Human&#x2013;Computer Interact.</source> <volume>36</volume> (<issue>13</issue>), <fpage>1216</fpage>&#x2013;<lpage>1230</lpage>. <pub-id pub-id-type="doi">10.1080/10447318.2020.1727262</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Dragicevic</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Castro</surname>
<given-names>F. A.</given-names>
</name>
<name>
<surname>Sester</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Winter</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Coltekin</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Geospatial big data handling theory and methods: A review and research challenges</article-title>. <source>ISPRS J. Photogrammetry Remote Sens.</source>, <volume>115</volume>, <fpage>119</fpage>&#x2013;<lpage>133</lpage>. <pub-id pub-id-type="doi">10.1016/j.isprsjprs.2015.10.012</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>A survey of remote-sensing big data</article-title>. <source>Front. Environ. Sci.</source> <volume>3</volume>, <fpage>45</fpage>. <pub-id pub-id-type="doi">10.3389/fenvs.2015.00045</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Madison</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Frischmann</surname>
<given-names>B. M.</given-names>
</name>
<name>
<surname>Sanfilippo</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Strandburg</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Too much of a good thing? A governing knowledge commons review of abundance in context</article-title>. <source>Front. Res. Metrics Anal.</source> <volume>45</volume>, <fpage>959505</fpage>. <pub-id pub-id-type="doi">10.3389/frma.2022.959505</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mbow</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Rosenzweig</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Barioni</surname>
<given-names>L. G.</given-names>
</name>
<name>
<surname>Benton</surname>
<given-names>T. G.</given-names>
</name>
<name>
<surname>Herrero</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Krishnapillai</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Food security</article-title>.</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rao</surname>
<given-names>N. H.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Big data and climate smart agriculture-status and implications for agricultural research and innovation in India</article-title>. In <source>Proc. Indian Natl. Sci. Acad.</source> <volume>84</volume>, <fpage>625</fpage>&#x2013;<lpage>640</lpage>.</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Robertson</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wieczorek</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Raymond</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Diversifying the GBIF data model</article-title>. <source>Biodivers. Inf. Sci. Stand.</source> <volume>6</volume>, <fpage>e94420</fpage>. <pub-id pub-id-type="doi">10.3897/biss.6.94420</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shelestov</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Lavreniuk</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kussul</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Novikov</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Skakun</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Exploring Google Earth Engine platform for big data processing: Classification of multi-temporal satellite imagery for crop mapping</article-title>. <source>Front. Earth Sci.</source> <volume>5</volume>, <fpage>17</fpage>. <pub-id pub-id-type="doi">10.3389/feart.2017.00017</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tamiminia</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Salehi</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Mahdianpari</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Quackenbush</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Adeli</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Brisco</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Google earth engine for geo-big data applications: A meta-analysis and systematic review</article-title>. <source>ISPRS J. Photogrammetry Remote Sens.</source> <volume>164</volume>, <fpage>152</fpage>&#x2013;<lpage>170</lpage>. <pub-id pub-id-type="doi">10.1016/j.isprsjprs.2020.04.001</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wilkinson</surname>
<given-names>M. D.</given-names>
</name>
<name>
<surname>Dumontier</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Aalbersberg</surname>
<given-names>I. J.</given-names>
</name>
<name>
<surname>Appleton</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Axton</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Baak</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>The FAIR Guiding Principles for scientific data management and stewardship</article-title>. <source>Sci. data</source> <volume>3</volume> (<issue>1</issue>), <fpage>160018</fpage>&#x2013;<lpage>160019</lpage>. <pub-id pub-id-type="doi">10.1038/sdata.2016.18</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Fujita</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>S.</given-names>
</name>
</person-group>, (<year>2016</year>) <article-title>Towards felicitous decision making: An overview on challenges and trends of Big Data</article-title>. <source>Inf. Sci.</source>, <volume>367-368</volume>, <fpage>747</fpage>&#x2013;<lpage>765</lpage>. <comment>ISSN 0020-0255</comment>, <pub-id pub-id-type="doi">10.1016/j.ins.2016.07.007</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>