<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Archiving and Interchange DTD v2.3 20070202//EN" "archivearticle.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="data-paper">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Robot. AI</journal-id>
<journal-title>Frontiers in Robotics and AI</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Robot. AI</abbrev-journal-title>
<issn pub-type="epub">2296-9144</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frobt.2017.00010</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Robotics and AI</subject>
<subj-group>
<subject>Code</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>A Package for Measuring Emergence, Self-organization, and Complexity Based on Shannon Entropy</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Santamar&#x000ED;a-Bonfil</surname> <given-names>Guillermo</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x0002A;</xref>
<uri xlink:href="http://frontiersin.org/people/u/186860"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Gershenson</surname> <given-names>Carlos</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x0002A;</xref>
<uri xlink:href="http://frontiersin.org/people/u/142875"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Fern&#x000E1;ndez</surname> <given-names>Nelson</given-names></name>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<xref ref-type="aff" rid="aff7"><sup>7</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x0002A;</xref>
<uri xlink:href="http://frontiersin.org/people/u/278212"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>CONACYT-Instituto Nacional de Electricidad y Energ&#x000ED;as Limpias, Gerencia de Tecnolog&#x000ED;as de la Informaci&#x000F3;n</institution>, <addr-line>Cuernavaca, Morelos</addr-line>, <country>M&#x000E9;xico</country></aff>
<aff id="aff2"><sup>2</sup><institution>Instituto de Investigaciones en Matem&#x000E1;ticas Aplicadas y en Sistemas, Universidad Nacional Aut&#x000F3;noma de M&#x000E9;xico</institution>, <addr-line>Ciudad de M&#x000E9;xico</addr-line>, <country>M&#x000E9;xico</country></aff>
<aff id="aff3"><sup>3</sup><institution>Centro de Ciencias de la Complejidad, Universidad Nacional Aut&#x000F3;noma de M&#x000E9;xico</institution>, <addr-line>Mexico City, Distrito Federal</addr-line>, <country>M&#x000E9;xico</country></aff>
<aff id="aff4"><sup>4</sup><institution>SENSEable City Lab, Massachusetts Institute of Technology</institution>, <addr-line>Cambridge, MA</addr-line>, <country>USA</country></aff>
<aff id="aff5"><sup>5</sup><institution>ITMO University</institution>, <addr-line>St. Petersburg</addr-line>, <country>Russian Federation</country></aff>
<aff id="aff6"><sup>6</sup><institution>Laboratorio de Hidroinform&#x000E1;tica, Universidad de Pamplona</institution>, <addr-line>Pamplona</addr-line>, <country>Colombia</country></aff>
<aff id="aff7"><sup>7</sup><institution>Grupo de Investigaci&#x000F3;n en Ecolog&#x000ED;a y Biogeograf&#x000ED;a, Universidad de Pamplona</institution>, <addr-line>Pamplona</addr-line>, <country>Colombia</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Zbigniew R. Struzik, University of Tokyo, Japan</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Hector Zenil, Karolinska Institutet, Sweden; Sebastian Wallot, Max Planck Institute for Empirical Aesthetics (MPG), Germany; V&#x000ED;ctor M. Egu&#x000ED;luz, Instituto de F&#x000ED;sica Interdisicplinary Sistemas Complejos IFISC (CSIC-UIB), Spain</p></fn>
<corresp content-type="corresp" id="cor1">&#x0002A;Correspondence: Guillermo Santamar&#x000ED;a-Bonfil, <email>gsantamaria&#x00040;conacyt.mx</email>, <email>guillermo.santamaira&#x00040;iie.org.mx</email>; Carlos Gershenson, <email>cgg&#x00040;unam.mx</email>, <email>cgg&#x00040;mit.edu</email>; Nelson Fern&#x000E1;ndez, <email>nfernandez&#x00040;unipamplona.edu.co</email></corresp>
<fn fn-type="other" id="fn002"><p>Specialty section: This article was submitted to Computational Intelligence, a section of the journal Frontiers in Robotics and AI</p></fn>
</author-notes>
<pub-date pub-type="epub">
<day>28</day>
<month>03</month>
<year>2017</year>
</pub-date>
<pub-date pub-type="collection">
<year>2017</year>
</pub-date>
<volume>4</volume>
<elocation-id>10</elocation-id>
<history>
<date date-type="received">
<day>25</day>
<month>11</month>
<year>2016</year>
</date>
<date date-type="accepted">
<day>03</day>
<month>03</month>
<year>2017</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2017 Santamar&#x000ED;a-Bonfil, Gershenson and Fern&#x000E1;ndez.</copyright-statement>
<copyright-year>2017</copyright-year>
<copyright-holder>Santamar&#x000ED;a-Bonfil, Gershenson and Fern&#x000E1;ndez</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) or licensor are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>We present a set of Matlab/Octave functions to compute measures of emergence, self-organization, and complexity applied to discrete and continuous data. These measures are based on Shannon&#x02019;s information and differential entropy. Examples from different datasets and probability distributions are provided to show how to use our proposed code.</p>
</abstract>
<kwd-group>
<kwd>emergence</kwd>
<kwd>self-organization</kwd>
<kwd>complexity</kwd>
<kwd>machine learning datasets</kwd>
<kwd>code:Octave/Matlab</kwd>
</kwd-group>
<counts>
<fig-count count="4"/>
<table-count count="1"/>
<equation-count count="5"/>
<ref-count count="20"/>
<page-count count="12"/>
<word-count count="6494"/>
</counts>
</article-meta>
</front>
<body>
<sec id="S1">
<label>1</label> <title>Description</title>
<p>Complexity has generated interest in recent years (Bar-Yam, <xref ref-type="bibr" rid="B2">1997</xref>; Mitchell, <xref ref-type="bibr" rid="B14">2009</xref>; Haken and Portugali, <xref ref-type="bibr" rid="B8">2017</xref>). A complex system can be understood as one composed by many elements, which acquire functional/spatial/temporal structures without <italic>a priori</italic> specifications (Haken and Portugali, <xref ref-type="bibr" rid="B8">2017</xref>). It has been studied in several disciplines, as one can try to measure the complexity of almost any phenomenon (Lopez-Ruiz et al., <xref ref-type="bibr" rid="B13">1995</xref>; Bandt and Pompe, <xref ref-type="bibr" rid="B1">2002</xref>; Prokopenko et al., <xref ref-type="bibr" rid="B15">2009</xref>; Lizier, <xref ref-type="bibr" rid="B12">2014</xref>; Soler-Toscano et al., <xref ref-type="bibr" rid="B18">2014</xref>; Haken and Portugali, <xref ref-type="bibr" rid="B8">2017</xref>). Thus, there exist a broad variety of measures of complexity where Shannon&#x02019;s entropy and its generalizations have played a crucial role (Haken and Portugali, <xref ref-type="bibr" rid="B8">2017</xref>). For instance, permutation entropy have been proposed for analyzing the complexity of time series in terms of its periodicity/chaoticity/randomness (Bandt and Pompe, <xref ref-type="bibr" rid="B1">2002</xref>), whereas the LMC Complexity describe it in terms of equilibrium and disequilibrium (Lopez-Ruiz et al., <xref ref-type="bibr" rid="B13">1995</xref>). Nevertheless, it should be noted that all measures of complexity have limits (Zenil and Kiani, <xref ref-type="bibr" rid="B19">2016</xref>). Thus, it is important to promote the quantitative study of complexity through mathematical frameworks to enhance the exchange of ideas, for instance:
<list list-type="order">
<list-item><p>The Java Information Dynamics Toolkit presents a multi-platform library to calculate complexity of dynamical systems using Shannon&#x02019;s entropy (e.g., information transfer) for discrete and continuous data (Lizier, <xref ref-type="bibr" rid="B12">2014</xref>).</p></list-item>
<list-item><p>The Online Algorithmic Complexity Calculator <italic>OACC</italic>, provides discrete estimations using Shannon entropy and the algorithmic complexity.</p></list-item>
<list-item><p>The Algorithmic Complexity for Short Strings (ACSS), for the R language, computes the Kolmogorov complexity for short strings (Soler-Toscano et al., <xref ref-type="bibr" rid="B18">2014</xref>; Gauvrit et al., <xref ref-type="bibr" rid="B5">2016</xref>; Zenil et al., <xref ref-type="bibr" rid="B20">2016</xref>).</p></list-item>
</list></p>
<p>In this manuscript, we present a package to calculate statistical measures of emergence E, self-organization S, and complexity C which are applicable to any dataset or probability distributions (Fern&#x000E1;ndez et al., <xref ref-type="bibr" rid="B4">2014</xref>; Santamar&#x000ED;a-Bonfil et al., <xref ref-type="bibr" rid="B17">2016</xref>). These are closely related to other Shannon-based measures (Lopez-Ruiz et al., <xref ref-type="bibr" rid="B13">1995</xref>; Jost, <xref ref-type="bibr" rid="B9">2006</xref>). In this regard, discrete measures are calculated employing the discrete Shannon&#x02019;s entropy. Rosetta Code website already provides code for Shannon&#x02019;s discrete entropy in 62 different programming languages (e.g., C, C&#x0002B;&#x0002B;, Java, Matlab/Octave, R, Python). On the other hand, continuous measures are calculated using Shannon&#x02019;s differential entropy.</p>
<p>A previous effort of Fern&#x000E1;ndez et al. (<xref ref-type="bibr" rid="B4">2014</xref>) is provided in COMIN; however, it is rather a prototype. Specifically, our main contributions are as follows: (a) a software bundle to compute discrete and continuous statistical complexity measures; (b) examples on how to use both measures to analyze probability distributions and different timescales; (c) code comments to enhance usability; (d) vector and matrix operations to improve the computation time. This package was coded in Octave GNU 4.0.3, checked for compatibility in Matlab2013a, and is publicly available at the web repository Entropy-based Complexity<xref ref-type="fn" rid="fn1"><sup>1</sup></xref> (Santamar&#x000ED;a-Bonfil, <xref ref-type="bibr" rid="B16">2016</xref>). In Appendices <xref ref-type="sec" rid="A1">A</xref> and <xref ref-type="sec" rid="A2">B</xref>, code snippets of the function are provided.</p>
<p>This paper is organized as follows. Section <xref ref-type="sec" rid="S2">2</xref> briefly describes theory on discrete and continuous complexity measures. Section <xref ref-type="sec" rid="S3">3</xref> provides an overview of the code functions, their inputs and outputs (summarized in Table <xref ref-type="table" rid="T1">1</xref>). It also provides guidelines in results interpretation, issues, and limitations. Section <xref ref-type="sec" rid="S4">4</xref> presents two examples (one per function) to introduce users to the basic usage of the developed functions. Finally, Section <xref ref-type="sec" rid="S5">5</xref> presents a discussion on future development topics around entropy-based complexity measures.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p><bold>Summary for discrete and continuous complexity Octave/Matlab functions</bold>.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left">Function or filename</th>
<th valign="top" align="left">Functionality</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">DiscreteComplexityMeasures (pmfSample, noOfStates)</td>
<td align="left" valign="top">This function calculates discrete entropy-based complexity measures for a univariate sample in accordance to the number of the sample&#x02019;s system states</td>
</tr>
<tr>
<td align="left" valign="top">ContinuousComplexityMeasures (pdfSample, minVal, maxVal, distSampleSize, noOfStates)</td>
<td align="left" valign="top">This function calculates continuous entropy-based complexity measures for a probability density distribution in accordance to the minimum and maximum values such distribution takes, the integration step, and the number of system&#x02019;s states</td>
</tr>
<tr>
<td align="left" valign="top">bar3DPlot (M, width, param1Labels, param2Labels)</td>
<td align="left" valign="top">This function makes a 3D bar to graphically display ESC Measures</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In Appendix <xref ref-type="sec" rid="A3">C</xref>, numeric results for the code examples are provided (three machine learning datasets<xref ref-type="fn" rid="fn2"><sup>2</sup></xref> (Fanaee-T and Gama, <xref ref-type="bibr" rid="B3">2013</xref>; Lichman, <xref ref-type="bibr" rid="B11">2013</xref>) and two probability distributions). Furthermore, in Appendix <xref ref-type="sec" rid="A4">D</xref>, we provide an example on the use of our complexity measures to analyze a system at different timescales.</p>
</sec>
<sec id="S2">
<label>2</label> <title>Method: Emergence, Self-Organization, and Complexity</title>
<p>In this section, we describe the statistical measures of E, S, and C. Discrete measures were defined in a previous study presented in Fern&#x000E1;ndez et al. (<xref ref-type="bibr" rid="B4">2014</xref>), latter extended for continuous probability distributions (Santamar&#x000ED;a-Bonfil et al., <xref ref-type="bibr" rid="B17">2016</xref>). This package is limited to the aforementioned measures. Proofs, advantages, and limitations are defined and discussed in Fern&#x000E1;ndez et al. (<xref ref-type="bibr" rid="B4">2014</xref>) and Santamar&#x000ED;a-Bonfil et al. (<xref ref-type="bibr" rid="B17">2016</xref>). Furthermore, for simplicity, differences between discrete and the continuous will be mentioned when necessary.</p>
<p>Many notions of <italic>Emergence</italic> describe it as novelty (between scales, in time, or within a process). E can be understood as new global patterns which are not present in the system&#x02019;s components. More precisely, for a discrete probability distributions, E measures the average ratio of uncertainty a process produces by new information that is a consequence of changes in (a) dynamics or (b) scale. For continuous distributions, E interpretation is constrained to the average uncertainty a process produces <italic>under a specific set of the distribution parameters</italic> (e.g., the SD value for a Gaussian distribution) (Santamar&#x000ED;a-Bonfil et al., <xref ref-type="bibr" rid="B17">2016</xref>). Formally, the discrete and continuous E are defined as follows:
<disp-formula id="E1"><mml:math id="M1"><mml:mrow><mml:malignmark/><mml:msub><mml:mi>E</mml:mi><mml:mi>D</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mo>&#x02212;</mml:mo><mml:mi>K</mml:mi><mml:mtext>&#x02009;</mml:mtext><mml:mstyle displaystyle='true'><mml:msubsup><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:msubsup><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:msub><mml:mtext>&#x02009;log</mml:mtext><mml:mn>2</mml:mn></mml:msub><mml:mtext>&#x02009;&#x02009;</mml:mtext><mml:msub><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mstyle></mml:mrow></mml:math></disp-formula></p>
<disp-formula id="E2"><label>(1)</label><mml:math id="M2"><mml:mrow><mml:malignmark/><mml:msub><mml:mi>E</mml:mi><mml:mi>C</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mo>&#x02212;</mml:mo><mml:mi>K</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mi>lim</mml:mi><mml:mrow><mml:mn>&#x00394;</mml:mn><mml:mo>&#x02192;</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:msub><mml:mtext>&#x02009;</mml:mtext><mml:mi>H</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:msup><mml:mi>X</mml:mi><mml:mn>&#x00394;</mml:mn></mml:msup><mml:mo stretchy='false'>)</mml:mo><mml:mo>+</mml:mo><mml:mrow><mml:msub><mml:mi>log</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow><mml:mo stretchy='false'>(</mml:mo><mml:mn>&#x00394;</mml:mn><mml:mo stretchy='false'>)</mml:mo><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:math></disp-formula>
<p><italic>E<sub>D</sub></italic> in equation (<xref ref-type="disp-formula" rid="E1">1</xref>) corresponds to the discrete E, where <italic>p<sub>i</sub></italic>&#x02009;&#x0003D;&#x02009;<italic>P</italic>(<italic>X</italic>&#x02009;&#x0003D;&#x02009;<italic>x</italic>) is the probability of the element <italic>i</italic>. <italic>E<sub>C</sub></italic> in equation (<xref ref-type="disp-formula" rid="E1">1</xref>) corresponds to the continuous E. Note that the latter is rather a quantized version of the differential entropy, where <italic>X</italic><sup>&#x00394;</sup> corresponds to discretized version of X, and &#x00394; is the integration step. On the other hand, <italic>K</italic> is a normalizing constant that constrains E within the range 0&#x02009;&#x02264;&#x02009;<italic>E</italic>&#x02009;&#x02264;&#x02009;1. It is estimated as
<disp-formula id="E3"><label>(2)</label><mml:math id="M3"><mml:mrow><mml:mi>K</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msub><mml:mi>log</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo stretchy='false'>(</mml:mo><mml:mi>b</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>
where <italic>b</italic> corresponds to the system&#x02019;s alphabet size: the number of bins of a probability mass function, or, in the continuous case, to the states that satisfies <italic>P</italic>(<italic>x<sub>i</sub></italic>)&#x02009;&#x0003E;&#x02009;0. More importantly, the denominator of equation (<xref ref-type="disp-formula" rid="E2">2</xref>), log<sub>2</sub>(<italic>b</italic>), corresponds to the maximum entropy for a distribution function with alphabet size of <italic>b</italic>. Consequently, E can be understood as the ratio between the entropy for given distribution <italic>H</italic>(<italic>X</italic>), and the maximum entropy for the same alphabet size <italic>H</italic>(<italic>U</italic>), <inline-formula><mml:math id="M4"><mml:mrow><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:mstyle scriptlevel='+1'><mml:mfrac><mml:mrow><mml:mi>H</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>X</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow><mml:mrow><mml:mi>H</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>U</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:mfrac></mml:mstyle></mml:mrow></mml:math></inline-formula>.</p>
<p>It is also worth noting that, <italic>E<sub>D</sub></italic>&#x02009;&#x0003D;&#x02009;0 is only achievable when the entropy for a given probability distribution is such that <italic>H</italic>(<italic>X</italic>)&#x02009;&#x0003D;&#x02009;0, which corresponds to the entropy of a Dirac delta distribution. However, in the continuous case the differential entropy of a Dirac delta or a discrete value is &#x02212;&#x0221E;. Nonetheless, differential entropy only becomes negative when the probability distribution becomes extremely concentrated in very few states. Thus, when calculating our statistical continuous complexity measures, we set <italic>H</italic>(<italic>x<sub>i</sub></italic>)&#x02009;&#x0003D;&#x02009;0 iff <italic>H</italic>(<italic>x<sub>i</sub></italic>)&#x02009;&#x0003C;&#x02009;0.</p>
<p><italic>Self-organization</italic>, in its most general form, can be seen as a reduction of entropy (Gershenson and Heylighen, <xref ref-type="bibr" rid="B7">2003</xref>). S is the complement of E, thus, self-organization is related to order and regularity due changes in the process dynamics or scale. In this sense, an entirely random process (e.g., uniform distribution) has the lowest organization and a completely deterministic system one (Dirac delta distribution) has the highest. S is defined as
<disp-formula id="E4"><label>(3)</label><mml:math id="M5"><mml:mrow><mml:mi>S</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x02212;</mml:mo><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x02212;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mi>H</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>P</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>X</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mo stretchy='false'>)</mml:mo></mml:mrow><mml:mrow><mml:mi>H</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>U</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:mfrac></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>
such that 0&#x02009;&#x02264;&#x02009;<italic>S</italic>&#x02009;&#x02264;&#x02009;1.</p>
<p><italic>Complexity</italic> comes from the Latin <italic>plexus</italic>, which means interwoven. Thus, something complex is difficult to separate. This means that its components are interdependent, i.e., their future is partly determined by their interactions. Complexity represents a balance between change and regularity (Kaufmann, <xref ref-type="bibr" rid="B10">1993</xref>), which allows systems to adapt in a robust fashion. Regularity ensures that information survives, while change allows the exploration of new possibilities, essential for adaptability. In this sense, complexity can also be used to characterize living systems or artificial adaptive systems, especially when comparing their complexity with that of their environment (Fern&#x000E1;ndez et al., <xref ref-type="bibr" rid="B4">2014</xref>). More precisely, this function describes a system&#x02019;s behavior in terms of the average uncertainty produced by emergent and regular global patterns as described by its probability distribution. Thus, the complexity measure is defined as
<disp-formula id="E5"><label>(4)</label><mml:math id="M6"><mml:mrow><mml:mi>C</mml:mi><mml:mo>=</mml:mo><mml:mn>4</mml:mn><mml:mo>&#x022C5;</mml:mo><mml:mi>E</mml:mi><mml:mo>&#x022C5;</mml:mo><mml:mi>S</mml:mi><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>
such that, 0&#x02009;&#x02264;&#x02009;<italic>C</italic>&#x02009;&#x02264;&#x02009;1. C is only maximal when E and S are equal (i.e., <italic>E</italic>&#x02009;&#x0003D;&#x02009;<italic>S</italic>&#x02009;&#x0003D;&#x02009;0.5). In Fern&#x000E1;ndez et al. (<xref ref-type="bibr" rid="B4">2014</xref>) they showed that for a variable with only two states, the highest C is achieved when one of the states is highly probable, i.e., &#x02248;0.89. Thus, it infers that a system which concentrates its dynamics into few highly probable states with many less frequent states, displays high complexity (e.g., a power-law distribution). C becomes 0 for equiprobable distributions.</p>
</sec>
<sec id="S3">
<label>3</label> <title>Functions of Complexity Measures</title>
<p>The complexity of different phenomena can be calculated using entropy-based measures. However, to obtain meaningful results, users must first determine the adequate function to be employed for their problem (e.g., should a raw sample or an estimated probability distribution function be used?). In this section, we describe two functions for complexity: <italic>DiscreteComplexityMeasures</italic>, and <italic>ContinuousComplexityMeasures</italic>. We provide details on the <italic>inputs</italic> and <italic>outputs</italic> required by these complexity functions. In addition, we also provide a graphical function to display emergence, self-organization, and complexity (ESC); we take no authorship of it since it is freely available on the internet<xref ref-type="fn" rid="fn3"><sup>3</sup></xref>; nonetheless, in the next section, we provide details of its functionality.</p>
<sec id="S3-1">
<label>3.1</label> <title>Functions Definition</title>
<p><italic>DiscreteComplexityMeasures</italic>, and <italic>ContinuousComplexityMeasures</italic> are briefly summarized at Table <xref ref-type="table" rid="T1">1</xref>. In the following, <italic>inputs</italic> and <italic>outputs</italic> are detailed.</p>
<sec id="S3-1-1">
<label>3.1.1</label> <title>Inputs</title>
<list list-type="order">
<list-item><p>DiscreteComplexityMeasures(pmfSample, noOfStates)</p>
<list list-type="simple">
<list-item><label>(a)</label> <p><italic>pmfSample</italic> is a vector of size <italic>n</italic>&#x02009;&#x000D7;&#x02009;1 which corresponds to <italic>n</italic> real values displayed by a given system, e.g., a time series.</p></list-item>
<list-item><label>(b)</label> <p><italic>noOfStates</italic> is an integer &#x02265;2 that defines the number of states to coarse grain the given sample. If it is empty, a heuristic is used to calculate the number of system states.</p></list-item>
</list>
</list-item>
<list-item><p>ContinuousComplexityMeasures(pdfSample, varargin)</p>
<list list-type="simple">
<list-item><label>(a)</label> <p><italic>pdfSample</italic> is a vector of size <italic>n</italic>&#x02009;&#x000D7;&#x02009;1, which contains the <italic>n</italic> probability values assigned by the probability distribution function (i.e., <italic>f</italic>(<italic>x</italic>)&#x02009;&#x0003D;&#x02009;<italic>P</italic>(<italic>x</italic>)).</p></list-item>
<list-item><label>(b)</label> <p>Additional parameters are:</p>
<list list-type="roman-lower">
<list-item><p><italic>minVal</italic>, it&#x02019;s a real value corresponding to the minimal value where the PDF will be evaluated.</p></list-item>
<list-item><p><italic>maxVal</italic>, it&#x02019;s a real value corresponding to the maximal value where the PDF will be evaluated. It is strictly necessary that <italic>minVal</italic>&#x02009;&#x0003C;&#x02009;<italic>maxVal</italic>.</p></list-item>
<list-item><p><italic>distSampleSize</italic>, is an integer value which corresponds to an approximate sample size. This value is used to estimate the integration step <inline-formula><mml:math id="M7"><mml:mrow><mml:mn>&#x00394;</mml:mn><mml:mo>=</mml:mo><mml:mstyle scriptlevel='+1'><mml:mfrac><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mi>V</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mi>m</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>V</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi>S</mml:mi><mml:mi>a</mml:mi><mml:mi>m</mml:mi><mml:mi>p</mml:mi><mml:mi>l</mml:mi><mml:mi>e</mml:mi><mml:mi>S</mml:mi><mml:mi>i</mml:mi><mml:mi>z</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:mfrac></mml:mstyle></mml:mrow></mml:math></inline-formula>.</p></list-item>
<list-item><p><italic>noOfStates</italic> is an integer value used to define the number of possible states a system can take. As its discrete counterpart, it should satisfy that &#x02265;2. In particular, <italic>noOfStates</italic> should be large to satisfy 0&#x02009;&#x02264;&#x02009;<italic>E, S, C</italic>&#x02009;&#x02264;&#x02009;1. If not provided, a heuristic is employed to obtain it.</p></list-item></list></list-item>
</list>
</list-item>
<list-item><p>bar3DPlot(M, width, param1Labels, varargin)</p>
<list list-type="simple">
<list-item><label>(a)</label> <p><italic>M</italic> is <italic>n</italic>&#x02009;&#x000D7;&#x02009;3 or <italic>n</italic>&#x02009;&#x000D7;&#x02009;<italic>m</italic> matrix. For the former, rows correspond to a feature of a system whereas its columns are the corresponding E, S, C, respectively. For the latter, columns are rather a parameter of the system, thus, only one ESC measure can be displayed at a time.</p></list-item>
<list-item><label>(b)</label> <p><italic>width</italic> determines each bar size, this value ranges from</p></list-item>
<list-item><label>(c)</label> <p>0&#x02009;&#x0003C;&#x02009;<italic>width</italic>&#x02009;&#x02264;&#x02009;1.</p></list-item>
<list-item><label>(d)</label> <p><italic>param1Labels</italic> this parameter is <italic>n</italic>&#x02009;&#x000D7;&#x02009;1 label matrix. It contains the corresponding labels for each row of <italic>M</italic>.</p></list-item>
<list-item><label>(e)</label> <p>When <italic>M</italic> is a <italic>n</italic>&#x02009;&#x000D7;&#x02009;<italic>m</italic> matrix, additional labels are required. <italic>param2Labels</italic> is <italic>n</italic>&#x02009;&#x000D7;&#x02009;1 label matrix which contains the corresponding labels for each column of <italic>M</italic>.</p></list-item>
</list>
</list-item>
</list>
</sec>
<sec id="S3-1-2">
<label>3.1.2</label> <title>Outputs</title>
<p>Complexity measure functions return 4 elements: three mandatory outputs <italic>Emergence, Self-organization, Complexity</italic>, and an optional one, which corresponds to data&#x02019;s discrete or continuous entropy.</p>
</sec>
</sec>
<sec id="S3-2">
<label>3.2</label> <title>Results Interpretation</title>
<p>E, S, and C measures provide a <italic>big picture</italic> about the expected uncertainty that belongs to a system in terms of its probability distribution product of (a) a reduction/increase of system&#x02019;s states (Gershenson and Fern&#x000E1;ndez, <xref ref-type="bibr" rid="B6">2012</xref>), or (b) the concentration/homogenization of the probability distribution (Santamar&#x000ED;a-Bonfil et al., <xref ref-type="bibr" rid="B17">2016</xref>). E is able to measure the change in scale given a process that transforms information (Gershenson and Fern&#x000E1;ndez, <xref ref-type="bibr" rid="B6">2012</xref>). For instance, E can be expressed as <inline-formula><mml:math id="M8"><mml:mrow><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:mstyle scriptlevel='+1'><mml:mfrac><mml:mrow><mml:msub><mml:mi>H</mml:mi><mml:mrow><mml:mi>o</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy='false'>(</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow><mml:mrow><mml:msub><mml:mi>H</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo stretchy='false'>(</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:mfrac></mml:mstyle></mml:mrow></mml:math></inline-formula>, where <italic>H<sub>in</sub></italic> is the initial entropy for a system, and <italic>H<sub>out</sub></italic>&#x02009;&#x0003D;&#x02009;<italic>f</italic>(<italic>H<sub>in</sub></italic>) is <italic>H<sub>in</sub></italic> transformed by process <italic>f</italic>. On the other hand, for a given probability distribution either discrete or continuous, if E is close to 1, the system shows similar probability for most of its states. Otherwise, if <italic>E</italic>&#x02009;&#x02261;&#x02009;1, all states are equiprobable. Thus, if 0&#x02009;&#x0003C;&#x02009;<italic>E&#x02009;&#x0226A;</italic>&#x02009;1, then, system&#x02019;s states distribution have few states with a considerable amount of the probability, whereas if 0&#x02009;<italic>&#x0226A;&#x02009;E</italic>&#x02009;&#x0003C;&#x02009;1 then, the states of the system are more evenly distributed. Since S is the complement of E, the above mentioned descriptions apply in a conversely way to S. In this context, if <italic>S</italic>&#x02009;&#x02261;&#x02009;1, the system can be considered to be predictable since a single state <italic>x<sub>j</sub></italic> has <italic>P</italic>(<italic>x<sub>j</sub></italic>)&#x02009;&#x02248;&#x02009;1. This interpretation of E and S is shared by other Shannon-based measures like LMC Complexity and statistical diversity (Lopez-Ruiz et al., <xref ref-type="bibr" rid="B13">1995</xref>; Jost, <xref ref-type="bibr" rid="B9">2006</xref>) (e.g., the disequilibrium of a crystal&#x02009;&#x0003D;&#x02009;the diversity of a population with exactly 1 species&#x02009;&#x0003D;&#x02009;<italic>S<sub>max</sub></italic>&#x02009;&#x0003D;&#x02009;1).</p>
<p>On the other hand, <italic>C</italic>&#x02009;&#x0003D;&#x02009;1 only when <italic>E, S</italic>&#x02009;&#x0003D;&#x02009;0.5. Such scenario is given when a single or few state are highly concentrated in terms of their probability, with many other states with lesser probabilities. In this regard, C becomes 0 when the distribution resembles a uniform distribution or a Dirac delta. Moreover, higher values of C are required in order to the probability distribution remains. It should be noted that a system with 5 states is considered as follows: one state has <italic>p</italic>(<italic>s</italic><sub>1</sub>)&#x02009;&#x0003D;&#x02009;0.8 and the remaining 4 states have equal probability <italic>p</italic>(<italic>s</italic><sub>2, &#x02026;, 5</sub>)&#x02009;&#x0003D;&#x02009;0.05 hence <italic>C</italic>&#x02009;&#x0003D;&#x02009;0.9988. This behaviour can be observed in the Gaussian distribution case discussed in Appendix <xref ref-type="sec" rid="A3">C</xref>.</p>
</sec>
<sec id="S3-3">
<label>3.3</label> <title>Issues and Limitations</title>
<p>Some of the known issues, considerations, and limitations of this package are as follows:
<list list-type="order">
<list-item><p>The statistical measures proposed are mainly based on Shannon&#x02019;s discrete and differential entropy (i.e., H(X)) per symbol.</p></list-item>
<list-item><p>Our proposed measures only consider I.I.D. random variables. Thus, conditional time relations or strings size&#x02009;&#x0003E;&#x02009;1 are not considered. The former is particularly important when analyzing a distribution. For instance, if a discrete sequence of repeating points, e.g., 0, 1, 2, 0, 1, 2, &#x02026; is analyzed in terms of each number, the distribution will resemble a uniform distribution; hence, E&#x02009;&#x0003D;&#x02009;1. However, if the states of the system are strings of 3 elements, the distribution will be Dirac delta S&#x02009;&#x0003D;&#x02009;1.</p></list-item>
<list-item><p>In order to obtain some preliminary results when calculating continuous complexity, it should be considered the size of the integration step &#x00394;. In this context, if &#x00394;&#x02009;&#x02248;&#x02009;0 then <italic>H</italic>(<italic>x</italic>)&#x02009;&#x0003D;&#x02009;&#x02212;&#x0221E;, which could induce a spurious decay of <italic>E<sub>C</sub></italic> values (interested reader please refer to Santamar&#x000ED;a-Bonfil et al. (<xref ref-type="bibr" rid="B17">2016</xref>) for more details).</p></list-item>
<list-item><p>Emergence value is understood as <italic>E</italic>&#x02009;&#x0003D;&#x02009;<italic>K</italic>&#x0002A;<italic>H</italic>(<italic>X</italic>) constraining it to 0&#x02009;&#x02264;&#x02009;<italic>E</italic>&#x02009;&#x02264;&#x02009;1 by the normalizing constant <italic>K</italic> (Fern&#x000E1;ndez et al., <xref ref-type="bibr" rid="B4">2014</xref>). This constant value is calculated as <italic>K</italic>&#x02009;&#x0003D;&#x02009;1/log(<italic>b</italic>) where <italic>b</italic> is the system&#x02019;s alphabet size. Since, <italic>log</italic>(<italic>b</italic>), corresponds to the maximum entropy for any probability distribution with <italic>b</italic> symbols, <italic>E</italic> is the ratio between the entropy for a given distribution <italic>P</italic>(<italic>X</italic>), and the maximum entropy for the same alphabet size (Santamar&#x000ED;a-Bonfil et al., <xref ref-type="bibr" rid="B17">2016</xref>). Therefore, if <italic>b</italic> is not provided, a heuristic is employed with the aim to compute the total number of symbols from <italic>P</italic>(<italic>X</italic>) that satisfies <italic>p</italic>(<italic>x</italic>)&#x02009;&#x0003E;&#x02009;0 (both for discrete and complexity measures).</p></list-item>
<list-item><p>These ESC measures are univariate.</p></list-item>
</list></p>
</sec>
</sec>
<sec id="S4">
<label>4</label> <title>Code Example: ExampleComplexityMeasures</title>
<p>In this section, we present an example that shows the functionality of our complexity measures (additional details are provided at Appendix <xref ref-type="sec" rid="A3">C</xref>). First, we present the overall functionality of the example and how it should be edited. Octave 4.0.3. or Matlab 2013a are required to run these complexity functions. We highly recommend to the reader to use as templates the examples and complexity measures from the publicly available Entropy-based Complexity repository.</p>
<p>The example <italic>ExampleComplexityMeasures</italic>, is basically divided in two sections (<xref ref-type="sec" rid="S1">1</xref>) discrete examples, and (<xref ref-type="sec" rid="S2">2</xref>) continuous examples. In either case, ESC measures are simultaneously calculated, and stored in variable <italic>ESC</italic> to make a 3D Bar plot as follows:
<preformat>
<monospace>[Emrgnc, SlfRgnztn, Cmplxty]&#x02009;&#x0003D;&#x02009;&#x02026;</monospace>
<monospace>DiscreteComplexityMeasures(pmfSample, noOfStates);</monospace>
<monospace>ESC&#x02009;&#x0003D;&#x02009;[Emrgnc, SlfRgnztn, Cmplxty];</monospace>
<monospace>typeLabel&#x02009;&#x0003D;&#x02009;[<font color="#009A55">&#x02019;Feature1&#x02019;</font>;<font color="#009A55">&#x02019;Feature2&#x02019;</font>];</monospace>
<monospace><font color="#A4548A">figure</font> (1);</monospace>
<monospace>width&#x02009;&#x0003D;&#x02009;1;</monospace>
<monospace>bar3DPlot(ESC,width,typelabes);</monospace>
</preformat></p>
<sec id="S4-1">
<label>4.1</label> <title>How to Modify the Example?</title>
<p>First, you must choose between discrete or continuous examples. Next, you need to specify the working directory and the dataset. Some datasets from the University of California Irvine were provided in advance (UCI) (Lichman, <xref ref-type="bibr" rid="B11">2013</xref>) in mat format: (a) <italic>frequency of three types of solar flares per 24&#x02009;h</italic>, (b) <italic>the bicycle rides made per day and hour for a station within a bicycle sharing system</italic>, and (c) <italic>household electric consumption per minute for a whole house metering and kitchen submetering</italic>. These must be downloaded to the working directory. For the example of continuous complexity measures, a probability distribution data are generated on the fly. Any other dataset to work with must be in mat format.</p>
<p>The working directory is specified (a) via Matlab/Octave user interface, or (b) by setting the path via code as is shown.</p>
<preformat>
<monospace>filePath&#x02009;&#x0003D;&#x02009;<font color="#009A55">&#x02019;C:&#x0005C;HereSetYourPath&#x0005C;&#x02019;</font>;</monospace>
</preformat>
<p>Next, you must choose the type of complexity measure: <italic>1</italic> for discrete and <italic>2</italic> for differential.</p>
<preformat>
<monospace>complexityType&#x02009;&#x0003D;&#x02009;1; <font color="#0072BC">%Discrete complexity measures</font></monospace>
<monospace><font color="#0072BC">%complexityType&#x02009;&#x0003D;&#x02009;2;%Continuous complexity measures</font></monospace>
</preformat>
<p><bold>If discrete complexity function is chosen:</bold></p>
<p>Specify the dataset to be employed:</p>
<preformat>
<monospace>dataSet&#x02009;&#x0003D;&#x02009;1; <font color="#A4548A">load</font>([filePath SolarFlaresData]);</monospace>
<monospace><font color="#0072BC">% dataSet&#x02009;&#x0003D;&#x02009;2; load([filePath BikeSharingData]);</font></monospace>
<monospace><font color="#0072BC">% dataSet&#x02009;&#x0003D;&#x02009;3; load([filePath</font></monospace>
<monospace>&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02009;&#x02009;&#x02003;&#x02003;&#x02003;&#x02003;HouseElecCnsmptData]);</monospace>
</preformat>
<p>Also, you may specify the number of states (<italic>noOfStates</italic>) the system will have, 10 is an educated guess.</p>
<preformat>
<monospace>noOfStates&#x02009;&#x0003D;&#x02009;10;<font color="#0072BC">%Number of states of the system</font></monospace>
</preformat>
<p>Finally, calculate ESC measures as follows:</p>
<preformat>
<monospace>[Emrgnc,</monospace>
<monospace>SlfRgnztn,</monospace>
<monospace>Cmplxty]&#x02009;&#x0003D;&#x02009;DiscreteComplexityMeasures(&#x02026;</monospace>
<monospace>&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02009;&#x02009;&#x02003;&#x02003;&#x02003;&#x02003;pmfSample,noOfStates);</monospace>
</preformat>
<p>For illustrative purposes, we chose as <italic>pmfSample</italic> the household electric consumption described in Appendix <xref ref-type="sec" rid="A3">C</xref>. The corresponding results are shown in Figure <xref ref-type="fig" rid="F1">1</xref>.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p><bold>Discrete E, S, and C for a single household electric consumption</bold>. These data correspond to time series of energy consumption per minute for a whole house and its kitchen. Note that the electricity consumption for the whole house has complexity near 1, while the kitchen is rather highly self-organized. In the former, the results imply that a single or few energy consumption states concentrate most of the probability (i.e., regular patterns) with many new emergent states of usage. In the latter, kitchen&#x02019;s energy consumption is more regular and more predictable. In fact most of the time kitchen will not consume electricity (91% of the probability is concentrated in the 0 energy consumption state). Kitchen also displays a <italic>C</italic>&#x02009;&#x02248;&#x02009;0.27, which is the result of its periodic usage (e.g., meals during workweeks).</p></caption>
<graphic xlink:href="frobt-04-00010-g001.tif"/>
</fig>
<p><bold>If continuous complexity function is chosen:</bold></p>
<p>Probability Density Functions are used to estimate Gaussian and Power-Law (PL) distributions. In the former, a pre-programmed language function is employed, whereas in the latter, we implemented our own probability function. In either case, some parameters are required: <italic>distSampleSize</italic> and <italic>distParamNum</italic>. The first determines the integration sampling step. The second is the number of parameters that our probability distribution will have (in the Normal distribution case, different <italic>&#x003C3;</italic> values are used, whereas, in the PL distribution, distinct <italic>x<sub>min</sub></italic> and <italic>&#x003B1;</italic> values are employed).</p>
<preformat>
<monospace>distSampleSize&#x02009;&#x0003D;&#x02009;100000;</monospace>
<monospace>distParamNum&#x02009;&#x0003D;&#x02009;10;</monospace>
</preformat>
<p>Next, specify variable <italic>pdfType</italic> to select either, <italic>1</italic> Gaussian, or <italic>2</italic> PL distribution.</p>
<preformat>
<monospace>pdfType&#x02009;&#x0003D;&#x02009;1;% <font color="#0072BC">Gaussian Distribution</font></monospace>
<monospace><font color="#0072BC">% pdfType&#x02009;&#x0003D;&#x02009;2;% Power-law Distribution</font></monospace>
</preformat>
<p>Also, you must specify the <italic>noOfStates</italic> as in the discrete case. Variable <italic>plotPDFOn</italic>&#x02009;&#x0003D;&#x02009;{0, 1} can be used to plot PDF&#x02019;s for the different parameters. Finally, calculate ESC measures for a PDF by calling the function as follows:
<preformat>
<monospace>[Emrgnc,</monospace>
<monospace>SlfRgnztn,</monospace>
<monospace>Cmplxty]&#x02009;&#x0003D;&#x02009;ContinuousComplexityMeasures(</monospace>
<monospace>&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;pdfDist,minVal,maxVal, &#x02026;</monospace>
<monospace>&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;distSampleSize,noOfStates);</monospace>
</preformat></p>
<p>For illustrative purposes we chose as the <italic>pdfSample</italic> a power-law with parameters <italic>x<sub>min</sub></italic>&#x02009;&#x0003D;&#x02009;3 and <italic>&#x003B1;</italic>&#x02009;&#x0003D;&#x02009;2, &#x02026;, 11. The corresponding results are shown in Figure <xref ref-type="fig" rid="F2">2</xref>.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p><bold>Continuous E, S, and C for a power-law with a fixed <italic>x<sub>min</sub></italic>&#x02009;&#x0003D;&#x02009;3 and scale exponents <italic>&#x003B1;</italic>&#x02009;&#x0003D;&#x02009;2, &#x02026;, 11</bold>. Note that as the scale exponent grows, <italic>E</italic> decays product of the concentration of distribution around the <italic>x<sub>min</sub></italic> value. However, even for <italic>&#x003B1;</italic>&#x02009;&#x0003D;&#x02009;11, a considerable amount of complexity is displayed <italic>C</italic>&#x02009;&#x02248;&#x02009;0.37. The latter is product of the heavy-tail of the distribution. Also note that C is high for 2&#x02009;&#x02264;&#x02009;<italic>&#x003B1;</italic>&#x02009;&#x02264;&#x02009;4 where <italic>C</italic>&#x02009;&#x02248;&#x02009;0.95, 0.99, 0.93, respectively. The max <italic>C</italic> can be shifted to lower or higher scale exponents by <italic>x<sub>min</sub></italic>, which may be convenient to describe real-world phenomena.</p></caption>
<graphic xlink:href="frobt-04-00010-g002.tif"/>
</fig>
</sec>
</sec>
<sec id="S5" sec-type="discussion">
<label>5</label> <title>Discussion</title>
<p>In this paper, we presented two functions to calculate entropy-based complexity measures: <italic>Emergence, Self-Organization</italic>, and <italic>Complexity</italic>. These measures can be employed for discrete samples or continuous probability distributions. The inputs and outputs for these two functions were described, and a code example for testing complexity functions was provided. Additionally, code snippets and dataset descriptions are provided in Appendixes <xref ref-type="sec" rid="A1">A</xref>, <xref ref-type="sec" rid="A2">B</xref>, and <xref ref-type="sec" rid="A3">C</xref>, respectively.</p>
<p>Additional notes need to be made. First, for pedagogical purposes these functions were developed using GNU Octave language. However, they can be easily extended to R or Python. Note that for a fast computation process, the implementation of these measures on other languages will require vector and matrix operations, loop usage is discouraged. Second, these functions only are designed to calculate discrete and continuous complexity of univariate systems. Thus, a measure for multivariate systems is required. A fast proxy for multivariate entropy calculation could be the summation of each feature entropy. Consequently, Emergence could be calculated as the ratio of <inline-formula><mml:math id="M9"><mml:mrow><mml:mstyle scriptlevel='+1'><mml:mfrac><mml:mrow><mml:mstyle displaystyle='true'><mml:msub><mml:mo>&#x02211;</mml:mo><mml:mi>i</mml:mi></mml:msub></mml:mstyle><mml:mtext>&#x02009;</mml:mtext><mml:mi>H</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>N</mml:mi><mml:mtext>&#x02009;</mml:mtext><mml:mstyle displaystyle='true'><mml:msub><mml:mo>&#x02211;</mml:mo><mml:mi>i</mml:mi></mml:msub></mml:mstyle><mml:mtext>&#x02009;</mml:mtext><mml:mi>l</mml:mi><mml:mi>o</mml:mi><mml:msub><mml:mi>g</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:mstyle></mml:mrow></mml:math></inline-formula>, where <italic>N</italic> is the number of system variables, and <italic>b<sub>i</sub></italic> is the alphabet for each variable. However, further research about this issue is required. Third, a further extension of this research includes the usage of the continuous entropy to calculate discrete complexity measures to provide more sensible results for any given probability mass function. Also, because these measures only describe the complexity at the level of symbols in the alphabet rather than on strings, conditional entropy should be used in future work. Such function can provide the average entropy growth for both, IID random variables and stochastic processes. Particularly the latter feature would be convenient for analyzing the complexity of time series and dynamical processes with memory.</p>
</sec>
<sec id="S6" sec-type="author-contributor">
<title>Author Contributions</title>
<p>GS-B designed and coded ESC discrete and continuous Matlab/Octave functions and performed the experiments. GS-B, CG, and NF conceived and designed the experiments and wrote the paper.</p>
</sec>
<sec id="S7">
<title>Conflict of Interest Statement</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest. The reviewer HZ declared a past co-authorship with one of the authors (CG) to the handling Editor, who ensured that the process met the standards of a fair and objective review.</p>
</sec>
</body>
<back>
<ack>
<p>The authors would like to thank Carlos Pi&#x000F1;a Ph.D. for the help in editing and proofreading this manuscript. GS-B was supported by the Consejo Nacional de Ciencia y Tecnolog&#x000ED;a under the C&#x000E1;tedra-Conacyt contract 969.</p>
</ack>
<sec id="S8" sec-type="supplementary-material">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at <uri xlink:href="http://journal.frontiersin.org/article/10.3389/frobt.2017.00010/full&#x00023;supplementary-material">http://journal.frontiersin.org/article/10.3389/frobt.2017.00010/full&#x00023;supplementary-material</uri>.</p>
<supplementary-material xlink:href="data_sheet_1.zip" id="SM1" mimetype="applicationn/zip" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="code.zip" id="SM2" mimetype="applicationn/zip" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bandt</surname> <given-names>C.</given-names></name> <name><surname>Pompe</surname> <given-names>B.</given-names></name></person-group> (<year>2002</year>). <article-title>Permutation entropy: a natural complexity measure for time series</article-title>. <source>Phys. Rev. Lett.</source> <volume>88</volume>, <fpage>174102</fpage>.<pub-id pub-id-type="doi">10.1103/PhysRevLett.88.174102</pub-id><pub-id pub-id-type="pmid">12005759</pub-id></citation></ref>
<ref id="B2"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Bar-Yam</surname> <given-names>Y.</given-names></name></person-group> (<year>1997</year>). <source>Dynamics of Complex Systems. Studies in Nonlinearity</source>. <publisher-loc>Boulder, CO, USA</publisher-loc>: <publisher-name>Westview Press</publisher-name>.</citation></ref>
<ref id="B3"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fanaee-T</surname> <given-names>H.</given-names></name> <name><surname>Gama</surname> <given-names>J.</given-names></name></person-group> (<year>2013</year>). <article-title>Event labeling combining ensemble detectors and background knowledge</article-title>. <source>Prog. Artif. Intel.</source> <volume>2</volume>, <fpage>1</fpage>&#x02013;<lpage>15</lpage>.<pub-id pub-id-type="doi">10.1007/s13748-013-0040-3</pub-id></citation></ref>
<ref id="B4"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Fern&#x000E1;ndez</surname> <given-names>N.</given-names></name> <name><surname>Maldonado</surname> <given-names>C.</given-names></name> <name><surname>Gershenson</surname> <given-names>C.</given-names></name></person-group> (<year>2014</year>). <article-title>&#x0201C;Information measures of complexity, emergence, self-organization, homeostasis, and autopoiesis,&#x0201D;</article-title> in <source>Guided Self-Organization: Inception, Volume 9 of Emergence, Complexity and Computation</source>, ed. <person-group person-group-type="editor"><name><surname>Prokopenko</surname> <given-names>M.</given-names></name></person-group> (<publisher-loc>Berlin, Heidelberg</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>19</fpage>&#x02013;<lpage>51</lpage>.</citation></ref>
<ref id="B5"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gauvrit</surname> <given-names>N.</given-names></name> <name><surname>Singmann</surname> <given-names>H.</given-names></name> <name><surname>Soler-Toscano</surname> <given-names>F.</given-names></name> <name><surname>Zenil</surname> <given-names>H.</given-names></name></person-group> (<year>2016</year>). <article-title>Algorithmic complexity for psychology: a user-friendly implementation of the coding theorem method</article-title>. <source>Behav. Res. Methods</source> <volume>48</volume>, <fpage>314</fpage>&#x02013;<lpage>329</lpage>.<pub-id pub-id-type="doi">10.3758/s13428-015-0574-3</pub-id><pub-id pub-id-type="pmid">25761393</pub-id></citation></ref>
<ref id="B6"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gershenson</surname> <given-names>C.</given-names></name> <name><surname>Fern&#x000E1;ndez</surname> <given-names>N.</given-names></name></person-group> (<year>2012</year>). <article-title>Complexity and information: measuring emergence, self-organization, and homeostasis at multiple scales</article-title>. <source>Complexity</source> <volume>18</volume>, <fpage>29</fpage>&#x02013;<lpage>44</lpage>.<pub-id pub-id-type="doi">10.1002/cplx.21424</pub-id></citation></ref>
<ref id="B7"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Gershenson</surname> <given-names>C.</given-names></name> <name><surname>Heylighen</surname> <given-names>F.</given-names></name></person-group> (<year>2003</year>). <article-title>&#x0201C;When can we call a system self-organizing?,&#x0201D;</article-title> in <conf-name>Advances in Artificial Life, 7th European Conference, ECAL 2003 LNAI 2801</conf-name>, eds <person-group person-group-type="editor"><name><surname>Banzhaf</surname> <given-names>W.</given-names></name> <name><surname>Christaller</surname> <given-names>T.</given-names></name> <name><surname>Dittrich</surname> <given-names>P.</given-names></name> <name><surname>Kim</surname> <given-names>J. T.</given-names></name> <name><surname>Ziegler</surname> <given-names>J.</given-names></name></person-group> (<publisher-loc>Berlin</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>606</fpage>&#x02013;<lpage>614</lpage>.</citation></ref>
<ref id="B8"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Haken</surname> <given-names>H.</given-names></name> <name><surname>Portugali</surname> <given-names>J.</given-names></name></person-group> (<year>2017</year>). <article-title>Information and self-organization</article-title>. <source>Entropy</source> <volume>19</volume>, <fpage>18</fpage>.<pub-id pub-id-type="doi">10.3390/e19010018</pub-id></citation></ref>
<ref id="B9"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jost</surname> <given-names>L.</given-names></name></person-group> (<year>2006</year>). <article-title>Entropy and diversity</article-title>. <source>Oikos</source> <volume>113</volume>, <fpage>363</fpage>&#x02013;<lpage>375</lpage>.<pub-id pub-id-type="doi">10.1111/j.2006.0030-1299.14714.x</pub-id></citation></ref>
<ref id="B10"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Kaufmann</surname> <given-names>S.</given-names></name></person-group> (<year>1993</year>). <source>The Origins of Order</source>. <publisher-loc>New York</publisher-loc>: <publisher-name>Oxford University Press, Inc</publisher-name>.</citation></ref>
<ref id="B11"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Lichman</surname> <given-names>M.</given-names></name></person-group> (<year>2013</year>). <source>UCI Machine Learning Repository</source>.</citation></ref>
<ref id="B12"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lizier</surname> <given-names>J.</given-names></name></person-group> (<year>2014</year>). <article-title>JIDT: an information-theoretic toolkit for studying the dynamics of complex systems</article-title>. <source>Front. Robot. AI</source> <volume>1</volume>:<fpage>11</fpage>.<pub-id pub-id-type="doi">10.3389/frobt.2014.00011</pub-id></citation></ref>
<ref id="B13"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lopez-Ruiz</surname> <given-names>R.</given-names></name> <name><surname>Mancini</surname> <given-names>H.</given-names></name> <name><surname>Calbet</surname> <given-names>X.</given-names></name></person-group> (<year>1995</year>). <article-title>A statistical measure of complexity</article-title>. <source>Phys. Lett. A</source> <volume>209</volume>, <fpage>321</fpage>&#x02013;<lpage>326</lpage>.<pub-id pub-id-type="doi">10.1016/0375-9601(95)00867-5</pub-id></citation></ref>
<ref id="B14"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Mitchell</surname> <given-names>M.</given-names></name></person-group> (<year>2009</year>). <source>Complexity: A Guided Tour</source>. <publisher-loc>Oxford, UK</publisher-loc>: <publisher-name>Oxford University Press</publisher-name>.</citation></ref>
<ref id="B15"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Prokopenko</surname> <given-names>M.</given-names></name> <name><surname>Boschetti</surname> <given-names>F.</given-names></name> <name><surname>Ryan</surname> <given-names>A.</given-names></name></person-group> (<year>2009</year>). <article-title>An information-theoretic primer on complexity, self-organisation and emergence</article-title>. <source>Complexity</source> <volume>15</volume>, <fpage>11</fpage>&#x02013;<lpage>28</lpage>.<pub-id pub-id-type="doi">10.1002/cplx.20249</pub-id></citation></ref>
<ref id="B16"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Santamar&#x000ED;a-Bonfil</surname> <given-names>G.</given-names></name></person-group> (<year>2016</year>). <source>GitHub Repository: Complexity Measures Functions</source>.</citation></ref>
<ref id="B17"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Santamar&#x000ED;a-Bonfil</surname> <given-names>G.</given-names></name> <name><surname>Fern&#x000E1;ndez</surname> <given-names>N.</given-names></name> <name><surname>Gershenson</surname> <given-names>C.</given-names></name></person-group> (<year>2016</year>). <article-title>Measuring the complexity of continuous distributions</article-title>. <source>Entropy</source> <volume>18</volume>, <fpage>72</fpage>.<pub-id pub-id-type="doi">10.3390/e18030072</pub-id></citation></ref>
<ref id="B18"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Soler-Toscano</surname> <given-names>F.</given-names></name> <name><surname>Zenil</surname> <given-names>H.</given-names></name> <name><surname>Delahaye</surname> <given-names>J.</given-names></name> <name><surname>Gauvrit</surname> <given-names>N.</given-names></name></person-group> (<year>2014</year>). <article-title>Calculating Kolmogorov complexity from the output frequency distributions of small Turing machines</article-title>. <source>PLoS ONE</source> <volume>9</volume>:<fpage>e96223</fpage>.<pub-id pub-id-type="doi">10.1371/journal.pone.0096223</pub-id><pub-id pub-id-type="pmid">24809449</pub-id></citation></ref>
<ref id="B19"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zenil</surname> <given-names>H.</given-names></name> <name><surname>Kiani</surname> <given-names>N. A.</given-names></name></person-group> (<year>2016</year>). <article-title>Low algorithmic complexity entropy-deceiving graphs</article-title>. <source>CoRR</source>, <fpage>1</fpage>&#x02013;<lpage>21</lpage>. abs/1608.05972. arXiv:1608.05972.</citation></ref>
<ref id="B20"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zenil</surname> <given-names>H.</given-names></name> <name><surname>Soler-Toscano</surname> <given-names>F.</given-names></name> <name><surname>Kiani</surname> <given-names>N. A.</given-names></name> <name><surname>Hern&#x000E1;ndez-Orozco</surname> <given-names>S.</given-names></name> <name><surname>Rueda-Toicen</surname> <given-names>A.</given-names></name></person-group> (<year>2016</year>). <article-title>A decomposition method for global evaluation of shannon entropy and local estimations of algorithmic complexity</article-title>. <volume>arXiv</volume>:<fpage>1609.00110</fpage>.</citation></ref>
</ref-list>
<app-group>
<app id="App1">
<title>Appendix</title>
<sec id="A1">
<title>A. Discrete Complexity Measures</title>
<preformat>
<monospace><font color="#A4548A">function</font> [</monospace>
<monospace>emergence, &#x02026;</monospace>
<monospace>selfOrganization, &#x02026;</monospace>
<monospace>complexity, &#x02026;</monospace>
<monospace>varargout]&#x02009;&#x0003D;&#x02009;&#x02026;</monospace>
<monospace>DiscreteComplexityMeasures(stringSample, varargin)</monospace>
<monospace><font color="#0072BC">%This function calculates Discrete Complexity Measures</font></monospace>
<monospace><font color="#0072BC">%for discrete samples.</font></monospace>
<monospace><font color="#0072BC">%First, we get the number of observations</font></monospace>
<monospace><font color="#0072BC">%contained in the sample.</font></monospace>
<monospace>measLen&#x02009;&#x0003D;&#x02009;<font color="#A4548A">length</font>(stringSample);</monospace>
<monospace><font color="#0072BC">%If the number of states of the PMF</font></monospace>
<monospace><font color="#A4548A">%is known beforehand</font></monospace>
<monospace><font color="#A4548A">if</font>(&#x0007E;<font color="#A4548A">isempty</font>(varargin))</monospace>
<monospace>&#x02003;&#x02003;%<font color="#0072BC">Calculate the marginal states probability</font></monospace>
<monospace>&#x02003;&#x02003;no_States&#x02009;&#x0003D;&#x02009;varargin{1};</monospace>
<monospace>&#x02003;&#x02003;margSttProb&#x02009;&#x0003D;&#x02009;(<font color="#A4548A">hist</font>(&#x02026;</monospace>
<monospace>&#x02003;&#x02003;&#x02003;&#x02003;stringSample,no_States)./measLen)&#x02019;;</monospace>
<monospace><font color="#A4548A">else</font><font color="#0072BC">%Use an heuristic to obtain the PMF</font></monospace>
<monospace>&#x02003;&#x02003;<font color="#0072BC">%Obtain the system&#x02019;s unique states.</font></monospace>
<monospace>&#x02003;&#x02003;sysStates&#x02009;&#x0003D;&#x02009;unique(stringSample);</monospace>
<monospace>&#x02003;&#x02003;<font color="#A4548A">if</font>(<font color="#A4548A">size</font>(sysStates,2)&#x02009;&#x0003E;&#x02009;1)</monospace>
<monospace>&#x02003;&#x02003;&#x02003;&#x02003;sysStates&#x02009;&#x0003D;&#x02009;sysStates&#x02019;;</monospace>
<monospace>&#x02003;&#x02003;<font color="#A4548A">end</font></monospace>
<monospace>&#x02003;&#x02003;<font color="#0072BC">%Get the length of the unique states.</font></monospace>
<monospace>&#x02003;&#x02003;<font color="#0072BC">%And calculate the marginal states probability</font></monospace>
<monospace>&#x02003;&#x02003;no_States&#x02009;&#x0003D;&#x02009;<font color="#A4548A">length</font>(sysStates);</monospace>
<monospace>&#x02003;&#x02003;margSttProb&#x02009;&#x0003D;&#x02009;<font color="#A4548A">zeros</font>(<font color="#A4548A">length</font>(sysStates), 1);</monospace>
<monospace>&#x02003;&#x02003;<font color="#A4548A">for</font> i&#x02009;&#x0003D;&#x02009;1:no_States</monospace>
<monospace>&#x02003;&#x02003;&#x02003;&#x02003;margSttProb(i,1)&#x02009;&#x0003D;&#x02009;(nnz(&#x02026;</monospace>
<monospace>&#x02003;&#x02003;&#x02003;&#x02003;ismember(&#x02026;</monospace>
<monospace>&#x02003;&#x02003;&#x02003;&#x02003;stringSample,sysStates(i))))/measLen;</monospace>
<monospace>&#x02003;&#x02003;<font color="#A4548A">end</font></monospace>
<monospace><font color="#A4548A">end</font></monospace>
<monospace><font color="#0072BC">%Define the normalizing constant k</font></monospace>
<monospace><font color="#A4548A">if</font>(no_States&#x02009;&#x0003D;&#x02009;&#x0003D;&#x02009;1)</monospace>
<monospace>&#x02003;&#x02003;kConst&#x02009;&#x0003D;&#x02009;1;</monospace>
<monospace><font color="#A4548A">else</font></monospace>
<monospace>&#x02003;&#x02003;kConst&#x02009;&#x0003D;&#x02009;1/log2(no_States);</monospace>
<monospace><font color="#A4548A">end</font></monospace>
<monospace><font color="#0072BC">%Then, calculate entropy for all elements</font></monospace>
<monospace><font color="#0072BC">%of the PMF with p(x)&#x02009;&#x0003E;&#x02009;0</font></monospace>
<monospace>ind&#x02009;&#x0003D;&#x02009;margSttProb&#x02009;&#x0003E;&#x02009;0;</monospace>
<monospace>entropy&#x02009;&#x0003D;&#x02009;<font color="#A4548A">Sum</font>(margSttProb(ind,1).&#x0002A;<font color="#A4548A">log2</font>(margSttProb(ind,1)));</monospace>
<monospace><font color="#0072BC">%Calculate ESC measures</font></monospace>
<monospace>emergence&#x02009;&#x0003D;&#x02009;(-1)&#x0002A;kConst&#x0002A;entropy;</monospace>
<monospace>selfOrganization&#x02009;&#x0003D;&#x02009;1 - emergence;</monospace>
<monospace>complexity&#x02009;&#x0003D;&#x02009;4 &#x0002A; emergence &#x0002A; selfOrganization;</monospace>
<monospace>varargout1&#x02009;&#x0003D;&#x02009;entropy;</monospace>
<monospace><font color="#A4548A">end</font></monospace>
</preformat>
</sec>
<sec id="A2">
<title>B. Continuous Complexity Measures</title>
<preformat>
<monospace><font color="#A4548A">function</font> [</monospace>
<monospace>emergence, &#x02026;</monospace>
<monospace>selfOrganization, &#x02026;</monospace>
<monospace>complexity, &#x02026;</monospace>
<monospace>varargout]&#x02009;&#x0003D;&#x02009;&#x02026;</monospace>
<monospace>ContinuousComplexityMeasures(&#x02026;</monospace>
<monospace>pdfSample, varargin)</monospace>
<monospace>minVal&#x02009;&#x0003D;&#x02009;varargin{1};</monospace>
<monospace>maxVal&#x02009;&#x0003D;&#x02009;varargin{2};</monospace>
<monospace>distSampleSize&#x02009;&#x0003D;&#x02009;varargin{3};</monospace>
<monospace><font color="#0072BC">%Determine a integration interval Delta</font></monospace>
<monospace><font color="#0072BC">%Remember that by definition</font></monospace>
<monospace><font color="#0072BC">%for really small Deltas the entropy</font></monospace>
<monospace><font color="#0072BC">%is negative and can become -infinity</font></monospace>
<monospace>Delta&#x02009;&#x0003D;&#x02009;(maxVal-minVal)/(distSampleSize);</monospace>
<monospace><font color="#0072BC">%Use the provided Probability Distribution Function</font></monospace>
<monospace><font color="#0072BC">%to determine the non-zero elements of the PDF</font></monospace>
<monospace>tempPdf&#x02009;&#x0003D;&#x02009;pdfSample;</monospace>
<monospace>ind&#x02009;&#x0003D;&#x02009;tempPdf&#x02009;&#x0003E;&#x02009;0;</monospace>
<monospace>pdfNoZeros&#x02009;&#x0003D;&#x02009;<font color="#A4548A">sum</font>(ind);</monospace>
<monospace><font color="#0072BC">%Calculate Differential Entropy</font></monospace>
<monospace><font color="#0072BC">%for the non-zero elements of the PDF</font></monospace>
<monospace>rightHandSide&#x02009;&#x0003D;&#x02009;-1&#x0002A;<font color="#A4548A">log2</font>(Delta);</monospace>
<monospace>leftHandSide&#x02009;&#x0003D;&#x02009;(-1)&#x0002A;<font color="#A4548A">sum</font>((Delta&#x0002A;tempPdf(ind)).&#x0002A;<font color="#A4548A">log2</font>(tempPdf(ind)));</monospace>
<monospace>lmtEntrpy&#x02009;&#x0003D;&#x02009;rightHandSide&#x02009;&#x0002B;&#x02009;leftHandSide;</monospace>
<monospace>diffEntrop&#x02009;&#x0003D;&#x02009;lmtEntrpy&#x02009;&#x0002B;&#x02009;<font color="#A4548A">log2</font>(Delta);</monospace>
<monospace>%K constant to be determined by 1) the number of</monospace>
<monospace>%no-zeros probability elements,</monospace>
<monospace>%2) a large value (i.e. the sample size)</monospace>
<monospace>if(if(length(varargin)&#x02009;&#x0003C;&#x02009;4))</monospace>
<monospace>&#x02003;&#x02003;if(distSampleSize&#x02009;&#x0003C;&#x02009;pdfNoZeros)</monospace>
<monospace>&#x02003;&#x02003;&#x02003;&#x02003;kConst&#x02009;&#x0003D;&#x02009;1/<font color="#A4548A">log2</font>(pdfNoZeros);</monospace>
<monospace>&#x02003;&#x02003;<font color="#A4548A">else</font></monospace>
<monospace>&#x02003;&#x02003;&#x02003;kConst&#x02009;&#x0003D;&#x02009;1/<font color="#A4548A">log2</font>(distSampleSize);</monospace>
<monospace>&#x02003;&#x02003;<font color="#A4548A">end</font></monospace>
<monospace><font color="#A4548A">else</font></monospace>
<monospace>&#x02003;&#x02003;kConst&#x02009;&#x0003D;&#x02009;1/<font color="#A4548A">log2</font>(varargin4);</monospace>
<monospace><font color="#A4548A">end</font></monospace>
<monospace>modfDiffEntrop&#x02009;&#x0003D;&#x02009;diffEntrop.&#x0002A;kConst;</monospace>
<monospace><font color="#A4548A">if</font>(modfDiffEntrop&#x02009;&#x0003C;&#x02009;0)</monospace>
<monospace>&#x02003;&#x02003;emergence&#x02009;&#x0003D;&#x02009;0;</monospace>
<monospace>&#x02003;&#x02003;selfOrganization&#x02009;&#x0003D;&#x02009;1;</monospace>
<monospace>&#x02003;&#x02003;complexity&#x02009;&#x0003D;&#x02009;0;</monospace>
<monospace><font color="#A4548A">else</font></monospace>
<monospace>&#x02003;&#x02003;emergence&#x02009;&#x0003D;&#x02009;modfDiffEntrop;</monospace>
<monospace>&#x02003;&#x02003;selfOrganization&#x02009;&#x0003D;&#x02009;1 - emergence;</monospace>
<monospace>&#x02003;&#x02003;complexity&#x02009;&#x0003D;&#x02009;4&#x02003;&#x0002A;&#x02003;&#x02003;(emergence&#x02003;&#x02003;&#x0002A;&#x02003;&#x02003;selfOrganization);</monospace>
<monospace><font color="#A4548A">end</font></monospace>
<monospace><font color="#A4548A">end</font></monospace>
</preformat>
</sec>
<sec id="A3">
<title>C. Experimental Description</title>
<p>In the following, experimental results are briefly described. To demonstrate the functionality of <italic>E, S</italic>, and <italic>C</italic>, two types of experiments were performed. On the one hand, discrete complexity measures using publicly available machine learning datasets were tested. On the other hand, continuous complexity measures using probability density functions were employed. For the discrete datasets, we used <italic>noOfStates</italic>&#x02009;&#x0003D;&#x02009;10, whereas for the continuous case, we used <italic>noOfStates</italic>&#x02009;&#x0003D;&#x02009;50. Only real-world datasets are described. For continuous complexity measures Gaussian and Power-Law distributions were used, however, since these probability distributions have been well documented elsewhere (Santamar&#x000ED;a-Bonfil et al., <xref ref-type="bibr" rid="B17">2016</xref>), no further details are provided. Results for the discrete measures for, the solar flares and the bike-sharing system datasets are shown in Figure <xref ref-type="fig" rid="F3">3</xref>, whereas for probability distributions are presented in Figure <xref ref-type="fig" rid="F4">4</xref>.</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p><bold>(A)</bold> Solar flares <bold>(B)</bold> bike-sharing system datasets. For the first set, high self-organization is appreciated for each type of solar flare class. As solar flares become larger in magnitude, its distribution becomes more organized (around a single state). Also, the number of possible states is reduced for larger magnitudes. Thus, the largest C is displayed by the common class. For the BSS dataset, we can observe that hourly usage is more uniformly distributed between its states, thus a higher <italic>E</italic>, than daily. Even while, hourly and daily usage have lower organization, the usage of the latter reaches a higher <italic>C</italic>&#x02009;&#x02248;&#x02009;0.79, since its distribution is highly concentrated around the mean value, with many lower but uniformly distributed states around it.</p></caption>
<graphic xlink:href="frobt-04-00010-g003.tif"/>
</fig>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p><bold>(A)</bold> Gaussian distribution <bold>(B)</bold> Power-law distribution. For the first, several SDs were tested <italic>&#x003C3;</italic>&#x02009;&#x0003D;&#x02009;1, &#x02026;, 10. Note that the highest balance between S and E is constrained between 1&#x02009;&#x02264;&#x02009;<italic>&#x003C3;</italic>&#x02009;&#x02264;&#x02009;3. As <italic>&#x003C3;</italic> becomes larger, distribution becomes more uniform, thus, less complex. For the second, parameters were <italic>&#x003B1;</italic>&#x02009;&#x0003D;&#x02009;5, and <italic>x<sub>min</sub></italic>&#x02009;&#x0003D;&#x02009;1, &#x02026;, 10. We can observe that as <italic>x<sub>min</sub></italic> value increases, so does the relation between a state with high probability and many others with lower one. It is known that for a system to be described as power-law its values must satisfy <italic>x<sub>i</sub></italic>&#x02009;&#x0003E;&#x02009;<italic>x<sub>min</sub></italic>. Thus, a high <italic>C</italic> may be a good proxy of the proper <italic>x<sub>min</sub></italic> value required (in this figure <italic>x<sub>min</sub></italic>&#x02009;&#x02265;&#x02009;4).</p></caption>
<graphic xlink:href="frobt-04-00010-g004.tif"/>
</fig>
<sec id="A3-1">
<title>C.1. Solar Flares</title>
<p>A solar flare occurs when magnetic energy that has built up in the solar atmosphere is suddenly released. UCI&#x02019;s dataset contains three types of classes categorized by their magnitude and frequency. For each class, the number of solar flares of a certain class that occur in a 24-h period are counted.<xref ref-type="fn" rid="fn4"><sup>4</sup></xref> The variables analyzed are three types of solar flares: (a) C-class flares, which are common, (b) M-class flares, which are flares of moderate size, and (c) X-class flares, which constitute flares of a severe magnitude.</p>
</sec>
<sec id="A3-2">
<title>C.2. Bike-Sharing System</title>
<p>Bike-sharing systems (BSS) are a new generation of urban mobility systems, composed by bicycles which are rented to subscribers for these to travel short to medium distances. These type of systems can be scrutinized from a large-scale statistic point of view.<xref ref-type="fn" rid="fn5"><sup>5</sup></xref> In these experiments, BSS data consist in the total count of bicycle rentals per hour including both, casual and registered users. Further details can be obtained from Fanaee-T and Gama (<xref ref-type="bibr" rid="B3">2013</xref>), and UCI&#x02019;s repository (Lichman, <xref ref-type="bibr" rid="B11">2013</xref>).</p>
</sec>
<sec id="A3-3">
<title>C.3. Individual Household Electric Power Consumption Dataset</title>
<p>The need for a more efficient lifestyle requires to parametrize several aspects of human activities. Household electric consumption provides information not only to casual/conscious consumers but also to providers and grid managers. In these experiments, measurements of electric power consumption in one household with a 1-min sampling rate over a period of almost 4&#x02009;years were used.<xref ref-type="fn" rid="fn6"><sup>6</sup></xref> We employed &#x0007E;1 million observations of two variables: (a) the global household active power, which corresponds to global measures of the minute-averaged active power in kilowatts and (b) Kitchen energy sub-metering, which corresponds to measurements from a kitchen containing a dishwasher, an oven, and a microwave.</p>
</sec>
<sec id="A4">
<title>D. Example: Analyzing Timescales</title>
<p>In the previous section, numeric results of different phenomena and parameters of distributions were presented. In this section, we provide results for the analysis of multiple timescales. For such purposes, we employed the largest dataset available which is the household electric consumption. In the former example, only half of the dataset was employed. For this example, &#x0007E;2 million points were used. The different timescales that were analyzed are <italic>minute, hour, day, week</italic>, and <italic>month</italic>. Further, we added another variable to the analysis, <italic>indoor comfort</italic> which consists of an electric water-heater and an air-conditioner. It was considered because indoor comfort represents around 60% of a building energy consumption.</p>
<p>First, we remove missing data points. Then, data were averaged in accordance to the aforementioned timescales. Next, we calculate Emergence, Self-organization, and Complexity, for the house&#x02019;s global, kitchen, and indoor comfort active power. Results for the complexity measures are presented in Figure <xref ref-type="fig" rid="FA1">A1</xref>. The code for this example is provided in the Github repository (Santamar&#x000ED;a-Bonfil, <xref ref-type="bibr" rid="B16">2016</xref>) with the name <italic>Example2ComplexityMeasures.m</italic>.</p>
<fig position="float" id="FA1">
<label>Figure A1</label>
<caption><p><bold>E, S, and C for the global, kitchen, and indoor comfort electric consumption for several timescales</bold>. For the global consumption, note that as the scale of the measures turns coarser, the probability distribution becomes more uniform. The highest <italic>C</italic>s, few states with high probability and many emergent new states, is given for the minute/hour consumption. Moreover, even while the consumption of energy presents many new patterns for coarser measurement scales (i.e., week), cyclical and seasonal components remain considerable high (<italic>C</italic>&#x02009;&#x02248;&#x02009;0.78). In fact, the same ESC behavior can be observed for the comfort energy usage give its high correlation with the whole house consumption. On the other hand, the kitchen electricity usage is highly self-organized (i.e., regular) for the minute and hour scales; however, self-organization quickly decays for the day time scale. Thus, kitchen&#x02019;s daily electric consumption even while regular presents rich new patterns of usage (e.g., dinner with old friends).</p></caption>
<graphic xlink:href="frobt-04-00010-a001.tif"/>
</fig>
</sec>
</sec>
</app>
</app-group>
<fn-group>
<fn id="fn1"><p><sup>1</sup><uri xlink:href="https://doi.org/10.5281/zenodo.166566">https://doi.org/10.5281/zenodo.166566</uri>.</p></fn>
<fn id="fn2"><p><sup>2</sup><uri xlink:href="http://archive.ics.uci.edu/ml">http://archive.ics.uci.edu/ml</uri>.</p></fn>
<fn id="fn3"><p><sup>3</sup><uri xlink:href="http://stackoverflow.com/questions/24180890/3d-histogram-with-gnuplot-or-octave">http://stackoverflow.com/questions/24180890/3d-histogram-with-gnuplot-or-octave</uri>, accessed 30/10/2016.</p></fn>
<fn id="fn4"><p><sup>1</sup><uri xlink:href="https://archive.ics.uci.edu/ml/datasets/Solar&#x0002B;Flare">https://archive.ics.uci.edu/ml/datasets/Solar&#x0002B;Flare</uri>.</p></fn>
<fn id="fn5"><p><sup>2</sup><uri xlink:href="https://archive.ics.uci.edu/ml/datasets/Bike&#x0002B;Sharing&#x0002B;Dataset">https://archive.ics.uci.edu/ml/datasets/Bike&#x0002B;Sharing&#x0002B;Dataset</uri>.</p></fn>
<fn id="fn6"><p><sup>3</sup><uri xlink:href="https://archive.ics.uci.edu/ml/datasets/Individual&#x0002B;household&#x0002B;electric&#x0002B;power&#x0002B;consumption&#x00023;">https://archive.ics.uci.edu/ml/datasets/Individual&#x0002B;household&#x0002B;electric&#x0002B;power&#x0002B;consumption&#x00023;</uri>.</p></fn>
</fn-group>
</back>
</article>